Pranoy Mukherjee commited on
Commit
9237011
·
1 Parent(s): a495074

Update SwarmAudit Space demo

Browse files
Files changed (44) hide show
  1. .env.example +16 -1
  2. .gitignore +1 -0
  3. AMD_VLLM_RUNBOOK.md +190 -0
  4. HF_SPACES_DEPLOY.md +80 -51
  5. README.md +219 -99
  6. app/agents/config_agent.py +114 -0
  7. app/agents/cuda_migration_agent.py +106 -0
  8. app/agents/dependency_agent.py +347 -0
  9. app/agents/docs_agent.py +14 -2
  10. app/agents/error_handling_agent.py +183 -0
  11. app/agents/graph.py +121 -47
  12. app/agents/llm_enrichment.py +85 -0
  13. app/agents/observability_agent.py +155 -0
  14. app/agents/performance_agent.py +39 -11
  15. app/agents/quality_agent.py +14 -2
  16. app/agents/security_agent.py +7 -5
  17. app/agents/synthesizer_agent.py +181 -2
  18. app/config.py +6 -1
  19. app/main.py +7 -1
  20. app/schemas.py +32 -0
  21. app/services/benchmark.py +67 -0
  22. app/services/json_parser.py +42 -0
  23. app/services/llm_client.py +90 -4
  24. app/services/repo_crawler.py +13 -2
  25. app/services/report_formatter.py +288 -0
  26. app/ui/gradio_app.py +1427 -35
  27. tests/test_agent_llm_enrichment.py +104 -0
  28. tests/test_api.py +12 -0
  29. tests/test_benchmark.py +29 -0
  30. tests/test_config_agent.py +56 -0
  31. tests/test_cuda_migration_agent.py +54 -0
  32. tests/test_dependency_agent.py +80 -0
  33. tests/test_docs_agent.py +6 -4
  34. tests/test_error_handling_agent.py +82 -0
  35. tests/test_gradio_app.py +298 -1
  36. tests/test_graph_progress.py +50 -1
  37. tests/test_json_parser.py +44 -0
  38. tests/test_llm_client.py +59 -0
  39. tests/test_observability_agent.py +85 -0
  40. tests/test_repo_crawler.py +11 -0
  41. tests/test_report_exports.py +130 -0
  42. tests/test_security_report.py +67 -1
  43. tests/test_synthesizer_agent.py +94 -3
  44. tests/test_v2_schemas.py +93 -0
.env.example CHANGED
@@ -1,10 +1,25 @@
1
  APP_NAME=SwarmAudit
2
  LLM_PROVIDER=mock
3
- LLM_BASE_URL=http://localhost:8000/v1
4
  LLM_API_KEY=not-needed-for-mock
5
  LLM_MODEL=Qwen/Qwen2.5-Coder-32B-Instruct
 
 
 
 
 
6
  MAX_FILES=200
7
  MAX_FILE_SIZE_KB=250
8
  MAX_CHARS_PER_CHUNK=12000
9
  CLONE_TIMEOUT_SECONDS=60
10
  CLONE_BASE_DIR=.swarm_audit_tmp
 
 
 
 
 
 
 
 
 
 
 
1
  APP_NAME=SwarmAudit
2
  LLM_PROVIDER=mock
3
+ LLM_BASE_URL=http://localhost:9000/v1
4
  LLM_API_KEY=not-needed-for-mock
5
  LLM_MODEL=Qwen/Qwen2.5-Coder-32B-Instruct
6
+ ENABLE_LLM_ENRICHMENT=false
7
+ ENABLE_DEPENDENCY_CVE_LOOKUP=false
8
+ DEPENDENCY_OSV_TIMEOUT_SECONDS=20
9
+ MAX_LLM_CHUNKS=5
10
+ LLM_TIMEOUT_SECONDS=120
11
  MAX_FILES=200
12
  MAX_FILE_SIZE_KB=250
13
  MAX_CHARS_PER_CHUNK=12000
14
  CLONE_TIMEOUT_SECONDS=60
15
  CLONE_BASE_DIR=.swarm_audit_tmp
16
+
17
+ # Credit-safe AMD/vLLM first test overrides:
18
+ # LLM_PROVIDER=vllm
19
+ # LLM_BASE_URL=http://YOUR_VLLM_ENDPOINT/v1
20
+ # LLM_API_KEY=swarm-audit-demo-key
21
+ # ENABLE_LLM_ENRICHMENT=true
22
+ # MAX_FILES=100
23
+ # MAX_FILE_SIZE_KB=150
24
+ # MAX_CHARS_PER_CHUNK=8000
25
+ # MAX_LLM_CHUNKS=2
.gitignore CHANGED
@@ -208,6 +208,7 @@ tempCodeRunnerFile.py
208
 
209
  # SwarmAudit local test artifacts
210
  .tmp_pytest*/
 
211
  pytest-cache-files-*
212
  .swarm_audit_tmp/
213
 
 
208
 
209
  # SwarmAudit local test artifacts
210
  .tmp_pytest*/
211
+ .tmp_test_exports/
212
  pytest-cache-files-*
213
  .swarm_audit_tmp/
214
 
AMD_VLLM_RUNBOOK.md ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AMD vLLM Runbook
2
+
3
+ SwarmAudit is AMD-ready through an HTTP-only vLLM integration. The app does not install vLLM. It calls an OpenAI-compatible endpoint that can be hosted on AMD Developer Cloud with ROCm.
4
+
5
+ ## What Was Validated
6
+
7
+ During development, SwarmAudit was tested against:
8
+
9
+ - AMD Developer Cloud GPU instance
10
+ - ROCm visible through `rocm-smi`
11
+ - Docker-based vLLM environment
12
+ - `Qwen/Qwen2.5-Coder-32B-Instruct`
13
+ - OpenAI-compatible routes:
14
+ - `/v1/models`
15
+ - `/v1/chat/completions`
16
+ - SwarmAudit Diagnostics tab
17
+ - SwarmAudit Benchmark tab
18
+ - real audit run with `ENABLE_LLM_ENRICHMENT=true` and `MAX_LLM_CHUNKS=2`
19
+
20
+ The AMD instance was destroyed afterward to avoid credit burn.
21
+
22
+ ## Safe Default
23
+
24
+ Use this locally and on Hugging Face Spaces when AMD is not running:
25
+
26
+ ```text
27
+ LLM_PROVIDER=mock
28
+ ENABLE_LLM_ENRICHMENT=false
29
+ ENABLE_DEPENDENCY_CVE_LOOKUP=false
30
+ ```
31
+
32
+ ## Credit-Safe AMD Settings
33
+
34
+ Use these for the first AMD session:
35
+
36
+ ```text
37
+ LLM_PROVIDER=vllm
38
+ LLM_BASE_URL=http://YOUR_VLLM_ENDPOINT/v1
39
+ LLM_API_KEY=not-needed-if-open
40
+ LLM_MODEL=Qwen/Qwen2.5-Coder-32B-Instruct
41
+ ENABLE_LLM_ENRICHMENT=false
42
+ ENABLE_DEPENDENCY_CVE_LOOKUP=false
43
+ MAX_FILES=100
44
+ MAX_FILE_SIZE_KB=150
45
+ MAX_CHARS_PER_CHUNK=8000
46
+ MAX_LLM_CHUNKS=2
47
+ ```
48
+
49
+ Only switch this after Diagnostics passes:
50
+
51
+ ```text
52
+ ENABLE_LLM_ENRICHMENT=true
53
+ ```
54
+
55
+ ## AMD Session Flow
56
+
57
+ 1. Create/start the AMD GPU instance.
58
+ 2. SSH into the instance.
59
+ 3. Confirm GPU visibility:
60
+
61
+ ```bash
62
+ rocm-smi
63
+ ```
64
+
65
+ 4. If the image provides a vLLM container, enter it:
66
+
67
+ ```bash
68
+ docker exec -it rocm /bin/bash
69
+ ```
70
+
71
+ 5. Start vLLM:
72
+
73
+ ```bash
74
+ vllm serve Qwen/Qwen2.5-Coder-32B-Instruct \
75
+ --host 0.0.0.0 \
76
+ --port 8000 \
77
+ --dtype float16 \
78
+ --max-model-len 8192 \
79
+ --gpu-memory-utilization 0.90
80
+ ```
81
+
82
+ If the provided AMD image recommends different flags, use the provided image guidance first. The important part is that `/v1/models` and `/v1/chat/completions` are reachable.
83
+
84
+ ## Endpoint Checks
85
+
86
+ From a machine that can reach the endpoint:
87
+
88
+ ```bash
89
+ curl http://YOUR_VLLM_ENDPOINT/v1/models
90
+ ```
91
+
92
+ Then:
93
+
94
+ ```bash
95
+ curl http://YOUR_VLLM_ENDPOINT/v1/chat/completions \
96
+ -H "Content-Type: application/json" \
97
+ -d '{
98
+ "model": "Qwen/Qwen2.5-Coder-32B-Instruct",
99
+ "messages": [
100
+ {"role": "user", "content": "Reply with exactly: SwarmAudit LLM OK"}
101
+ ],
102
+ "max_tokens": 16,
103
+ "temperature": 0
104
+ }'
105
+ ```
106
+
107
+ Expected completion:
108
+
109
+ ```text
110
+ SwarmAudit LLM OK
111
+ ```
112
+
113
+ ## Connect SwarmAudit
114
+
115
+ Set local `.env` or Space secrets:
116
+
117
+ ```text
118
+ LLM_PROVIDER=vllm
119
+ LLM_BASE_URL=http://YOUR_VLLM_ENDPOINT/v1
120
+ LLM_API_KEY=not-needed-if-open
121
+ LLM_MODEL=Qwen/Qwen2.5-Coder-32B-Instruct
122
+ ENABLE_LLM_ENRICHMENT=false
123
+ MAX_LLM_CHUNKS=2
124
+ ```
125
+
126
+ Run:
127
+
128
+ ```bash
129
+ python app.py
130
+ ```
131
+
132
+ Open the Diagnostics tab and confirm:
133
+
134
+ - provider is `vllm`
135
+ - model is `Qwen/Qwen2.5-Coder-32B-Instruct`
136
+ - `/v1/models` succeeds
137
+ - chat completion succeeds
138
+
139
+ Then enable:
140
+
141
+ ```text
142
+ ENABLE_LLM_ENRICHMENT=true
143
+ ```
144
+
145
+ Restart the app after changing env vars.
146
+
147
+ ## Credit-Safe Demo Order
148
+
149
+ 1. Local mock test.
150
+ 2. HF Space mock test.
151
+ 3. Start AMD GPU.
152
+ 4. Start vLLM.
153
+ 5. Run Diagnostics once.
154
+ 6. Run Benchmark once.
155
+ 7. Enable enrichment with `MAX_LLM_CHUNKS=2`.
156
+ 8. Audit:
157
+
158
+ ```text
159
+ https://github.com/pallets/itsdangerous
160
+ ```
161
+
162
+ 9. If good, audit:
163
+
164
+ ```text
165
+ https://github.com/psf/requests
166
+ ```
167
+
168
+ 10. Capture screenshots:
169
+ - `rocm-smi`
170
+ - vLLM startup/model logs
171
+ - Diagnostics OK
172
+ - Benchmark result
173
+ - SwarmAudit report
174
+ 11. Destroy the GPU instance when done.
175
+
176
+ ## Important Billing Note
177
+
178
+ For AMD GPU droplets, powering off may still reserve billable resources. Destroy the instance when finished unless the provider explicitly says billing stops.
179
+
180
+ ## Fallback
181
+
182
+ If anything fails, use:
183
+
184
+ ```text
185
+ LLM_PROVIDER=mock
186
+ ENABLE_LLM_ENRICHMENT=false
187
+ ```
188
+
189
+ SwarmAudit still runs the static multi-agent audit and remains demo-ready.
190
+
HF_SPACES_DEPLOY.md CHANGED
@@ -1,46 +1,31 @@
1
- # Hugging Face Spaces Deployment Checklist
2
 
3
- ## Local Preflight
4
-
5
- Run these from the repo root:
6
 
7
- ```bash
8
- pip install -r requirements.txt
9
- python -m pytest
10
- python app.py
11
- ```
12
 
13
- Open:
14
 
15
  ```text
16
- http://127.0.0.1:7860
17
- ```
18
-
19
- Test a small repo first:
20
-
21
- ```text
22
- https://github.com/pallets/itsdangerous
23
  ```
24
 
25
- ## Create The Space
26
-
27
- 1. Go to Hugging Face Spaces.
28
- 2. Create a new Space.
29
- 3. Choose SDK: `Gradio`.
30
- 4. Choose hardware: CPU basic for the mock MVP.
31
- 5. Use the AMD hackathon organization if the event requires it.
32
 
33
  ## Required Files
34
 
35
- These must be at the repo root:
36
 
37
  ```text
38
  app.py
39
  requirements.txt
40
  README.md
 
41
  ```
42
 
43
- The README includes the Space metadata:
44
 
45
  ```yaml
46
  sdk: gradio
@@ -48,50 +33,94 @@ sdk_version: 6.14.0
48
  app_file: app.py
49
  ```
50
 
51
- ## Environment Variables
52
 
53
- For the public mock demo:
 
 
 
 
 
 
 
 
 
 
 
54
 
55
  ```text
56
- LLM_PROVIDER=mock
57
  ```
58
 
59
- For a later AMD/vLLM deployment:
60
 
61
  ```text
62
- LLM_PROVIDER=vllm
63
- LLM_BASE_URL=http://YOUR_VLLM_ENDPOINT/v1
64
- LLM_API_KEY=not-needed-if-your-endpoint-does-not-require-one
65
- LLM_MODEL=Qwen/Qwen2.5-Coder-32B-Instruct
66
  ```
67
 
68
- ## First Hosted Smoke Test
 
 
 
 
 
 
 
 
 
69
 
70
- In the deployed Space, test:
 
 
 
 
 
 
71
 
72
  ```text
73
- https://github.com/pallets/itsdangerous
74
  ```
75
 
76
- Then test:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
  ```text
79
- https://github.com/psf/requests
 
 
 
 
 
80
  ```
81
 
82
- Expected behavior:
 
 
 
 
 
 
83
 
84
- - Crawler maps files.
85
- - Chunker creates chunks.
86
- - Security, Performance, Quality, and Docs agents run.
87
- - Synthesizer returns a report.
88
- - Report shows a prioritized subset while preserving total finding counts.
89
 
90
- ## If The Space Fails
91
 
92
- Check the Space logs first. Common issues:
 
 
 
 
93
 
94
- - Dependency install failure: verify `requirements.txt`.
95
- - App import failure: verify root `app.py`.
96
- - GitHub clone failure: verify Space has outbound internet access.
97
- - Large repo timeout: test `pallets/itsdangerous` before larger repos.
 
1
+ # Hugging Face Spaces Deployment
2
 
3
+ Use this checklist when updating the SwarmAudit Space.
 
 
4
 
5
+ ## Recommended Public Demo Mode
 
 
 
 
6
 
7
+ Keep the public Space reliable unless a stable AMD/vLLM endpoint will remain online for judging.
8
 
9
  ```text
10
+ LLM_PROVIDER=mock
11
+ ENABLE_LLM_ENRICHMENT=false
12
+ ENABLE_DEPENDENCY_CVE_LOOKUP=false
 
 
 
 
13
  ```
14
 
15
+ This still runs the static multi-agent audit and produces exportable reports.
 
 
 
 
 
 
16
 
17
  ## Required Files
18
 
19
+ These files must be at the Space repo root:
20
 
21
  ```text
22
  app.py
23
  requirements.txt
24
  README.md
25
+ app/
26
  ```
27
 
28
+ The README front matter tells Spaces how to start the app:
29
 
30
  ```yaml
31
  sdk: gradio
 
33
  app_file: app.py
34
  ```
35
 
36
+ ## Local Preflight
37
 
38
+ From the repo root:
39
+
40
+ ```bash
41
+ pip install -r requirements.txt
42
+ python -m compileall -q app tests app.py
43
+ python -m pytest --basetemp=.tmp_pytest -p no:cacheprovider
44
+ python app.py
45
+ ```
46
+
47
+ Open the local URL printed by Gradio.
48
+
49
+ Test:
50
 
51
  ```text
52
+ https://github.com/pallets/itsdangerous
53
  ```
54
 
55
+ Then:
56
 
57
  ```text
58
+ https://github.com/psf/requests
 
 
 
59
  ```
60
 
61
+ Confirm:
62
+
63
+ - agent progress appears
64
+ - findings render
65
+ - severity filters work
66
+ - finding detail panel updates when clicking rows
67
+ - Markdown download works
68
+ - JSON download works
69
+ - Diagnostics tab shows `Provider: mock` and `Status: OK`
70
+ - Benchmark tab works in mock mode
71
 
72
+ ## Space Settings
73
+
74
+ - SDK: Gradio
75
+ - Hardware: CPU basic for public mock mode
76
+ - App file: `app.py`
77
+ - License: MIT
78
+ - Suggested short description:
79
 
80
  ```text
81
+ Multi-agent production-readiness scanner for AI-generated code
82
  ```
83
 
84
+ ## Deploy / Update
85
+
86
+ Push the same project code to the hackathon organization Space repo.
87
+
88
+ After the build starts:
89
+
90
+ 1. Open the Space logs.
91
+ 2. Wait for the Gradio startup message.
92
+ 3. Open the app.
93
+ 4. Run the small repo smoke test.
94
+ 5. Keep a screenshot of the working report for submission material.
95
+
96
+ ## Optional AMD/vLLM Mode
97
+
98
+ Only use this if the endpoint is stable:
99
 
100
  ```text
101
+ LLM_PROVIDER=vllm
102
+ LLM_BASE_URL=http://YOUR_VLLM_ENDPOINT/v1
103
+ LLM_API_KEY=stored-as-space-secret
104
+ LLM_MODEL=Qwen/Qwen2.5-Coder-32B-Instruct
105
+ ENABLE_LLM_ENRICHMENT=false
106
+ MAX_LLM_CHUNKS=2
107
  ```
108
 
109
+ Run the Diagnostics tab before enabling enrichment.
110
+
111
+ After diagnostics passes:
112
+
113
+ ```text
114
+ ENABLE_LLM_ENRICHMENT=true
115
+ ```
116
 
117
+ If the endpoint is temporary, switch back to mock mode after recording demo proof.
 
 
 
 
118
 
119
+ ## Common Issues
120
 
121
+ - **Build error**: check `requirements.txt` and root `app.py`.
122
+ - **No logs**: verify the code is pushed to the actual Space remote, not only GitHub.
123
+ - **Clone error**: test a smaller public repo first.
124
+ - **Port issue locally**: `python app.py` tries `7860` first and falls back locally when no explicit port env var is set.
125
+ - **Secrets**: never put real API keys in README, screenshots, or `.env.example`.
126
 
 
 
 
 
README.md CHANGED
@@ -1,4 +1,4 @@
1
- ---
2
  title: SwarmAudit
3
  sdk: gradio
4
  sdk_version: 6.14.0
@@ -9,58 +9,159 @@ license: mit
9
 
10
  # SwarmAudit
11
 
12
- Paste any public GitHub URL. Get a structured multi-agent code audit in minutes.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
- SwarmAudit is an AI-agent code review system for the AMD Developer Hackathon. It clones a public repository, filters and chunks source files, runs specialized review agents, and returns a severity-ranked report with file references and suggested fixes.
 
 
 
 
 
 
 
 
 
15
 
16
- The local MVP runs in mock-first mode, so the demo works without waiting for ROCm, vLLM, or MI300X infrastructure. The inference layer is designed to switch to a vLLM-compatible Qwen2.5-Coder endpoint later.
17
 
18
- ## MVP
 
 
 
19
 
20
- SwarmAudit currently runs with a mock-first LLM interface so the demo is not blocked by ROCm, vLLM, or AMD MI300X setup. The current graph is:
21
 
22
  ```text
23
- GitHub URL -> Crawler -> Chunker -> [Security Agent + Performance Agent + Quality Agent + Docs Agent] -> Synthesizer -> Report
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  ```
25
 
26
- ## Demo Status
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
- Working locally:
 
 
 
 
 
29
 
30
- - Gradio UI with live agent progress
31
- - FastAPI `/health` and `/audit` endpoints
32
- - GitHub clone and repo scan on public repos
33
- - Four analysis agents plus synthesizer
34
- - Prioritized report display with full raw finding totals preserved
35
- - Hugging Face Spaces-style `app.py` entrypoint
 
 
 
 
 
36
 
37
- Smoke-tested repos:
 
 
38
 
39
- - `https://github.com/psf/requests`
40
- - `https://github.com/pallets/itsdangerous`
41
 
42
- Example output is available in [`examples/requests_report_excerpt.md`](examples/requests_report_excerpt.md).
43
 
44
- ## Architecture
 
 
 
45
 
46
- ```mermaid
47
- flowchart LR
48
- U[User enters GitHub URL] --> API[FastAPI / Gradio]
49
- API --> C[Crawler Agent]
50
- C --> F[File Filter]
51
- F --> K[Chunker]
52
- K --> S[Security Agent]
53
- K --> P[Performance Agent]
54
- K --> Q[Quality Agent]
55
- K --> D[Docs Agent]
56
- S --> Y[Synthesizer Agent]
57
- P --> Y
58
- Q --> Y
59
- D --> Y
60
- Y --> R[Structured Audit Report]
61
  ```
62
 
63
- The graph is intentionally modular: each agent returns strict Pydantic findings, and the synthesizer merges, deduplicates, prioritizes, and formats the final report.
 
 
64
 
65
  ## Quick Start
66
 
@@ -70,115 +171,134 @@ python -m venv .venv
70
  pip install -r requirements.txt
71
  ```
72
 
 
 
 
 
 
 
 
 
73
  Run the FastAPI backend:
74
 
75
  ```bash
76
  uvicorn app.main:app --reload
77
  ```
78
 
79
- If port 8000 is busy on Windows, use:
80
 
81
  ```bash
82
  uvicorn app.main:app --reload --port 8001
83
  ```
84
 
85
- Health check:
86
 
87
  ```bash
88
  curl http://127.0.0.1:8000/health
 
89
  ```
90
 
91
- Audit endpoint:
92
 
93
  ```bash
94
  curl -X POST http://127.0.0.1:8000/audit \
95
  -H "Content-Type: application/json" \
96
- -d '{"repo_url":"https://github.com/psf/requests"}'
97
- ```
98
-
99
- Run the Gradio demo:
100
-
101
- ```bash
102
- python -m app.ui.gradio_app
103
  ```
104
 
105
- For Hugging Face Spaces-style startup:
106
 
107
- ```bash
108
- python app.py
 
109
  ```
110
 
111
- The Gradio app includes example repos, a live agent progress panel, and a structured markdown report panel.
112
- The launcher binds to `0.0.0.0` and uses `PORT` when provided, which matches hosted Gradio deployment expectations.
113
-
114
  ## Configuration
115
 
116
- Copy `.env.example` to `.env` for local overrides. Default inference mode is:
117
-
118
- ```text
119
- LLM_PROVIDER=mock
120
- ```
121
-
122
- Later, set `LLM_PROVIDER=vllm` and point `LLM_BASE_URL` at an OpenAI-compatible vLLM endpoint running Qwen2.5-Coder.
123
 
124
- Key safety limits:
125
 
126
  ```text
 
 
 
 
 
 
 
 
127
  MAX_FILES=200
128
  MAX_FILE_SIZE_KB=250
129
  MAX_CHARS_PER_CHUNK=12000
 
130
  CLONE_BASE_DIR=.swarm_audit_tmp
131
  ```
132
 
133
- ## Report Schema
134
-
135
- Each finding includes:
136
-
137
- - title
138
- - severity: CRITICAL, HIGH, MEDIUM, LOW
139
- - file path and line range
140
- - description
141
- - why it matters
142
- - suggested fix
143
- - agent source
144
 
145
- Reports preserve full finding totals while displaying a prioritized subset for readability. High-severity findings are shown first, repeated low-severity findings are summarized, and warnings explain when lower-priority findings are hidden from the demo report.
146
-
147
- ## Current Agents
148
-
149
- - Security Agent: flags hardcoded secrets, disabled TLS verification, and dynamic code execution.
150
- - Performance Agent: flags HTTP calls without timeouts, blocking sleep inside async functions, nested loops, file reads in loops, and synchronous Node.js filesystem calls.
151
- - Quality Agent: flags long functions, high branch density, large source sections, unresolved TODO/FIXME/HACK comments, and very short symbol names.
152
- - Docs Agent: flags incomplete README guidance and public Python symbols missing docstrings.
153
- - Synthesizer Agent: deduplicates findings, sorts by severity, and builds the final report.
154
 
155
- ## Hugging Face Spaces
156
 
157
- SwarmAudit is ready to launch as a Gradio Space with the root `app.py` entrypoint. Keep `LLM_PROVIDER=mock` for a reliable public demo, then switch to `LLM_PROVIDER=vllm` when an AMD MI300X-hosted Qwen2.5-Coder endpoint is available.
 
 
158
 
159
- See [`HF_SPACES_DEPLOY.md`](HF_SPACES_DEPLOY.md) for the deployment checklist.
160
 
161
- Recommended Space settings:
 
 
 
162
 
163
- - SDK: Gradio
164
- - App file: `app.py`
165
- - Python: 3.11 or newer
166
- - Default env: `LLM_PROVIDER=mock`
167
 
168
- ## AMD MI300X Roadmap
 
 
169
 
170
- The current code path is intentionally mock-first. The next inference phase is:
171
 
172
- 1. Start a Qwen2.5-Coder vLLM server on AMD Developer Cloud.
173
- 2. Expose an OpenAI-compatible `/v1/chat/completions` endpoint.
174
- 3. Set `LLM_PROVIDER=vllm`, `LLM_BASE_URL`, and `LLM_MODEL`.
175
- 4. Add LLM enrichment to agent findings while keeping static rules as deterministic guardrails.
176
- 5. Add a benchmark tab with MI300X latency and throughput numbers.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
 
178
- ## Tests
179
 
180
- ```bash
181
- python -m pytest
182
- ```
183
 
 
 
 
 
 
 
184
 
 
1
+ ---
2
  title: SwarmAudit
3
  sdk: gradio
4
  sdk_version: 6.14.0
 
9
 
10
  # SwarmAudit
11
 
12
+ SwarmAudit is a multi-agent production-readiness scanner for AI-generated code.
13
+
14
+ Paste a public GitHub repository URL and SwarmAudit clones the repo, maps source files, runs specialized static and optional LLM-enriched agents, then returns a prioritized audit report with severity filters, file references, remediation guidance, scores, and Markdown/JSON exports.
15
+
16
+ The project was built for the AMD Developer Hackathon Track 1: AI Agents & Agentic Workflows. It is designed to run reliably in mock/static mode for public demos and switch to AMD Developer Cloud + ROCm + vLLM + Qwen2.5-Coder when GPU credits are available.
17
+
18
+ ## Why It Exists
19
+
20
+ AI coding tools are fast, but they often miss production concerns: broken security assumptions, unsafe configuration, missing timeouts, swallowed exceptions, weak observability, dependency risk, and GPU portability issues. SwarmAudit turns those review concerns into a coordinated agent workflow.
21
+
22
+ The goal is not to replace linters. The goal is to give teams a fast second-pass review for code that might be functionally correct but not production-ready.
23
+
24
+ ## Current Status
25
+
26
+ Working now:
27
 
28
+ - Gradio dashboard with agent progress, activity log, summary cards, clickable severity filters, finding inspector, and report downloads.
29
+ - FastAPI backend with `/health`, `/llm/health`, and `/audit`.
30
+ - GitHub repo cloning with file limits and Windows-safe temp paths.
31
+ - Static multi-agent audit path that works without GPU access.
32
+ - Optional vLLM/Qwen enrichment behind config.
33
+ - LLM Diagnostics tab for `/v1/models` and chat-completion checks.
34
+ - Benchmark tab for latency checks against mock or vLLM backends.
35
+ - Markdown and JSON report export.
36
+ - Hugging Face Spaces entrypoint through root `app.py`.
37
+ - AMD/vLLM runbook for credit-safe MI300X testing.
38
 
39
+ Validated during development:
40
 
41
+ - Hugging Face Space running in mock/static mode.
42
+ - AMD Developer Cloud GPU instance with ROCm visible through `rocm-smi`.
43
+ - vLLM serving `Qwen/Qwen2.5-Coder-32B-Instruct` through an OpenAI-compatible `/v1` API.
44
+ - SwarmAudit Diagnostics and Benchmark tabs connected successfully to the AMD-hosted vLLM endpoint.
45
 
46
+ ## Agent Workflow
47
 
48
  ```text
49
+ GitHub URL
50
+ -> Crawler Agent
51
+ -> Chunker
52
+ -> Parallel Analysis Agents
53
+ Security
54
+ Performance
55
+ Quality
56
+ Docs
57
+ Config
58
+ Dependency
59
+ Error Handling
60
+ Observability
61
+ CUDA-to-ROCm
62
+ -> Synthesizer
63
+ -> Scores + Roadmap + Report
64
  ```
65
 
66
+ ## Agents
67
+
68
+ - **Security Agent**: hardcoded secrets, disabled TLS verification, dynamic execution, insecure dependency version ranges.
69
+ - **Performance Agent**: missing HTTP timeouts, blocking work in async paths, nested loops, repeated file reads, synchronous hot-path operations.
70
+ - **Quality Agent**: long functions, high branch density, very short identifiers, TODO/FIXME/HACK comments, maintainability signals.
71
+ - **Docs Agent**: README gaps, missing install/run/test guidance, public Python symbols without docstrings.
72
+ - **Config Agent**: production-dangerous defaults such as debug mode, open CORS, disabled TLS checks, weak secrets, unsafe config patterns.
73
+ - **Dependency Agent**: parses manifests and optionally queries OSV.dev for CVE data when enabled.
74
+ - **Error Handling Agent**: swallowed exceptions, missing timeouts, missing retry/fallback behavior, resilience gaps.
75
+ - **Observability Agent**: `print` logging, sensitive data in logs, missing health checks, missing metrics/tracing signals.
76
+ - **CUDA-to-ROCm Agent**: flags CUDA/NVIDIA-specific assumptions such as `torch.cuda`, `.cuda()`, `pynvml`, `nvidia-smi`, `cudaMalloc`, and `cudaMemcpy`, then suggests ROCm/generic alternatives.
77
+ - **Synthesizer Agent**: deduplicates findings, ranks by severity, computes scores, groups categories, and builds the remediation roadmap.
78
+
79
+ ## Report Output
80
+
81
+ Each audit report includes:
82
+
83
+ - Repository URL
84
+ - scanned/skipped file counts
85
+ - severity summary
86
+ - total/displayed/hidden finding counts
87
+ - agent finding counts
88
+ - category summary
89
+ - security score
90
+ - production readiness score
91
+ - remediation roadmap:
92
+ - This Week
93
+ - Next Sprint
94
+ - Backlog
95
+ - structured findings with:
96
+ - title
97
+ - severity
98
+ - file path and line range
99
+ - explanation
100
+ - why it matters
101
+ - suggested fix
102
+ - agent source
103
+ - category
104
+ - confidence when available
105
+ - Markdown export
106
+ - JSON export
107
+
108
+ The UI displays a prioritized subset for readability while preserving full totals in the structured report.
109
+
110
+ ## AMD + Qwen Integration
111
+
112
+ SwarmAudit uses Qwen through an OpenAI-compatible vLLM endpoint. The app does not install or run vLLM directly; it calls vLLM over HTTP.
113
+
114
+ The AMD path improves the project by allowing the same agent workflow to use a stronger code model on AMD GPU infrastructure:
115
+
116
+ - AMD Developer Cloud provides the GPU runtime.
117
+ - ROCm exposes AMD GPU acceleration.
118
+ - vLLM serves Qwen2.5-Coder as an OpenAI-compatible API.
119
+ - SwarmAudit uses that endpoint for Diagnostics, Benchmark, and optional LLM enrichment.
120
+ - Static agents remain the reliable fallback if the endpoint is unavailable.
121
+
122
+ Default public/demo mode stays cheap and reliable:
123
 
124
+ ```text
125
+ LLM_PROVIDER=mock
126
+ ENABLE_LLM_ENRICHMENT=false
127
+ ```
128
+
129
+ Credit-safe AMD test mode:
130
 
131
+ ```text
132
+ LLM_PROVIDER=vllm
133
+ LLM_BASE_URL=http://YOUR_VLLM_ENDPOINT/v1
134
+ LLM_API_KEY=swarm-audit-demo-key
135
+ LLM_MODEL=Qwen/Qwen2.5-Coder-32B-Instruct
136
+ ENABLE_LLM_ENRICHMENT=true
137
+ MAX_FILES=100
138
+ MAX_FILE_SIZE_KB=150
139
+ MAX_CHARS_PER_CHUNK=8000
140
+ MAX_LLM_CHUNKS=2
141
+ ```
142
 
143
+ See [`AMD_VLLM_RUNBOOK.md`](AMD_VLLM_RUNBOOK.md) for the exact AMD setup and shutdown checklist.
144
+
145
+ ## Hugging Face Spaces
146
 
147
+ SwarmAudit is deployable as a Gradio Space using the root `app.py`.
 
148
 
149
+ Recommended public Space settings:
150
 
151
+ - SDK: Gradio
152
+ - Hardware: CPU basic
153
+ - App file: `app.py`
154
+ - Environment:
155
 
156
+ ```text
157
+ LLM_PROVIDER=mock
158
+ ENABLE_LLM_ENRICHMENT=false
159
+ ENABLE_DEPENDENCY_CVE_LOOKUP=false
 
 
 
 
 
 
 
 
 
 
 
160
  ```
161
 
162
+ Keep the public Space in mock/static mode unless a stable vLLM endpoint is available for the full judging window. Do not expose private endpoint keys in the README or UI.
163
+
164
+ See [`HF_SPACES_DEPLOY.md`](HF_SPACES_DEPLOY.md) for the deployment checklist.
165
 
166
  ## Quick Start
167
 
 
171
  pip install -r requirements.txt
172
  ```
173
 
174
+ Run the Gradio app:
175
+
176
+ ```bash
177
+ python app.py
178
+ ```
179
+
180
+ Open the URL printed by Gradio. The app tries port `7860` first and falls back to another local Gradio port if `7860` is busy.
181
+
182
  Run the FastAPI backend:
183
 
184
  ```bash
185
  uvicorn app.main:app --reload
186
  ```
187
 
188
+ If port `8000` is busy:
189
 
190
  ```bash
191
  uvicorn app.main:app --reload --port 8001
192
  ```
193
 
194
+ Health checks:
195
 
196
  ```bash
197
  curl http://127.0.0.1:8000/health
198
+ curl http://127.0.0.1:8000/llm/health
199
  ```
200
 
201
+ Audit API:
202
 
203
  ```bash
204
  curl -X POST http://127.0.0.1:8000/audit \
205
  -H "Content-Type: application/json" \
206
+ -d '{"repo_url":"https://github.com/pallets/itsdangerous"}'
 
 
 
 
 
 
207
  ```
208
 
209
+ Recommended first test repos:
210
 
211
+ ```text
212
+ https://github.com/pallets/itsdangerous
213
+ https://github.com/psf/requests
214
  ```
215
 
 
 
 
216
  ## Configuration
217
 
218
+ Copy `.env.example` to `.env` for local overrides.
 
 
 
 
 
 
219
 
220
+ Important settings:
221
 
222
  ```text
223
+ LLM_PROVIDER=mock
224
+ LLM_BASE_URL=http://localhost:9000/v1
225
+ LLM_API_KEY=not-needed-for-mock
226
+ LLM_MODEL=Qwen/Qwen2.5-Coder-32B-Instruct
227
+ ENABLE_LLM_ENRICHMENT=false
228
+ ENABLE_DEPENDENCY_CVE_LOOKUP=false
229
+ MAX_LLM_CHUNKS=5
230
+ LLM_TIMEOUT_SECONDS=120
231
  MAX_FILES=200
232
  MAX_FILE_SIZE_KB=250
233
  MAX_CHARS_PER_CHUNK=12000
234
+ CLONE_TIMEOUT_SECONDS=60
235
  CLONE_BASE_DIR=.swarm_audit_tmp
236
  ```
237
 
238
+ Dependency CVE lookup is off by default so demos do not depend on network calls beyond cloning the target repo:
 
 
 
 
 
 
 
 
 
 
239
 
240
+ ```text
241
+ ENABLE_DEPENDENCY_CVE_LOOKUP=false
242
+ ```
 
 
 
 
 
 
243
 
244
+ Enable it only when you want OSV.dev CVE checks:
245
 
246
+ ```text
247
+ ENABLE_DEPENDENCY_CVE_LOOKUP=true
248
+ ```
249
 
250
+ ## Tests
251
 
252
+ ```bash
253
+ python -m compileall -q app tests app.py
254
+ python -m pytest --basetemp=.tmp_pytest -p no:cacheprovider
255
+ ```
256
 
257
+ Current local suite:
 
 
 
258
 
259
+ ```text
260
+ 104 tests
261
+ ```
262
 
263
+ ## Project Structure
264
 
265
+ ```text
266
+ app.py # Hugging Face/Gradio entrypoint
267
+ app/
268
+ main.py # FastAPI API
269
+ config.py # environment settings
270
+ schemas.py # Pydantic models
271
+ agents/
272
+ graph.py # orchestration
273
+ security_agent.py
274
+ performance_agent.py
275
+ quality_agent.py
276
+ docs_agent.py
277
+ config_agent.py
278
+ dependency_agent.py
279
+ error_handling_agent.py
280
+ observability_agent.py
281
+ cuda_migration_agent.py
282
+ synthesizer_agent.py
283
+ llm_enrichment.py
284
+ services/
285
+ llm_client.py
286
+ benchmark.py
287
+ report_formatter.py
288
+ ui/
289
+ gradio_app.py
290
+ tests/
291
+ examples/
292
+ ```
293
 
294
+ ## Submission Notes
295
 
296
+ For the hackathon submission, highlight:
 
 
297
 
298
+ - agentic workflow with multiple specialized agents
299
+ - Qwen2.5-Coder integration through vLLM
300
+ - AMD Developer Cloud + ROCm validation
301
+ - Hugging Face Space deployment
302
+ - practical business value: production readiness for AI-generated code
303
+ - originality: combining security, operations, dependency, and CUDA-to-ROCm portability checks in one audit workflow
304
 
app/agents/config_agent.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ from app.schemas import AgentOutput, CodeChunk, Finding, Severity
4
+
5
+
6
+ CONFIG_PATTERNS = [
7
+ (
8
+ re.compile(r"(?i)\bdebug\s*=\s*true\b"),
9
+ "Debug mode enabled",
10
+ Severity.high,
11
+ "Debug mode can expose stack traces, environment details, and interactive debugger behavior.",
12
+ "Disable debug mode in production and load it from an environment-specific setting.",
13
+ 0.9,
14
+ ),
15
+ (
16
+ re.compile(r"(?i)(allow_origins|cors_allowed_origins)\s*=\s*\[[^\]]*['\"]\*['\"]"),
17
+ "Wildcard CORS origin",
18
+ Severity.medium,
19
+ "A wildcard CORS policy can allow untrusted origins to interact with browser-protected resources.",
20
+ "Replace '*' with an explicit allowlist of trusted production origins.",
21
+ 0.86,
22
+ ),
23
+ (
24
+ re.compile(r"(?i)access-control-allow-origin['\"]?\s*[:=]\s*['\"]\*['\"]"),
25
+ "Wildcard Access-Control-Allow-Origin",
26
+ Severity.medium,
27
+ "A wildcard Access-Control-Allow-Origin header weakens browser origin protections.",
28
+ "Set Access-Control-Allow-Origin to specific trusted domains.",
29
+ 0.86,
30
+ ),
31
+ (
32
+ re.compile(r"(?i)verify\s*=\s*false\b"),
33
+ "TLS verification disabled in configuration",
34
+ Severity.high,
35
+ "Disabling TLS verification lets attackers intercept traffic that should be protected.",
36
+ "Remove verify=False and configure a trusted CA bundle if custom certificates are required.",
37
+ 0.91,
38
+ ),
39
+ (
40
+ re.compile(r"(?i)node_tls_reject_unauthorized\s*=\s*['\"]?0['\"]?"),
41
+ "Node TLS certificate checks disabled",
42
+ Severity.high,
43
+ "Disabling Node.js TLS verification makes HTTPS connections vulnerable to interception.",
44
+ "Remove NODE_TLS_REJECT_UNAUTHORIZED=0 and fix certificate trust at the environment level.",
45
+ 0.92,
46
+ ),
47
+ (
48
+ re.compile(r"(?i)(secret_key|jwt_secret|session_secret)\s*=\s*['\"](secret|changeme|change-me|password|django-insecure[^'\"]*)['\"]"),
49
+ "Weak default secret configured",
50
+ Severity.high,
51
+ "Default secrets are easy to guess and can compromise sessions, JWTs, or signed cookies.",
52
+ "Generate a strong secret and load it from a secret manager or environment variable.",
53
+ 0.9,
54
+ ),
55
+ ]
56
+
57
+
58
+ class ConfigAgent:
59
+ name = "Config Agent"
60
+
61
+ async def analyze(self, chunks: list[CodeChunk]) -> AgentOutput:
62
+ findings: list[Finding] = []
63
+ for chunk in chunks:
64
+ findings.extend(self._scan_chunk(chunk))
65
+
66
+ return AgentOutput(
67
+ agent_name=self.name,
68
+ findings=findings,
69
+ metadata={"chunks_scanned": len(chunks), "mode": "static-rules"},
70
+ )
71
+
72
+ def _scan_chunk(self, chunk: CodeChunk) -> list[Finding]:
73
+ findings: list[Finding] = []
74
+ for offset, line in enumerate(chunk.content.splitlines()):
75
+ actual_line = chunk.line_start + offset
76
+ for pattern, title, severity, description, fix, confidence in CONFIG_PATTERNS:
77
+ if pattern.search(line):
78
+ findings.append(
79
+ self._finding(
80
+ title=title,
81
+ severity=severity,
82
+ chunk=chunk,
83
+ line_number=actual_line,
84
+ description=description,
85
+ suggested_fix=fix,
86
+ confidence=confidence,
87
+ )
88
+ )
89
+
90
+ return findings
91
+
92
+ def _finding(
93
+ self,
94
+ title: str,
95
+ severity: Severity,
96
+ chunk: CodeChunk,
97
+ line_number: int,
98
+ description: str,
99
+ suggested_fix: str,
100
+ confidence: float,
101
+ ) -> Finding:
102
+ return Finding(
103
+ title=title,
104
+ severity=severity,
105
+ file_path=chunk.file_path,
106
+ line_start=line_number,
107
+ line_end=line_number,
108
+ description=description,
109
+ why_it_matters="Development-safe configuration often becomes production risk when copied into deployed environments.",
110
+ suggested_fix=suggested_fix,
111
+ agent_source=self.name,
112
+ category="config",
113
+ confidence=confidence,
114
+ )
app/agents/cuda_migration_agent.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ from app.schemas import AgentOutput, CodeChunk, Finding, Severity
4
+
5
+
6
+ CUDA_PATTERNS = [
7
+ (
8
+ re.compile(r"\btorch\.cuda\b|\.cuda\s*\("),
9
+ "PyTorch CUDA-specific API usage",
10
+ "Use device-agnostic PyTorch code such as torch.device('cuda' if torch.cuda.is_available() else 'cpu') only when portability is intended, and validate the same path under ROCm where PyTorch maps CUDA APIs to HIP.",
11
+ 0.82,
12
+ ),
13
+ (
14
+ re.compile(r"\bpynvml\b|\bnvidia-smi\b"),
15
+ "NVIDIA-specific GPU monitoring",
16
+ "Replace NVIDIA-specific monitoring with ROCm tools such as rocm-smi or a metrics adapter that supports AMD GPUs.",
17
+ 0.9,
18
+ ),
19
+ (
20
+ re.compile(r"\bcuda(Malloc|Free|Memcpy|Memset|DeviceSynchronize|GetDevice|SetDevice)\b"),
21
+ "CUDA runtime API call",
22
+ "Map CUDA runtime calls to HIP/ROCm equivalents and validate memory transfer semantics on AMD hardware.",
23
+ 0.88,
24
+ ),
25
+ (
26
+ re.compile(r"\b(cublas|cudnn|cufft|curand)\w*\b", re.IGNORECASE),
27
+ "CUDA library dependency",
28
+ "Review ROCm equivalents such as rocBLAS, MIOpen, rocFFT, or rocRAND before running on AMD GPUs.",
29
+ 0.86,
30
+ ),
31
+ (
32
+ re.compile(r"\bnccl\w*\b", re.IGNORECASE),
33
+ "NCCL-specific distributed GPU dependency",
34
+ "Use RCCL or a framework abstraction that supports AMD GPU collectives.",
35
+ 0.84,
36
+ ),
37
+ ]
38
+
39
+
40
+ class CudaMigrationAgent:
41
+ name = "CUDA-to-ROCm Agent"
42
+
43
+ async def analyze(self, chunks: list[CodeChunk]) -> AgentOutput:
44
+ findings: list[Finding] = []
45
+ for chunk in chunks:
46
+ findings.extend(self._scan_chunk(chunk))
47
+
48
+ return AgentOutput(
49
+ agent_name=self.name,
50
+ findings=findings,
51
+ metadata={"chunks_scanned": len(chunks), "mode": "static-rules"},
52
+ )
53
+
54
+ def _scan_chunk(self, chunk: CodeChunk) -> list[Finding]:
55
+ findings: list[Finding] = []
56
+ seen_titles: set[str] = set()
57
+
58
+ for offset, line in enumerate(chunk.content.splitlines()):
59
+ actual_line = chunk.line_start + offset
60
+ for pattern, title, fix, confidence in CUDA_PATTERNS:
61
+ if title in seen_titles:
62
+ continue
63
+ if pattern.search(line):
64
+ seen_titles.add(title)
65
+ findings.append(
66
+ self._finding(
67
+ title=title,
68
+ chunk=chunk,
69
+ line_number=actual_line,
70
+ matched_line=line,
71
+ suggested_fix=fix,
72
+ confidence=confidence,
73
+ )
74
+ )
75
+
76
+ return findings
77
+
78
+ def _finding(
79
+ self,
80
+ title: str,
81
+ chunk: CodeChunk,
82
+ line_number: int,
83
+ matched_line: str,
84
+ suggested_fix: str,
85
+ confidence: float,
86
+ ) -> Finding:
87
+ snippet = self._snippet(matched_line)
88
+ return Finding(
89
+ title=title,
90
+ severity=Severity.medium,
91
+ file_path=chunk.file_path,
92
+ line_start=line_number,
93
+ line_end=line_number,
94
+ description=f"`{snippet}` references a CUDA/NVIDIA-specific API that needs review before AMD ROCm deployment.",
95
+ why_it_matters="This exact GPU assumption can fail or reduce portability when the app moves from NVIDIA CUDA environments to AMD MI300X/ROCm.",
96
+ suggested_fix=suggested_fix,
97
+ agent_source=self.name,
98
+ category="cuda_migration",
99
+ confidence=confidence,
100
+ )
101
+
102
+ def _snippet(self, line: str, max_length: int = 96) -> str:
103
+ normalized = " ".join(line.strip().split())
104
+ if len(normalized) <= max_length:
105
+ return normalized
106
+ return f"{normalized[: max_length - 3]}..."
app/agents/dependency_agent.py ADDED
@@ -0,0 +1,347 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import re
3
+ import tomllib
4
+ from dataclasses import dataclass
5
+ from typing import Any
6
+
7
+ import httpx
8
+
9
+ from app.config import Settings
10
+ from app.schemas import AgentOutput, CodeChunk, Finding, Severity
11
+
12
+
13
+ @dataclass(frozen=True)
14
+ class Dependency:
15
+ name: str
16
+ version: str | None
17
+ ecosystem: str
18
+ manifest_path: str
19
+ line_number: int
20
+ source: str
21
+
22
+
23
+ class DependencyAgent:
24
+ name = "Dependency Agent"
25
+
26
+ def __init__(self, settings: Settings):
27
+ self.settings = settings
28
+
29
+ async def analyze(self, chunks: list[CodeChunk]) -> AgentOutput:
30
+ dependencies = self._parse_dependencies(chunks)
31
+ findings: list[Finding] = []
32
+ cves: list[dict[str, Any]] = []
33
+ warnings: list[str] = []
34
+
35
+ if self.settings.enable_dependency_cve_lookup and dependencies:
36
+ cves, warnings = await self._lookup_cves(dependencies)
37
+ findings.extend(self._cve_findings(cves))
38
+
39
+ return AgentOutput(
40
+ agent_name=self.name,
41
+ findings=findings,
42
+ metadata={
43
+ "mode": "manifest-parse+optional-osv",
44
+ "dependency_count": len(dependencies),
45
+ "manifests": sorted({dependency.manifest_path for dependency in dependencies}),
46
+ "dependency_cves": cves,
47
+ "warnings": warnings,
48
+ },
49
+ )
50
+
51
+ def _parse_dependencies(self, chunks: list[CodeChunk]) -> list[Dependency]:
52
+ dependencies: list[Dependency] = []
53
+ seen: set[tuple[str, str, str, str | None]] = set()
54
+
55
+ for chunk in chunks:
56
+ parsed = self._parse_chunk(chunk)
57
+ for dependency in parsed:
58
+ key = (
59
+ dependency.ecosystem,
60
+ dependency.name.lower(),
61
+ dependency.manifest_path,
62
+ dependency.version,
63
+ )
64
+ if key in seen:
65
+ continue
66
+ seen.add(key)
67
+ dependencies.append(dependency)
68
+
69
+ return dependencies
70
+
71
+ def _parse_chunk(self, chunk: CodeChunk) -> list[Dependency]:
72
+ path = chunk.file_path.lower()
73
+ if path.endswith("requirements.txt"):
74
+ return self._parse_requirements(chunk)
75
+ if path.endswith("package.json"):
76
+ return self._parse_package_json(chunk)
77
+ if path.endswith("pyproject.toml"):
78
+ return self._parse_pyproject(chunk)
79
+ if path.endswith("go.mod"):
80
+ return self._parse_go_mod(chunk)
81
+ if path.endswith("cargo.toml"):
82
+ return self._parse_cargo_toml(chunk)
83
+ return []
84
+
85
+ def _parse_requirements(self, chunk: CodeChunk) -> list[Dependency]:
86
+ dependencies: list[Dependency] = []
87
+ for offset, raw_line in enumerate(chunk.content.splitlines()):
88
+ line = raw_line.split("#", 1)[0].strip()
89
+ if not line or line.startswith(("-", "git+", "http://", "https://")):
90
+ continue
91
+ match = re.match(r"([A-Za-z0-9_.-]+)\s*(?:\[.*?\])?\s*(==|~=|>=|<=|>|<)?\s*([A-Za-z0-9_.*!+-][A-Za-z0-9_.*!+-]*)?", line)
92
+ if not match:
93
+ continue
94
+ name = match.group(1)
95
+ version = self._clean_version(match.group(3))
96
+ dependencies.append(
97
+ Dependency(
98
+ name=name,
99
+ version=version,
100
+ ecosystem="PyPI",
101
+ manifest_path=chunk.file_path,
102
+ line_number=chunk.line_start + offset,
103
+ source=line,
104
+ )
105
+ )
106
+ return dependencies
107
+
108
+ def _parse_package_json(self, chunk: CodeChunk) -> list[Dependency]:
109
+ try:
110
+ data = json.loads(chunk.content)
111
+ except json.JSONDecodeError:
112
+ return []
113
+
114
+ dependencies: list[Dependency] = []
115
+ for section in ("dependencies", "devDependencies", "optionalDependencies"):
116
+ section_dependencies = data.get(section, {})
117
+ if not isinstance(section_dependencies, dict):
118
+ continue
119
+ for name, raw_version in section_dependencies.items():
120
+ dependencies.append(
121
+ Dependency(
122
+ name=name,
123
+ version=self._clean_version(str(raw_version)),
124
+ ecosystem="npm",
125
+ manifest_path=chunk.file_path,
126
+ line_number=self._line_for_text(chunk, f'"{name}"'),
127
+ source=section,
128
+ )
129
+ )
130
+ return dependencies
131
+
132
+ def _parse_pyproject(self, chunk: CodeChunk) -> list[Dependency]:
133
+ try:
134
+ data = tomllib.loads(chunk.content)
135
+ except tomllib.TOMLDecodeError:
136
+ return []
137
+
138
+ dependencies: list[Dependency] = []
139
+ project_dependencies = data.get("project", {}).get("dependencies", [])
140
+ if isinstance(project_dependencies, list):
141
+ for value in project_dependencies:
142
+ dependency = self._python_dependency_from_string(str(value), chunk)
143
+ if dependency:
144
+ dependencies.append(dependency)
145
+
146
+ poetry_dependencies = data.get("tool", {}).get("poetry", {}).get("dependencies", {})
147
+ if isinstance(poetry_dependencies, dict):
148
+ for name, value in poetry_dependencies.items():
149
+ if name.lower() == "python":
150
+ continue
151
+ dependencies.append(
152
+ Dependency(
153
+ name=name,
154
+ version=self._clean_version(str(value)),
155
+ ecosystem="PyPI",
156
+ manifest_path=chunk.file_path,
157
+ line_number=self._line_for_text(chunk, name),
158
+ source="tool.poetry.dependencies",
159
+ )
160
+ )
161
+ return dependencies
162
+
163
+ def _parse_go_mod(self, chunk: CodeChunk) -> list[Dependency]:
164
+ dependencies: list[Dependency] = []
165
+ in_require_block = False
166
+ for offset, raw_line in enumerate(chunk.content.splitlines()):
167
+ line = raw_line.strip()
168
+ if line.startswith("require ("):
169
+ in_require_block = True
170
+ continue
171
+ if in_require_block and line == ")":
172
+ in_require_block = False
173
+ continue
174
+ if line.startswith("require "):
175
+ line = line.removeprefix("require ").strip()
176
+ elif not in_require_block:
177
+ continue
178
+ parts = line.split()
179
+ if len(parts) < 2:
180
+ continue
181
+ dependencies.append(
182
+ Dependency(
183
+ name=parts[0],
184
+ version=self._clean_version(parts[1]),
185
+ ecosystem="Go",
186
+ manifest_path=chunk.file_path,
187
+ line_number=chunk.line_start + offset,
188
+ source=line,
189
+ )
190
+ )
191
+ return dependencies
192
+
193
+ def _parse_cargo_toml(self, chunk: CodeChunk) -> list[Dependency]:
194
+ try:
195
+ data = tomllib.loads(chunk.content)
196
+ except tomllib.TOMLDecodeError:
197
+ return []
198
+
199
+ dependencies: list[Dependency] = []
200
+ for section in ("dependencies", "dev-dependencies", "build-dependencies"):
201
+ section_dependencies = data.get(section, {})
202
+ if not isinstance(section_dependencies, dict):
203
+ continue
204
+ for name, value in section_dependencies.items():
205
+ version = value.get("version") if isinstance(value, dict) else str(value)
206
+ dependencies.append(
207
+ Dependency(
208
+ name=name,
209
+ version=self._clean_version(str(version)),
210
+ ecosystem="crates.io",
211
+ manifest_path=chunk.file_path,
212
+ line_number=self._line_for_text(chunk, name),
213
+ source=section,
214
+ )
215
+ )
216
+ return dependencies
217
+
218
+ def _python_dependency_from_string(self, value: str, chunk: CodeChunk) -> Dependency | None:
219
+ match = re.match(r"([A-Za-z0-9_.-]+)\s*(?:\[.*?\])?\s*(?:==|~=|>=|<=|>|<)?\s*([A-Za-z0-9_.*!+-]+)?", value)
220
+ if not match:
221
+ return None
222
+ return Dependency(
223
+ name=match.group(1),
224
+ version=self._clean_version(match.group(2)),
225
+ ecosystem="PyPI",
226
+ manifest_path=chunk.file_path,
227
+ line_number=self._line_for_text(chunk, match.group(1)),
228
+ source="project.dependencies",
229
+ )
230
+
231
+ async def _lookup_cves(self, dependencies: list[Dependency]) -> tuple[list[dict[str, Any]], list[str]]:
232
+ query_dependencies = [dependency for dependency in dependencies if dependency.version]
233
+ if not query_dependencies:
234
+ return [], []
235
+
236
+ queries = [
237
+ {
238
+ "package": {"name": dependency.name, "ecosystem": dependency.ecosystem},
239
+ "version": dependency.version,
240
+ }
241
+ for dependency in query_dependencies
242
+ ]
243
+ try:
244
+ async with httpx.AsyncClient(timeout=self.settings.dependency_osv_timeout_seconds) as client:
245
+ response = await client.post("https://api.osv.dev/v1/querybatch", json={"queries": queries})
246
+ response.raise_for_status()
247
+ payload = response.json()
248
+ except Exception as exc:
249
+ return [], [f"Dependency CVE lookup failed gracefully: {exc}"]
250
+
251
+ cves: list[dict[str, Any]] = []
252
+ results = payload.get("results", [])
253
+ for dependency, result in zip(query_dependencies, results, strict=False):
254
+ for vuln in result.get("vulns", []):
255
+ cves.append(self._cve_record(dependency, vuln))
256
+ return cves, []
257
+
258
+ def _cve_record(self, dependency: Dependency, vuln: dict[str, Any]) -> dict[str, Any]:
259
+ severity = self._severity_from_vuln(vuln)
260
+ return {
261
+ "id": vuln.get("id", "UNKNOWN"),
262
+ "package": dependency.name,
263
+ "version": dependency.version,
264
+ "ecosystem": dependency.ecosystem,
265
+ "severity": severity.value,
266
+ "summary": vuln.get("summary") or vuln.get("details", "Known vulnerability reported by OSV.dev."),
267
+ "manifest_path": dependency.manifest_path,
268
+ "line_number": dependency.line_number,
269
+ "fixed_version": self._fixed_version(vuln),
270
+ }
271
+
272
+ def _cve_findings(self, cves: list[dict[str, Any]]) -> list[Finding]:
273
+ findings: list[Finding] = []
274
+ for cve in cves:
275
+ package = cve["package"]
276
+ version = cve.get("version") or "unknown"
277
+ cve_id = cve["id"]
278
+ fixed_version = cve.get("fixed_version") or "a non-vulnerable version"
279
+ findings.append(
280
+ Finding(
281
+ title=f"Vulnerable dependency: {package}",
282
+ severity=Severity(cve["severity"]),
283
+ file_path=cve["manifest_path"],
284
+ line_start=cve["line_number"],
285
+ line_end=cve["line_number"],
286
+ description=f"{package}@{version} is associated with {cve_id}: {cve['summary']}",
287
+ why_it_matters="Known vulnerable dependencies can expose the application to publicly documented exploits.",
288
+ suggested_fix=f"Upgrade {package} to {fixed_version} after checking compatibility and lockfile updates.",
289
+ agent_source=self.name,
290
+ category="dependency",
291
+ confidence=0.95,
292
+ )
293
+ )
294
+ return findings
295
+
296
+ def _severity_from_vuln(self, vuln: dict[str, Any]) -> Severity:
297
+ database_severity = str(vuln.get("database_specific", {}).get("severity", "")).upper()
298
+ if database_severity in Severity._value2member_map_:
299
+ return Severity(database_severity)
300
+
301
+ scores = []
302
+ for severity in vuln.get("severity", []):
303
+ score = self._cvss_score(str(severity.get("score", "")))
304
+ if score is not None:
305
+ scores.append(score)
306
+ max_score = max(scores, default=0.0)
307
+ if max_score >= 9:
308
+ return Severity.critical
309
+ if max_score >= 7:
310
+ return Severity.high
311
+ if max_score >= 4:
312
+ return Severity.medium
313
+ return Severity.low
314
+
315
+ def _cvss_score(self, score: str) -> float | None:
316
+ match = re.search(r"/AV:|CVSS:", score)
317
+ if match:
318
+ return None
319
+ try:
320
+ return float(score)
321
+ except ValueError:
322
+ return None
323
+
324
+ def _fixed_version(self, vuln: dict[str, Any]) -> str | None:
325
+ for affected in vuln.get("affected", []):
326
+ for range_data in affected.get("ranges", []):
327
+ for event in range_data.get("events", []):
328
+ fixed = event.get("fixed")
329
+ if fixed:
330
+ return fixed
331
+ return None
332
+
333
+ def _clean_version(self, value: str | None) -> str | None:
334
+ if not value:
335
+ return None
336
+ version = value.strip().strip('"').strip("'")
337
+ version = re.sub(r"^[\^~<>=!\s]+", "", version)
338
+ version = version.split(",", 1)[0].strip()
339
+ if not version or version == "*" or any(char in version for char in "{}"):
340
+ return None
341
+ return version
342
+
343
+ def _line_for_text(self, chunk: CodeChunk, text: str) -> int:
344
+ for offset, line in enumerate(chunk.content.splitlines()):
345
+ if text in line:
346
+ return chunk.line_start + offset
347
+ return chunk.line_start
app/agents/docs_agent.py CHANGED
@@ -1,6 +1,9 @@
1
  import re
2
 
 
 
3
  from app.schemas import AgentOutput, CodeChunk, Finding, Severity
 
4
 
5
 
6
  PYTHON_PUBLIC_DEF = re.compile(r"^(\s*)(async\s+def|def|class)\s+([A-Za-z][A-Za-z0-9_]*)")
@@ -9,9 +12,12 @@ README_TEST_TERMS = ("test", "pytest", "unittest")
9
  README_CONFIG_TERMS = ("config", "environment", ".env", "settings")
10
 
11
 
12
- class DocsAgent:
13
  name = "Docs Agent"
14
 
 
 
 
15
  async def analyze(self, chunks: list[CodeChunk]) -> AgentOutput:
16
  findings: list[Finding] = []
17
  readme_seen = False
@@ -37,10 +43,16 @@ class DocsAgent:
37
  )
38
  )
39
 
 
 
 
 
 
 
40
  return AgentOutput(
41
  agent_name=self.name,
42
  findings=findings,
43
- metadata={"chunks_scanned": len(chunks), "mode": "static-rules"},
44
  )
45
 
46
  def _scan_readme(self, chunk: CodeChunk) -> list[Finding]:
 
1
  import re
2
 
3
+ from app.agents.llm_enrichment import LLMEnrichmentMixin
4
+ from app.config import Settings
5
  from app.schemas import AgentOutput, CodeChunk, Finding, Severity
6
+ from app.services.llm_client import LLMClient
7
 
8
 
9
  PYTHON_PUBLIC_DEF = re.compile(r"^(\s*)(async\s+def|def|class)\s+([A-Za-z][A-Za-z0-9_]*)")
 
12
  README_CONFIG_TERMS = ("config", "environment", ".env", "settings")
13
 
14
 
15
+ class DocsAgent(LLMEnrichmentMixin):
16
  name = "Docs Agent"
17
 
18
+ def __init__(self, llm_client: LLMClient | None = None):
19
+ self.llm_client = llm_client or LLMClient(Settings())
20
+
21
  async def analyze(self, chunks: list[CodeChunk]) -> AgentOutput:
22
  findings: list[Finding] = []
23
  readme_seen = False
 
43
  )
44
  )
45
 
46
+ llm_output = await self._run_llm_enrichment(
47
+ chunks,
48
+ "Review these code and README chunks for high-confidence documentation gaps, unclear setup instructions, missing usage guidance, or missing public API documentation.",
49
+ )
50
+ findings.extend(llm_output.findings)
51
+
52
  return AgentOutput(
53
  agent_name=self.name,
54
  findings=findings,
55
+ metadata=self._llm_metadata(chunks, llm_output),
56
  )
57
 
58
  def _scan_readme(self, chunk: CodeChunk) -> list[Finding]:
app/agents/error_handling_agent.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ from app.schemas import AgentOutput, CodeChunk, Finding, Severity
4
+
5
+
6
+ EXCEPT_LINE = re.compile(r"^\s*except(?:\s+([\w.]+))?.*:")
7
+ REQUEST_WITHOUT_TIMEOUT = re.compile(r"\brequests\.(get|post|put|patch|delete)\s*\((?!.*\btimeout\s*=)")
8
+ JS_FETCH_WITHOUT_ABORT = re.compile(r"\bfetch\s*\([^,\n)]+\)")
9
+ LOGGING_SIGNALS = ("logging.", "logger.", ".exception(", ".error(", ".warning(", "console.error", "console.warn")
10
+
11
+
12
+ class ErrorHandlingAgent:
13
+ name = "Error Handling Agent"
14
+
15
+ async def analyze(self, chunks: list[CodeChunk]) -> AgentOutput:
16
+ findings: list[Finding] = []
17
+ for chunk in chunks:
18
+ findings.extend(self._scan_chunk(chunk))
19
+
20
+ return AgentOutput(
21
+ agent_name=self.name,
22
+ findings=findings,
23
+ metadata={"chunks_scanned": len(chunks), "mode": "static-rules"},
24
+ )
25
+
26
+ def _scan_chunk(self, chunk: CodeChunk) -> list[Finding]:
27
+ findings: list[Finding] = []
28
+ lines = chunk.content.splitlines()
29
+
30
+ for index, line in enumerate(lines):
31
+ actual_line = chunk.line_start + index
32
+ stripped = line.strip()
33
+
34
+ except_match = EXCEPT_LINE.match(line)
35
+ if except_match:
36
+ findings.extend(self._scan_except_block(chunk, lines, index, actual_line, except_match.group(1)))
37
+
38
+ if REQUEST_WITHOUT_TIMEOUT.search(line):
39
+ call_snippet = self._snippet(line)
40
+ findings.append(
41
+ self._finding(
42
+ "External HTTP call without timeout",
43
+ Severity.medium,
44
+ chunk,
45
+ actual_line,
46
+ f"`{call_snippet}` makes an external request without an explicit timeout.",
47
+ f"Add `timeout=` to `{call_snippet}` and handle timeout exceptions with logging or retry policy.",
48
+ 0.84,
49
+ why_it_matters=(
50
+ "This exact call can hold the worker until the operating system or remote service gives up, "
51
+ "which makes downstream outages spread into the app."
52
+ ),
53
+ )
54
+ )
55
+
56
+ if JS_FETCH_WITHOUT_ABORT.search(line) and "AbortController" not in chunk.content:
57
+ call_snippet = self._snippet(line)
58
+ findings.append(
59
+ self._finding(
60
+ "Fetch call has no cancellation timeout",
61
+ Severity.low,
62
+ chunk,
63
+ actual_line,
64
+ f"`{call_snippet}` uses fetch without an AbortController or deadline in this scanned chunk.",
65
+ "Wrap this fetch in an AbortController timeout or a shared HTTP client that enforces request deadlines.",
66
+ 0.76,
67
+ why_it_matters="A stuck fetch can leave the user action or server-side request waiting with no bounded failure path.",
68
+ )
69
+ )
70
+
71
+ return findings
72
+
73
+ def _scan_except_block(
74
+ self,
75
+ chunk: CodeChunk,
76
+ lines: list[str],
77
+ except_index: int,
78
+ actual_line: int,
79
+ exception_name: str | None,
80
+ ) -> list[Finding]:
81
+ block_lines = self._collect_block(lines, except_index)
82
+ normalized = "\n".join(line.strip() for line in block_lines)
83
+ findings: list[Finding] = []
84
+
85
+ if exception_name in (None, "Exception", "BaseException"):
86
+ exception_label = exception_name or "bare except"
87
+ findings.append(
88
+ self._finding(
89
+ "Broad exception handler",
90
+ Severity.medium,
91
+ chunk,
92
+ actual_line,
93
+ f"The handler catches `{exception_label}`, which can group unrelated failures into the same recovery path.",
94
+ f"Replace `{exception_label}` with the narrow exception type expected here, and let unexpected failures surface with context.",
95
+ 0.82,
96
+ why_it_matters="Broad handlers make different failure modes look identical during incident triage.",
97
+ )
98
+ )
99
+
100
+ if not block_lines:
101
+ return findings
102
+
103
+ has_logging = any(signal in normalized for signal in LOGGING_SIGNALS)
104
+ reraises = re.search(r"(^|\n)raise(\s|$)", normalized) is not None
105
+ silent_body = normalized in {"pass", "..."} or normalized.startswith("return None")
106
+
107
+ if silent_body:
108
+ body_preview = self._snippet(normalized.splitlines()[0] if normalized else "empty handler")
109
+ findings.append(
110
+ self._finding(
111
+ "Exception swallowed without recovery",
112
+ Severity.high,
113
+ chunk,
114
+ actual_line,
115
+ f"The except block uses `{body_preview}` and suppresses the failure without logging, retrying, or returning a meaningful fallback.",
116
+ "Log the exception with local context, re-raise when the caller must handle it, or return a deliberate typed fallback.",
117
+ 0.9,
118
+ why_it_matters="This handler erases the original failure at the exact point where debugging context is still available.",
119
+ )
120
+ )
121
+ elif not has_logging and not reraises:
122
+ first_action = self._snippet(normalized.splitlines()[0] if normalized else "handler body")
123
+ findings.append(
124
+ self._finding(
125
+ "Exception handled without logging or re-raise",
126
+ Severity.medium,
127
+ chunk,
128
+ actual_line,
129
+ f"The except block continues with `{first_action}` but does not log or re-raise the exception.",
130
+ "Add structured logging before this recovery path, or re-raise after adding recovery-specific context.",
131
+ 0.82,
132
+ why_it_matters="The recovery branch may keep execution going while hiding why the branch was needed.",
133
+ )
134
+ )
135
+
136
+ return findings
137
+
138
+ def _collect_block(self, lines: list[str], except_index: int) -> list[str]:
139
+ except_line = lines[except_index]
140
+ except_indent = len(except_line) - len(except_line.lstrip(" "))
141
+ block: list[str] = []
142
+
143
+ for line in lines[except_index + 1 :]:
144
+ if not line.strip():
145
+ continue
146
+ indent = len(line) - len(line.lstrip(" "))
147
+ if indent <= except_indent:
148
+ break
149
+ block.append(line)
150
+
151
+ return block
152
+
153
+ def _finding(
154
+ self,
155
+ title: str,
156
+ severity: Severity,
157
+ chunk: CodeChunk,
158
+ line_number: int,
159
+ description: str,
160
+ suggested_fix: str,
161
+ confidence: float,
162
+ why_it_matters: str | None = None,
163
+ ) -> Finding:
164
+ return Finding(
165
+ title=title,
166
+ severity=severity,
167
+ file_path=chunk.file_path,
168
+ line_start=line_number,
169
+ line_end=line_number,
170
+ description=description,
171
+ why_it_matters=why_it_matters
172
+ or "Weak error handling turns small downstream failures into outages that are hard to diagnose and recover from.",
173
+ suggested_fix=suggested_fix,
174
+ agent_source=self.name,
175
+ category="error_handling",
176
+ confidence=confidence,
177
+ )
178
+
179
+ def _snippet(self, line: str, max_length: int = 96) -> str:
180
+ normalized = " ".join(line.strip().split())
181
+ if len(normalized) <= max_length:
182
+ return normalized
183
+ return f"{normalized[: max_length - 3]}..."
app/agents/graph.py CHANGED
@@ -1,10 +1,16 @@
1
  from collections.abc import AsyncIterator
 
2
  from operator import add
3
- from typing import Annotated, TypedDict
4
 
5
  from langgraph.graph import END, StateGraph
6
 
 
 
 
7
  from app.agents.docs_agent import DocsAgent
 
 
8
  from app.agents.performance_agent import PerformanceAgent
9
  from app.agents.quality_agent import QualityAgent
10
  from app.agents.security_agent import SecurityAgent
@@ -16,6 +22,22 @@ from app.services.llm_client import LLMClient
16
  from app.services.repo_crawler import RepoCrawler
17
 
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  class AuditState(TypedDict, total=False):
20
  repo_url: str
21
  repo: RepoScanResult
@@ -24,6 +46,11 @@ class AuditState(TypedDict, total=False):
24
  performance_output: AgentOutput
25
  quality_output: AgentOutput
26
  docs_output: AgentOutput
 
 
 
 
 
27
  report: AuditReport
28
  progress: Annotated[list[str], add]
29
 
@@ -34,32 +61,103 @@ class AuditGraph:
34
  self.crawler = RepoCrawler(self.settings)
35
  self.chunker = Chunker(self.settings)
36
  self.llm_client = LLMClient(self.settings)
37
- self.security_agent = SecurityAgent(self.llm_client)
38
- self.performance_agent = PerformanceAgent()
39
- self.quality_agent = QualityAgent()
40
- self.docs_agent = DocsAgent()
41
  self.synthesizer = SynthesizerAgent()
42
  self.graph = self._build_graph()
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  def _build_graph(self):
45
  graph = StateGraph(AuditState)
46
  graph.add_node("crawl", self._crawl)
47
  graph.add_node("chunk", self._chunk)
48
- graph.add_node("security", self._security)
49
- graph.add_node("performance", self._performance)
50
- graph.add_node("quality", self._quality)
51
- graph.add_node("docs", self._docs)
52
  graph.add_node("synthesize", self._synthesize)
53
  graph.set_entry_point("crawl")
54
  graph.add_edge("crawl", "chunk")
55
- graph.add_edge("chunk", "security")
56
- graph.add_edge("chunk", "performance")
57
- graph.add_edge("chunk", "quality")
58
- graph.add_edge("chunk", "docs")
59
- graph.add_edge(["security", "performance", "quality", "docs"], "synthesize")
60
  graph.add_edge("synthesize", END)
61
  return graph.compile()
62
 
 
 
 
 
 
 
 
 
 
 
63
  async def run(self, repo_url: str) -> AuditReport:
64
  result = await self.graph.ainvoke({"repo_url": repo_url, "progress": []})
65
  return result["report"]
@@ -75,26 +173,17 @@ class AuditGraph:
75
  chunks = self.chunker.chunk_files(repo.files)
76
  yield f"Chunker: created {len(chunks)} code chunks."
77
 
78
- yield "Security Agent: scanning for risky patterns..."
79
- security_output = await self.security_agent.analyze(chunks)
80
- yield f"Security Agent: found {len(security_output.findings)} findings."
81
-
82
- yield "Performance Agent: scanning for slow-path patterns..."
83
- performance_output = await self.performance_agent.analyze(chunks)
84
- yield f"Performance Agent: found {len(performance_output.findings)} findings."
85
-
86
- yield "Quality Agent: scanning maintainability signals..."
87
- quality_output = await self.quality_agent.analyze(chunks)
88
- yield f"Quality Agent: found {len(quality_output.findings)} findings."
89
-
90
- yield "Docs Agent: scanning README and public documentation..."
91
- docs_output = await self.docs_agent.analyze(chunks)
92
- yield f"Docs Agent: found {len(docs_output.findings)} findings."
93
 
94
  yield "Synthesizer Agent: ranking findings and formatting report..."
95
  report = await self.synthesizer.synthesize(
96
  repo,
97
- [security_output, performance_output, quality_output, docs_output],
98
  )
99
  yield "Synthesizer Agent: final report generated."
100
  yield report
@@ -109,26 +198,11 @@ class AuditGraph:
109
  chunks = self.chunker.chunk_files(state["repo"].files)
110
  return {"chunks": chunks, "progress": [f"Chunker: created {len(chunks)} code chunks."]}
111
 
112
- async def _security(self, state: AuditState) -> AuditState:
113
- output = await self.security_agent.analyze(state["chunks"])
114
- return {"security_output": output, "progress": [f"Security Agent: found {len(output.findings)} findings."]}
115
-
116
- async def _performance(self, state: AuditState) -> AuditState:
117
- output = await self.performance_agent.analyze(state["chunks"])
118
- return {"performance_output": output, "progress": [f"Performance Agent: found {len(output.findings)} findings."]}
119
-
120
- async def _quality(self, state: AuditState) -> AuditState:
121
- output = await self.quality_agent.analyze(state["chunks"])
122
- return {"quality_output": output, "progress": [f"Quality Agent: found {len(output.findings)} findings."]}
123
-
124
- async def _docs(self, state: AuditState) -> AuditState:
125
- output = await self.docs_agent.analyze(state["chunks"])
126
- return {"docs_output": output, "progress": [f"Docs Agent: found {len(output.findings)} findings."]}
127
-
128
  async def _synthesize(self, state: AuditState) -> AuditState:
 
129
  report = await self.synthesizer.synthesize(
130
  state["repo"],
131
- [state["security_output"], state["performance_output"], state["quality_output"], state["docs_output"]],
132
  )
133
  self.crawler.cleanup(state["repo"])
134
  return {"report": report, "progress": ["Synthesizer Agent: final report generated."]}
 
1
  from collections.abc import AsyncIterator
2
+ from dataclasses import dataclass
3
  from operator import add
4
+ from typing import Annotated, Protocol, TypedDict
5
 
6
  from langgraph.graph import END, StateGraph
7
 
8
+ from app.agents.config_agent import ConfigAgent
9
+ from app.agents.cuda_migration_agent import CudaMigrationAgent
10
+ from app.agents.dependency_agent import DependencyAgent
11
  from app.agents.docs_agent import DocsAgent
12
+ from app.agents.error_handling_agent import ErrorHandlingAgent
13
+ from app.agents.observability_agent import ObservabilityAgent
14
  from app.agents.performance_agent import PerformanceAgent
15
  from app.agents.quality_agent import QualityAgent
16
  from app.agents.security_agent import SecurityAgent
 
22
  from app.services.repo_crawler import RepoCrawler
23
 
24
 
25
+ class AnalysisAgent(Protocol):
26
+ name: str
27
+
28
+ async def analyze(self, chunks: list[CodeChunk]) -> AgentOutput:
29
+ ...
30
+
31
+
32
+ @dataclass(frozen=True)
33
+ class AnalysisAgentSpec:
34
+ node_name: str
35
+ state_key: str
36
+ progress_label: str
37
+ start_message: str
38
+ agent: AnalysisAgent
39
+
40
+
41
  class AuditState(TypedDict, total=False):
42
  repo_url: str
43
  repo: RepoScanResult
 
46
  performance_output: AgentOutput
47
  quality_output: AgentOutput
48
  docs_output: AgentOutput
49
+ config_output: AgentOutput
50
+ dependency_output: AgentOutput
51
+ error_handling_output: AgentOutput
52
+ observability_output: AgentOutput
53
+ cuda_migration_output: AgentOutput
54
  report: AuditReport
55
  progress: Annotated[list[str], add]
56
 
 
61
  self.crawler = RepoCrawler(self.settings)
62
  self.chunker = Chunker(self.settings)
63
  self.llm_client = LLMClient(self.settings)
64
+ self.analysis_agents = self._build_agent_registry()
 
 
 
65
  self.synthesizer = SynthesizerAgent()
66
  self.graph = self._build_graph()
67
 
68
+ def _build_agent_registry(self) -> list[AnalysisAgentSpec]:
69
+ return [
70
+ AnalysisAgentSpec(
71
+ node_name="security",
72
+ state_key="security_output",
73
+ progress_label="Security Agent",
74
+ start_message="Security Agent: scanning for risky patterns...",
75
+ agent=SecurityAgent(self.llm_client),
76
+ ),
77
+ AnalysisAgentSpec(
78
+ node_name="performance",
79
+ state_key="performance_output",
80
+ progress_label="Performance Agent",
81
+ start_message="Performance Agent: scanning for slow-path patterns...",
82
+ agent=PerformanceAgent(self.llm_client),
83
+ ),
84
+ AnalysisAgentSpec(
85
+ node_name="quality",
86
+ state_key="quality_output",
87
+ progress_label="Quality Agent",
88
+ start_message="Quality Agent: scanning maintainability signals...",
89
+ agent=QualityAgent(self.llm_client),
90
+ ),
91
+ AnalysisAgentSpec(
92
+ node_name="docs",
93
+ state_key="docs_output",
94
+ progress_label="Docs Agent",
95
+ start_message="Docs Agent: scanning README and public documentation...",
96
+ agent=DocsAgent(self.llm_client),
97
+ ),
98
+ AnalysisAgentSpec(
99
+ node_name="config",
100
+ state_key="config_output",
101
+ progress_label="Config Agent",
102
+ start_message="Config Agent: scanning production configuration risk...",
103
+ agent=ConfigAgent(),
104
+ ),
105
+ AnalysisAgentSpec(
106
+ node_name="dependency",
107
+ state_key="dependency_output",
108
+ progress_label="Dependency Agent",
109
+ start_message="Dependency Agent: parsing manifests and optional CVE data...",
110
+ agent=DependencyAgent(self.settings),
111
+ ),
112
+ AnalysisAgentSpec(
113
+ node_name="error_handling",
114
+ state_key="error_handling_output",
115
+ progress_label="Error Handling Agent",
116
+ start_message="Error Handling Agent: scanning resilience and failure paths...",
117
+ agent=ErrorHandlingAgent(),
118
+ ),
119
+ AnalysisAgentSpec(
120
+ node_name="observability",
121
+ state_key="observability_output",
122
+ progress_label="Observability Agent",
123
+ start_message="Observability Agent: scanning logs, health checks, and telemetry gaps...",
124
+ agent=ObservabilityAgent(),
125
+ ),
126
+ AnalysisAgentSpec(
127
+ node_name="cuda_migration",
128
+ state_key="cuda_migration_output",
129
+ progress_label="CUDA-to-ROCm Agent",
130
+ start_message="CUDA-to-ROCm Agent: scanning NVIDIA-specific GPU assumptions...",
131
+ agent=CudaMigrationAgent(),
132
+ ),
133
+ ]
134
+
135
  def _build_graph(self):
136
  graph = StateGraph(AuditState)
137
  graph.add_node("crawl", self._crawl)
138
  graph.add_node("chunk", self._chunk)
139
+ for spec in self.analysis_agents:
140
+ graph.add_node(spec.node_name, self._make_agent_node(spec))
 
 
141
  graph.add_node("synthesize", self._synthesize)
142
  graph.set_entry_point("crawl")
143
  graph.add_edge("crawl", "chunk")
144
+ agent_node_names = [spec.node_name for spec in self.analysis_agents]
145
+ for node_name in agent_node_names:
146
+ graph.add_edge("chunk", node_name)
147
+ graph.add_edge(agent_node_names, "synthesize")
 
148
  graph.add_edge("synthesize", END)
149
  return graph.compile()
150
 
151
+ def _make_agent_node(self, spec: AnalysisAgentSpec):
152
+ async def run_agent(state: AuditState) -> AuditState:
153
+ output = await spec.agent.analyze(state["chunks"])
154
+ return {
155
+ spec.state_key: output,
156
+ "progress": [f"{spec.progress_label}: found {len(output.findings)} findings."],
157
+ }
158
+
159
+ return run_agent
160
+
161
  async def run(self, repo_url: str) -> AuditReport:
162
  result = await self.graph.ainvoke({"repo_url": repo_url, "progress": []})
163
  return result["report"]
 
173
  chunks = self.chunker.chunk_files(repo.files)
174
  yield f"Chunker: created {len(chunks)} code chunks."
175
 
176
+ outputs: list[AgentOutput] = []
177
+ for spec in self.analysis_agents:
178
+ yield spec.start_message
179
+ output = await spec.agent.analyze(chunks)
180
+ outputs.append(output)
181
+ yield f"{spec.progress_label}: found {len(output.findings)} findings."
 
 
 
 
 
 
 
 
 
182
 
183
  yield "Synthesizer Agent: ranking findings and formatting report..."
184
  report = await self.synthesizer.synthesize(
185
  repo,
186
+ outputs,
187
  )
188
  yield "Synthesizer Agent: final report generated."
189
  yield report
 
198
  chunks = self.chunker.chunk_files(state["repo"].files)
199
  return {"chunks": chunks, "progress": [f"Chunker: created {len(chunks)} code chunks."]}
200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  async def _synthesize(self, state: AuditState) -> AuditState:
202
+ outputs = [state[spec.state_key] for spec in self.analysis_agents]
203
  report = await self.synthesizer.synthesize(
204
  state["repo"],
205
+ outputs,
206
  )
207
  self.crawler.cleanup(state["repo"])
208
  return {"report": report, "progress": ["Synthesizer Agent: final report generated."]}
app/agents/llm_enrichment.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from app.schemas import AgentOutput, CodeChunk
2
+ from app.services.json_parser import parse_agent_output
3
+ from app.services.llm_client import LLMClient
4
+
5
+
6
+ FINDING_SCHEMA_INSTRUCTIONS = (
7
+ "Return JSON matching this schema exactly:\n"
8
+ "{\n"
9
+ ' "findings": [\n'
10
+ " {\n"
11
+ ' "title": "short title",\n'
12
+ ' "severity": "CRITICAL|HIGH|MEDIUM|LOW",\n'
13
+ ' "file_path": "path from input",\n'
14
+ ' "line_start": 1,\n'
15
+ ' "line_end": 1,\n'
16
+ ' "description": "what is wrong",\n'
17
+ ' "why_it_matters": "impact",\n'
18
+ ' "suggested_fix": "specific fix",\n'
19
+ ' "agent_source": "agent name"\n'
20
+ " }\n"
21
+ " ]\n"
22
+ "}\n"
23
+ )
24
+
25
+ CONTEXTUAL_REVIEW_INSTRUCTIONS = (
26
+ "Make each finding specific to the exact code shown. "
27
+ "Reference the concrete function, call, config value, exception handler, or line pattern when visible. "
28
+ "Do not reuse generic boilerplate language across findings. "
29
+ "Do not report duplicates of the same issue in the same file unless the risk or fix is meaningfully different. "
30
+ "Descriptions should explain what this exact code does wrong; suggested_fix should name the specific API, guard, timeout, logger, or config change to use."
31
+ )
32
+
33
+
34
+ class LLMEnrichmentMixin:
35
+ name: str
36
+ llm_client: LLMClient
37
+
38
+ async def _run_llm_enrichment(self, chunks: list[CodeChunk], review_instruction: str) -> AgentOutput:
39
+ if not self.llm_client.settings.enable_llm_enrichment:
40
+ return AgentOutput(agent_name=self.name)
41
+
42
+ selected_chunks = chunks[: self.llm_client.settings.max_llm_chunks]
43
+ if not selected_chunks:
44
+ return AgentOutput(agent_name=self.name)
45
+
46
+ try:
47
+ raw_output = await self.llm_client.complete_json(
48
+ f"You are a senior {self.name.lower()}. Return only JSON.",
49
+ self._build_llm_prompt(selected_chunks, review_instruction),
50
+ )
51
+ return parse_agent_output(raw_output, self.name)
52
+ except Exception as exc:
53
+ return AgentOutput(
54
+ agent_name=self.name,
55
+ metadata={"llm_error": str(exc)},
56
+ )
57
+
58
+ def _llm_metadata(self, chunks: list[CodeChunk], llm_output: AgentOutput) -> dict[str, object]:
59
+ return {
60
+ "chunks_scanned": len(chunks),
61
+ "mode": "static-rules-plus-optional-llm",
62
+ "llm_enrichment_enabled": self.llm_client.settings.enable_llm_enrichment,
63
+ "llm_findings": len(llm_output.findings),
64
+ **llm_output.metadata,
65
+ }
66
+
67
+ def _build_llm_prompt(self, chunks: list[CodeChunk], review_instruction: str) -> str:
68
+ chunk_text = "\n\n".join(
69
+ [
70
+ f"File: {chunk.file_path}\n"
71
+ f"Lines: {chunk.line_start}-{chunk.line_end}\n"
72
+ "```code\n"
73
+ f"{chunk.content[:4000]}\n"
74
+ "```"
75
+ for chunk in chunks
76
+ ]
77
+ )
78
+ return (
79
+ f"{review_instruction}\n"
80
+ f"{CONTEXTUAL_REVIEW_INSTRUCTIONS}\n"
81
+ f"{FINDING_SCHEMA_INSTRUCTIONS}\n"
82
+ f'Every finding must set "agent_source" to "{self.name}". '
83
+ "Only include findings that are specific, actionable, and tied to the provided files.\n\n"
84
+ f"{chunk_text}"
85
+ )
app/agents/observability_agent.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from collections import Counter
3
+
4
+ from app.schemas import AgentOutput, CodeChunk, Finding, Severity
5
+
6
+
7
+ PRINT_CALL = re.compile(r"\bprint\s*\(")
8
+ LOGGER_CALL = re.compile(r"\b(logging|logger|log)\.(debug|info|warning|error|exception|critical)\s*\(")
9
+ ROUTE_DECLARATION = re.compile(r"@\w*(app|router)\.(get|post|put|patch|delete|route)\s*\(\s*['\"]([^'\"]+)['\"]")
10
+ JS_ROUTE_DECLARATION = re.compile(r"\b(app|router)\.(get|post|put|patch|delete)\s*\(\s*['\"]([^'\"]+)['\"]")
11
+ SENSITIVE_LOG_LINE = re.compile(r"(?i)(print|logging|logger|console)\S*\s*\(.*(password|passwd|secret|token|api[_-]?key)")
12
+ HEALTH_PATHS = {"/health", "/healthz", "/ready", "/readiness", "/live", "/liveness", "/ping"}
13
+
14
+
15
+ class ObservabilityAgent:
16
+ name = "Observability Agent"
17
+
18
+ async def analyze(self, chunks: list[CodeChunk]) -> AgentOutput:
19
+ findings: list[Finding] = []
20
+ route_paths: set[str] = set()
21
+ print_counts: Counter[str] = Counter()
22
+ logger_seen = False
23
+
24
+ for chunk in chunks:
25
+ chunk_findings, chunk_routes, chunk_prints, chunk_has_logger = self._scan_chunk(chunk)
26
+ findings.extend(chunk_findings)
27
+ route_paths.update(chunk_routes)
28
+ print_counts[chunk.file_path] += chunk_prints
29
+ logger_seen = logger_seen or chunk_has_logger
30
+
31
+ findings.extend(self._print_overuse_findings(chunks, print_counts, logger_seen))
32
+ if route_paths and not any(path in HEALTH_PATHS for path in route_paths):
33
+ findings.append(self._missing_health_finding(chunks[0]))
34
+
35
+ return AgentOutput(
36
+ agent_name=self.name,
37
+ findings=findings,
38
+ metadata={
39
+ "chunks_scanned": len(chunks),
40
+ "mode": "static-rules",
41
+ "routes_seen": len(route_paths),
42
+ "logging_seen": logger_seen,
43
+ },
44
+ )
45
+
46
+ def _scan_chunk(self, chunk: CodeChunk) -> tuple[list[Finding], set[str], int, bool]:
47
+ findings: list[Finding] = []
48
+ routes: set[str] = set()
49
+ print_count = 0
50
+ has_logger = False
51
+
52
+ for offset, line in enumerate(chunk.content.splitlines()):
53
+ actual_line = chunk.line_start + offset
54
+ if PRINT_CALL.search(line):
55
+ print_count += 1
56
+ if LOGGER_CALL.search(line):
57
+ has_logger = True
58
+ if SENSITIVE_LOG_LINE.search(line):
59
+ sensitive_term = self._sensitive_term(line)
60
+ log_snippet = self._snippet(line)
61
+ findings.append(
62
+ self._finding(
63
+ "Sensitive value may be written to logs",
64
+ Severity.high,
65
+ chunk,
66
+ actual_line,
67
+ f"`{log_snippet}` appears to log credential-like data containing `{sensitive_term}`.",
68
+ f"Remove `{sensitive_term}` from this log statement and log a masked value or stable identifier instead.",
69
+ 0.86,
70
+ why_it_matters="This exact log statement can put sensitive data into terminal output, CI logs, or hosted application logs.",
71
+ )
72
+ )
73
+
74
+ routes.update(match.group(3) for match in ROUTE_DECLARATION.finditer(line))
75
+ routes.update(match.group(3) for match in JS_ROUTE_DECLARATION.finditer(line))
76
+
77
+ return findings, routes, print_count, has_logger
78
+
79
+ def _print_overuse_findings(
80
+ self,
81
+ chunks: list[CodeChunk],
82
+ print_counts: Counter[str],
83
+ logger_seen: bool,
84
+ ) -> list[Finding]:
85
+ if logger_seen:
86
+ return []
87
+
88
+ findings: list[Finding] = []
89
+ first_chunk_by_path = {chunk.file_path: chunk for chunk in chunks}
90
+ for file_path, count in print_counts.items():
91
+ if count < 3:
92
+ continue
93
+ chunk = first_chunk_by_path[file_path]
94
+ findings.append(
95
+ self._finding(
96
+ "Print statements used instead of structured logging",
97
+ Severity.low,
98
+ chunk,
99
+ chunk.line_start,
100
+ f"This file has {count} print statements and no structured logging was detected in the scanned repo.",
101
+ "Use a logger with levels and structured context such as request_id, route, and operation.",
102
+ 0.72,
103
+ why_it_matters=f"`{file_path}` will be harder to filter and correlate in production logs because print output has no severity or structured context.",
104
+ )
105
+ )
106
+
107
+ return findings
108
+
109
+ def _missing_health_finding(self, chunk: CodeChunk) -> Finding:
110
+ return self._finding(
111
+ "Web service has routes but no health endpoint detected",
112
+ Severity.medium,
113
+ chunk,
114
+ chunk.line_start,
115
+ "The scanned code defines web routes but no /health, /ready, /live, or /ping endpoint was detected.",
116
+ "Add a lightweight health endpoint that returns process readiness and dependency status appropriate for your deployment.",
117
+ 0.74,
118
+ why_it_matters="Deployments and uptime checks need a predictable endpoint to tell whether this service process is alive and ready.",
119
+ )
120
+
121
+ def _finding(
122
+ self,
123
+ title: str,
124
+ severity: Severity,
125
+ chunk: CodeChunk,
126
+ line_number: int,
127
+ description: str,
128
+ suggested_fix: str,
129
+ confidence: float,
130
+ why_it_matters: str | None = None,
131
+ ) -> Finding:
132
+ return Finding(
133
+ title=title,
134
+ severity=severity,
135
+ file_path=chunk.file_path,
136
+ line_start=line_number,
137
+ line_end=line_number,
138
+ description=description,
139
+ why_it_matters=why_it_matters
140
+ or "Without basic observability, production failures are harder to detect, triage, and explain during incidents.",
141
+ suggested_fix=suggested_fix,
142
+ agent_source=self.name,
143
+ category="observability",
144
+ confidence=confidence,
145
+ )
146
+
147
+ def _sensitive_term(self, line: str) -> str:
148
+ match = re.search(r"(?i)(password|passwd|secret|token|api[_-]?key)", line)
149
+ return match.group(1) if match else "secret"
150
+
151
+ def _snippet(self, line: str, max_length: int = 96) -> str:
152
+ normalized = " ".join(line.strip().split())
153
+ if len(normalized) <= max_length:
154
+ return normalized
155
+ return f"{normalized[: max_length - 3]}..."
app/agents/performance_agent.py CHANGED
@@ -1,6 +1,9 @@
1
  import re
2
 
 
 
3
  from app.schemas import AgentOutput, CodeChunk, Finding, Severity
 
4
 
5
 
6
  REQUEST_WITHOUT_TIMEOUT = re.compile(r"\brequests\.(get|post|put|patch|delete)\s*\((?!.*\btimeout\s*=)")
@@ -9,18 +12,27 @@ PYTHON_LOOP = re.compile(r"^(\s*)(for|while)\b")
9
  PYTHON_FILE_READ = re.compile(r"\b(open\s*\(|Path\s*\([^)]*\)\.read_(text|bytes)\s*\()")
10
 
11
 
12
- class PerformanceAgent:
13
  name = "Performance Agent"
14
 
 
 
 
15
  async def analyze(self, chunks: list[CodeChunk]) -> AgentOutput:
16
  findings: list[Finding] = []
17
  for chunk in chunks:
18
  findings.extend(self._scan_chunk(chunk))
19
 
 
 
 
 
 
 
20
  return AgentOutput(
21
  agent_name=self.name,
22
  findings=findings,
23
- metadata={"chunks_scanned": len(chunks), "mode": "static-rules"},
24
  )
25
 
26
  def _scan_chunk(self, chunk: CodeChunk) -> list[Finding]:
@@ -56,50 +68,58 @@ class PerformanceAgent:
56
  loop_stack.append(len(loop_match.group(1)))
57
 
58
  if REQUEST_WITHOUT_TIMEOUT.search(line):
 
59
  findings.append(
60
  self._finding(
61
  "HTTP request without timeout",
62
  Severity.medium,
63
  chunk,
64
  actual_line,
65
- "Network calls without timeouts can hang workers and make the app appear frozen under bad network conditions.",
66
- "Pass an explicit timeout, for example requests.get(url, timeout=10).",
 
67
  )
68
  )
69
 
70
  if async_indent_stack and "time.sleep(" in line:
 
71
  findings.append(
72
  self._finding(
73
  "Blocking sleep inside async function",
74
  Severity.medium,
75
  chunk,
76
  actual_line,
77
- "time.sleep blocks the event loop, delaying unrelated async work.",
78
- "Use await asyncio.sleep(...) inside async functions.",
 
79
  )
80
  )
81
 
82
  if loop_stack and PYTHON_FILE_READ.search(line):
 
83
  findings.append(
84
  self._finding(
85
  "File read inside loop",
86
  Severity.low,
87
  chunk,
88
  actual_line,
89
- "Repeated disk reads inside loops can dominate runtime and slow audits on larger inputs.",
90
- "Read once before the loop, cache results, or stream data deliberately.",
 
91
  )
92
  )
93
 
94
  if SYNC_FS_JS.search(line):
 
95
  findings.append(
96
  self._finding(
97
  "Synchronous filesystem call",
98
  Severity.low,
99
  chunk,
100
  actual_line,
101
- "Synchronous filesystem APIs block the Node.js event loop and can hurt request latency.",
102
- "Use async fs.promises APIs or move blocking work outside latency-sensitive paths.",
 
103
  )
104
  )
105
 
@@ -113,6 +133,7 @@ class PerformanceAgent:
113
  line_number: int,
114
  description: str,
115
  suggested_fix: str,
 
116
  ) -> Finding:
117
  return Finding(
118
  title=title,
@@ -121,7 +142,14 @@ class PerformanceAgent:
121
  line_start=line_number,
122
  line_end=line_number,
123
  description=description,
124
- why_it_matters="Performance issues in hot paths can increase latency, resource usage, and demo analysis time.",
 
125
  suggested_fix=suggested_fix,
126
  agent_source=self.name,
127
  )
 
 
 
 
 
 
 
1
  import re
2
 
3
+ from app.agents.llm_enrichment import LLMEnrichmentMixin
4
+ from app.config import Settings
5
  from app.schemas import AgentOutput, CodeChunk, Finding, Severity
6
+ from app.services.llm_client import LLMClient
7
 
8
 
9
  REQUEST_WITHOUT_TIMEOUT = re.compile(r"\brequests\.(get|post|put|patch|delete)\s*\((?!.*\btimeout\s*=)")
 
12
  PYTHON_FILE_READ = re.compile(r"\b(open\s*\(|Path\s*\([^)]*\)\.read_(text|bytes)\s*\()")
13
 
14
 
15
+ class PerformanceAgent(LLMEnrichmentMixin):
16
  name = "Performance Agent"
17
 
18
+ def __init__(self, llm_client: LLMClient | None = None):
19
+ self.llm_client = llm_client or LLMClient(Settings())
20
+
21
  async def analyze(self, chunks: list[CodeChunk]) -> AgentOutput:
22
  findings: list[Finding] = []
23
  for chunk in chunks:
24
  findings.extend(self._scan_chunk(chunk))
25
 
26
+ llm_output = await self._run_llm_enrichment(
27
+ chunks,
28
+ "Review these code chunks for high-confidence performance issues such as algorithmic bottlenecks, blocking I/O, inefficient repeated work, or expensive hot paths.",
29
+ )
30
+ findings.extend(llm_output.findings)
31
+
32
  return AgentOutput(
33
  agent_name=self.name,
34
  findings=findings,
35
+ metadata=self._llm_metadata(chunks, llm_output),
36
  )
37
 
38
  def _scan_chunk(self, chunk: CodeChunk) -> list[Finding]:
 
68
  loop_stack.append(len(loop_match.group(1)))
69
 
70
  if REQUEST_WITHOUT_TIMEOUT.search(line):
71
+ call_snippet = self._snippet(line)
72
  findings.append(
73
  self._finding(
74
  "HTTP request without timeout",
75
  Severity.medium,
76
  chunk,
77
  actual_line,
78
+ f"`{call_snippet}` does not pass `timeout=`, so this request can wait indefinitely.",
79
+ f"Add a bounded timeout to this call, for example `{call_snippet.rstrip(')')}, timeout=10)` if the arguments fit that shape.",
80
+ why_it_matters="This specific network call can tie up a worker or thread when the remote service stalls.",
81
  )
82
  )
83
 
84
  if async_indent_stack and "time.sleep(" in line:
85
+ sleep_snippet = self._snippet(line)
86
  findings.append(
87
  self._finding(
88
  "Blocking sleep inside async function",
89
  Severity.medium,
90
  chunk,
91
  actual_line,
92
+ f"`{sleep_snippet}` runs inside an async scope and blocks the event loop.",
93
+ "Replace this call with `await asyncio.sleep(...)` or move blocking work out of the async path.",
94
+ why_it_matters="Blocking the event loop here delays unrelated coroutines that should be able to keep running.",
95
  )
96
  )
97
 
98
  if loop_stack and PYTHON_FILE_READ.search(line):
99
+ read_snippet = self._snippet(line)
100
  findings.append(
101
  self._finding(
102
  "File read inside loop",
103
  Severity.low,
104
  chunk,
105
  actual_line,
106
+ f"`{read_snippet}` appears inside a loop, so the same path may hit disk repeatedly.",
107
+ "Read once before the loop, cache by file path, or stream deliberately if every iteration needs fresh data.",
108
+ why_it_matters="Repeated disk I/O in this loop can dominate runtime as the input size grows.",
109
  )
110
  )
111
 
112
  if SYNC_FS_JS.search(line):
113
+ fs_snippet = self._snippet(line)
114
  findings.append(
115
  self._finding(
116
  "Synchronous filesystem call",
117
  Severity.low,
118
  chunk,
119
  actual_line,
120
+ f"`{fs_snippet}` uses a synchronous filesystem API.",
121
+ "Use `fs.promises` or move this filesystem work outside latency-sensitive request paths.",
122
+ why_it_matters="This call blocks the Node.js event loop while disk I/O completes.",
123
  )
124
  )
125
 
 
133
  line_number: int,
134
  description: str,
135
  suggested_fix: str,
136
+ why_it_matters: str | None = None,
137
  ) -> Finding:
138
  return Finding(
139
  title=title,
 
142
  line_start=line_number,
143
  line_end=line_number,
144
  description=description,
145
+ why_it_matters=why_it_matters
146
+ or "Performance issues in hot paths can increase latency, resource usage, and demo analysis time.",
147
  suggested_fix=suggested_fix,
148
  agent_source=self.name,
149
  )
150
+
151
+ def _snippet(self, line: str, max_length: int = 96) -> str:
152
+ normalized = " ".join(line.strip().split())
153
+ if len(normalized) <= max_length:
154
+ return normalized
155
+ return f"{normalized[: max_length - 3]}..."
app/agents/quality_agent.py CHANGED
@@ -1,6 +1,9 @@
1
  import re
2
 
 
 
3
  from app.schemas import AgentOutput, CodeChunk, Finding, Severity
 
4
 
5
 
6
  PYTHON_DEF = re.compile(r"^\s*(async\s+def|def|class)\s+([A-Za-z_][A-Za-z0-9_]*)")
@@ -16,18 +19,27 @@ MAX_BRANCHES_PER_CHUNK = 25
16
  MIN_MEANINGFUL_NAME_LENGTH = 3
17
 
18
 
19
- class QualityAgent:
20
  name = "Quality Agent"
21
 
 
 
 
22
  async def analyze(self, chunks: list[CodeChunk]) -> AgentOutput:
23
  findings: list[Finding] = []
24
  for chunk in chunks:
25
  findings.extend(self._scan_chunk(chunk))
26
 
 
 
 
 
 
 
27
  return AgentOutput(
28
  agent_name=self.name,
29
  findings=findings,
30
- metadata={"chunks_scanned": len(chunks), "mode": "static-rules"},
31
  )
32
 
33
  def _scan_chunk(self, chunk: CodeChunk) -> list[Finding]:
 
1
  import re
2
 
3
+ from app.agents.llm_enrichment import LLMEnrichmentMixin
4
+ from app.config import Settings
5
  from app.schemas import AgentOutput, CodeChunk, Finding, Severity
6
+ from app.services.llm_client import LLMClient
7
 
8
 
9
  PYTHON_DEF = re.compile(r"^\s*(async\s+def|def|class)\s+([A-Za-z_][A-Za-z0-9_]*)")
 
19
  MIN_MEANINGFUL_NAME_LENGTH = 3
20
 
21
 
22
+ class QualityAgent(LLMEnrichmentMixin):
23
  name = "Quality Agent"
24
 
25
+ def __init__(self, llm_client: LLMClient | None = None):
26
+ self.llm_client = llm_client or LLMClient(Settings())
27
+
28
  async def analyze(self, chunks: list[CodeChunk]) -> AgentOutput:
29
  findings: list[Finding] = []
30
  for chunk in chunks:
31
  findings.extend(self._scan_chunk(chunk))
32
 
33
+ llm_output = await self._run_llm_enrichment(
34
+ chunks,
35
+ "Review these code chunks for high-confidence code quality issues such as overly complex structure, risky abstractions, poor naming, or maintainability problems.",
36
+ )
37
+ findings.extend(llm_output.findings)
38
+
39
  return AgentOutput(
40
  agent_name=self.name,
41
  findings=findings,
42
+ metadata=self._llm_metadata(chunks, llm_output),
43
  )
44
 
45
  def _scan_chunk(self, chunk: CodeChunk) -> list[Finding]:
app/agents/security_agent.py CHANGED
@@ -1,6 +1,7 @@
1
  import re
2
 
3
  from app.schemas import AgentOutput, CodeChunk, Finding, Severity
 
4
  from app.services.llm_client import LLMClient
5
 
6
 
@@ -29,7 +30,7 @@ SECURITY_PATTERNS = [
29
  ]
30
 
31
 
32
- class SecurityAgent:
33
  name = "Security Agent"
34
 
35
  def __init__(self, llm_client: LLMClient):
@@ -41,15 +42,16 @@ class SecurityAgent:
41
  for chunk in chunks:
42
  findings.extend(self._scan_chunk(chunk))
43
 
44
- await self.llm_client.complete_json(
45
- "You are a security code review agent. Return JSON findings only.",
46
- f"Review {len(chunks)} chunks for security issues.",
47
  )
 
48
 
49
  return AgentOutput(
50
  agent_name=self.name,
51
  findings=findings,
52
- metadata={"chunks_scanned": len(chunks), "mode": "static-rules-plus-llm-interface"},
53
  )
54
 
55
  def _scan_chunk(self, chunk: CodeChunk) -> list[Finding]:
 
1
  import re
2
 
3
  from app.schemas import AgentOutput, CodeChunk, Finding, Severity
4
+ from app.agents.llm_enrichment import LLMEnrichmentMixin
5
  from app.services.llm_client import LLMClient
6
 
7
 
 
30
  ]
31
 
32
 
33
+ class SecurityAgent(LLMEnrichmentMixin):
34
  name = "Security Agent"
35
 
36
  def __init__(self, llm_client: LLMClient):
 
42
  for chunk in chunks:
43
  findings.extend(self._scan_chunk(chunk))
44
 
45
+ llm_output = await self._run_llm_enrichment(
46
+ chunks,
47
+ "Review these code chunks for high-confidence security issues.",
48
  )
49
+ findings.extend(llm_output.findings)
50
 
51
  return AgentOutput(
52
  agent_name=self.name,
53
  findings=findings,
54
+ metadata=self._llm_metadata(chunks, llm_output),
55
  )
56
 
57
  def _scan_chunk(self, chunk: CodeChunk) -> list[Finding]:
app/agents/synthesizer_agent.py CHANGED
@@ -13,9 +13,59 @@ MAX_DISPLAY_FINDINGS_BY_AGENT = {
13
  "Security Agent": 20,
14
  "Performance Agent": 12,
15
  "Quality Agent": 10,
16
- "Docs Agent": 8,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  }
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  class SynthesizerAgent:
21
  name = "Synthesizer Agent"
@@ -30,6 +80,21 @@ class SynthesizerAgent:
30
 
31
  agent_counts = {output.agent_name: len(output.findings) for output in outputs}
32
  display_findings, hidden_count, warnings = self._select_display_findings(all_findings, agent_counts)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
  return AuditReport(
35
  repo_url=repo.repo_url,
@@ -41,8 +106,13 @@ class SynthesizerAgent:
41
  displayed_findings_count=len(display_findings),
42
  hidden_findings_count=hidden_count,
43
  agent_finding_counts=agent_counts,
 
 
 
 
 
44
  agents_run=[output.agent_name for output in outputs] + [self.name],
45
- warnings=repo.warnings + warnings,
46
  )
47
 
48
  def _dedupe(self, findings: list[Finding]) -> list[Finding]:
@@ -63,16 +133,47 @@ class SynthesizerAgent:
63
  ) -> tuple[list[Finding], int, list[str]]:
64
  selected: list[Finding] = []
65
  selected_by_agent = {agent_name: 0 for agent_name in agent_counts}
 
66
 
67
  for finding in findings:
68
  agent_limit = MAX_DISPLAY_FINDINGS_BY_AGENT.get(finding.agent_source, MAX_DISPLAY_FINDINGS)
 
 
 
69
  if selected_by_agent.get(finding.agent_source, 0) >= agent_limit:
70
  continue
71
  if len(selected) >= MAX_DISPLAY_FINDINGS:
72
  break
73
  selected.append(finding)
74
  selected_by_agent[finding.agent_source] = selected_by_agent.get(finding.agent_source, 0) + 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
 
76
  hidden_count = max(0, len(findings) - len(selected))
77
  warnings: list[str] = []
78
  if hidden_count:
@@ -89,6 +190,13 @@ class SynthesizerAgent:
89
 
90
  return selected, hidden_count, warnings
91
 
 
 
 
 
 
 
 
92
  def _sort_key(self, finding: Finding) -> tuple[int, int, str, int]:
93
  test_file_penalty = 1 if self._is_test_file(finding.file_path) and finding.severity != Severity.critical else 0
94
  return (SEVERITY_ORDER[finding.severity], test_file_penalty, finding.file_path, finding.line_start)
@@ -96,3 +204,74 @@ class SynthesizerAgent:
96
  def _is_test_file(self, file_path: str) -> bool:
97
  normalized = file_path.lower().replace("\\", "/")
98
  return "/test" in normalized or normalized.startswith("test") or "_test." in normalized
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  "Security Agent": 20,
14
  "Performance Agent": 12,
15
  "Quality Agent": 10,
16
+ "Docs Agent": 12,
17
+ }
18
+ MAX_DISPLAY_BY_SEVERITY = {
19
+ Severity.critical: None,
20
+ Severity.high: 30,
21
+ Severity.medium: 18,
22
+ Severity.low: 12,
23
+ }
24
+
25
+ SECURITY_CATEGORIES = {
26
+ "security",
27
+ "config",
28
+ "dependency",
29
+ "cuda_migration",
30
+ }
31
+
32
+ PRODUCTION_CATEGORIES = {
33
+ "performance",
34
+ "quality",
35
+ "docs",
36
+ "error_handling",
37
+ "observability",
38
+ }
39
+
40
+ AGENT_CATEGORY_DEFAULTS = {
41
+ "Security Agent": "security",
42
+ "Config Agent": "config",
43
+ "Dependency Agent": "dependency",
44
+ "CUDA-to-ROCm Agent": "cuda_migration",
45
+ "Performance Agent": "performance",
46
+ "Quality Agent": "quality",
47
+ "Docs Agent": "docs",
48
+ "Error Handling Agent": "error_handling",
49
+ "Observability Agent": "observability",
50
  }
51
 
52
+ SECURITY_WEIGHTS = {
53
+ Severity.critical: 24,
54
+ Severity.high: 12,
55
+ Severity.medium: 5,
56
+ Severity.low: 1,
57
+ }
58
+
59
+ PRODUCTION_WEIGHTS = {
60
+ Severity.critical: 16,
61
+ Severity.high: 9,
62
+ Severity.medium: 4,
63
+ Severity.low: 1,
64
+ }
65
+
66
+ MAX_SECURITY_CATEGORY_PENALTY = 35
67
+ MAX_PRODUCTION_CATEGORY_PENALTY = 28
68
+
69
 
70
  class SynthesizerAgent:
71
  name = "Synthesizer Agent"
 
80
 
81
  agent_counts = {output.agent_name: len(output.findings) for output in outputs}
82
  display_findings, hidden_count, warnings = self._select_display_findings(all_findings, agent_counts)
83
+ category_summary = self._category_summary(all_findings)
84
+ security_score, production_score = self._compute_scores(all_findings)
85
+ roadmap = self._build_roadmap(all_findings)
86
+ dependency_cves = [
87
+ cve
88
+ for output in outputs
89
+ for cve in output.metadata.get("dependency_cves", [])
90
+ if isinstance(cve, dict)
91
+ ]
92
+ dependency_warnings = [
93
+ warning
94
+ for output in outputs
95
+ for warning in output.metadata.get("warnings", [])
96
+ if isinstance(warning, str)
97
+ ]
98
 
99
  return AuditReport(
100
  repo_url=repo.repo_url,
 
106
  displayed_findings_count=len(display_findings),
107
  hidden_findings_count=hidden_count,
108
  agent_finding_counts=agent_counts,
109
+ category_summary=category_summary,
110
+ security_score=security_score,
111
+ production_score=production_score,
112
+ remediation_roadmap=roadmap,
113
+ dependency_cves=dependency_cves,
114
  agents_run=[output.agent_name for output in outputs] + [self.name],
115
+ warnings=repo.warnings + dependency_warnings + warnings,
116
  )
117
 
118
  def _dedupe(self, findings: list[Finding]) -> list[Finding]:
 
133
  ) -> tuple[list[Finding], int, list[str]]:
134
  selected: list[Finding] = []
135
  selected_by_agent = {agent_name: 0 for agent_name in agent_counts}
136
+ selected_by_severity = {severity: 0 for severity in Severity}
137
 
138
  for finding in findings:
139
  agent_limit = MAX_DISPLAY_FINDINGS_BY_AGENT.get(finding.agent_source, MAX_DISPLAY_FINDINGS)
140
+ severity_limit = MAX_DISPLAY_BY_SEVERITY[finding.severity]
141
+ if severity_limit is not None and selected_by_severity[finding.severity] >= severity_limit:
142
+ continue
143
  if selected_by_agent.get(finding.agent_source, 0) >= agent_limit:
144
  continue
145
  if len(selected) >= MAX_DISPLAY_FINDINGS:
146
  break
147
  selected.append(finding)
148
  selected_by_agent[finding.agent_source] = selected_by_agent.get(finding.agent_source, 0) + 1
149
+ selected_by_severity[finding.severity] += 1
150
+
151
+ if not any(finding.severity == Severity.low for finding in selected):
152
+ low_findings = [finding for finding in findings if finding.severity == Severity.low]
153
+ low_slots = MAX_DISPLAY_BY_SEVERITY[Severity.low] or 0
154
+ for finding in low_findings[:low_slots]:
155
+ if finding in selected:
156
+ continue
157
+ if len(selected) >= MAX_DISPLAY_FINDINGS:
158
+ replace_index = self._replaceable_display_index(selected)
159
+ if replace_index is None:
160
+ break
161
+ replaced = selected[replace_index]
162
+ selected_by_agent[replaced.agent_source] = max(
163
+ 0,
164
+ selected_by_agent.get(replaced.agent_source, 0) - 1,
165
+ )
166
+ selected_by_severity[replaced.severity] = max(
167
+ 0,
168
+ selected_by_severity[replaced.severity] - 1,
169
+ )
170
+ selected[replace_index] = finding
171
+ else:
172
+ selected.append(finding)
173
+ selected_by_agent[finding.agent_source] = selected_by_agent.get(finding.agent_source, 0) + 1
174
+ selected_by_severity[finding.severity] += 1
175
 
176
+ selected.sort(key=self._sort_key)
177
  hidden_count = max(0, len(findings) - len(selected))
178
  warnings: list[str] = []
179
  if hidden_count:
 
190
 
191
  return selected, hidden_count, warnings
192
 
193
+ def _replaceable_display_index(self, selected: list[Finding]) -> int | None:
194
+ for severity in (Severity.low, Severity.medium):
195
+ for index in range(len(selected) - 1, -1, -1):
196
+ if selected[index].severity == severity:
197
+ return index
198
+ return None
199
+
200
  def _sort_key(self, finding: Finding) -> tuple[int, int, str, int]:
201
  test_file_penalty = 1 if self._is_test_file(finding.file_path) and finding.severity != Severity.critical else 0
202
  return (SEVERITY_ORDER[finding.severity], test_file_penalty, finding.file_path, finding.line_start)
 
204
  def _is_test_file(self, file_path: str) -> bool:
205
  normalized = file_path.lower().replace("\\", "/")
206
  return "/test" in normalized or normalized.startswith("test") or "_test." in normalized
207
+
208
+ def _category_for(self, finding: Finding) -> str:
209
+ if finding.category:
210
+ return finding.category
211
+ return AGENT_CATEGORY_DEFAULTS.get(finding.agent_source, finding.agent_source.replace(" Agent", "").lower())
212
+
213
+ def _category_summary(self, findings: list[Finding]) -> dict[str, int]:
214
+ summary: dict[str, int] = {}
215
+ for finding in findings:
216
+ category = self._category_for(finding)
217
+ summary[category] = summary.get(category, 0) + 1
218
+ return dict(sorted(summary.items(), key=lambda item: (-item[1], item[0])))
219
+
220
+ def _compute_scores(self, findings: list[Finding]) -> tuple[int, int]:
221
+ security_penalties: dict[str, int] = {}
222
+ production_penalties: dict[str, int] = {}
223
+
224
+ for finding in findings:
225
+ category = self._category_for(finding)
226
+ if category in SECURITY_CATEGORIES or finding.agent_source in {
227
+ "Security Agent",
228
+ "Config Agent",
229
+ "Dependency Agent",
230
+ "CUDA-to-ROCm Agent",
231
+ }:
232
+ security_penalties[category] = security_penalties.get(category, 0) + SECURITY_WEIGHTS[finding.severity]
233
+ if category in PRODUCTION_CATEGORIES or finding.agent_source in {
234
+ "Performance Agent",
235
+ "Quality Agent",
236
+ "Docs Agent",
237
+ "Error Handling Agent",
238
+ "Observability Agent",
239
+ }:
240
+ production_penalties[category] = (
241
+ production_penalties.get(category, 0) + PRODUCTION_WEIGHTS[finding.severity]
242
+ )
243
+
244
+ security_penalty = sum(min(value, MAX_SECURITY_CATEGORY_PENALTY) for value in security_penalties.values())
245
+ production_penalty = sum(min(value, MAX_PRODUCTION_CATEGORY_PENALTY) for value in production_penalties.values())
246
+ return self._score_from_penalty(security_penalty), self._score_from_penalty(production_penalty)
247
+
248
+ def _score_from_penalty(self, penalty: int) -> int:
249
+ if penalty <= 0:
250
+ return 100
251
+ return max(1, round(10000 / (100 + penalty)))
252
+
253
+ def _build_roadmap(self, findings: list[Finding]) -> dict[str, list[dict[str, str]]]:
254
+ critical = [finding for finding in findings if finding.severity == Severity.critical]
255
+ high = [finding for finding in findings if finding.severity == Severity.high]
256
+ medium = [finding for finding in findings if finding.severity == Severity.medium]
257
+ low = [finding for finding in findings if finding.severity == Severity.low]
258
+
259
+ this_week = critical + high[:5]
260
+ next_sprint = high[5:] + medium[:10]
261
+ backlog = medium[10:] + low
262
+
263
+ return {
264
+ "this_week": [self._roadmap_item(finding) for finding in this_week],
265
+ "next_sprint": [self._roadmap_item(finding) for finding in next_sprint],
266
+ "backlog": [self._roadmap_item(finding) for finding in backlog],
267
+ }
268
+
269
+ def _roadmap_item(self, finding: Finding) -> dict[str, str]:
270
+ return {
271
+ "title": finding.title,
272
+ "severity": finding.severity.value,
273
+ "category": self._category_for(finding),
274
+ "file_path": finding.file_path,
275
+ "line_start": str(finding.line_start),
276
+ "agent_source": finding.agent_source,
277
+ }
app/config.py CHANGED
@@ -6,9 +6,14 @@ from pydantic_settings import BaseSettings, SettingsConfigDict
6
  class Settings(BaseSettings):
7
  app_name: str = "SwarmAudit"
8
  llm_provider: str = "mock"
9
- llm_base_url: str = "http://localhost:8000/v1"
10
  llm_api_key: str = "not-needed-for-mock"
11
  llm_model: str = "Qwen/Qwen2.5-Coder-32B-Instruct"
 
 
 
 
 
12
  max_files: int = 200
13
  max_file_size_kb: int = 250
14
  max_chars_per_chunk: int = 12000
 
6
  class Settings(BaseSettings):
7
  app_name: str = "SwarmAudit"
8
  llm_provider: str = "mock"
9
+ llm_base_url: str = "http://localhost:9000/v1"
10
  llm_api_key: str = "not-needed-for-mock"
11
  llm_model: str = "Qwen/Qwen2.5-Coder-32B-Instruct"
12
+ enable_llm_enrichment: bool = False
13
+ enable_dependency_cve_lookup: bool = False
14
+ dependency_osv_timeout_seconds: int = 20
15
+ max_llm_chunks: int = 5
16
+ llm_timeout_seconds: int = 120
17
  max_files: int = 200
18
  max_file_size_kb: int = 250
19
  max_chars_per_chunk: int = 12000
app/main.py CHANGED
@@ -2,7 +2,8 @@ from fastapi import FastAPI, HTTPException
2
 
3
  from app.agents.graph import AuditGraph
4
  from app.config import get_settings
5
- from app.schemas import AuditReport, AuditRequest
 
6
 
7
  app = FastAPI(title="SwarmAudit", version="0.1.0")
8
 
@@ -12,6 +13,11 @@ async def health() -> dict[str, str]:
12
  return {"status": "ok", "app": get_settings().app_name}
13
 
14
 
 
 
 
 
 
15
  @app.post("/audit", response_model=AuditReport)
16
  async def audit(request: AuditRequest) -> AuditReport:
17
  try:
 
2
 
3
  from app.agents.graph import AuditGraph
4
  from app.config import get_settings
5
+ from app.schemas import AuditReport, AuditRequest, LLMHealth
6
+ from app.services.llm_client import LLMClient
7
 
8
  app = FastAPI(title="SwarmAudit", version="0.1.0")
9
 
 
13
  return {"status": "ok", "app": get_settings().app_name}
14
 
15
 
16
+ @app.get("/llm/health", response_model=LLMHealth)
17
+ async def llm_health() -> LLMHealth:
18
+ return await LLMClient(get_settings()).health_check()
19
+
20
+
21
  @app.post("/audit", response_model=AuditReport)
22
  async def audit(request: AuditRequest) -> AuditReport:
23
  try:
app/schemas.py CHANGED
@@ -41,6 +41,8 @@ class Finding(BaseModel):
41
  why_it_matters: str
42
  suggested_fix: str
43
  agent_source: str
 
 
44
 
45
 
46
  class AgentOutput(BaseModel):
@@ -67,6 +69,11 @@ class AuditReport(BaseModel):
67
  displayed_findings_count: int = 0
68
  hidden_findings_count: int = 0
69
  agent_finding_counts: dict[str, int] = Field(default_factory=dict)
 
 
 
 
 
70
  generated_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
71
  agents_run: list[str]
72
  warnings: list[str] = Field(default_factory=list)
@@ -75,3 +82,28 @@ class AuditReport(BaseModel):
75
  class AuditProgress(BaseModel):
76
  message: str
77
  stage: str
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  why_it_matters: str
42
  suggested_fix: str
43
  agent_source: str
44
+ category: str | None = None
45
+ confidence: float | None = Field(default=None, ge=0, le=1)
46
 
47
 
48
  class AgentOutput(BaseModel):
 
69
  displayed_findings_count: int = 0
70
  hidden_findings_count: int = 0
71
  agent_finding_counts: dict[str, int] = Field(default_factory=dict)
72
+ category_summary: dict[str, int] = Field(default_factory=dict)
73
+ security_score: int | None = Field(default=None, ge=0, le=100)
74
+ production_score: int | None = Field(default=None, ge=0, le=100)
75
+ remediation_roadmap: dict[str, Any] = Field(default_factory=dict)
76
+ dependency_cves: list[dict[str, Any]] = Field(default_factory=list)
77
  generated_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
78
  agents_run: list[str]
79
  warnings: list[str] = Field(default_factory=list)
 
82
  class AuditProgress(BaseModel):
83
  message: str
84
  stage: str
85
+
86
+
87
+ class LLMHealth(BaseModel):
88
+ provider: str
89
+ model: str
90
+ base_url: str
91
+ ok: bool
92
+ latency_ms: float | None = None
93
+ models: list[str] = Field(default_factory=list)
94
+ completion_preview: str | None = None
95
+ error: str | None = None
96
+
97
+
98
+ class BenchmarkResult(BaseModel):
99
+ provider: str
100
+ model: str
101
+ backend: str
102
+ hardware: str
103
+ ok: bool
104
+ latency_ms: float | None = None
105
+ prompt_chars: int
106
+ completion_chars: int = 0
107
+ chars_per_second: float | None = None
108
+ completion_preview: str | None = None
109
+ error: str | None = None
app/services/benchmark.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+
3
+ from app.config import Settings
4
+ from app.schemas import BenchmarkResult
5
+ from app.services.llm_client import LLMClient
6
+
7
+
8
+ BENCHMARK_PROMPT = (
9
+ "Review this Python snippet for one security issue and answer in one concise sentence:\n"
10
+ "user_code = input('code: ')\n"
11
+ "eval(user_code)\n"
12
+ )
13
+
14
+
15
+ class BenchmarkService:
16
+ def __init__(self, settings: Settings):
17
+ self.settings = settings
18
+ self.llm_client = LLMClient(settings)
19
+
20
+ async def run_llm_benchmark(self) -> BenchmarkResult:
21
+ start = time.perf_counter()
22
+ try:
23
+ completion = await self.llm_client.test_completion()
24
+ latency_ms = round((time.perf_counter() - start) * 1000, 2)
25
+ completion_chars = len(completion)
26
+ chars_per_second = self._chars_per_second(completion_chars, latency_ms)
27
+ return BenchmarkResult(
28
+ provider=self.settings.llm_provider,
29
+ model=self.settings.llm_model,
30
+ backend=self._backend_name(),
31
+ hardware=self._hardware_label(),
32
+ ok=True,
33
+ latency_ms=latency_ms,
34
+ prompt_chars=len(BENCHMARK_PROMPT),
35
+ completion_chars=completion_chars,
36
+ chars_per_second=chars_per_second,
37
+ completion_preview=completion,
38
+ )
39
+ except Exception as exc:
40
+ latency_ms = round((time.perf_counter() - start) * 1000, 2)
41
+ return BenchmarkResult(
42
+ provider=self.settings.llm_provider,
43
+ model=self.settings.llm_model,
44
+ backend=self._backend_name(),
45
+ hardware=self._hardware_label(),
46
+ ok=False,
47
+ latency_ms=latency_ms,
48
+ prompt_chars=len(BENCHMARK_PROMPT),
49
+ error=str(exc),
50
+ )
51
+
52
+ def _backend_name(self) -> str:
53
+ if self.settings.llm_provider == "mock":
54
+ return "Mock local backend"
55
+ if self.settings.llm_provider == "vllm":
56
+ return "vLLM OpenAI-compatible endpoint"
57
+ return self.settings.llm_provider
58
+
59
+ def _hardware_label(self) -> str:
60
+ if self.settings.llm_provider == "vllm":
61
+ return "AMD MI300X target"
62
+ return "Local/mock"
63
+
64
+ def _chars_per_second(self, completion_chars: int, latency_ms: float) -> float | None:
65
+ if latency_ms <= 0:
66
+ return None
67
+ return round(completion_chars / (latency_ms / 1000), 2)
app/services/json_parser.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import re
3
+ from typing import Any
4
+
5
+ from pydantic import ValidationError
6
+
7
+ from app.schemas import AgentOutput
8
+
9
+
10
+ FENCED_JSON_PATTERN = re.compile(r"```(?:json)?\s*(.*?)```", re.DOTALL | re.IGNORECASE)
11
+
12
+
13
+ def parse_json_object(raw: str | dict[str, Any]) -> dict[str, Any]:
14
+ if isinstance(raw, dict):
15
+ return raw
16
+
17
+ text = raw.strip()
18
+ fence_match = FENCED_JSON_PATTERN.search(text)
19
+ if fence_match:
20
+ text = fence_match.group(1).strip()
21
+
22
+ try:
23
+ return json.loads(text)
24
+ except json.JSONDecodeError:
25
+ start = text.find("{")
26
+ end = text.rfind("}")
27
+ if start == -1 or end == -1 or end <= start:
28
+ raise
29
+ return json.loads(text[start : end + 1])
30
+
31
+
32
+ def parse_agent_output(raw: str | dict[str, Any], agent_name: str) -> AgentOutput:
33
+ try:
34
+ data = parse_json_object(raw)
35
+ data.setdefault("agent_name", agent_name)
36
+ return AgentOutput.model_validate(data)
37
+ except (json.JSONDecodeError, ValidationError, TypeError, ValueError):
38
+ return AgentOutput(
39
+ agent_name=agent_name,
40
+ findings=[],
41
+ metadata={"parse_error": True},
42
+ )
app/services/llm_client.py CHANGED
@@ -1,9 +1,11 @@
1
  import json
 
2
  from typing import Any
3
 
4
  import httpx
5
 
6
  from app.config import Settings
 
7
 
8
 
9
  class LLMClient:
@@ -29,13 +31,97 @@ class LLMClient:
29
  "temperature": 0.1,
30
  "response_format": {"type": "json_object"},
31
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  headers = {"Authorization": f"Bearer {self.settings.llm_api_key}"}
33
- async with httpx.AsyncClient(timeout=120) as client:
 
 
 
 
 
 
 
 
 
 
34
  response = await client.post(
35
- f"{self.settings.llm_base_url.rstrip('/')}/chat/completions",
36
  json=payload,
37
  headers=headers,
38
  )
39
  response.raise_for_status()
40
- content = response.json()["choices"][0]["message"]["content"]
41
- return json.loads(content)
 
1
  import json
2
+ import time
3
  from typing import Any
4
 
5
  import httpx
6
 
7
  from app.config import Settings
8
+ from app.schemas import LLMHealth
9
 
10
 
11
  class LLMClient:
 
31
  "temperature": 0.1,
32
  "response_format": {"type": "json_object"},
33
  }
34
+ response = await self._client_post("/chat/completions", payload)
35
+ content = response.json()["choices"][0]["message"]["content"]
36
+ return json.loads(content)
37
+
38
+ async def health_check(self) -> LLMHealth:
39
+ if self.settings.llm_provider == "mock":
40
+ return LLMHealth(
41
+ provider=self.settings.llm_provider,
42
+ model=self.settings.llm_model,
43
+ base_url=self.settings.llm_base_url,
44
+ ok=True,
45
+ latency_ms=0,
46
+ models=[self.settings.llm_model],
47
+ completion_preview="Mock LLM is active.",
48
+ )
49
+
50
+ if self.settings.llm_provider != "vllm":
51
+ return LLMHealth(
52
+ provider=self.settings.llm_provider,
53
+ model=self.settings.llm_model,
54
+ base_url=self.settings.llm_base_url,
55
+ ok=False,
56
+ error=f"Unsupported LLM_PROVIDER={self.settings.llm_provider}",
57
+ )
58
+
59
+ start = time.perf_counter()
60
+ try:
61
+ models = await self.list_models()
62
+ preview = await self.test_completion()
63
+ latency_ms = round((time.perf_counter() - start) * 1000, 2)
64
+ return LLMHealth(
65
+ provider=self.settings.llm_provider,
66
+ model=self.settings.llm_model,
67
+ base_url=self.settings.llm_base_url,
68
+ ok=True,
69
+ latency_ms=latency_ms,
70
+ models=models,
71
+ completion_preview=preview,
72
+ )
73
+ except Exception as exc:
74
+ latency_ms = round((time.perf_counter() - start) * 1000, 2)
75
+ return LLMHealth(
76
+ provider=self.settings.llm_provider,
77
+ model=self.settings.llm_model,
78
+ base_url=self.settings.llm_base_url,
79
+ ok=False,
80
+ latency_ms=latency_ms,
81
+ error=str(exc),
82
+ )
83
+
84
+ async def list_models(self) -> list[str]:
85
+ if self.settings.llm_provider == "mock":
86
+ return [self.settings.llm_model]
87
+
88
+ response = await self._client_get("/models")
89
+ data = response.json()
90
+ return [model.get("id", "unknown") for model in data.get("data", [])]
91
+
92
+ async def test_completion(self) -> str:
93
+ if self.settings.llm_provider == "mock":
94
+ return "Mock LLM is active."
95
+
96
+ payload = {
97
+ "model": self.settings.llm_model,
98
+ "messages": [
99
+ {"role": "system", "content": "You are a concise diagnostics assistant."},
100
+ {"role": "user", "content": "Reply with exactly: SwarmAudit LLM OK"},
101
+ ],
102
+ "temperature": 0,
103
+ "max_tokens": 16,
104
+ }
105
+ response = await self._client_post("/chat/completions", payload)
106
+ return response.json()["choices"][0]["message"]["content"].strip()
107
+
108
+ async def _client_get(self, path: str) -> httpx.Response:
109
  headers = {"Authorization": f"Bearer {self.settings.llm_api_key}"}
110
+ async with httpx.AsyncClient(timeout=30) as client:
111
+ response = await client.get(
112
+ f"{self.settings.llm_base_url.rstrip('/')}{path}",
113
+ headers=headers,
114
+ )
115
+ response.raise_for_status()
116
+ return response
117
+
118
+ async def _client_post(self, path: str, payload: dict[str, Any]) -> httpx.Response:
119
+ headers = {"Authorization": f"Bearer {self.settings.llm_api_key}"}
120
+ async with httpx.AsyncClient(timeout=self.settings.llm_timeout_seconds) as client:
121
  response = await client.post(
122
+ f"{self.settings.llm_base_url.rstrip('/')}{path}",
123
  json=payload,
124
  headers=headers,
125
  )
126
  response.raise_for_status()
127
+ return response
 
app/services/repo_crawler.py CHANGED
@@ -40,6 +40,13 @@ SUPPORTED_EXTENSIONS = {
40
  }
41
 
42
  README_FILENAMES = {"readme", "readme.md", "readme.rst", "readme.txt"}
 
 
 
 
 
 
 
43
 
44
 
45
  def validate_github_url(repo_url: str) -> str:
@@ -87,7 +94,8 @@ class RepoCrawler:
87
  skipped += 1
88
  continue
89
  readme_language = self._readme_language(rel_path)
90
- if path.suffix.lower() not in SUPPORTED_EXTENSIONS and readme_language is None:
 
91
  skipped += 1
92
  continue
93
  size = path.stat().st_size
@@ -99,7 +107,7 @@ class RepoCrawler:
99
  skipped += 1
100
  continue
101
 
102
- language = readme_language or SUPPORTED_EXTENSIONS[path.suffix.lower()]
103
  files.append(
104
  SourceFile(
105
  path=str(rel_path).replace("\\", "/"),
@@ -125,6 +133,9 @@ class RepoCrawler:
125
  return None
126
  return "Markdown" if rel_path.suffix.lower() == ".md" else "Documentation"
127
 
 
 
 
128
  def cleanup(self, scan_result: RepoScanResult | None) -> None:
129
  if scan_result is None:
130
  return
 
40
  }
41
 
42
  README_FILENAMES = {"readme", "readme.md", "readme.rst", "readme.txt"}
43
+ DEPENDENCY_MANIFESTS = {
44
+ "requirements.txt": "Python Requirements",
45
+ "pyproject.toml": "Python Project",
46
+ "package.json": "Node Package",
47
+ "go.mod": "Go Module",
48
+ "cargo.toml": "Rust Package",
49
+ }
50
 
51
 
52
  def validate_github_url(repo_url: str) -> str:
 
94
  skipped += 1
95
  continue
96
  readme_language = self._readme_language(rel_path)
97
+ manifest_language = self._manifest_language(rel_path)
98
+ if path.suffix.lower() not in SUPPORTED_EXTENSIONS and readme_language is None and manifest_language is None:
99
  skipped += 1
100
  continue
101
  size = path.stat().st_size
 
107
  skipped += 1
108
  continue
109
 
110
+ language = readme_language or manifest_language or SUPPORTED_EXTENSIONS[path.suffix.lower()]
111
  files.append(
112
  SourceFile(
113
  path=str(rel_path).replace("\\", "/"),
 
133
  return None
134
  return "Markdown" if rel_path.suffix.lower() == ".md" else "Documentation"
135
 
136
+ def _manifest_language(self, rel_path: Path) -> str | None:
137
+ return DEPENDENCY_MANIFESTS.get(rel_path.name.lower())
138
+
139
  def cleanup(self, scan_result: RepoScanResult | None) -> None:
140
  if scan_result is None:
141
  return
app/services/report_formatter.py CHANGED
@@ -1,3 +1,8 @@
 
 
 
 
 
1
  from app.schemas import AuditReport, Severity
2
 
3
 
@@ -10,6 +15,11 @@ def format_report_markdown(report: AuditReport) -> str:
10
  f"Files skipped: `{report.skipped_file_count}`",
11
  f"Findings shown: `{report.displayed_findings_count}` of `{report.total_findings_count}`",
12
  "",
 
 
 
 
 
13
  "## Severity Summary",
14
  "",
15
  ]
@@ -22,6 +32,40 @@ def format_report_markdown(report: AuditReport) -> str:
22
  for agent_name, count in report.agent_finding_counts.items():
23
  lines.append(f"- **{agent_name}**: {count}")
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  if report.warnings:
26
  lines.extend(["", "## Warnings", ""])
27
  lines.extend(f"- {warning}" for warning in report.warnings)
@@ -53,3 +97,247 @@ def format_report_markdown(report: AuditReport) -> str:
53
  )
54
 
55
  return "\n".join(lines)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tempfile
2
+ from html import escape
3
+ from pathlib import Path
4
+ from urllib.parse import quote
5
+
6
  from app.schemas import AuditReport, Severity
7
 
8
 
 
15
  f"Files skipped: `{report.skipped_file_count}`",
16
  f"Findings shown: `{report.displayed_findings_count}` of `{report.total_findings_count}`",
17
  "",
18
+ "## Readiness Scores",
19
+ "",
20
+ f"- **Security Score**: `{_score_label(report.security_score)}`",
21
+ f"- **Production Readiness Score**: `{_score_label(report.production_score)}`",
22
+ "",
23
  "## Severity Summary",
24
  "",
25
  ]
 
32
  for agent_name, count in report.agent_finding_counts.items():
33
  lines.append(f"- **{agent_name}**: {count}")
34
 
35
+ if report.category_summary:
36
+ lines.extend(["", "## Category Summary", ""])
37
+ for category, count in report.category_summary.items():
38
+ lines.append(f"- **{_label(category)}**: {count}")
39
+
40
+ if report.remediation_roadmap:
41
+ lines.extend(["", "## Remediation Roadmap", ""])
42
+ for key, label in [
43
+ ("this_week", "This Week"),
44
+ ("next_sprint", "Next Sprint"),
45
+ ("backlog", "Backlog"),
46
+ ]:
47
+ items = report.remediation_roadmap.get(key, [])
48
+ lines.extend(["", f"### {label}", ""])
49
+ if not items:
50
+ lines.append("No items in this lane.")
51
+ continue
52
+ for item in items:
53
+ lines.append(
54
+ f"- **[{item.get('severity', 'LOW')}] {item.get('title', 'Finding')}** "
55
+ f"({_label(item.get('category', 'general'))}) - "
56
+ f"`{item.get('file_path', 'unknown')}:{item.get('line_start', '?')}`"
57
+ )
58
+
59
+ if report.dependency_cves:
60
+ lines.extend(["", "## Dependency CVEs", ""])
61
+ for cve in report.dependency_cves:
62
+ fixed_version = cve.get("fixed_version") or "a patched version"
63
+ lines.append(
64
+ f"- **[{cve.get('severity', 'LOW')}] {cve.get('id', 'UNKNOWN')}** "
65
+ f"`{cve.get('package', 'package')}@{cve.get('version', 'unknown')}` "
66
+ f"({cve.get('ecosystem', 'unknown')}) - upgrade to {fixed_version}"
67
+ )
68
+
69
  if report.warnings:
70
  lines.extend(["", "## Warnings", ""])
71
  lines.extend(f"- {warning}" for warning in report.warnings)
 
97
  )
98
 
99
  return "\n".join(lines)
100
+
101
+
102
+ def format_report_html(report: AuditReport) -> str:
103
+ findings = report.findings
104
+ critical = report.severity_summary.get(Severity.critical, 0)
105
+ high = report.severity_summary.get(Severity.high, 0)
106
+ medium = report.severity_summary.get(Severity.medium, 0)
107
+ low = report.severity_summary.get(Severity.low, 0)
108
+
109
+ if not findings:
110
+ return """
111
+ <section class="audit-console">
112
+ <div class="audit-console-header">
113
+ <div class="audit-console-title">Audit report</div>
114
+ <div class="audit-filter-row"><span class="filter-pill">All 0</span></div>
115
+ </div>
116
+ <div class="audit-empty">
117
+ <h3>No findings detected</h3>
118
+ <p>The current agent set did not raise findings for the displayed report.</p>
119
+ </div>
120
+ </section>
121
+ """
122
+
123
+ selected = findings[0]
124
+ list_items = "\n".join(_finding_list_item(finding, index + 1) for index, finding in enumerate(findings[:12]))
125
+
126
+ filter_items = _severity_filter_items(
127
+ {
128
+ Severity.critical: critical,
129
+ Severity.high: high,
130
+ Severity.medium: medium,
131
+ Severity.low: low,
132
+ }
133
+ )
134
+
135
+ return f"""
136
+ <section class="audit-console">
137
+ <div class="audit-console-header">
138
+ <div class="audit-console-title">Audit report</div>
139
+ <div class="audit-filter-row">
140
+ <span class="filter-pill active">All {report.displayed_findings_count}</span>
141
+ {filter_items}
142
+ </div>
143
+ </div>
144
+ <div class="audit-console-body">
145
+ <div class="finding-list">
146
+ {list_items}
147
+ </div>
148
+ <div class="finding-detail">
149
+ {_finding_detail(selected, 1)}
150
+ </div>
151
+ </div>
152
+ </section>
153
+ """
154
+
155
+
156
+ def format_empty_report_html() -> str:
157
+ return """
158
+ <section class="audit-console">
159
+ <div class="audit-console-header">
160
+ <div class="audit-console-title">Audit report</div>
161
+ <div class="audit-filter-row"><span class="filter-pill active">All 0</span></div>
162
+ </div>
163
+ <div class="audit-empty">
164
+ <h3>Run an audit to populate findings</h3>
165
+ <p>The report panel will show ranked findings with file references and suggested fixes.</p>
166
+ </div>
167
+ </section>
168
+ """
169
+
170
+
171
+ def format_report_overview_html(report: AuditReport | None) -> str:
172
+ if report is None:
173
+ return """
174
+ <section class="report-overview">
175
+ <div class="overview-column">
176
+ <span>Security Score</span>
177
+ <strong>-</strong>
178
+ </div>
179
+ <div class="overview-column">
180
+ <span>Production Readiness</span>
181
+ <strong>-</strong>
182
+ </div>
183
+ </section>
184
+ """
185
+
186
+ categories = "".join(
187
+ f"<span>{escape(_label(category))}: {count}</span>"
188
+ for category, count in list(report.category_summary.items())[:6]
189
+ )
190
+ roadmap = report.remediation_roadmap or {}
191
+ return f"""
192
+ <section class="report-overview">
193
+ <div class="overview-column">
194
+ <span>Security Score</span>
195
+ <strong>{_score_label(report.security_score)}</strong>
196
+ </div>
197
+ <div class="overview-column">
198
+ <span>Production Readiness</span>
199
+ <strong>{_score_label(report.production_score)}</strong>
200
+ </div>
201
+ <div class="overview-column overview-wide">
202
+ <span>Category Summary</span>
203
+ <div class="overview-tags">{categories or "<span>No categories raised</span>"}</div>
204
+ </div>
205
+ <div class="overview-column overview-wide">
206
+ <span>Roadmap</span>
207
+ <div class="overview-tags">
208
+ <span>This Week: {len(roadmap.get("this_week", []))}</span>
209
+ <span>Next Sprint: {len(roadmap.get("next_sprint", []))}</span>
210
+ <span>Backlog: {len(roadmap.get("backlog", []))}</span>
211
+ </div>
212
+ </div>
213
+ </section>
214
+ """
215
+
216
+
217
+ def format_finding_detail_html(report: AuditReport | None, index: int = 0) -> str:
218
+ if report is None or not report.findings:
219
+ return format_empty_finding_detail_html()
220
+
221
+ safe_index = min(max(index, 0), len(report.findings) - 1)
222
+ return f"""
223
+ <section class="finding-detail-panel">
224
+ {_finding_detail(report.findings[safe_index], safe_index + 1, report.repo_url)}
225
+ </section>
226
+ """
227
+
228
+
229
+ def format_empty_finding_detail_html() -> str:
230
+ return """
231
+ <section class="finding-detail-panel empty-detail">
232
+ <div class="audit-empty">
233
+ <h3>Select a finding</h3>
234
+ <p>Run an audit, then click any row in the findings list to inspect its explanation and suggested fix.</p>
235
+ </div>
236
+ </section>
237
+ """
238
+
239
+
240
+ def _finding_list_item(finding, index: int) -> str:
241
+ severity = finding.severity.value
242
+ severity_class = severity.lower()
243
+ reference = f"{finding.file_path}:{finding.line_start}"
244
+ return f"""
245
+ <article class="finding-row severity-{severity_class}">
246
+ <div class="finding-row-meta">
247
+ <span class="severity-badge">{escape(severity)}</span>
248
+ <span>F-{index:03d}</span>
249
+ </div>
250
+ <div class="finding-row-title">{escape(finding.title)}</div>
251
+ <div class="finding-row-path">{escape(reference)}</div>
252
+ </article>
253
+ """
254
+
255
+
256
+ def _severity_filter_items(counts: dict[Severity, int]) -> str:
257
+ items: list[str] = []
258
+ for severity, css_class, label in [
259
+ (Severity.critical, "dot-critical", "Critical"),
260
+ (Severity.high, "dot-high", "High"),
261
+ (Severity.medium, "dot-medium", "Medium"),
262
+ (Severity.low, "dot-low", "Low"),
263
+ ]:
264
+ count = counts.get(severity, 0)
265
+ if count <= 0:
266
+ continue
267
+ items.append(f'<span class="filter-dot {css_class}"></span><span>{label} {count}</span>')
268
+ return "\n".join(items)
269
+
270
+
271
+ def _finding_detail(finding, index: int, repo_url: str | None = None) -> str:
272
+ severity = finding.severity.value
273
+ severity_class = severity.lower()
274
+ reference = f"{finding.file_path}:{finding.line_start}-{finding.line_end}"
275
+ category = finding.category or finding.agent_source.replace(" Agent", "").lower()
276
+ file_url = _github_file_url(repo_url, finding.file_path, finding.line_start)
277
+ open_html = (
278
+ f'<a href="{escape(file_url)}" target="_blank" rel="noopener noreferrer">open -></a>'
279
+ if file_url
280
+ else "<span>open -></span>"
281
+ )
282
+ return f"""
283
+ <div class="finding-detail-meta">
284
+ <span>F-{index:03d}</span>
285
+ <span>></span>
286
+ <span>{escape(category.upper())}</span>
287
+ <span>></span>
288
+ <span>{escape(reference)}</span>
289
+ </div>
290
+ <div class="finding-detail-title">
291
+ <span class="severity-badge severity-{severity_class}">{escape(severity)}</span>
292
+ <h3>{escape(finding.title)}</h3>
293
+ </div>
294
+ <div class="detail-section">
295
+ <span>Explanation</span>
296
+ <p>{escape(finding.description)}</p>
297
+ </div>
298
+ <div class="detail-section">
299
+ <span>Why it matters</span>
300
+ <p>{escape(finding.why_it_matters)}</p>
301
+ </div>
302
+ <div class="detail-section">
303
+ <span>Suggested fix</span>
304
+ <pre>{escape(finding.suggested_fix)}</pre>
305
+ </div>
306
+ <div class="reference-card">
307
+ <code>{escape(reference)}</code>
308
+ {open_html}
309
+ </div>
310
+ """
311
+
312
+
313
+ def write_report_exports(report: AuditReport, output_dir: Path | None = None) -> tuple[str, str]:
314
+ export_dir = output_dir or Path(tempfile.mkdtemp(prefix="swarm_audit_export_"))
315
+ export_dir.mkdir(parents=True, exist_ok=True)
316
+
317
+ markdown_path = export_dir / "swarm_audit_report.md"
318
+ json_path = export_dir / "swarm_audit_report.json"
319
+
320
+ markdown_path.write_text(format_report_markdown(report), encoding="utf-8")
321
+ json_path.write_text(report.model_dump_json(indent=2), encoding="utf-8")
322
+
323
+ return str(markdown_path), str(json_path)
324
+
325
+
326
+ def _score_label(score: int | None) -> str:
327
+ if score is None:
328
+ return "-"
329
+ return f"{score}/100"
330
+
331
+
332
+ def _label(value: str | None) -> str:
333
+ if not value:
334
+ return "General"
335
+ return value.replace("_", " ").replace("-", " ").title()
336
+
337
+
338
+ def _github_file_url(repo_url: str | None, file_path: str, line_start: int) -> str | None:
339
+ if not repo_url or "github.com/" not in repo_url:
340
+ return None
341
+ normalized_repo = repo_url.removesuffix(".git").rstrip("/")
342
+ quoted_path = quote(file_path.replace("\\", "/"))
343
+ return f"{normalized_repo}/blob/HEAD/{quoted_path}#L{line_start}"
app/ui/gradio_app.py CHANGED
@@ -1,10 +1,19 @@
1
  import os
 
2
 
3
  import gradio as gr
4
 
5
  from app.agents.graph import AuditGraph
6
- from app.schemas import AuditReport
7
- from app.services.report_formatter import format_report_markdown
 
 
 
 
 
 
 
 
8
 
9
 
10
  EXAMPLE_REPOS = {
@@ -13,69 +22,1452 @@ EXAMPLE_REPOS = {
13
  "Flask": "https://github.com/pallets/flask",
14
  }
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  async def analyze_repo(repo_url: str):
18
  if not repo_url.strip():
19
- yield "Paste a public GitHub repository URL to start.", ""
 
 
 
 
 
 
 
 
 
 
 
 
20
  return
21
 
22
  progress: list[str] = []
23
- report_markdown = ""
 
 
 
 
 
 
 
 
 
24
  try:
25
  async for event in AuditGraph().run_with_progress(repo_url.strip()):
26
  if isinstance(event, AuditReport):
27
- report_markdown = format_report_markdown(event)
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  else:
29
  progress.append(event)
30
- yield "\n".join(progress), report_markdown
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  except Exception as exc:
32
  progress.append(f"Audit failed: {exc}")
33
- yield "\n".join(progress), report_markdown
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
 
36
  def choose_example(example_name: str) -> str:
37
  return EXAMPLE_REPOS.get(example_name, "")
38
 
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  def build_app() -> gr.Blocks:
41
- with gr.Blocks(title="SwarmAudit") as demo:
42
- gr.Markdown(
43
- "# SwarmAudit\n"
44
- "Paste a public GitHub URL and get a structured multi-agent audit report."
 
 
 
 
 
 
 
 
 
45
  )
 
 
46
 
47
- with gr.Row():
48
- repo_url = gr.Textbox(
49
- label="GitHub Repository URL",
50
- placeholder="https://github.com/owner/repo",
51
- scale=4,
52
- )
53
- analyze = gr.Button("Analyze", variant="primary", scale=1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
- example = gr.Dropdown(
56
- label="Example repos",
57
- choices=list(EXAMPLE_REPOS.keys()),
58
- value=None,
59
- interactive=True,
60
- )
61
- example.change(choose_example, inputs=example, outputs=repo_url)
62
 
63
- with gr.Row():
64
- progress_output = gr.Textbox(
65
- label="Agent Progress",
66
- lines=10,
67
- interactive=False,
68
- )
69
- report_output = gr.Markdown(label="Audit Report")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
- analyze.click(analyze_repo, inputs=repo_url, outputs=[progress_output, report_output])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  return demo
73
 
74
 
75
  def launch_app() -> None:
76
  server_name = os.getenv("GRADIO_SERVER_NAME", "0.0.0.0")
77
- server_port = int(os.getenv("PORT", os.getenv("GRADIO_SERVER_PORT", "7860")))
78
- build_app().queue().launch(server_name=server_name, server_port=server_port)
 
 
 
 
 
 
79
 
80
 
81
  if __name__ == "__main__":
 
1
  import os
2
+ import warnings
3
 
4
  import gradio as gr
5
 
6
  from app.agents.graph import AuditGraph
7
+ from app.config import get_settings
8
+ from app.schemas import AuditReport, Severity
9
+ from app.services.llm_client import LLMClient
10
+ from app.services.benchmark import BenchmarkService
11
+ from app.services.report_formatter import (
12
+ format_empty_finding_detail_html,
13
+ format_finding_detail_html,
14
+ format_report_overview_html,
15
+ write_report_exports,
16
+ )
17
 
18
 
19
  EXAMPLE_REPOS = {
 
22
  "Flask": "https://github.com/pallets/flask",
23
  }
24
 
25
+ AGENT_SWARM = [
26
+ ("Crawler", "Fetch repository tree", "Crawler Agent", "mapped"),
27
+ ("Chunker", "Tokenize and segment files", "Chunker", "created"),
28
+ ("Security", "CVE and secret scanning", "Security Agent", "found"),
29
+ ("Performance", "Hot-path and complexity", "Performance Agent", "found"),
30
+ ("Quality", "Lint, types, smells", "Quality Agent", "found"),
31
+ ("Docs", "Coverage and accuracy", "Docs Agent", "found"),
32
+ ("Config", "Production config risk", "Config Agent", "found"),
33
+ ("Dependency", "Manifest and CVE checks", "Dependency Agent", "found"),
34
+ ("Errors", "Resilience paths", "Error Handling Agent", "found"),
35
+ ("Observability", "Logs and health checks", "Observability Agent", "found"),
36
+ ("ROCm", "CUDA portability", "CUDA-to-ROCm Agent", "found"),
37
+ ("Synthesizer", "Merge findings into report", "Synthesizer Agent", "final report"),
38
+ ]
39
+
40
+
41
+ APP_CSS = """
42
+ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap');
43
+
44
+ :root {
45
+ --sa-bg: #080d14;
46
+ --sa-surface: #0d141d;
47
+ --sa-panel: #101923;
48
+ --sa-panel-high: #162233;
49
+ --sa-panel-higher: #1b293a;
50
+ --sa-border: #26364a;
51
+ --sa-border-strong: #33465e;
52
+ --sa-text: #e6f0ff;
53
+ --sa-muted: #8aa0b8;
54
+ --sa-primary: #60a5fa;
55
+ --sa-primary-soft: rgba(96, 165, 250, 0.14);
56
+ --sa-blue: #06b6d4;
57
+ --sa-orange: #f97316;
58
+ --sa-yellow: #eab308;
59
+ --sa-red: #ef4444;
60
+ --sa-green: #22c55e;
61
+ --sa-info: #64748b;
62
+ --sa-card-shadow: 0 18px 60px rgba(0, 0, 0, 0.24);
63
+ }
64
+
65
+ * {
66
+ scrollbar-width: thin;
67
+ scrollbar-color: #475569 #0f172a;
68
+ }
69
+
70
+ *::-webkit-scrollbar {
71
+ width: 8px;
72
+ height: 8px;
73
+ }
74
+
75
+ *::-webkit-scrollbar-track {
76
+ background: #0f172a;
77
+ }
78
+
79
+ *::-webkit-scrollbar-thumb {
80
+ background: #475569;
81
+ border-radius: 999px;
82
+ border: 2px solid #0f172a;
83
+ }
84
+
85
+ *::-webkit-scrollbar-thumb:hover {
86
+ background: #64748b;
87
+ }
88
+
89
+ .gradio-container {
90
+ background:
91
+ radial-gradient(circle at 18% -10%, rgba(96, 165, 250, 0.08), transparent 30%),
92
+ linear-gradient(180deg, #0a1018 0%, var(--sa-bg) 38%, #070b11 100%) !important;
93
+ color: var(--sa-text) !important;
94
+ font-family: Inter, system-ui, sans-serif !important;
95
+ }
96
+
97
+ #swarm-shell {
98
+ max-width: 1440px;
99
+ margin: 0 auto;
100
+ }
101
+
102
+ .swarm-topbar {
103
+ border: 1px solid rgba(96, 165, 250, 0.18);
104
+ background:
105
+ linear-gradient(135deg, rgba(16, 25, 35, 0.94), rgba(13, 20, 29, 0.86)),
106
+ rgba(16, 25, 35, 0.86);
107
+ border-radius: 10px;
108
+ padding: 14px 16px 13px;
109
+ margin-bottom: 12px;
110
+ box-shadow: 0 18px 70px rgba(0, 0, 0, 0.18), inset 0 1px 0 rgba(230, 240, 255, 0.04);
111
+ backdrop-filter: blur(10px);
112
+ }
113
+
114
+ .swarm-brand-row {
115
+ display: flex;
116
+ align-items: center;
117
+ justify-content: space-between;
118
+ gap: 16px;
119
+ margin-bottom: 10px;
120
+ }
121
+
122
+ .swarm-brand {
123
+ font-size: 19px;
124
+ line-height: 24px;
125
+ font-weight: 700;
126
+ letter-spacing: 0;
127
+ }
128
+
129
+ .swarm-tagline {
130
+ color: var(--sa-muted);
131
+ font-size: 12px;
132
+ line-height: 18px;
133
+ }
134
+
135
+ .swarm-status {
136
+ color: var(--sa-muted);
137
+ font: 600 11px/16px JetBrains Mono, monospace;
138
+ text-transform: uppercase;
139
+ }
140
+
141
+ .swarm-progressbar {
142
+ height: 3px;
143
+ border-radius: 999px;
144
+ background: rgba(38, 54, 74, 0.7);
145
+ overflow: hidden;
146
+ }
147
+
148
+ .swarm-progressbar span {
149
+ display: block;
150
+ width: 100%;
151
+ height: 100%;
152
+ background: linear-gradient(90deg, var(--sa-primary), #22c55e);
153
+ box-shadow: 0 0 18px rgba(96, 165, 250, 0.24);
154
+ }
155
+
156
+ .swarm-summary-grid {
157
+ display: grid;
158
+ grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
159
+ gap: 10px;
160
+ margin: 12px 0;
161
+ }
162
+
163
+ .swarm-metric {
164
+ border: 1px solid rgba(38, 54, 74, 0.95);
165
+ background: linear-gradient(180deg, rgba(22, 34, 51, 0.86), rgba(16, 25, 35, 0.9));
166
+ border-radius: 8px;
167
+ padding: 12px;
168
+ box-shadow: inset 0 1px 0 rgba(230, 240, 255, 0.035);
169
+ transition: border-color 160ms ease, transform 160ms ease, background 160ms ease;
170
+ }
171
+
172
+ .swarm-metric:hover {
173
+ border-color: rgba(96, 165, 250, 0.34);
174
+ transform: translateY(-1px);
175
+ }
176
+
177
+ .swarm-metric span {
178
+ display: block;
179
+ color: var(--sa-muted);
180
+ font: 600 11px/16px JetBrains Mono, monospace;
181
+ text-transform: uppercase;
182
+ letter-spacing: 0;
183
+ }
184
+
185
+ .swarm-metric strong {
186
+ display: block;
187
+ color: var(--sa-text);
188
+ font-size: 22px;
189
+ line-height: 28px;
190
+ margin-top: 2px;
191
+ }
192
+
193
+ .metric-critical strong,
194
+ .metric-critical span {
195
+ color: var(--sa-red);
196
+ }
197
+
198
+ .metric-high strong,
199
+ .metric-high span {
200
+ color: var(--sa-orange);
201
+ }
202
+
203
+ .metric-medium strong,
204
+ .metric-medium span {
205
+ color: var(--sa-yellow);
206
+ }
207
+
208
+ .metric-low strong,
209
+ .metric-low span {
210
+ color: var(--sa-blue);
211
+ }
212
+
213
+ .swarm-card,
214
+ .swarm-panel,
215
+ .swarm-export {
216
+ border: 1px solid var(--sa-border) !important;
217
+ background: rgba(16, 25, 35, 0.92) !important;
218
+ border-radius: 8px !important;
219
+ box-shadow: inset 0 1px 0 rgba(230, 240, 255, 0.03);
220
+ }
221
+
222
+ .agent-card {
223
+ border: 1px solid rgba(38, 54, 74, 0.95);
224
+ background: linear-gradient(180deg, rgba(16, 25, 35, 0.95), rgba(13, 20, 29, 0.96));
225
+ border-radius: 9px;
226
+ overflow: hidden;
227
+ margin-bottom: 12px;
228
+ box-shadow: inset 0 1px 0 rgba(230, 240, 255, 0.035);
229
+ }
230
+
231
+ .agent-card-header,
232
+ .audit-console-header {
233
+ min-height: 42px;
234
+ border-bottom: 1px solid var(--sa-border);
235
+ display: flex;
236
+ align-items: center;
237
+ justify-content: space-between;
238
+ padding: 0 14px;
239
+ }
240
+
241
+ .agent-card-title,
242
+ .audit-console-title {
243
+ color: var(--sa-text);
244
+ font-size: 13px;
245
+ line-height: 18px;
246
+ font-weight: 700;
247
+ }
248
+
249
+ .agent-card-count,
250
+ .audit-filter-row {
251
+ color: var(--sa-muted);
252
+ font: 500 11px/16px JetBrains Mono, monospace;
253
+ }
254
+
255
+ .agent-list {
256
+ padding: 12px 14px 14px;
257
+ }
258
+
259
+ .agent-item {
260
+ display: grid;
261
+ grid-template-columns: 28px 1fr auto;
262
+ gap: 10px;
263
+ align-items: center;
264
+ padding: 8px 0;
265
+ border-radius: 7px;
266
+ transition: background 150ms ease, border-color 150ms ease;
267
+ }
268
+
269
+ .agent-icon {
270
+ width: 28px;
271
+ height: 28px;
272
+ border-radius: 6px;
273
+ border: 1px solid var(--sa-border);
274
+ background: rgba(27, 41, 58, 0.88);
275
+ display: flex;
276
+ align-items: center;
277
+ justify-content: center;
278
+ color: var(--sa-muted);
279
+ font: 700 11px/16px JetBrains Mono, monospace;
280
+ }
281
+
282
+ .agent-name {
283
+ color: var(--sa-text);
284
+ font-size: 13px;
285
+ line-height: 18px;
286
+ font-weight: 700;
287
+ }
288
+
289
+ .agent-desc {
290
+ color: var(--sa-muted);
291
+ font-size: 11px;
292
+ line-height: 16px;
293
+ }
294
+
295
+ .agent-status {
296
+ font: 600 11px/16px JetBrains Mono, monospace;
297
+ }
298
+
299
+ .agent-status.done {
300
+ color: var(--sa-green);
301
+ }
302
+
303
+ .agent-status.running {
304
+ color: var(--sa-primary);
305
+ }
306
+
307
+ .agent-status.idle {
308
+ color: var(--sa-muted);
309
+ }
310
+
311
+ .agent-item.running {
312
+ background: rgba(34, 197, 94, 0.08);
313
+ border: 1px solid rgba(34, 197, 94, 0.22);
314
+ border-radius: 7px;
315
+ margin: 2px -6px;
316
+ padding: 8px 6px;
317
+ }
318
+
319
+ .swarm-card textarea,
320
+ .swarm-card input,
321
+ .swarm-card select {
322
+ font-family: JetBrains Mono, monospace !important;
323
+ }
324
+
325
+ .swarm-progress textarea {
326
+ min-height: 285px !important;
327
+ font-family: JetBrains Mono, monospace !important;
328
+ font-size: 12px !important;
329
+ line-height: 20px !important;
330
+ color: #d8e3ef !important;
331
+ background: #0b1118 !important;
332
+ }
333
+
334
+ .swarm-report {
335
+ min-height: 560px;
336
+ }
337
+
338
+ .swarm-report h1,
339
+ .swarm-report h2,
340
+ .swarm-report h3 {
341
+ color: var(--sa-text) !important;
342
+ }
343
+
344
+ .swarm-report code,
345
+ .swarm-report pre {
346
+ font-family: JetBrains Mono, monospace !important;
347
+ }
348
+
349
+ .swarm-export {
350
+ padding: 12px !important;
351
+ }
352
+
353
+ .audit-actionbar {
354
+ border: 1px solid rgba(38, 54, 74, 0.95) !important;
355
+ background: linear-gradient(180deg, rgba(16, 25, 35, 0.92), rgba(13, 20, 29, 0.94)) !important;
356
+ border-radius: 10px !important;
357
+ padding: 7px 8px !important;
358
+ margin-bottom: 12px !important;
359
+ box-shadow: inset 0 1px 0 rgba(230, 240, 255, 0.035);
360
+ }
361
+
362
+ .audit-actionbar .form,
363
+ .audit-actionbar .block {
364
+ min-height: 0 !important;
365
+ }
366
+
367
+ .audit-actionbar .gradio-row,
368
+ .audit-actionbar .row {
369
+ align-items: center !important;
370
+ gap: 8px !important;
371
+ }
372
+
373
+ .repo-input {
374
+ min-width: min(560px, 100%) !important;
375
+ }
376
+
377
+ .audit-actionbar label {
378
+ color: var(--sa-muted) !important;
379
+ font: 600 11px/16px JetBrains Mono, monospace !important;
380
+ text-transform: lowercase !important;
381
+ }
382
+
383
+ .audit-actionbar input {
384
+ background: #111a25 !important;
385
+ border: 1px solid var(--sa-border) !important;
386
+ border-radius: 7px !important;
387
+ color: var(--sa-text) !important;
388
+ font-family: JetBrains Mono, monospace !important;
389
+ min-height: 34px !important;
390
+ height: 34px !important;
391
+ padding: 6px 10px !important;
392
+ transition: border-color 150ms ease, box-shadow 150ms ease, background 150ms ease;
393
+ }
394
+
395
+ .audit-actionbar input:focus {
396
+ border-color: rgba(96, 165, 250, 0.7) !important;
397
+ box-shadow: 0 0 0 3px rgba(96, 165, 250, 0.11) !important;
398
+ background: #132033 !important;
399
+ }
400
+
401
+ .example-label {
402
+ display: flex;
403
+ align-items: center;
404
+ color: var(--sa-muted);
405
+ font: 600 11px/16px JetBrains Mono, monospace;
406
+ letter-spacing: 0.02em;
407
+ text-transform: uppercase;
408
+ height: 34px;
409
+ padding: 0 8px 0 12px;
410
+ }
411
+
412
+ .example-chip button {
413
+ background: rgba(74, 91, 113, 0.82) !important;
414
+ border: 1px solid rgba(100, 116, 139, 0.42) !important;
415
+ border-radius: 12px !important;
416
+ color: var(--sa-text) !important;
417
+ font: 700 13px/18px Inter, system-ui, sans-serif !important;
418
+ min-width: 0 !important;
419
+ height: 40px !important;
420
+ min-height: 40px !important;
421
+ padding: 0 20px !important;
422
+ margin: 0 4px !important;
423
+ transition: border-color 150ms ease, background 150ms ease, color 150ms ease, transform 150ms ease;
424
+ }
425
+
426
+ .example-chip button:hover {
427
+ background: rgba(87, 108, 135, 0.92) !important;
428
+ border-color: rgba(96, 165, 250, 0.36) !important;
429
+ color: var(--sa-text) !important;
430
+ transform: translateY(-1px);
431
+ }
432
+
433
+ button.primary,
434
+ .gradio-button.primary {
435
+ background: linear-gradient(180deg, #7bb8ff, var(--sa-primary)) !important;
436
+ color: #08111d !important;
437
+ border: 1px solid rgba(147, 197, 253, 0.48) !important;
438
+ border-radius: 8px !important;
439
+ font-weight: 700 !important;
440
+ box-shadow: 0 0 0 1px rgba(96, 165, 250, 0.08), 0 10px 26px rgba(96, 165, 250, 0.14);
441
+ min-height: 34px !important;
442
+ height: 34px !important;
443
+ padding: 0 14px !important;
444
+ transition: filter 150ms ease, transform 150ms ease, box-shadow 150ms ease;
445
+ }
446
+
447
+ button.primary:hover,
448
+ .gradio-button.primary:hover {
449
+ filter: brightness(1.04);
450
+ transform: translateY(-1px);
451
+ box-shadow: 0 0 0 1px rgba(96, 165, 250, 0.1), 0 14px 30px rgba(96, 165, 250, 0.18);
452
+ }
453
+
454
+ .tabs {
455
+ border: 1px solid var(--sa-border) !important;
456
+ border-radius: 10px !important;
457
+ background: rgba(8, 13, 20, 0.74) !important;
458
+ padding: 8px !important;
459
+ box-shadow: inset 0 1px 0 rgba(230, 240, 255, 0.03);
460
+ }
461
+
462
+ .tab-nav button {
463
+ border-radius: 7px !important;
464
+ font-weight: 600 !important;
465
+ }
466
+
467
+ .tab-nav button.selected,
468
+ .tab-nav button[aria-selected="true"] {
469
+ color: var(--sa-primary) !important;
470
+ box-shadow: inset 0 -1px 0 var(--sa-primary), 0 10px 24px rgba(96, 165, 250, 0.08);
471
+ }
472
+
473
+ .swarm-note {
474
+ color: var(--sa-muted);
475
+ font-size: 13px;
476
+ line-height: 20px;
477
+ margin: 0 0 10px;
478
+ }
479
+
480
+ .swarm-report a {
481
+ color: var(--sa-primary) !important;
482
+ }
483
+
484
+ .swarm-report blockquote {
485
+ border-left: 3px solid var(--sa-border-strong) !important;
486
+ color: var(--sa-muted) !important;
487
+ }
488
+
489
+ .audit-console {
490
+ border: 1px solid var(--sa-border);
491
+ background: rgba(16, 25, 35, 0.92);
492
+ border-radius: 9px;
493
+ overflow: hidden;
494
+ min-height: 700px;
495
+ }
496
+
497
+ .findings-list-radio,
498
+ .finding-detail-panel {
499
+ border: 1px solid rgba(38, 54, 74, 0.95);
500
+ background: rgba(16, 25, 35, 0.94);
501
+ border-radius: 0;
502
+ overflow: hidden;
503
+ }
504
+
505
+ .findings-list-radio {
506
+ height: 690px;
507
+ max-height: 690px;
508
+ overflow-y: auto !important;
509
+ border-right: 0;
510
+ border-radius: 0 0 0 8px;
511
+ scrollbar-gutter: auto;
512
+ }
513
+
514
+ .report-toolbar {
515
+ min-height: 41px;
516
+ border: 1px solid rgba(38, 54, 74, 0.95);
517
+ border-bottom: 0;
518
+ background: linear-gradient(180deg, rgba(16, 25, 35, 0.98), rgba(13, 20, 29, 0.96));
519
+ border-radius: 9px 9px 0 0;
520
+ display: flex;
521
+ align-items: center;
522
+ justify-content: space-between;
523
+ gap: 12px;
524
+ padding: 0 13px;
525
+ }
526
+
527
+ .report-header-row {
528
+ border: 1px solid rgba(38, 54, 74, 0.95) !important;
529
+ border-bottom: 0 !important;
530
+ background: linear-gradient(180deg, rgba(16, 25, 35, 0.98), rgba(13, 20, 29, 0.96)) !important;
531
+ border-radius: 9px 9px 0 0 !important;
532
+ align-items: center !important;
533
+ gap: 8px !important;
534
+ padding: 7px 10px !important;
535
+ }
536
+
537
+ .report-header-row .report-toolbar {
538
+ border: 0 !important;
539
+ background: transparent !important;
540
+ min-height: 28px !important;
541
+ padding: 0 !important;
542
+ }
543
+
544
+ .severity-filter-radio {
545
+ min-width: 360px !important;
546
+ }
547
+
548
+ .severity-filter-radio .wrap,
549
+ .severity-filter-radio .block,
550
+ .severity-filter-radio fieldset {
551
+ background: transparent !important;
552
+ border: 0 !important;
553
+ padding: 0 !important;
554
+ }
555
+
556
+ .severity-filter-radio label {
557
+ border: 1px solid transparent !important;
558
+ border-radius: 6px !important;
559
+ background: transparent !important;
560
+ padding: 5px 7px !important;
561
+ margin: 0 1px !important;
562
+ color: var(--sa-muted) !important;
563
+ transition: background 140ms ease, border-color 140ms ease, color 140ms ease;
564
+ }
565
+
566
+ .severity-filter-radio label:hover,
567
+ .severity-filter-radio label:has(input:checked) {
568
+ background: rgba(22, 34, 51, 0.92) !important;
569
+ border-color: rgba(96, 165, 250, 0.18) !important;
570
+ color: var(--sa-text) !important;
571
+ }
572
+
573
+ .severity-filter-radio label:has(input[value^="Critical"]) span { color: var(--sa-red) !important; }
574
+ .severity-filter-radio label:has(input[value^="High"]) span { color: var(--sa-orange) !important; }
575
+ .severity-filter-radio label:has(input[value^="Medium"]) span { color: var(--sa-yellow) !important; }
576
+ .severity-filter-radio label:has(input[value^="Low"]) span { color: var(--sa-blue) !important; }
577
+
578
+ .severity-filter-radio span {
579
+ font: 700 10px/14px JetBrains Mono, monospace !important;
580
+ white-space: nowrap !important;
581
+ }
582
+
583
+ .severity-filter-radio input {
584
+ display: none !important;
585
+ }
586
+
587
+ .report-download button {
588
+ height: 30px !important;
589
+ min-height: 30px !important;
590
+ border-radius: 7px !important;
591
+ border: 1px solid var(--sa-border) !important;
592
+ background: rgba(22, 34, 51, 0.82) !important;
593
+ color: var(--sa-text) !important;
594
+ font: 700 11px/16px Inter, system-ui, sans-serif !important;
595
+ padding: 0 10px !important;
596
+ }
597
+
598
+ .report-download button:hover {
599
+ border-color: rgba(96, 165, 250, 0.34) !important;
600
+ background: rgba(27, 41, 58, 0.96) !important;
601
+ }
602
+
603
+ .report-overview {
604
+ border: 1px solid rgba(38, 54, 74, 0.95);
605
+ border-top: 0;
606
+ background: rgba(16, 25, 35, 0.88);
607
+ display: grid;
608
+ grid-template-columns: repeat(2, minmax(120px, 0.7fr)) repeat(2, minmax(170px, 1fr));
609
+ gap: 0;
610
+ }
611
+
612
+ .overview-column {
613
+ border-right: 1px solid var(--sa-border);
614
+ padding: 10px 12px;
615
+ }
616
+
617
+ .overview-column:last-child {
618
+ border-right: 0;
619
+ }
620
+
621
+ .overview-column span {
622
+ color: var(--sa-muted);
623
+ font: 600 10px/15px JetBrains Mono, monospace;
624
+ text-transform: uppercase;
625
+ }
626
+
627
+ .overview-column strong {
628
+ display: block;
629
+ color: var(--sa-text);
630
+ font-size: 18px;
631
+ line-height: 24px;
632
+ margin-top: 2px;
633
+ }
634
+
635
+ .overview-tags {
636
+ display: flex;
637
+ flex-wrap: wrap;
638
+ gap: 6px;
639
+ margin-top: 4px;
640
+ }
641
+
642
+ .overview-tags span {
643
+ border: 1px solid var(--sa-border);
644
+ border-radius: 6px;
645
+ background: rgba(22, 34, 51, 0.82);
646
+ color: #cbd5e1;
647
+ padding: 3px 6px;
648
+ text-transform: none;
649
+ }
650
+
651
+ .report-body {
652
+ border: 1px solid var(--sa-border) !important;
653
+ border-top: 0 !important;
654
+ background: rgba(16, 25, 35, 0.94) !important;
655
+ border-radius: 0 0 9px 9px !important;
656
+ overflow: hidden !important;
657
+ }
658
+
659
+ .report-body > .form {
660
+ gap: 0 !important;
661
+ }
662
+
663
+ .report-title {
664
+ color: var(--sa-text);
665
+ font-size: 13px;
666
+ line-height: 18px;
667
+ font-weight: 700;
668
+ }
669
+
670
+ .report-title span {
671
+ color: var(--sa-muted);
672
+ font: 600 11px/16px JetBrains Mono, monospace;
673
+ margin-right: 6px;
674
+ }
675
+
676
+ .report-subnote {
677
+ color: var(--sa-muted);
678
+ font: 500 10px/14px JetBrains Mono, monospace;
679
+ margin-top: 1px;
680
+ opacity: 0.74;
681
+ }
682
+
683
+ .findings-list-radio .wrap,
684
+ .findings-list-radio .block,
685
+ .findings-list-radio fieldset {
686
+ background: transparent !important;
687
+ border: 0 !important;
688
+ padding: 0 !important;
689
+ }
690
+
691
+ .findings-list-radio label {
692
+ border-bottom: 1px solid rgba(38, 54, 74, 0.72) !important;
693
+ background: rgba(16, 25, 35, 0.5) !important;
694
+ padding: 12px 13px !important;
695
+ margin: 0 !important;
696
+ align-items: flex-start !important;
697
+ cursor: pointer !important;
698
+ transition: background 140ms ease, box-shadow 140ms ease, border-color 140ms ease;
699
+ }
700
+
701
+ .findings-list-radio label:hover {
702
+ background: rgba(22, 34, 51, 0.86) !important;
703
+ }
704
+
705
+ .findings-list-radio input:checked + span,
706
+ .findings-list-radio label:has(input:checked) {
707
+ background: linear-gradient(90deg, rgba(96, 165, 250, 0.14), rgba(22, 34, 51, 0.86)) !important;
708
+ box-shadow: inset 2px 0 0 var(--sa-primary);
709
+ }
710
+
711
+ .findings-list-radio span {
712
+ color: #dce4ee !important;
713
+ font: 600 12px/18px Inter, system-ui, sans-serif !important;
714
+ white-space: pre-wrap !important;
715
+ }
716
+
717
+ .findings-list-radio label:has(input[value^="CRIT"]) span { color: var(--sa-red) !important; }
718
+ .findings-list-radio label:has(input[value^="HIGH"]) span { color: var(--sa-orange) !important; }
719
+ .findings-list-radio label:has(input[value^="MED"]) span { color: var(--sa-yellow) !important; }
720
+ .findings-list-radio label:has(input[value^="LOW"]) span { color: var(--sa-blue) !important; }
721
+
722
+ .findings-list-radio label:has(input[value^="LOW"]) {
723
+ background: rgba(6, 182, 212, 0.055) !important;
724
+ }
725
+
726
+ .findings-list-radio input {
727
+ margin-top: 4px !important;
728
+ accent-color: var(--sa-primary) !important;
729
+ }
730
+
731
+ .finding-detail-panel {
732
+ height: 690px;
733
+ max-height: 690px;
734
+ overflow-y: auto;
735
+ border-radius: 0 0 8px 0;
736
+ scrollbar-gutter: auto;
737
+ }
738
+
739
+ .swarm-report .finding-detail-panel {
740
+ border: 0;
741
+ background: transparent;
742
+ border-radius: 0;
743
+ }
744
+
745
+ .audit-filter-row {
746
+ display: flex;
747
+ align-items: center;
748
+ gap: 10px;
749
+ white-space: nowrap;
750
+ }
751
+
752
+ .filter-pill {
753
+ background: rgba(32, 42, 54, 0.9);
754
+ border-radius: 6px;
755
+ padding: 5px 10px;
756
+ color: var(--sa-muted);
757
+ }
758
+
759
+ .filter-pill.active {
760
+ color: var(--sa-text);
761
+ }
762
+
763
+ .filter-dot {
764
+ width: 6px;
765
+ height: 6px;
766
+ border-radius: 999px;
767
+ display: inline-block;
768
+ }
769
+
770
+ .dot-critical { background: var(--sa-red); }
771
+ .dot-high { background: var(--sa-orange); }
772
+ .dot-medium { background: var(--sa-yellow); }
773
+ .dot-low { background: var(--sa-blue); }
774
+
775
+ .audit-console-body {
776
+ display: grid;
777
+ grid-template-columns: minmax(280px, 42%) 1fr;
778
+ min-height: 657px;
779
+ }
780
+
781
+ .finding-list {
782
+ border-right: 1px solid var(--sa-border);
783
+ background: #121922;
784
+ }
785
+
786
+ .finding-row {
787
+ padding: 14px 16px;
788
+ border-bottom: 1px solid var(--sa-border);
789
+ background: #121922;
790
+ }
791
+
792
+ .finding-row:first-child {
793
+ background: #1b232d;
794
+ }
795
+
796
+ .finding-row-meta {
797
+ display: flex;
798
+ align-items: center;
799
+ gap: 8px;
800
+ color: var(--sa-muted);
801
+ font: 500 11px/16px JetBrains Mono, monospace;
802
+ margin-bottom: 7px;
803
+ }
804
+
805
+ .severity-badge {
806
+ border: 1px solid currentColor;
807
+ border-radius: 5px;
808
+ padding: 2px 7px;
809
+ font: 700 10px/14px JetBrains Mono, monospace;
810
+ color: var(--sa-muted);
811
+ letter-spacing: 0.01em;
812
+ }
813
+
814
+ .severity-critical .severity-badge,
815
+ .severity-badge.severity-critical {
816
+ color: #fecaca;
817
+ background: rgba(239, 68, 68, 0.13);
818
+ border-color: rgba(239, 68, 68, 0.55);
819
+ }
820
+ .severity-high .severity-badge,
821
+ .severity-badge.severity-high {
822
+ color: #fed7aa;
823
+ background: rgba(249, 115, 22, 0.13);
824
+ border-color: rgba(249, 115, 22, 0.55);
825
+ }
826
+ .severity-medium .severity-badge,
827
+ .severity-badge.severity-medium {
828
+ color: #fde68a;
829
+ background: rgba(234, 179, 8, 0.13);
830
+ border-color: rgba(234, 179, 8, 0.55);
831
+ }
832
+ .severity-low .severity-badge,
833
+ .severity-badge.severity-low {
834
+ color: #a5f3fc;
835
+ background: rgba(6, 182, 212, 0.13);
836
+ border-color: rgba(6, 182, 212, 0.55);
837
+ }
838
+ .severity-info .severity-badge,
839
+ .severity-badge.severity-info {
840
+ color: #cbd5e1;
841
+ background: rgba(100, 116, 139, 0.16);
842
+ border-color: rgba(100, 116, 139, 0.55);
843
+ }
844
+
845
+ .finding-row-title {
846
+ color: var(--sa-text);
847
+ font-size: 13px;
848
+ line-height: 19px;
849
+ font-weight: 700;
850
+ }
851
+
852
+ .finding-row-path {
853
+ color: var(--sa-muted);
854
+ font: 500 11px/16px JetBrains Mono, monospace;
855
+ margin-top: 3px;
856
+ }
857
+
858
+ .finding-detail {
859
+ padding: 22px 22px 26px;
860
+ background: transparent;
861
+ }
862
+
863
+ .finding-detail-meta {
864
+ display: flex;
865
+ gap: 8px;
866
+ color: var(--sa-muted);
867
+ font: 500 11px/16px JetBrains Mono, monospace;
868
+ margin-bottom: 12px;
869
+ }
870
+
871
+ .finding-detail-title {
872
+ display: flex;
873
+ align-items: center;
874
+ gap: 10px;
875
+ margin-bottom: 22px;
876
+ }
877
+
878
+ .finding-detail-title h3 {
879
+ margin: 0;
880
+ color: var(--sa-text);
881
+ font-size: 18px;
882
+ line-height: 26px;
883
+ }
884
+
885
+ .detail-section {
886
+ margin-bottom: 20px;
887
+ }
888
+
889
+ .detail-section span {
890
+ display: block;
891
+ color: var(--sa-muted);
892
+ font: 600 11px/16px JetBrains Mono, monospace;
893
+ text-transform: uppercase;
894
+ margin-bottom: 8px;
895
+ }
896
+
897
+ .detail-section p {
898
+ color: #dbeafe;
899
+ font-size: 13px;
900
+ line-height: 21px;
901
+ margin: 0;
902
+ }
903
+
904
+ .detail-section pre,
905
+ .reference-card {
906
+ border: 0;
907
+ background: rgba(22, 34, 51, 0.48);
908
+ border-radius: 7px;
909
+ }
910
+
911
+ .detail-section pre {
912
+ color: #f1f5f9;
913
+ white-space: pre-wrap;
914
+ font: 500 12px/20px JetBrains Mono, monospace;
915
+ padding: 14px;
916
+ box-shadow: inset 0 1px 0 rgba(230, 240, 255, 0.03);
917
+ }
918
+
919
+ .reference-card {
920
+ display: flex;
921
+ align-items: center;
922
+ justify-content: space-between;
923
+ padding: 12px 14px;
924
+ color: var(--sa-muted);
925
+ transition: border-color 150ms ease, background 150ms ease;
926
+ }
927
+
928
+ .reference-card:hover {
929
+ background: rgba(27, 41, 58, 0.9);
930
+ border-color: rgba(96, 165, 250, 0.34);
931
+ }
932
+
933
+ .reference-card code {
934
+ color: #dce4ee;
935
+ font: 600 12px/18px JetBrains Mono, monospace;
936
+ }
937
+
938
+ .reference-card a {
939
+ color: var(--sa-text) !important;
940
+ text-decoration: none !important;
941
+ font: 700 12px/18px Inter, system-ui, sans-serif;
942
+ }
943
+
944
+ .audit-empty {
945
+ padding: 72px 24px;
946
+ text-align: center;
947
+ color: var(--sa-muted);
948
+ }
949
+
950
+ .audit-empty h3 {
951
+ color: var(--sa-text);
952
+ margin: 0 0 8px;
953
+ }
954
+
955
+ @media (max-width: 900px) {
956
+ .swarm-summary-grid {
957
+ grid-template-columns: repeat(2, minmax(0, 1fr));
958
+ }
959
+ .audit-console-body {
960
+ grid-template-columns: 1fr;
961
+ }
962
+ .finding-list {
963
+ border-right: 0;
964
+ }
965
+ .report-overview {
966
+ grid-template-columns: 1fr 1fr;
967
+ }
968
+ }
969
+ """
970
+
971
+
972
+ def render_workspace_header() -> str:
973
+ return """
974
+ <section class="swarm-topbar">
975
+ <div class="swarm-brand-row">
976
+ <div>
977
+ <div class="swarm-brand">SwarmAudit</div>
978
+ <div class="swarm-tagline">AI-generated code production-readiness scanner</div>
979
+ </div>
980
+ <div class="swarm-status">mock-first / vLLM-ready</div>
981
+ </div>
982
+ <div class="swarm-progressbar"><span></span></div>
983
+ </section>
984
+ """
985
+
986
+
987
+ def render_agent_swarm(progress: list[str] | None = None) -> str:
988
+ progress = progress or []
989
+ done_count = sum(1 for _, _, token, done_token in AGENT_SWARM if _agent_status(progress, token, done_token) == "done")
990
+ items = "\n".join(
991
+ f"""
992
+ <div class="agent-item {status}">
993
+ <div class="agent-icon">{name[:2].upper()}</div>
994
+ <div>
995
+ <div class="agent-name">{name}</div>
996
+ <div class="agent-desc">{desc}</div>
997
+ </div>
998
+ <div class="agent-status {status}">{status}</div>
999
+ </div>
1000
+ """
1001
+ for name, desc, token, done_token in AGENT_SWARM
1002
+ for status in [_agent_status(progress, token, done_token)]
1003
+ )
1004
+ return f"""
1005
+ <section class="agent-card">
1006
+ <div class="agent-card-header">
1007
+ <div class="agent-card-title">Agent swarm</div>
1008
+ <div class="agent-card-count">{done_count}/{len(AGENT_SWARM)} done</div>
1009
+ </div>
1010
+ <div class="agent-list">{items}</div>
1011
+ </section>
1012
+ """
1013
+
1014
+
1015
+ def _agent_status(progress: list[str], token: str, done_token: str) -> str:
1016
+ matching_events = [event for event in progress if token in event]
1017
+ if any(done_token in event for event in matching_events):
1018
+ return "done"
1019
+ if matching_events:
1020
+ return "running"
1021
+ return "idle"
1022
+
1023
+
1024
+ def render_empty_summary() -> str:
1025
+ return render_summary_cards(
1026
+ files_scanned="-",
1027
+ total_findings="-",
1028
+ severity_counts={},
1029
+ )
1030
+
1031
+
1032
+ def render_report_summary(report: AuditReport) -> str:
1033
+ return render_summary_cards(
1034
+ files_scanned=str(report.scanned_file_count),
1035
+ total_findings=str(report.total_findings_count),
1036
+ severity_counts={
1037
+ Severity.critical: report.severity_summary.get(Severity.critical, 0),
1038
+ Severity.high: report.severity_summary.get(Severity.high, 0),
1039
+ Severity.medium: report.severity_summary.get(Severity.medium, 0),
1040
+ Severity.low: report.severity_summary.get(Severity.low, 0),
1041
+ },
1042
+ )
1043
+
1044
+
1045
+ def render_report_toolbar(report: AuditReport | None) -> str:
1046
+ return f"""
1047
+ <section class="report-toolbar">
1048
+ <div>
1049
+ <div class="report-title"><span>DOC</span>Audit report</div>
1050
+ <div class="report-subnote">Visible rows prioritize important findings; downloads keep full report data.</div>
1051
+ </div>
1052
+ </section>
1053
+ """
1054
+
1055
+
1056
+ def build_severity_filter_choices(report: AuditReport | None) -> list[str]:
1057
+ if report is None:
1058
+ return ["All 0"]
1059
+
1060
+ displayed_counts = {severity: 0 for severity in Severity}
1061
+ for finding in report.findings:
1062
+ displayed_counts[finding.severity] += 1
1063
+
1064
+ choices = [f"All {len(report.findings)}"]
1065
+ for severity, label in [
1066
+ (Severity.critical, "Critical"),
1067
+ (Severity.high, "High"),
1068
+ (Severity.medium, "Medium"),
1069
+ (Severity.low, "Low"),
1070
+ ]:
1071
+ count = displayed_counts.get(severity, 0)
1072
+ if count > 0:
1073
+ choices.append(f"{label} {count}")
1074
+ return choices
1075
+
1076
+
1077
+ def render_summary_cards(
1078
+ files_scanned: str,
1079
+ total_findings: str,
1080
+ severity_counts: dict[Severity, int],
1081
+ ) -> str:
1082
+ severity_cards = []
1083
+ for severity, css_class in [
1084
+ (Severity.critical, "metric-critical"),
1085
+ (Severity.high, "metric-high"),
1086
+ (Severity.medium, "metric-medium"),
1087
+ (Severity.low, "metric-low"),
1088
+ ]:
1089
+ count = severity_counts.get(severity, 0)
1090
+ if count <= 0:
1091
+ continue
1092
+ severity_cards.append(
1093
+ f'<div class="swarm-metric {css_class}"><span>{severity.value.title()}</span><strong>{count}</strong></div>'
1094
+ )
1095
+
1096
+ severity_html = "\n".join(severity_cards)
1097
+ return f"""
1098
+ <section class="swarm-summary-grid">
1099
+ <div class="swarm-metric"><span>Files scanned</span><strong>{files_scanned}</strong></div>
1100
+ <div class="swarm-metric"><span>Findings</span><strong>{total_findings}</strong></div>
1101
+ {severity_html}
1102
+ </section>
1103
+ """
1104
+
1105
 
1106
  async def analyze_repo(repo_url: str):
1107
  if not repo_url.strip():
1108
+ yield (
1109
+ "Paste a public GitHub repository URL to start.",
1110
+ render_agent_swarm(),
1111
+ render_empty_summary(),
1112
+ render_report_toolbar(None),
1113
+ gr.update(choices=["All 0"], value="All 0"),
1114
+ format_report_overview_html(None),
1115
+ gr.update(choices=[], value=None),
1116
+ format_empty_finding_detail_html(),
1117
+ None,
1118
+ None,
1119
+ None,
1120
+ )
1121
  return
1122
 
1123
  progress: list[str] = []
1124
+ agent_html = render_agent_swarm(progress)
1125
+ summary_html = render_empty_summary()
1126
+ report_toolbar_html = render_report_toolbar(None)
1127
+ severity_filter_update = gr.update(choices=["All 0"], value="All 0")
1128
+ report_overview_html = format_report_overview_html(None)
1129
+ finding_choice_update = gr.update(choices=[], value=None)
1130
+ finding_detail_html = format_empty_finding_detail_html()
1131
+ markdown_export = None
1132
+ json_export = None
1133
+ report_state = None
1134
  try:
1135
  async for event in AuditGraph().run_with_progress(repo_url.strip()):
1136
  if isinstance(event, AuditReport):
1137
+ report_state = event
1138
+ filter_choices = build_severity_filter_choices(event)
1139
+ selected_filter = filter_choices[0]
1140
+ severity_filter_update = gr.update(choices=filter_choices, value=selected_filter)
1141
+ finding_choices = build_finding_choices(event, selected_filter)
1142
+ finding_choice_update = gr.update(
1143
+ choices=finding_choices,
1144
+ value=finding_choices[0] if finding_choices else None,
1145
+ )
1146
+ finding_detail_html = format_finding_detail_html(event, 0)
1147
+ summary_html = render_report_summary(event)
1148
+ report_toolbar_html = render_report_toolbar(event)
1149
+ report_overview_html = format_report_overview_html(event)
1150
+ markdown_export, json_export = write_report_exports(event)
1151
  else:
1152
  progress.append(event)
1153
+ agent_html = render_agent_swarm(progress)
1154
+ yield (
1155
+ "\n".join(progress),
1156
+ agent_html,
1157
+ summary_html,
1158
+ report_toolbar_html,
1159
+ severity_filter_update,
1160
+ report_overview_html,
1161
+ finding_choice_update,
1162
+ finding_detail_html,
1163
+ markdown_export,
1164
+ json_export,
1165
+ report_state,
1166
+ )
1167
  except Exception as exc:
1168
  progress.append(f"Audit failed: {exc}")
1169
+ yield (
1170
+ "\n".join(progress),
1171
+ render_agent_swarm(progress),
1172
+ render_empty_summary(),
1173
+ render_report_toolbar(None),
1174
+ gr.update(choices=["All 0"], value="All 0"),
1175
+ format_report_overview_html(None),
1176
+ gr.update(choices=[], value=None),
1177
+ format_empty_finding_detail_html(),
1178
+ None,
1179
+ None,
1180
+ None,
1181
+ )
1182
+
1183
+
1184
+ def build_finding_rows(report: AuditReport | None) -> list[list[str]]:
1185
+ if report is None:
1186
+ return []
1187
+
1188
+ rows: list[list[str]] = []
1189
+ for index, finding in enumerate(report.findings, start=1):
1190
+ rows.append(
1191
+ [
1192
+ f"F-{index:03d}",
1193
+ finding.severity.value,
1194
+ finding.title,
1195
+ f"{finding.file_path}:{finding.line_start}",
1196
+ finding.agent_source,
1197
+ ]
1198
+ )
1199
+ return rows
1200
+
1201
+
1202
+ def _severity_from_filter(filter_label: str | None) -> Severity | None:
1203
+ if not filter_label:
1204
+ return None
1205
+ normalized = filter_label.lower()
1206
+ for severity in Severity:
1207
+ if normalized.startswith(severity.value.lower()):
1208
+ return severity
1209
+ return None
1210
+
1211
+
1212
+ def _severity_marker(severity: Severity) -> str:
1213
+ return {
1214
+ Severity.critical: "CRIT",
1215
+ Severity.high: "HIGH",
1216
+ Severity.medium: "MED",
1217
+ Severity.low: "LOW",
1218
+ }.get(severity, "INFO")
1219
+
1220
+
1221
+ def build_finding_choices(report: AuditReport | None, severity_filter: str | None = None) -> list[str]:
1222
+ if report is None:
1223
+ return []
1224
+
1225
+ selected_severity = _severity_from_filter(severity_filter)
1226
+ choices: list[str] = []
1227
+ for index, finding in enumerate(report.findings, start=1):
1228
+ if selected_severity is not None and finding.severity != selected_severity:
1229
+ continue
1230
+ marker = _severity_marker(finding.severity)
1231
+ choices.append(
1232
+ f"{marker:<4} {finding.title}\n"
1233
+ f"{finding.file_path}:{finding.line_start} | {finding.agent_source}"
1234
+ )
1235
+ return choices
1236
+
1237
+
1238
+ def filter_findings(severity_filter: str | None, report: AuditReport | None):
1239
+ choices = build_finding_choices(report, severity_filter)
1240
+ selected = choices[0] if choices else None
1241
+ detail_html = select_finding(selected, report) if selected else format_empty_finding_detail_html()
1242
+ return gr.update(choices=choices, value=selected), detail_html
1243
+
1244
+
1245
+ def select_finding(choice: str | None, report: AuditReport | None) -> str:
1246
+ if report is None or not report.findings:
1247
+ return format_empty_finding_detail_html()
1248
+
1249
+ row_index = 0
1250
+ if choice:
1251
+ choices = build_finding_choices(report)
1252
+ if choice in choices:
1253
+ row_index = choices.index(choice)
1254
+
1255
+ return format_finding_detail_html(report, row_index)
1256
 
1257
 
1258
  def choose_example(example_name: str) -> str:
1259
  return EXAMPLE_REPOS.get(example_name, "")
1260
 
1261
 
1262
+ async def run_llm_diagnostics() -> str:
1263
+ health = await LLMClient(get_settings()).health_check()
1264
+ lines = [
1265
+ "# LLM Diagnostics",
1266
+ "",
1267
+ f"- Provider: `{health.provider}`",
1268
+ f"- Model: `{health.model}`",
1269
+ f"- Base URL: `{health.base_url}`",
1270
+ f"- Status: `{'OK' if health.ok else 'FAILED'}`",
1271
+ ]
1272
+
1273
+ if health.latency_ms is not None:
1274
+ lines.append(f"- Latency: `{health.latency_ms} ms`")
1275
+ if health.models:
1276
+ lines.extend(["", "## Models", ""])
1277
+ lines.extend(f"- `{model}`" for model in health.models)
1278
+ if health.completion_preview:
1279
+ lines.extend(["", "## Completion Preview", "", health.completion_preview])
1280
+ if health.error:
1281
+ lines.extend(["", "## Error", "", f"```text\n{health.error}\n```"])
1282
+
1283
+ return "\n".join(lines)
1284
+
1285
+
1286
+ async def run_benchmark() -> str:
1287
+ result = await BenchmarkService(get_settings()).run_llm_benchmark()
1288
+ lines = [
1289
+ "# LLM Benchmark",
1290
+ "",
1291
+ f"- Provider: `{result.provider}`",
1292
+ f"- Backend: `{result.backend}`",
1293
+ f"- Model: `{result.model}`",
1294
+ f"- Hardware: `{result.hardware}`",
1295
+ f"- Status: `{'OK' if result.ok else 'FAILED'}`",
1296
+ f"- Prompt chars: `{result.prompt_chars}`",
1297
+ f"- Completion chars: `{result.completion_chars}`",
1298
+ ]
1299
+
1300
+ if result.latency_ms is not None:
1301
+ lines.append(f"- Latency: `{result.latency_ms} ms`")
1302
+ if result.chars_per_second is not None:
1303
+ lines.append(f"- Approx chars/sec: `{result.chars_per_second}`")
1304
+ if result.completion_preview:
1305
+ lines.extend(["", "## Completion Preview", "", result.completion_preview])
1306
+ if result.error:
1307
+ lines.extend(["", "## Error", "", f"```text\n{result.error}\n```"])
1308
+
1309
+ lines.extend(
1310
+ [
1311
+ "",
1312
+ "## Notes",
1313
+ "",
1314
+ "This scaffold uses character counts until a real vLLM endpoint exposes token usage. "
1315
+ "When running on AMD MI300X, record latency/tokens-per-second here for the final demo.",
1316
+ ]
1317
+ )
1318
+ return "\n".join(lines)
1319
+
1320
+
1321
  def build_app() -> gr.Blocks:
1322
+ theme = gr.themes.Base(
1323
+ primary_hue="blue",
1324
+ secondary_hue="cyan",
1325
+ neutral_hue="slate",
1326
+ font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"],
1327
+ font_mono=[gr.themes.GoogleFont("JetBrains Mono"), "monospace"],
1328
+ )
1329
+
1330
+ with warnings.catch_warnings():
1331
+ warnings.filterwarnings(
1332
+ "ignore",
1333
+ message="The parameters have been moved from the Blocks constructor to the launch.*",
1334
+ category=UserWarning,
1335
  )
1336
+ with gr.Blocks(title="SwarmAudit", theme=theme, css=APP_CSS, elem_id="swarm-shell") as demo:
1337
+ gr.HTML(render_workspace_header())
1338
 
1339
+ with gr.Tab("Audit"):
1340
+ with gr.Group(elem_classes=["audit-actionbar"]):
1341
+ with gr.Row(equal_height=False):
1342
+ repo_url = gr.Textbox(
1343
+ label="",
1344
+ placeholder="repo https://github.com/owner/repo",
1345
+ scale=8,
1346
+ min_width=420,
1347
+ show_label=False,
1348
+ elem_classes=["repo-input"],
1349
+ )
1350
+ analyze = gr.Button("Analyze", variant="primary", scale=0, min_width=112)
1351
+ gr.HTML('<div class="example-label">Examples</div>', scale=0)
1352
+ for example_name, example_url in EXAMPLE_REPOS.items():
1353
+ example_button = gr.Button(
1354
+ example_name,
1355
+ scale=0,
1356
+ min_width=124,
1357
+ elem_classes=["example-chip"],
1358
+ )
1359
+ example_button.click(lambda url=example_url: url, outputs=repo_url)
1360
 
1361
+ summary_output = gr.HTML(render_empty_summary())
1362
+ report_state = gr.State(None)
 
 
 
 
 
1363
 
1364
+ with gr.Row():
1365
+ with gr.Column(scale=1):
1366
+ agent_output = gr.HTML(render_agent_swarm())
1367
+ progress_output = gr.Textbox(
1368
+ label="Activity log",
1369
+ lines=12,
1370
+ interactive=False,
1371
+ elem_classes=["swarm-panel", "swarm-progress"],
1372
+ )
1373
+ with gr.Column(scale=3):
1374
+ with gr.Row(elem_classes=["report-header-row"]):
1375
+ report_toolbar = gr.HTML(render_report_toolbar(None), scale=1)
1376
+ severity_filter = gr.Radio(
1377
+ choices=["All 0"],
1378
+ value="All 0",
1379
+ interactive=True,
1380
+ show_label=False,
1381
+ scale=0,
1382
+ min_width=360,
1383
+ elem_classes=["severity-filter-radio"],
1384
+ )
1385
+ markdown_export = gr.DownloadButton(
1386
+ "Markdown",
1387
+ value=None,
1388
+ size="sm",
1389
+ scale=0,
1390
+ min_width=96,
1391
+ elem_classes=["report-download"],
1392
+ )
1393
+ json_export = gr.DownloadButton(
1394
+ "JSON",
1395
+ value=None,
1396
+ size="sm",
1397
+ scale=0,
1398
+ min_width=76,
1399
+ elem_classes=["report-download"],
1400
+ )
1401
+ report_overview = gr.HTML(format_report_overview_html(None))
1402
+ with gr.Row(equal_height=True, elem_classes=["report-body"]):
1403
+ with gr.Column(scale=1):
1404
+ finding_selector = gr.Radio(
1405
+ choices=[],
1406
+ value=None,
1407
+ interactive=True,
1408
+ show_label=False,
1409
+ elem_classes=["findings-list-radio"],
1410
+ )
1411
+ with gr.Column(scale=1):
1412
+ finding_detail = gr.HTML(
1413
+ format_empty_finding_detail_html(),
1414
+ elem_classes=["swarm-panel", "swarm-report"],
1415
+ )
1416
+
1417
+ analyze.click(
1418
+ analyze_repo,
1419
+ inputs=repo_url,
1420
+ outputs=[
1421
+ progress_output,
1422
+ agent_output,
1423
+ summary_output,
1424
+ report_toolbar,
1425
+ severity_filter,
1426
+ report_overview,
1427
+ finding_selector,
1428
+ finding_detail,
1429
+ markdown_export,
1430
+ json_export,
1431
+ report_state,
1432
+ ],
1433
+ )
1434
+ severity_filter.change(
1435
+ filter_findings,
1436
+ inputs=[severity_filter, report_state],
1437
+ outputs=[finding_selector, finding_detail],
1438
+ )
1439
+ finding_selector.change(select_finding, inputs=[finding_selector, report_state], outputs=finding_detail)
1440
 
1441
+ with gr.Tab("Diagnostics"):
1442
+ gr.Markdown(
1443
+ "Verify the configured LLM backend before switching from mock mode to AMD/vLLM enrichment.",
1444
+ elem_classes=["swarm-note"],
1445
+ )
1446
+ diagnostics_button = gr.Button("Test LLM Connection", variant="primary")
1447
+ diagnostics_output = gr.Markdown(elem_classes=["swarm-panel"])
1448
+ diagnostics_button.click(run_llm_diagnostics, outputs=diagnostics_output)
1449
+
1450
+ with gr.Tab("Benchmark"):
1451
+ gr.Markdown(
1452
+ "Run a small timing probe. Mock mode validates the UI path; vLLM mode records MI300X demo numbers.",
1453
+ elem_classes=["swarm-note"],
1454
+ )
1455
+ benchmark_button = gr.Button("Run Benchmark", variant="primary")
1456
+ benchmark_output = gr.Markdown(elem_classes=["swarm-panel"])
1457
+ benchmark_button.click(run_benchmark, outputs=benchmark_output)
1458
  return demo
1459
 
1460
 
1461
  def launch_app() -> None:
1462
  server_name = os.getenv("GRADIO_SERVER_NAME", "0.0.0.0")
1463
+ configured_port = os.getenv("PORT") or os.getenv("GRADIO_SERVER_PORT")
1464
+ server_port = int(configured_port or "7860")
1465
+ try:
1466
+ build_app().queue().launch(server_name=server_name, server_port=server_port)
1467
+ except OSError:
1468
+ if configured_port:
1469
+ raise
1470
+ build_app().queue().launch(server_name=server_name, server_port=None)
1471
 
1472
 
1473
  if __name__ == "__main__":
tests/test_agent_llm_enrichment.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+
3
+ from app.agents.docs_agent import DocsAgent
4
+ from app.agents.performance_agent import PerformanceAgent
5
+ from app.agents.quality_agent import QualityAgent
6
+ from app.config import Settings
7
+ from app.schemas import CodeChunk
8
+ from app.services.llm_client import LLMClient
9
+
10
+
11
+ class FakeLLMClient(LLMClient):
12
+ def __init__(self, settings: Settings, payload=None, should_fail: bool = False):
13
+ super().__init__(settings)
14
+ self.payload = payload or {"findings": []}
15
+ self.should_fail = should_fail
16
+ self.calls = 0
17
+
18
+ async def complete_json(self, system_prompt: str, user_prompt: str):
19
+ self.calls += 1
20
+ if self.should_fail:
21
+ raise RuntimeError("vLLM unavailable")
22
+ return self.payload
23
+
24
+
25
+ def make_chunk() -> CodeChunk:
26
+ return CodeChunk(
27
+ file_path="app.py",
28
+ language="Python",
29
+ line_start=1,
30
+ line_end=2,
31
+ content="def work():\n return True",
32
+ )
33
+
34
+
35
+ def make_payload(agent_name: str):
36
+ return {
37
+ "findings": [
38
+ {
39
+ "title": f"{agent_name} LLM finding",
40
+ "severity": "LOW",
41
+ "file_path": "app.py",
42
+ "line_start": 1,
43
+ "line_end": 1,
44
+ "description": "LLM detected an issue.",
45
+ "why_it_matters": "It affects maintainability or runtime behavior.",
46
+ "suggested_fix": "Review and improve the implementation.",
47
+ "agent_source": agent_name,
48
+ }
49
+ ]
50
+ }
51
+
52
+
53
+ @pytest.mark.anyio
54
+ @pytest.mark.parametrize(
55
+ ("agent_cls", "agent_name"),
56
+ [
57
+ (PerformanceAgent, "Performance Agent"),
58
+ (QualityAgent, "Quality Agent"),
59
+ (DocsAgent, "Docs Agent"),
60
+ ],
61
+ )
62
+ async def test_agent_enrichment_disabled_does_not_call_llm(agent_cls, agent_name):
63
+ llm_client = FakeLLMClient(Settings(enable_llm_enrichment=False))
64
+ output = await agent_cls(llm_client).analyze([make_chunk()])
65
+
66
+ assert llm_client.calls == 0
67
+ assert output.metadata["llm_enrichment_enabled"] is False
68
+
69
+
70
+ @pytest.mark.anyio
71
+ @pytest.mark.parametrize(
72
+ ("agent_cls", "agent_name"),
73
+ [
74
+ (PerformanceAgent, "Performance Agent"),
75
+ (QualityAgent, "Quality Agent"),
76
+ (DocsAgent, "Docs Agent"),
77
+ ],
78
+ )
79
+ async def test_agent_enrichment_merges_valid_llm_findings(agent_cls, agent_name):
80
+ llm_client = FakeLLMClient(
81
+ Settings(enable_llm_enrichment=True, max_llm_chunks=1),
82
+ make_payload(agent_name),
83
+ )
84
+ output = await agent_cls(llm_client).analyze([make_chunk()])
85
+
86
+ assert llm_client.calls == 1
87
+ assert any(finding.title == f"{agent_name} LLM finding" for finding in output.findings)
88
+ assert output.metadata["llm_findings"] == 1
89
+
90
+
91
+ @pytest.mark.anyio
92
+ @pytest.mark.parametrize(
93
+ ("agent_cls", "agent_name"),
94
+ [
95
+ (PerformanceAgent, "Performance Agent"),
96
+ (QualityAgent, "Quality Agent"),
97
+ (DocsAgent, "Docs Agent"),
98
+ ],
99
+ )
100
+ async def test_agent_enrichment_failure_is_metadata_not_exception(agent_cls, agent_name):
101
+ llm_client = FakeLLMClient(Settings(enable_llm_enrichment=True), should_fail=True)
102
+ output = await agent_cls(llm_client).analyze([make_chunk()])
103
+
104
+ assert "vLLM unavailable" in output.metadata["llm_error"]
tests/test_api.py CHANGED
@@ -1,5 +1,7 @@
1
  from fastapi.testclient import TestClient
2
 
 
 
3
  from app.main import app
4
 
5
 
@@ -8,3 +10,13 @@ def test_health_endpoint():
8
 
9
  assert response.status_code == 200
10
  assert response.json() == {"status": "ok", "app": "SwarmAudit"}
 
 
 
 
 
 
 
 
 
 
 
1
  from fastapi.testclient import TestClient
2
 
3
+ import app.main as main
4
+ from app.config import Settings
5
  from app.main import app
6
 
7
 
 
10
 
11
  assert response.status_code == 200
12
  assert response.json() == {"status": "ok", "app": "SwarmAudit"}
13
+
14
+
15
+ def test_llm_health_endpoint(monkeypatch):
16
+ monkeypatch.setattr(main, "get_settings", lambda: Settings(_env_file=None, llm_provider="mock"))
17
+
18
+ response = TestClient(app).get("/llm/health")
19
+
20
+ assert response.status_code == 200
21
+ assert response.json()["ok"] is True
22
+ assert response.json()["provider"] in {"mock", "vllm"}
tests/test_benchmark.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+
3
+ from app.config import Settings
4
+ from app.services.benchmark import BenchmarkService
5
+
6
+
7
+ @pytest.mark.anyio
8
+ async def test_mock_benchmark_returns_ok_result():
9
+ result = await BenchmarkService(Settings(llm_provider="mock")).run_llm_benchmark()
10
+
11
+ assert result.ok is True
12
+ assert result.provider == "mock"
13
+ assert result.backend == "Mock local backend"
14
+ assert result.hardware == "Local/mock"
15
+ assert result.completion_chars > 0
16
+
17
+
18
+ @pytest.mark.anyio
19
+ async def test_benchmark_reports_llm_errors():
20
+ service = BenchmarkService(Settings(llm_provider="mock"))
21
+
22
+ async def fail_completion():
23
+ raise RuntimeError("benchmark failed")
24
+
25
+ service.llm_client.test_completion = fail_completion
26
+ result = await service.run_llm_benchmark()
27
+
28
+ assert result.ok is False
29
+ assert "benchmark failed" in result.error
tests/test_config_agent.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+
3
+ from app.agents.config_agent import ConfigAgent
4
+ from app.schemas import CodeChunk, Severity
5
+
6
+
7
+ def make_chunk(content: str, file_path: str = "config.py") -> CodeChunk:
8
+ return CodeChunk(
9
+ file_path=file_path,
10
+ language="Python",
11
+ line_start=1,
12
+ line_end=max(1, len(content.splitlines())),
13
+ content=content,
14
+ )
15
+
16
+
17
+ @pytest.mark.anyio
18
+ async def test_config_agent_detects_debug_mode():
19
+ output = await ConfigAgent().analyze([make_chunk("DEBUG = True")])
20
+
21
+ assert output.findings[0].title == "Debug mode enabled"
22
+ assert output.findings[0].severity == Severity.high
23
+ assert output.findings[0].category == "config"
24
+ assert output.findings[0].confidence is not None
25
+
26
+
27
+ @pytest.mark.anyio
28
+ async def test_config_agent_detects_wildcard_cors():
29
+ output = await ConfigAgent().analyze([make_chunk('allow_origins=["*"]')])
30
+
31
+ assert output.findings[0].title == "Wildcard CORS origin"
32
+ assert output.findings[0].severity == Severity.medium
33
+
34
+
35
+ @pytest.mark.anyio
36
+ async def test_config_agent_detects_disabled_tls_verification():
37
+ output = await ConfigAgent().analyze([make_chunk("session.verify = False")])
38
+
39
+ assert output.findings[0].title == "TLS verification disabled in configuration"
40
+ assert output.findings[0].severity == Severity.high
41
+
42
+
43
+ @pytest.mark.anyio
44
+ async def test_config_agent_detects_weak_default_secret():
45
+ output = await ConfigAgent().analyze([make_chunk("SECRET_KEY = 'django-insecure-demo'")])
46
+
47
+ assert output.findings[0].title == "Weak default secret configured"
48
+ assert output.findings[0].severity == Severity.high
49
+
50
+
51
+ @pytest.mark.anyio
52
+ async def test_config_agent_returns_empty_output_for_clean_config():
53
+ output = await ConfigAgent().analyze([make_chunk("DEBUG = env.bool('DEBUG', default=False)")])
54
+
55
+ assert output.findings == []
56
+ assert output.metadata["mode"] == "static-rules"
tests/test_cuda_migration_agent.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+
3
+ from app.agents.cuda_migration_agent import CudaMigrationAgent
4
+ from app.schemas import CodeChunk, Severity
5
+
6
+
7
+ def make_chunk(content: str, file_path: str = "model.py") -> CodeChunk:
8
+ return CodeChunk(
9
+ file_path=file_path,
10
+ language="Python",
11
+ line_start=1,
12
+ line_end=max(1, len(content.splitlines())),
13
+ content=content,
14
+ )
15
+
16
+
17
+ @pytest.mark.anyio
18
+ async def test_cuda_migration_agent_detects_torch_cuda():
19
+ output = await CudaMigrationAgent().analyze([make_chunk("device = torch.cuda.current_device()")])
20
+
21
+ assert output.findings[0].title == "PyTorch CUDA-specific API usage"
22
+ assert output.findings[0].severity == Severity.medium
23
+ assert output.findings[0].category == "cuda_migration"
24
+
25
+
26
+ @pytest.mark.anyio
27
+ async def test_cuda_migration_agent_detects_nvidia_monitoring():
28
+ output = await CudaMigrationAgent().analyze([make_chunk("import pynvml\nsubprocess.run(['nvidia-smi'])")])
29
+
30
+ assert output.findings[0].title == "NVIDIA-specific GPU monitoring"
31
+ assert "rocm-smi" in output.findings[0].suggested_fix
32
+
33
+
34
+ @pytest.mark.anyio
35
+ async def test_cuda_migration_agent_detects_cuda_runtime_calls():
36
+ output = await CudaMigrationAgent().analyze([make_chunk("cudaMemcpy(dst, src, size, cudaMemcpyDeviceToHost);", "kernel.cu")])
37
+
38
+ assert output.findings[0].title == "CUDA runtime API call"
39
+ assert output.findings[0].confidence is not None
40
+
41
+
42
+ @pytest.mark.anyio
43
+ async def test_cuda_migration_agent_detects_cuda_libraries():
44
+ output = await CudaMigrationAgent().analyze([make_chunk("handle = cublasCreate()", "linear_algebra.cpp")])
45
+
46
+ assert output.findings[0].title == "CUDA library dependency"
47
+ assert "rocBLAS" in output.findings[0].suggested_fix
48
+
49
+
50
+ @pytest.mark.anyio
51
+ async def test_cuda_migration_agent_returns_empty_for_cpu_code():
52
+ output = await CudaMigrationAgent().analyze([make_chunk("device = torch.device('cpu')")])
53
+
54
+ assert output.findings == []
tests/test_dependency_agent.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+
3
+ from app.agents.dependency_agent import DependencyAgent
4
+ from app.config import Settings
5
+ from app.schemas import CodeChunk, Severity
6
+
7
+
8
+ def make_chunk(file_path: str, content: str) -> CodeChunk:
9
+ return CodeChunk(
10
+ file_path=file_path,
11
+ language="Manifest",
12
+ line_start=1,
13
+ line_end=max(1, len(content.splitlines())),
14
+ content=content,
15
+ )
16
+
17
+
18
+ @pytest.mark.anyio
19
+ async def test_dependency_agent_parses_common_manifests_without_network():
20
+ chunks = [
21
+ make_chunk("requirements.txt", "requests==2.28.0\nfastapi>=0.100.0\n"),
22
+ make_chunk("package.json", '{"dependencies": {"express": "^4.18.2"}}'),
23
+ make_chunk("pyproject.toml", '[project]\ndependencies = ["pydantic==2.0.0"]\n'),
24
+ make_chunk("go.mod", "module demo\n\nrequire github.com/gin-gonic/gin v1.9.1\n"),
25
+ make_chunk("Cargo.toml", '[dependencies]\nserde = "1.0.0"\n'),
26
+ ]
27
+
28
+ output = await DependencyAgent(Settings(enable_dependency_cve_lookup=False)).analyze(chunks)
29
+
30
+ assert output.agent_name == "Dependency Agent"
31
+ assert output.findings == []
32
+ assert output.metadata["dependency_count"] == 6
33
+ assert "requirements.txt" in output.metadata["manifests"]
34
+ assert output.metadata["dependency_cves"] == []
35
+
36
+
37
+ @pytest.mark.anyio
38
+ async def test_dependency_agent_turns_cves_into_findings(monkeypatch):
39
+ async def fake_lookup_cves(dependencies):
40
+ return (
41
+ [
42
+ {
43
+ "id": "GHSA-test",
44
+ "package": "requests",
45
+ "version": "2.28.0",
46
+ "ecosystem": "PyPI",
47
+ "severity": "HIGH",
48
+ "summary": "Demo vulnerability",
49
+ "manifest_path": "requirements.txt",
50
+ "line_number": 1,
51
+ "fixed_version": "2.32.0",
52
+ }
53
+ ],
54
+ [],
55
+ )
56
+
57
+ agent = DependencyAgent(Settings(enable_dependency_cve_lookup=True))
58
+ monkeypatch.setattr(agent, "_lookup_cves", fake_lookup_cves)
59
+
60
+ output = await agent.analyze([make_chunk("requirements.txt", "requests==2.28.0\n")])
61
+
62
+ assert output.findings[0].severity == Severity.high
63
+ assert output.findings[0].category == "dependency"
64
+ assert output.findings[0].agent_source == "Dependency Agent"
65
+ assert output.metadata["dependency_cves"][0]["id"] == "GHSA-test"
66
+
67
+
68
+ @pytest.mark.anyio
69
+ async def test_dependency_agent_fails_gracefully_when_osv_is_unavailable(monkeypatch):
70
+ async def fake_lookup_cves(dependencies):
71
+ return [], ["Dependency CVE lookup failed gracefully: network unavailable"]
72
+
73
+ agent = DependencyAgent(Settings(enable_dependency_cve_lookup=True))
74
+ monkeypatch.setattr(agent, "_lookup_cves", fake_lookup_cves)
75
+
76
+ output = await agent.analyze([make_chunk("requirements.txt", "requests==2.28.0\n")])
77
+
78
+ assert output.findings == []
79
+ assert output.metadata["dependency_cves"] == []
80
+ assert "network unavailable" in output.metadata["warnings"][0]
tests/test_docs_agent.py CHANGED
@@ -1,6 +1,8 @@
1
  import pytest
2
 
3
  from app.agents.docs_agent import DocsAgent
 
 
4
  from app.schemas import CodeChunk, Severity
5
 
6
 
@@ -14,7 +16,7 @@ async def test_docs_agent_flags_incomplete_readme():
14
  content="# Demo\nShort description only.",
15
  )
16
 
17
- output = await DocsAgent().analyze([chunk])
18
 
19
  titles = {finding.title for finding in output.findings}
20
  assert "README missing usage/setup guidance" in titles
@@ -32,7 +34,7 @@ async def test_docs_agent_accepts_useful_readme():
32
  content="# Demo\n\n## Quick Start\nInstall and run it.\n## Tests\nRun pytest.\n## Configuration\nCopy .env.example.",
33
  )
34
 
35
- output = await DocsAgent().analyze([chunk])
36
 
37
  assert output.findings == []
38
 
@@ -47,7 +49,7 @@ async def test_docs_agent_flags_public_python_symbol_without_docstring():
47
  content="def run_audit():\n return True",
48
  )
49
 
50
- output = await DocsAgent().analyze([chunk])
51
 
52
  assert output.findings[0].title == "Public Python symbols missing docstrings"
53
  assert output.findings[0].severity == Severity.low
@@ -64,7 +66,7 @@ async def test_docs_agent_summarizes_missing_docstrings_per_chunk():
64
  content="def first():\n pass\n\ndef second():\n pass",
65
  )
66
 
67
- output = await DocsAgent().analyze([chunk])
68
 
69
  docstring_findings = [
70
  finding for finding in output.findings if finding.title == "Public Python symbols missing docstrings"
 
1
  import pytest
2
 
3
  from app.agents.docs_agent import DocsAgent
4
+ from app.config import Settings
5
+ from app.services.llm_client import LLMClient
6
  from app.schemas import CodeChunk, Severity
7
 
8
 
 
16
  content="# Demo\nShort description only.",
17
  )
18
 
19
+ output = await DocsAgent(LLMClient(Settings(enable_llm_enrichment=False))).analyze([chunk])
20
 
21
  titles = {finding.title for finding in output.findings}
22
  assert "README missing usage/setup guidance" in titles
 
34
  content="# Demo\n\n## Quick Start\nInstall and run it.\n## Tests\nRun pytest.\n## Configuration\nCopy .env.example.",
35
  )
36
 
37
+ output = await DocsAgent(LLMClient(Settings(enable_llm_enrichment=False))).analyze([chunk])
38
 
39
  assert output.findings == []
40
 
 
49
  content="def run_audit():\n return True",
50
  )
51
 
52
+ output = await DocsAgent(LLMClient(Settings(enable_llm_enrichment=False))).analyze([chunk])
53
 
54
  assert output.findings[0].title == "Public Python symbols missing docstrings"
55
  assert output.findings[0].severity == Severity.low
 
66
  content="def first():\n pass\n\ndef second():\n pass",
67
  )
68
 
69
+ output = await DocsAgent(LLMClient(Settings(enable_llm_enrichment=False))).analyze([chunk])
70
 
71
  docstring_findings = [
72
  finding for finding in output.findings if finding.title == "Public Python symbols missing docstrings"
tests/test_error_handling_agent.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+
3
+ from app.agents.error_handling_agent import ErrorHandlingAgent
4
+ from app.schemas import CodeChunk, Severity
5
+
6
+
7
+ def make_chunk(content: str, file_path: str = "app.py") -> CodeChunk:
8
+ return CodeChunk(
9
+ file_path=file_path,
10
+ language="Python",
11
+ line_start=1,
12
+ line_end=max(1, len(content.splitlines())),
13
+ content=content,
14
+ )
15
+
16
+
17
+ @pytest.mark.anyio
18
+ async def test_error_handling_agent_detects_bare_except_and_swallow():
19
+ output = await ErrorHandlingAgent().analyze(
20
+ [
21
+ make_chunk(
22
+ "try:\n"
23
+ " work()\n"
24
+ "except:\n"
25
+ " pass\n"
26
+ )
27
+ ]
28
+ )
29
+
30
+ titles = {finding.title for finding in output.findings}
31
+ assert "Broad exception handler" in titles
32
+ assert "Exception swallowed without recovery" in titles
33
+ assert all(finding.category == "error_handling" for finding in output.findings)
34
+
35
+
36
+ @pytest.mark.anyio
37
+ async def test_error_handling_agent_detects_return_none_swallow():
38
+ output = await ErrorHandlingAgent().analyze(
39
+ [
40
+ make_chunk(
41
+ "try:\n"
42
+ " return load_user()\n"
43
+ "except ValueError:\n"
44
+ " return None\n"
45
+ )
46
+ ]
47
+ )
48
+
49
+ assert output.findings[0].title == "Exception swallowed without recovery"
50
+ assert output.findings[0].severity == Severity.high
51
+
52
+
53
+ @pytest.mark.anyio
54
+ async def test_error_handling_agent_does_not_flag_logged_specific_exception():
55
+ output = await ErrorHandlingAgent().analyze(
56
+ [
57
+ make_chunk(
58
+ "try:\n"
59
+ " return load_user()\n"
60
+ "except ValueError:\n"
61
+ " logger.exception('load failed')\n"
62
+ " raise\n"
63
+ )
64
+ ]
65
+ )
66
+
67
+ assert output.findings == []
68
+
69
+
70
+ @pytest.mark.anyio
71
+ async def test_error_handling_agent_detects_request_without_timeout():
72
+ output = await ErrorHandlingAgent().analyze([make_chunk("response = requests.get(url)")])
73
+
74
+ assert output.findings[0].title == "External HTTP call without timeout"
75
+ assert output.findings[0].severity == Severity.medium
76
+
77
+
78
+ @pytest.mark.anyio
79
+ async def test_error_handling_agent_ignores_request_with_timeout():
80
+ output = await ErrorHandlingAgent().analyze([make_chunk("response = requests.get(url, timeout=10)")])
81
+
82
+ assert output.findings == []
tests/test_gradio_app.py CHANGED
@@ -1,7 +1,27 @@
1
  import runpy
2
  from pathlib import Path
3
 
4
- from app.ui.gradio_app import build_app, choose_example, launch_app
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
 
7
  def test_choose_example_returns_repo_url():
@@ -18,6 +38,188 @@ def test_build_app_creates_gradio_blocks():
18
  assert demo is not None
19
 
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  def test_root_app_py_exposes_demo_for_spaces():
22
  namespace = runpy.run_path(str(Path(__file__).parents[1] / "app.py"))
23
 
@@ -43,3 +245,98 @@ def test_launch_app_uses_spaces_friendly_defaults(monkeypatch):
43
  launch_app()
44
 
45
  assert calls == {"server_name": "0.0.0.0", "server_port": 7860}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import runpy
2
  from pathlib import Path
3
 
4
+ import pytest
5
+
6
+ from app.ui.gradio_app import (
7
+ analyze_repo,
8
+ build_app,
9
+ build_finding_choices,
10
+ build_finding_rows,
11
+ build_severity_filter_choices,
12
+ choose_example,
13
+ filter_findings,
14
+ launch_app,
15
+ render_agent_swarm,
16
+ render_empty_summary,
17
+ render_report_toolbar,
18
+ render_report_summary,
19
+ render_workspace_header,
20
+ run_benchmark,
21
+ run_llm_diagnostics,
22
+ select_finding,
23
+ )
24
+ from app.schemas import AuditReport, Finding, Severity
25
 
26
 
27
  def test_choose_example_returns_repo_url():
 
38
  assert demo is not None
39
 
40
 
41
+ def test_render_workspace_header_contains_product_and_readiness_signals():
42
+ html = render_workspace_header()
43
+
44
+ assert "SwarmAudit" in html
45
+ assert "production-readiness scanner" in html
46
+ assert "vLLM" in html
47
+
48
+
49
+ def test_render_empty_summary_contains_placeholder_cards():
50
+ html = render_empty_summary()
51
+
52
+ assert "Files scanned" in html
53
+ assert "<strong>-</strong>" in html
54
+
55
+
56
+ def test_render_agent_swarm_contains_current_agent_panel():
57
+ html = render_agent_swarm()
58
+
59
+ assert "Agent swarm" in html
60
+ assert "Synthesizer" in html
61
+ assert "idle" in html
62
+
63
+
64
+ def test_render_agent_swarm_tracks_running_and_done_states():
65
+ html = render_agent_swarm(
66
+ [
67
+ "Crawler Agent: cloning and mapping repository...",
68
+ "Crawler Agent: mapped 4 files and skipped 1.",
69
+ "Chunker: filtering source files and creating chunks...",
70
+ ]
71
+ )
72
+
73
+ assert "1/12 done" in html
74
+ assert '<div class="agent-item done">' in html
75
+ assert '<div class="agent-item running">' in html
76
+
77
+
78
+ def test_render_report_summary_uses_report_counts():
79
+ report = AuditReport(
80
+ repo_url="https://github.com/example/project",
81
+ scanned_file_count=4,
82
+ skipped_file_count=1,
83
+ findings=[],
84
+ severity_summary={
85
+ Severity.critical: 1,
86
+ Severity.high: 2,
87
+ Severity.medium: 3,
88
+ Severity.low: 4,
89
+ },
90
+ total_findings_count=10,
91
+ security_score=76,
92
+ production_score=84,
93
+ category_summary={"security": 3},
94
+ remediation_roadmap={"this_week": [], "next_sprint": [], "backlog": []},
95
+ agents_run=["Synthesizer Agent"],
96
+ )
97
+
98
+ html = render_report_summary(report)
99
+
100
+ assert "Files scanned" in html
101
+ assert "<strong>4</strong>" in html
102
+ assert "<strong>10</strong>" in html
103
+ assert "metric-critical" in html
104
+
105
+
106
+ def test_render_report_toolbar_renders_report_title():
107
+ report = AuditReport(
108
+ repo_url="https://github.com/example/project",
109
+ scanned_file_count=4,
110
+ skipped_file_count=1,
111
+ findings=[],
112
+ severity_summary={
113
+ Severity.critical: 1,
114
+ Severity.high: 2,
115
+ Severity.medium: 0,
116
+ Severity.low: 0,
117
+ },
118
+ displayed_findings_count=3,
119
+ security_score=76,
120
+ production_score=84,
121
+ category_summary={"security": 3},
122
+ remediation_roadmap={"this_week": [1], "next_sprint": [], "backlog": []},
123
+ agents_run=["Synthesizer Agent"],
124
+ )
125
+
126
+ html = render_report_toolbar(report)
127
+
128
+ assert "Audit report" in html
129
+
130
+
131
+ def test_build_severity_filter_choices_uses_actual_counts():
132
+ report = AuditReport(
133
+ repo_url="https://github.com/example/project",
134
+ scanned_file_count=4,
135
+ skipped_file_count=1,
136
+ findings=[],
137
+ severity_summary={
138
+ Severity.critical: 1,
139
+ Severity.high: 2,
140
+ Severity.medium: 0,
141
+ Severity.low: 0,
142
+ },
143
+ displayed_findings_count=3,
144
+ agents_run=["Synthesizer Agent"],
145
+ )
146
+
147
+ assert build_severity_filter_choices(report) == ["All 3", "Critical 1", "High 2"]
148
+
149
+
150
+ def make_report_with_findings() -> AuditReport:
151
+ finding = Finding(
152
+ title="Missing timeout",
153
+ severity=Severity.medium,
154
+ file_path="app.py",
155
+ line_start=10,
156
+ line_end=10,
157
+ description="HTTP request has no timeout.",
158
+ why_it_matters="Requests can hang indefinitely.",
159
+ suggested_fix="Pass timeout=10.",
160
+ agent_source="Performance Agent",
161
+ category="performance",
162
+ )
163
+ return AuditReport(
164
+ repo_url="https://github.com/example/project",
165
+ scanned_file_count=1,
166
+ skipped_file_count=0,
167
+ findings=[finding],
168
+ severity_summary={
169
+ Severity.critical: 0,
170
+ Severity.high: 0,
171
+ Severity.medium: 1,
172
+ Severity.low: 0,
173
+ },
174
+ total_findings_count=1,
175
+ displayed_findings_count=1,
176
+ agents_run=["Performance Agent"],
177
+ )
178
+
179
+
180
+ def test_build_finding_rows_uses_actual_report_findings():
181
+ rows = build_finding_rows(make_report_with_findings())
182
+
183
+ assert rows == [["F-001", "MEDIUM", "Missing timeout", "app.py:10", "Performance Agent"]]
184
+
185
+
186
+ def test_build_finding_choices_uses_actual_report_findings():
187
+ choices = build_finding_choices(make_report_with_findings())
188
+
189
+ assert choices == ["MED Missing timeout\napp.py:10 | Performance Agent"]
190
+
191
+
192
+ def test_filter_findings_returns_only_selected_severity():
193
+ high = Finding(
194
+ title="High risk",
195
+ severity=Severity.high,
196
+ file_path="app.py",
197
+ line_start=20,
198
+ line_end=20,
199
+ description="High issue.",
200
+ why_it_matters="Important.",
201
+ suggested_fix="Fix it.",
202
+ agent_source="Security Agent",
203
+ category="security",
204
+ )
205
+ report = make_report_with_findings()
206
+ report.findings.append(high)
207
+
208
+ update, html = filter_findings("High 1", report)
209
+
210
+ assert update["choices"] == ["HIGH High risk\napp.py:20 | Security Agent"]
211
+ assert "High risk" in html
212
+
213
+
214
+ def test_select_finding_renders_selected_actual_finding():
215
+ choices = build_finding_choices(make_report_with_findings())
216
+
217
+ html = select_finding(choices[0], make_report_with_findings())
218
+
219
+ assert "Missing timeout" in html
220
+ assert "Pass timeout=10." in html
221
+
222
+
223
  def test_root_app_py_exposes_demo_for_spaces():
224
  namespace = runpy.run_path(str(Path(__file__).parents[1] / "app.py"))
225
 
 
245
  launch_app()
246
 
247
  assert calls == {"server_name": "0.0.0.0", "server_port": 7860}
248
+
249
+
250
+ def test_launch_app_retries_dynamic_port_when_default_local_port_is_busy(monkeypatch):
251
+ calls = []
252
+
253
+ class FakeQueuedApp:
254
+ def launch(self, **kwargs):
255
+ calls.append(kwargs)
256
+ if len(calls) == 1:
257
+ raise OSError("Cannot find empty port in range: 7860-7860")
258
+
259
+ class FakeApp:
260
+ def queue(self):
261
+ return FakeQueuedApp()
262
+
263
+ monkeypatch.setattr("app.ui.gradio_app.build_app", lambda: FakeApp())
264
+ monkeypatch.delenv("PORT", raising=False)
265
+ monkeypatch.delenv("GRADIO_SERVER_PORT", raising=False)
266
+ monkeypatch.delenv("GRADIO_SERVER_NAME", raising=False)
267
+
268
+ launch_app()
269
+
270
+ assert calls == [
271
+ {"server_name": "0.0.0.0", "server_port": 7860},
272
+ {"server_name": "0.0.0.0", "server_port": None},
273
+ ]
274
+
275
+
276
+ @pytest.mark.anyio
277
+ async def test_run_llm_diagnostics_returns_provider_status(monkeypatch):
278
+ monkeypatch.setattr(
279
+ "app.ui.gradio_app.get_settings",
280
+ lambda: __import__("app.config").config.Settings(llm_provider="mock"),
281
+ )
282
+
283
+ markdown = await run_llm_diagnostics()
284
+
285
+ assert "LLM Diagnostics" in markdown
286
+ assert "Provider: `mock`" in markdown
287
+ assert "Status: `OK`" in markdown
288
+
289
+
290
+ @pytest.mark.anyio
291
+ async def test_run_benchmark_returns_mock_result(monkeypatch):
292
+ monkeypatch.setattr(
293
+ "app.ui.gradio_app.get_settings",
294
+ lambda: __import__("app.config").config.Settings(llm_provider="mock"),
295
+ )
296
+
297
+ markdown = await run_benchmark()
298
+
299
+ assert "LLM Benchmark" in markdown
300
+ assert "Provider: `mock`" in markdown
301
+ assert "Status: `OK`" in markdown
302
+
303
+
304
+ @pytest.mark.anyio
305
+ async def test_analyze_repo_empty_input_clears_report_exports():
306
+ result = await anext(analyze_repo(" "))
307
+
308
+ assert result[0] == "Paste a public GitHub repository URL to start."
309
+ assert "Agent swarm" in result[1]
310
+ assert "Files scanned" in result[2]
311
+ assert "Audit report" in result[3]
312
+ assert result[4]["choices"] == ["All 0"]
313
+ assert "Security Score" in result[5]
314
+ assert result[6]["choices"] == []
315
+ assert result[6]["value"] is None
316
+ assert "Select a finding" in result[7]
317
+ assert result[8:] == (None, None, None)
318
+
319
+
320
+ @pytest.mark.anyio
321
+ async def test_analyze_repo_failure_clears_report_exports(monkeypatch):
322
+ class FakeAuditGraph:
323
+ async def run_with_progress(self, repo_url: str):
324
+ yield "Crawler Agent: cloning and mapping repository..."
325
+ raise RuntimeError("clone failed")
326
+
327
+ monkeypatch.setattr("app.ui.gradio_app.AuditGraph", FakeAuditGraph)
328
+
329
+ updates = []
330
+ async for update in analyze_repo("https://github.com/example/project"):
331
+ updates.append(update)
332
+
333
+ assert updates[-1][0].endswith("Audit failed: clone failed")
334
+ assert "Agent swarm" in updates[-1][1]
335
+ assert "Files scanned" in updates[-1][2]
336
+ assert "Audit report" in updates[-1][3]
337
+ assert updates[-1][4]["choices"] == ["All 0"]
338
+ assert "Security Score" in updates[-1][5]
339
+ assert updates[-1][6]["choices"] == []
340
+ assert updates[-1][6]["value"] is None
341
+ assert "Select a finding" in updates[-1][7]
342
+ assert updates[-1][8:] == (None, None, None)
tests/test_graph_progress.py CHANGED
@@ -7,6 +7,44 @@ from app.config import Settings
7
  from app.schemas import AuditReport
8
 
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  @pytest.mark.anyio
11
  async def test_run_with_progress_yields_real_stages_and_report(tmp_path: Path):
12
  source = tmp_path / "app.py"
@@ -29,9 +67,20 @@ async def test_run_with_progress_yields_real_stages_and_report(tmp_path: Path):
29
  assert any("Performance Agent" in event for event in events if isinstance(event, str))
30
  assert any("Quality Agent" in event for event in events if isinstance(event, str))
31
  assert any("Docs Agent" in event for event in events if isinstance(event, str))
 
 
 
 
 
32
  assert isinstance(events[-1], AuditReport)
33
- assert len(events[-1].findings) == 2
 
34
  assert "Security Agent" in events[-1].agents_run
35
  assert "Performance Agent" in events[-1].agents_run
36
  assert "Quality Agent" in events[-1].agents_run
37
  assert "Docs Agent" in events[-1].agents_run
 
 
 
 
 
 
7
  from app.schemas import AuditReport
8
 
9
 
10
+ def test_audit_graph_exposes_current_agents_through_registry():
11
+ graph = AuditGraph(Settings())
12
+
13
+ assert [spec.node_name for spec in graph.analysis_agents] == [
14
+ "security",
15
+ "performance",
16
+ "quality",
17
+ "docs",
18
+ "config",
19
+ "dependency",
20
+ "error_handling",
21
+ "observability",
22
+ "cuda_migration",
23
+ ]
24
+ assert [spec.state_key for spec in graph.analysis_agents] == [
25
+ "security_output",
26
+ "performance_output",
27
+ "quality_output",
28
+ "docs_output",
29
+ "config_output",
30
+ "dependency_output",
31
+ "error_handling_output",
32
+ "observability_output",
33
+ "cuda_migration_output",
34
+ ]
35
+ assert [spec.agent.name for spec in graph.analysis_agents] == [
36
+ "Security Agent",
37
+ "Performance Agent",
38
+ "Quality Agent",
39
+ "Docs Agent",
40
+ "Config Agent",
41
+ "Dependency Agent",
42
+ "Error Handling Agent",
43
+ "Observability Agent",
44
+ "CUDA-to-ROCm Agent",
45
+ ]
46
+
47
+
48
  @pytest.mark.anyio
49
  async def test_run_with_progress_yields_real_stages_and_report(tmp_path: Path):
50
  source = tmp_path / "app.py"
 
67
  assert any("Performance Agent" in event for event in events if isinstance(event, str))
68
  assert any("Quality Agent" in event for event in events if isinstance(event, str))
69
  assert any("Docs Agent" in event for event in events if isinstance(event, str))
70
+ assert any("Config Agent" in event for event in events if isinstance(event, str))
71
+ assert any("Dependency Agent" in event for event in events if isinstance(event, str))
72
+ assert any("Error Handling Agent" in event for event in events if isinstance(event, str))
73
+ assert any("Observability Agent" in event for event in events if isinstance(event, str))
74
+ assert any("CUDA-to-ROCm Agent" in event for event in events if isinstance(event, str))
75
  assert isinstance(events[-1], AuditReport)
76
+ assert len(events[-1].findings) >= 2
77
+ assert any(finding.agent_source == "Error Handling Agent" for finding in events[-1].findings)
78
  assert "Security Agent" in events[-1].agents_run
79
  assert "Performance Agent" in events[-1].agents_run
80
  assert "Quality Agent" in events[-1].agents_run
81
  assert "Docs Agent" in events[-1].agents_run
82
+ assert "Config Agent" in events[-1].agents_run
83
+ assert "Dependency Agent" in events[-1].agents_run
84
+ assert "Error Handling Agent" in events[-1].agents_run
85
+ assert "Observability Agent" in events[-1].agents_run
86
+ assert "CUDA-to-ROCm Agent" in events[-1].agents_run
tests/test_json_parser.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from app.schemas import Severity
2
+ from app.services.json_parser import parse_agent_output, parse_json_object
3
+
4
+
5
+ def test_parse_json_object_accepts_fenced_json():
6
+ data = parse_json_object('```json\n{"findings": []}\n```')
7
+
8
+ assert data == {"findings": []}
9
+
10
+
11
+ def test_parse_json_object_extracts_object_from_extra_text():
12
+ data = parse_json_object('Here is JSON: {"findings": []} done.')
13
+
14
+ assert data == {"findings": []}
15
+
16
+
17
+ def test_parse_agent_output_returns_empty_output_for_invalid_json():
18
+ output = parse_agent_output("not json", "Security Agent")
19
+
20
+ assert output.findings == []
21
+ assert output.metadata["parse_error"] is True
22
+
23
+
24
+ def test_parse_agent_output_validates_findings():
25
+ output = parse_agent_output(
26
+ {
27
+ "findings": [
28
+ {
29
+ "title": "Unsafe eval",
30
+ "severity": "HIGH",
31
+ "file_path": "app.py",
32
+ "line_start": 1,
33
+ "line_end": 1,
34
+ "description": "eval is used",
35
+ "why_it_matters": "Arbitrary code execution",
36
+ "suggested_fix": "Remove eval",
37
+ "agent_source": "Security Agent",
38
+ }
39
+ ]
40
+ },
41
+ "Security Agent",
42
+ )
43
+
44
+ assert output.findings[0].severity == Severity.high
tests/test_llm_client.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import httpx
2
+ import pytest
3
+
4
+ from app.config import Settings
5
+ from app.services.llm_client import LLMClient
6
+
7
+
8
+ @pytest.mark.anyio
9
+ async def test_mock_llm_health_check_is_ok():
10
+ health = await LLMClient(Settings(llm_provider="mock")).health_check()
11
+
12
+ assert health.ok is True
13
+ assert health.provider == "mock"
14
+ assert health.completion_preview == "Mock LLM is active."
15
+
16
+
17
+ @pytest.mark.anyio
18
+ async def test_vllm_health_check_lists_models_and_tests_completion(monkeypatch):
19
+ async def fake_get(self, url, headers):
20
+ return httpx.Response(
21
+ 200,
22
+ json={"data": [{"id": "Qwen/Qwen2.5-Coder-32B-Instruct"}]},
23
+ request=httpx.Request("GET", url),
24
+ )
25
+
26
+ async def fake_post(self, url, json, headers):
27
+ return httpx.Response(
28
+ 200,
29
+ json={"choices": [{"message": {"content": "SwarmAudit LLM OK"}}]},
30
+ request=httpx.Request("POST", url),
31
+ )
32
+
33
+ monkeypatch.setattr(httpx.AsyncClient, "get", fake_get)
34
+ monkeypatch.setattr(httpx.AsyncClient, "post", fake_post)
35
+
36
+ health = await LLMClient(
37
+ Settings(
38
+ llm_provider="vllm",
39
+ llm_base_url="http://amd.example:8000/v1",
40
+ llm_api_key="token",
41
+ )
42
+ ).health_check()
43
+
44
+ assert health.ok is True
45
+ assert health.models == ["Qwen/Qwen2.5-Coder-32B-Instruct"]
46
+ assert health.completion_preview == "SwarmAudit LLM OK"
47
+
48
+
49
+ @pytest.mark.anyio
50
+ async def test_vllm_health_check_reports_errors(monkeypatch):
51
+ async def fake_get(self, url, headers):
52
+ raise httpx.ConnectError("connection failed", request=httpx.Request("GET", url))
53
+
54
+ monkeypatch.setattr(httpx.AsyncClient, "get", fake_get)
55
+
56
+ health = await LLMClient(Settings(llm_provider="vllm")).health_check()
57
+
58
+ assert health.ok is False
59
+ assert "connection failed" in health.error
tests/test_observability_agent.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+
3
+ from app.agents.observability_agent import ObservabilityAgent
4
+ from app.schemas import CodeChunk, Severity
5
+
6
+
7
+ def make_chunk(content: str, file_path: str = "app.py") -> CodeChunk:
8
+ return CodeChunk(
9
+ file_path=file_path,
10
+ language="Python",
11
+ line_start=1,
12
+ line_end=max(1, len(content.splitlines())),
13
+ content=content,
14
+ )
15
+
16
+
17
+ @pytest.mark.anyio
18
+ async def test_observability_agent_detects_sensitive_logging():
19
+ output = await ObservabilityAgent().analyze([make_chunk("print(f'password={password}')")])
20
+
21
+ assert output.findings[0].title == "Sensitive value may be written to logs"
22
+ assert output.findings[0].severity == Severity.high
23
+ assert output.findings[0].category == "observability"
24
+
25
+
26
+ @pytest.mark.anyio
27
+ async def test_observability_agent_detects_print_overuse_without_logger():
28
+ output = await ObservabilityAgent().analyze(
29
+ [
30
+ make_chunk(
31
+ "print('start')\n"
32
+ "print('middle')\n"
33
+ "print('done')\n"
34
+ )
35
+ ]
36
+ )
37
+
38
+ assert output.findings[0].title == "Print statements used instead of structured logging"
39
+ assert output.findings[0].severity == Severity.low
40
+
41
+
42
+ @pytest.mark.anyio
43
+ async def test_observability_agent_does_not_flag_prints_when_logger_exists():
44
+ output = await ObservabilityAgent().analyze(
45
+ [
46
+ make_chunk("print('start')\nprint('middle')\nprint('done')\n"),
47
+ make_chunk("logger.info('service started')", "logging_setup.py"),
48
+ ]
49
+ )
50
+
51
+ assert output.findings == []
52
+
53
+
54
+ @pytest.mark.anyio
55
+ async def test_observability_agent_detects_missing_health_route():
56
+ output = await ObservabilityAgent().analyze(
57
+ [
58
+ make_chunk(
59
+ "@app.get('/users')\n"
60
+ "def users():\n"
61
+ " return []\n"
62
+ )
63
+ ]
64
+ )
65
+
66
+ assert output.findings[0].title == "Web service has routes but no health endpoint detected"
67
+ assert output.findings[0].severity == Severity.medium
68
+
69
+
70
+ @pytest.mark.anyio
71
+ async def test_observability_agent_accepts_existing_health_route():
72
+ output = await ObservabilityAgent().analyze(
73
+ [
74
+ make_chunk(
75
+ "@app.get('/users')\n"
76
+ "def users():\n"
77
+ " return []\n"
78
+ "@app.get('/health')\n"
79
+ "def health():\n"
80
+ " return {'ok': True}\n"
81
+ )
82
+ ]
83
+ )
84
+
85
+ assert output.findings == []
tests/test_repo_crawler.py CHANGED
@@ -40,6 +40,17 @@ def test_scan_local_repo_includes_readme_for_docs_agent(tmp_path: Path):
40
  assert result.files[0].language == "Markdown"
41
 
42
 
 
 
 
 
 
 
 
 
 
 
 
43
  def test_clone_and_scan_omits_gitpython_timeout_on_windows(tmp_path: Path):
44
  crawler = RepoCrawler(Settings(max_files=10, max_file_size_kb=1, clone_base_dir=str(tmp_path / "clones")))
45
 
 
40
  assert result.files[0].language == "Markdown"
41
 
42
 
43
+ def test_scan_local_repo_includes_dependency_manifests(tmp_path: Path):
44
+ (tmp_path / "requirements.txt").write_text("requests==2.28.0\n", encoding="utf-8")
45
+ (tmp_path / "package.json").write_text('{"dependencies": {"express": "4.18.2"}}', encoding="utf-8")
46
+
47
+ crawler = RepoCrawler(Settings(max_files=10, max_file_size_kb=10))
48
+ result = crawler.scan_local_repo("https://github.com/example/project", tmp_path)
49
+
50
+ assert {source_file.path for source_file in result.files} == {"requirements.txt", "package.json"}
51
+ assert {source_file.language for source_file in result.files} == {"Python Requirements", "Node Package"}
52
+
53
+
54
  def test_clone_and_scan_omits_gitpython_timeout_on_windows(tmp_path: Path):
55
  crawler = RepoCrawler(Settings(max_files=10, max_file_size_kb=1, clone_base_dir=str(tmp_path / "clones")))
56
 
tests/test_report_exports.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from pathlib import Path
3
+
4
+ from app.schemas import AuditReport, Finding, Severity
5
+ from app.services.report_formatter import (
6
+ format_empty_report_html,
7
+ format_finding_detail_html,
8
+ format_report_html,
9
+ write_report_exports,
10
+ )
11
+
12
+
13
+ def make_report() -> AuditReport:
14
+ finding = Finding(
15
+ title="Missing timeout",
16
+ severity=Severity.medium,
17
+ file_path="app.py",
18
+ line_start=10,
19
+ line_end=10,
20
+ description="HTTP request has no timeout.",
21
+ why_it_matters="Requests can hang indefinitely.",
22
+ suggested_fix="Pass a timeout value.",
23
+ agent_source="Performance Agent",
24
+ )
25
+ return AuditReport(
26
+ repo_url="https://github.com/example/project",
27
+ scanned_file_count=1,
28
+ skipped_file_count=0,
29
+ findings=[finding],
30
+ severity_summary={
31
+ Severity.critical: 0,
32
+ Severity.high: 0,
33
+ Severity.medium: 1,
34
+ Severity.low: 0,
35
+ },
36
+ total_findings_count=1,
37
+ displayed_findings_count=1,
38
+ hidden_findings_count=0,
39
+ agent_finding_counts={"Performance Agent": 1},
40
+ category_summary={"performance": 1},
41
+ security_score=100,
42
+ production_score=96,
43
+ remediation_roadmap={
44
+ "this_week": [],
45
+ "next_sprint": [
46
+ {
47
+ "title": "Missing timeout",
48
+ "severity": "MEDIUM",
49
+ "category": "performance",
50
+ "file_path": "app.py",
51
+ "line_start": "10",
52
+ "agent_source": "Performance Agent",
53
+ }
54
+ ],
55
+ "backlog": [],
56
+ },
57
+ dependency_cves=[
58
+ {
59
+ "id": "GHSA-test",
60
+ "package": "requests",
61
+ "version": "2.28.0",
62
+ "ecosystem": "PyPI",
63
+ "severity": "HIGH",
64
+ "fixed_version": "2.32.0",
65
+ }
66
+ ],
67
+ agents_run=["Performance Agent", "Synthesizer Agent"],
68
+ )
69
+
70
+
71
+ def test_write_report_exports_creates_markdown_and_json():
72
+ output_dir = Path.cwd() / ".tmp_test_exports" / "report_export"
73
+ output_dir.mkdir(parents=True, exist_ok=True)
74
+
75
+ markdown_path, json_path = write_report_exports(make_report(), output_dir)
76
+
77
+ markdown = output_dir.joinpath("swarm_audit_report.md").read_text(encoding="utf-8")
78
+ data = json.loads(output_dir.joinpath("swarm_audit_report.json").read_text(encoding="utf-8"))
79
+
80
+ assert markdown_path.endswith("swarm_audit_report.md")
81
+ assert json_path.endswith("swarm_audit_report.json")
82
+ assert "# SwarmAudit Report" in markdown
83
+ assert "Security Score" in markdown
84
+ assert "Production Readiness Score" in markdown
85
+ assert "Category Summary" in markdown
86
+ assert "Remediation Roadmap" in markdown
87
+ assert "Dependency CVEs" in markdown
88
+ assert "GHSA-test" in markdown
89
+ assert "Missing timeout" in markdown
90
+ assert data["repo_url"] == "https://github.com/example/project"
91
+ assert data["findings"][0]["severity"] == "MEDIUM"
92
+ assert data["total_findings_count"] == 1
93
+
94
+
95
+ def test_format_report_html_renders_console_and_escapes_content():
96
+ report = make_report()
97
+ report.findings[0].title = "<script>alert('x')</script>"
98
+
99
+ html = format_report_html(report)
100
+
101
+ assert "audit-console" in html
102
+ assert "finding-list" in html
103
+ assert "finding-detail" in html
104
+ assert "&lt;script&gt;" in html
105
+ assert "<script>" not in html
106
+
107
+
108
+ def test_format_report_html_hides_zero_count_severity_filters():
109
+ report = make_report()
110
+
111
+ html = format_report_html(report)
112
+
113
+ assert "Medium 1" in html
114
+ assert "Critical 0" not in html
115
+ assert "High 0" not in html
116
+ assert "Low 0" not in html
117
+
118
+
119
+ def test_format_empty_report_html_renders_placeholder():
120
+ html = format_empty_report_html()
121
+
122
+ assert "Run an audit to populate findings" in html
123
+ assert "audit-console" in html
124
+
125
+
126
+ def test_format_finding_detail_links_to_github_file_reference():
127
+ html = format_finding_detail_html(make_report(), 0)
128
+
129
+ assert 'href="https://github.com/example/project/blob/HEAD/app.py#L10"' in html
130
+ assert 'target="_blank"' in html
tests/test_security_report.py CHANGED
@@ -16,7 +16,7 @@ async def test_security_agent_and_synthesizer_return_structured_report():
16
  line_end=10,
17
  content="API_KEY = '1234567890abcdef'",
18
  )
19
- output = await SecurityAgent(LLMClient(Settings())).analyze([chunk])
20
  repo = RepoScanResult(repo_url="https://github.com/example/project", local_path=".", files=[], skipped_files=0)
21
 
22
  report = await SynthesizerAgent().synthesize(repo, [output])
@@ -29,3 +29,69 @@ async def test_security_agent_and_synthesizer_return_structured_report():
29
  assert report.total_findings_count == 1
30
  assert report.displayed_findings_count == 1
31
  assert report.hidden_findings_count == 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  line_end=10,
17
  content="API_KEY = '1234567890abcdef'",
18
  )
19
+ output = await SecurityAgent(LLMClient(Settings(enable_llm_enrichment=False))).analyze([chunk])
20
  repo = RepoScanResult(repo_url="https://github.com/example/project", local_path=".", files=[], skipped_files=0)
21
 
22
  report = await SynthesizerAgent().synthesize(repo, [output])
 
29
  assert report.total_findings_count == 1
30
  assert report.displayed_findings_count == 1
31
  assert report.hidden_findings_count == 0
32
+
33
+
34
+ class FakeLLMClient(LLMClient):
35
+ def __init__(self, settings: Settings, payload):
36
+ super().__init__(settings)
37
+ self.payload = payload
38
+ self.calls = 0
39
+
40
+ async def complete_json(self, system_prompt: str, user_prompt: str):
41
+ self.calls += 1
42
+ return self.payload
43
+
44
+
45
+ @pytest.mark.anyio
46
+ async def test_security_agent_does_not_call_llm_when_enrichment_disabled():
47
+ llm_client = FakeLLMClient(Settings(enable_llm_enrichment=False), {"findings": []})
48
+ chunk = CodeChunk(file_path="app.py", language="Python", line_start=1, line_end=1, content="print('ok')")
49
+
50
+ output = await SecurityAgent(llm_client).analyze([chunk])
51
+
52
+ assert llm_client.calls == 0
53
+ assert output.metadata["llm_enrichment_enabled"] is False
54
+
55
+
56
+ @pytest.mark.anyio
57
+ async def test_security_agent_merges_valid_llm_findings_when_enabled():
58
+ llm_client = FakeLLMClient(
59
+ Settings(enable_llm_enrichment=True, max_llm_chunks=1),
60
+ {
61
+ "findings": [
62
+ {
63
+ "title": "LLM detected command injection",
64
+ "severity": "HIGH",
65
+ "file_path": "app.py",
66
+ "line_start": 2,
67
+ "line_end": 2,
68
+ "description": "User input reaches a shell command.",
69
+ "why_it_matters": "Attackers could execute arbitrary commands.",
70
+ "suggested_fix": "Avoid shell=True and pass argument lists.",
71
+ "agent_source": "Security Agent",
72
+ }
73
+ ]
74
+ },
75
+ )
76
+ chunk = CodeChunk(file_path="app.py", language="Python", line_start=1, line_end=2, content="run(user_input)")
77
+
78
+ output = await SecurityAgent(llm_client).analyze([chunk])
79
+
80
+ assert llm_client.calls == 1
81
+ assert output.findings[0].title == "LLM detected command injection"
82
+ assert output.metadata["llm_findings"] == 1
83
+
84
+
85
+ @pytest.mark.anyio
86
+ async def test_security_agent_survives_llm_failure_when_enabled():
87
+ class FailingLLMClient(FakeLLMClient):
88
+ async def complete_json(self, system_prompt: str, user_prompt: str):
89
+ raise RuntimeError("vLLM unavailable")
90
+
91
+ llm_client = FailingLLMClient(Settings(enable_llm_enrichment=True), {})
92
+ chunk = CodeChunk(file_path="app.py", language="Python", line_start=1, line_end=1, content="print('ok')")
93
+
94
+ output = await SecurityAgent(llm_client).analyze([chunk])
95
+
96
+ assert output.findings == []
97
+ assert "vLLM unavailable" in output.metadata["llm_error"]
tests/test_synthesizer_agent.py CHANGED
@@ -29,10 +29,10 @@ async def test_synthesizer_preserves_totals_when_display_is_truncated():
29
  report = await SynthesizerAgent().synthesize(repo, [output])
30
 
31
  assert report.total_findings_count == 20
32
- assert report.displayed_findings_count == 8
33
- assert report.hidden_findings_count == 12
34
  assert report.agent_finding_counts["Docs Agent"] == 20
35
- assert any("displaying 8 of 20" in warning for warning in report.warnings)
36
 
37
 
38
  @pytest.mark.anyio
@@ -46,3 +46,94 @@ async def test_synthesizer_keeps_high_severity_before_low_findings():
46
  report = await SynthesizerAgent().synthesize(repo, outputs)
47
 
48
  assert report.findings[0].severity == Severity.high
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  report = await SynthesizerAgent().synthesize(repo, [output])
30
 
31
  assert report.total_findings_count == 20
32
+ assert report.displayed_findings_count == 12
33
+ assert report.hidden_findings_count == 8
34
  assert report.agent_finding_counts["Docs Agent"] == 20
35
+ assert any("displaying 12 of 20" in warning for warning in report.warnings)
36
 
37
 
38
  @pytest.mark.anyio
 
46
  report = await SynthesizerAgent().synthesize(repo, outputs)
47
 
48
  assert report.findings[0].severity == Severity.high
49
+
50
+
51
+ @pytest.mark.anyio
52
+ async def test_synthesizer_keeps_low_findings_visible_when_report_is_noisy():
53
+ outputs = [
54
+ AgentOutput(
55
+ agent_name="Performance Agent",
56
+ findings=[make_finding(index, "Performance Agent", Severity.high) for index in range(45)],
57
+ ),
58
+ AgentOutput(
59
+ agent_name="Docs Agent",
60
+ findings=[make_finding(index + 100, "Docs Agent", Severity.low) for index in range(20)],
61
+ ),
62
+ ]
63
+ repo = RepoScanResult(repo_url="https://github.com/example/project", local_path=".", files=[], skipped_files=0)
64
+
65
+ report = await SynthesizerAgent().synthesize(repo, outputs)
66
+
67
+ assert any(finding.severity == Severity.low for finding in report.findings)
68
+ assert sum(1 for finding in report.findings if finding.severity == Severity.low) <= 12
69
+
70
+
71
+ @pytest.mark.anyio
72
+ async def test_synthesizer_populates_scores_categories_and_roadmap():
73
+ outputs = [
74
+ AgentOutput(
75
+ agent_name="Security Agent",
76
+ findings=[make_finding(1, "Security Agent", Severity.high)],
77
+ ),
78
+ AgentOutput(
79
+ agent_name="Performance Agent",
80
+ findings=[make_finding(2, "Performance Agent", Severity.medium)],
81
+ ),
82
+ AgentOutput(
83
+ agent_name="Error Handling Agent",
84
+ findings=[make_finding(3, "Error Handling Agent", Severity.low)],
85
+ ),
86
+ ]
87
+ repo = RepoScanResult(repo_url="https://github.com/example/project", local_path=".", files=[], skipped_files=0)
88
+
89
+ report = await SynthesizerAgent().synthesize(repo, outputs)
90
+
91
+ assert report.security_score == 89
92
+ assert report.production_score == 95
93
+ assert report.category_summary == {"error_handling": 1, "performance": 1, "security": 1}
94
+ assert report.remediation_roadmap["this_week"][0]["category"] == "security"
95
+ assert report.remediation_roadmap["next_sprint"][0]["category"] == "performance"
96
+ assert report.remediation_roadmap["backlog"][0]["category"] == "error_handling"
97
+
98
+
99
+ @pytest.mark.anyio
100
+ async def test_synthesizer_carries_dependency_cves_and_warnings():
101
+ outputs = [
102
+ AgentOutput(
103
+ agent_name="Dependency Agent",
104
+ findings=[],
105
+ metadata={
106
+ "dependency_cves": [{"id": "GHSA-test", "package": "requests", "severity": "HIGH"}],
107
+ "warnings": ["Dependency CVE lookup failed gracefully: timeout"],
108
+ },
109
+ )
110
+ ]
111
+ repo = RepoScanResult(repo_url="https://github.com/example/project", local_path=".", files=[], skipped_files=0)
112
+
113
+ report = await SynthesizerAgent().synthesize(repo, outputs)
114
+
115
+ assert report.dependency_cves == [{"id": "GHSA-test", "package": "requests", "severity": "HIGH"}]
116
+ assert "timeout" in report.warnings[0]
117
+
118
+
119
+ @pytest.mark.anyio
120
+ async def test_synthesizer_caps_score_penalties_for_noisy_repos():
121
+ outputs = [
122
+ AgentOutput(
123
+ agent_name="Performance Agent",
124
+ findings=[make_finding(index, "Performance Agent", Severity.medium) for index in range(120)],
125
+ ),
126
+ AgentOutput(
127
+ agent_name="Docs Agent",
128
+ findings=[make_finding(index + 200, "Docs Agent", Severity.low) for index in range(80)],
129
+ ),
130
+ AgentOutput(
131
+ agent_name="Error Handling Agent",
132
+ findings=[make_finding(index + 400, "Error Handling Agent", Severity.high) for index in range(20)],
133
+ ),
134
+ ]
135
+ repo = RepoScanResult(repo_url="https://github.com/example/project", local_path=".", files=[], skipped_files=0)
136
+
137
+ report = await SynthesizerAgent().synthesize(repo, outputs)
138
+
139
+ assert report.production_score == 54
tests/test_v2_schemas.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
+ import pytest
4
+ from pydantic import ValidationError
5
+
6
+ from app.schemas import AuditReport, Finding, Severity
7
+ from app.services.report_formatter import write_report_exports
8
+
9
+
10
+ def make_finding(**overrides) -> Finding:
11
+ data = {
12
+ "title": "Finding",
13
+ "severity": Severity.low,
14
+ "file_path": "app.py",
15
+ "line_start": 1,
16
+ "line_end": 1,
17
+ "description": "Description",
18
+ "why_it_matters": "Why",
19
+ "suggested_fix": "Fix",
20
+ "agent_source": "Quality Agent",
21
+ }
22
+ data.update(overrides)
23
+ return Finding(**data)
24
+
25
+
26
+ def make_report(**overrides) -> AuditReport:
27
+ data = {
28
+ "repo_url": "https://github.com/example/project",
29
+ "scanned_file_count": 1,
30
+ "skipped_file_count": 0,
31
+ "findings": [make_finding()],
32
+ "severity_summary": {
33
+ Severity.critical: 0,
34
+ Severity.high: 0,
35
+ Severity.medium: 0,
36
+ Severity.low: 1,
37
+ },
38
+ "agents_run": ["Quality Agent"],
39
+ }
40
+ data.update(overrides)
41
+ return AuditReport(**data)
42
+
43
+
44
+ def test_finding_keeps_legacy_fields_optional_for_v2_metadata():
45
+ finding = make_finding()
46
+
47
+ assert finding.category is None
48
+ assert finding.confidence is None
49
+
50
+
51
+ def test_finding_accepts_v2_category_and_confidence():
52
+ finding = make_finding(category="observability", confidence=0.91)
53
+
54
+ assert finding.category == "observability"
55
+ assert finding.confidence == 0.91
56
+
57
+
58
+ @pytest.mark.parametrize("confidence", [-0.1, 1.1])
59
+ def test_finding_rejects_invalid_confidence(confidence):
60
+ with pytest.raises(ValidationError):
61
+ make_finding(confidence=confidence)
62
+
63
+
64
+ def test_audit_report_defaults_v2_fields_without_breaking_legacy_reports():
65
+ report = make_report()
66
+
67
+ assert report.category_summary == {}
68
+ assert report.security_score is None
69
+ assert report.production_score is None
70
+ assert report.remediation_roadmap == {}
71
+ assert report.dependency_cves == []
72
+
73
+
74
+ def test_audit_report_exports_v2_fields_to_json(tmp_path):
75
+ report = make_report(
76
+ findings=[make_finding(category="config", confidence=0.8)],
77
+ category_summary={"config": 1},
78
+ security_score=88,
79
+ production_score=92,
80
+ remediation_roadmap={"this_week": [], "next_sprint": [], "backlog": []},
81
+ dependency_cves=[{"id": "GHSA-test", "package": "demo", "severity": "LOW"}],
82
+ )
83
+
84
+ _, json_path = write_report_exports(report, tmp_path)
85
+ data = json.loads(tmp_path.joinpath("swarm_audit_report.json").read_text(encoding="utf-8"))
86
+
87
+ assert json_path.endswith("swarm_audit_report.json")
88
+ assert data["findings"][0]["category"] == "config"
89
+ assert data["findings"][0]["confidence"] == 0.8
90
+ assert data["category_summary"] == {"config": 1}
91
+ assert data["security_score"] == 88
92
+ assert data["production_score"] == 92
93
+ assert data["dependency_cves"][0]["id"] == "GHSA-test"