Spaces:

lablab-ai-amd-developer-hackathon
/

Threat_Hunter

Running

File size: 46,501 Bytes

c8d30bc

# agents/scout.py
# 功能：Scout Agent 定義 — 威脅情報偵察員
# Harness 支柱：Constraints（系統憲法 + Skill SOP）+ Observability（verbose=True）
# 擁有者：成員 B（Scout Agent Pipeline）
#
# 使用方式：
#   from agents.scout import create_scout_agent
#
# 架構定位：
#   Pipeline 的第一環 — 收集情報 → 輸出 JSON → Analyst 接收
#   Agent = Tool（手）+ Skill（腦）+ Constitution（法）

import os
import logging
import time
from typing import Any, TYPE_CHECKING

import requests

from config import get_llm

# LLM 與 Tool 皆延遲初始化，避免純 helper / 測試 import 路徑觸發 CrewAI 副作用。

logger = logging.getLogger("ThreatHunter")

if TYPE_CHECKING:
    from crewai import Agent

# ══════════════════════════════════════════════════════════════
# Skill 載入（Phase 4D：使用 SkillLoader 熱載入系統）
# ══════════════════════════════════════════════════════════════

# ======================================================================
# v3.7: Path-Aware Skill Map
# 每種 input_type 對應一份 Skill SOP
# ======================================================================

SKILL_MAP: dict[str, str] = {
    "pkg":       "threat_intel.md",        # Path A: package CVE scan
    "code":      "source_code_audit.md",   # Path B-code: source code review
    "injection": "ai_security_audit.md",   # Path B-inject: AI security
    "config":    "config_audit.md",        # Path C: config file audit
}

# 專案根目錄（agents/ 的上一層）
PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
SKILL_PATH = os.path.join(PROJECT_ROOT, "skills", "threat_intel.md")  # default fallback

# Phase 4D: 使用 SkillLoader 熱載入系統
try:
    from skills.skill_loader import skill_loader as _skill_loader
    _SKILL_LOADER_AVAILABLE = True
    logger.info("[Scout] Phase 4D: SkillLoader 啟用 ✓")
except ImportError:
    _skill_loader = None
    _SKILL_LOADER_AVAILABLE = False
    logger.warning("[Scout] Phase 4D: SkillLoader 不可用，使用內建 _load_skill")


# ── GHSA Severity 解析輔助（Phase 7.5）──────────────────────
# OSV vuln_dict 中的 database_specific.severity 存有 GitHub Advisory 嚴重度
# 直接解析此欄位填補 Intel Fusion 的 GHSA 維度（10% 權重）
# 來源：https://docs.github.com/en/graphql/reference/enums#securityadvisoryidentifiertype

def _extract_ghsa_severity_from_osv(vuln_dict: dict) -> str:
    """
    從 OSV vuln_dict 解析 GHSA severity。

    OSV 的 database_specific 欄位：
      {
        "severity": "HIGH",    ← GitHub Advisory severity
        "cvss": {...}
      }
    また、vuln["osv_id"].startswith("GHSA-") 也是 GHSA 直接來源。

    Returns:
        "CRITICAL" | "HIGH" | "MODERATE" | "MEDIUM" | "LOW" | "UNKNOWN"
    """
    # 優先從已解析的欄位取（如果 _parse_osv_vuln 已填入）
    if vuln_dict.get("ghsa_severity"):
        return vuln_dict["ghsa_severity"]
    # 從原始 severity 欄位取
    sev = vuln_dict.get("severity", "")
    if sev in ("CRITICAL", "HIGH", "MODERATE", "MEDIUM", "LOW"):
        return sev
    return "UNKNOWN"
# ─────────────────────────────────────────────────────────────



def _severity_from_cvss(cvss_score: float) -> str:
    """將 CVSS 分數轉成標準 severity 字串。"""
    if cvss_score >= 9.0:
        return "CRITICAL"
    if cvss_score >= 7.0:
        return "HIGH"
    if cvss_score >= 4.0:
        return "MEDIUM"
    return "LOW"


def _summarize_intel_fusion_for_task(intel_fusion_result: dict | None) -> str:
    """將 Intel Fusion 結果壓縮成給 Scout 任務描述使用的摘要。"""
    if not intel_fusion_result:
        return ""

    fusion_results = intel_fusion_result.get("fusion_results", [])
    if not fusion_results:
        return ""

    lines: list[str] = [
        "Layer 1 Intel Fusion evidence is available.",
        "Reuse this enrichment instead of re-querying EPSS or OTX.",
        "Use OSV/NVD only for CVE discovery, verification, or missing-package fallback.",
        "Intel Fusion evidence:",
    ]

    for fusion in fusion_results[:8]:
        dims = fusion.get("dimension_scores", {})
        cve_id = fusion.get("cve_id", "UNKNOWN")
        score = fusion.get("composite_score", "n/a")
        kev = bool(dims.get("kev", False))
        epss = dims.get("epss", "n/a")
        ghsa = dims.get("ghsa_severity", "UNKNOWN")
        otx = dims.get("otx_threat", "unknown")
        lines.append(
            f"- {cve_id}: composite={score}, kev={kev}, epss={epss}, ghsa={ghsa}, otx={otx}"
        )

    return "\n".join(lines)


def _merge_intel_fusion_evidence(output: dict[str, Any], intel_fusion_result: dict | None) -> dict[str, Any]:
    """把 Intel Fusion 的富化證據併入 Scout 最終漏洞清單。"""
    if not intel_fusion_result:
        return output

    fusion_results = intel_fusion_result.get("fusion_results", [])
    if not fusion_results:
        return output

    representative_fusions: list[dict[str, Any]] = []
    fusion_by_cve: dict[str, dict[str, Any]] = {}
    for fusion in fusion_results:
        cve_id = str(fusion.get("cve_id", "")).strip()
        is_representative = (
            fusion.get("evidence_type") == "representative_cve"
            or bool(fusion.get("must_not_enter_package_actions"))
            or bool(fusion.get("not_directly_observed"))
        )
        if is_representative:
            representative_fusions.append(fusion)
            continue
        if cve_id:
            fusion_by_cve[cve_id] = fusion

    if not fusion_by_cve:
        if representative_fusions:
            output["_intel_fusion_applied"] = {
                "merged_existing": 0,
                "injected_missing": 0,
                "fusion_count": 0,
                "representative_cves_skipped": len(representative_fusions),
            }
            output["representative_cve_evidence"] = representative_fusions
        return output

    vulnerabilities = output.setdefault("vulnerabilities", [])
    seen_cves = {str(v.get("cve_id", "")).strip() for v in vulnerabilities}
    merged_count = 0
    injected_count = 0

    for vuln in vulnerabilities:
        cve_id = str(vuln.get("cve_id", "")).strip()
        fusion = fusion_by_cve.get(cve_id)
        if not fusion:
            continue

        dims = fusion.get("dimension_scores", {})
        vuln["composite_score"] = fusion.get("composite_score", vuln.get("composite_score"))
        vuln["intel_confidence"] = fusion.get("confidence", vuln.get("intel_confidence", ""))
        vuln["dimensions_used"] = fusion.get("dimensions_used", vuln.get("dimensions_used", []))
        vuln["weights_used"] = fusion.get("weights_used", vuln.get("weights_used", {}))
        vuln["evidence_type"] = fusion.get("evidence_type", vuln.get("evidence_type", "direct_cve"))
        vuln["not_directly_observed"] = bool(fusion.get("not_directly_observed", False))
        vuln["must_not_enter_package_actions"] = bool(fusion.get("must_not_enter_package_actions", False))

        if dims.get("epss") is not None:
            vuln["epss_score"] = dims.get("epss")
        if "kev" in dims:
            vuln["in_cisa_kev"] = bool(dims.get("kev"))
        if dims.get("ghsa_severity"):
            vuln["ghsa_severity"] = dims.get("ghsa_severity")
        if dims.get("otx_threat"):
            vuln["otx_threat"] = dims.get("otx_threat")

        merged_count += 1

    for cve_id, fusion in fusion_by_cve.items():
        if cve_id in seen_cves:
            continue

        dims = fusion.get("dimension_scores", {})
        cvss_score = float(dims.get("cvss", 0.0) or 0.0)
        vulnerabilities.append({
            "cve_id": cve_id,
            "package": fusion.get("package", "unknown"),
            "cvss_score": cvss_score,
            "severity": _severity_from_cvss(cvss_score),
            "description": fusion.get("description", ""),
            "is_new": True,
            "source": "INTEL_FUSION",
            "evidence_type": fusion.get("evidence_type", "package_cve"),
            "not_directly_observed": bool(fusion.get("not_directly_observed", False)),
            "must_not_enter_package_actions": bool(fusion.get("must_not_enter_package_actions", False)),
            "composite_score": fusion.get("composite_score", 0.0),
            "intel_confidence": fusion.get("confidence", ""),
            "dimensions_used": fusion.get("dimensions_used", []),
            "weights_used": fusion.get("weights_used", {}),
            "epss_score": dims.get("epss"),
            "in_cisa_kev": bool(dims.get("kev", False)),
            "ghsa_severity": dims.get("ghsa_severity", "UNKNOWN"),
            "otx_threat": dims.get("otx_threat", "unknown"),
        })
        injected_count += 1

    output["_intel_fusion_applied"] = {
        "merged_existing": merged_count,
        "injected_missing": injected_count,
        "fusion_count": len(fusion_by_cve),
        "representative_cves_skipped": len(representative_fusions),
    }
    if representative_fusions:
        output["representative_cve_evidence"] = representative_fusions
    return output


def merge_intel_fusion_evidence(output: dict[str, Any], intel_fusion_result: dict | None) -> dict[str, Any]:
    """Public wrapper for post-discovery Intel Fusion enrichment."""
    return _merge_intel_fusion_evidence(output, intel_fusion_result)


def _reconcile_is_new_flags(output: dict[str, Any], historical_cves: set[str]) -> dict[str, Any]:
    """依照歷史記憶重新校正所有漏洞的 is_new 旗標。"""
    corrected = 0
    for vuln in output.get("vulnerabilities", []):
        cve_id = vuln.get("cve_id", "")
        expected_is_new = cve_id not in historical_cves
        if vuln.get("is_new") != expected_is_new:
            vuln["is_new"] = expected_is_new
            corrected += 1

    summary = output.setdefault("summary", {})
    summary["new_since_last_scan"] = sum(
        1 for vuln in output.get("vulnerabilities", []) if vuln.get("is_new")
    )
    output["_is_new_corrected"] = corrected
    return output


def _load_skill(skill_filename: str = "threat_intel.md") -> str:
    """
    Load Skill SOP file by filename (v3.7 path-aware + Phase 4D 熱載入).

    Phase 4D: 優先使用 SkillLoader 單例（支援熱載入、mtime 驗證）。
    Fallback: 直接從磁碟讀取（原有實作，確保向後相容）。
    """
    # Phase 4D: SkillLoader 熱載入路徑
    if _SKILL_LOADER_AVAILABLE and _skill_loader is not None:
        try:
            return _skill_loader.load_skill(skill_filename)
        except Exception as e:
            logger.warning("[Scout] SkillLoader 失敗，回退直接讀取: %s", e)

    # Fallback: 直接從磁碟讀取（原有實作）
    skill_path = os.path.join(PROJECT_ROOT, "skills", skill_filename)
    for encoding in ("utf-8", "utf-8-sig", "latin-1"):
        try:
            if os.path.exists(skill_path):
                with open(skill_path, "r", encoding=encoding) as f:
                    content = f.read().strip()
                if content:
                    logger.info("[OK] Skill loaded: %s (%d chars)", skill_path, len(content))
                    return content
        except (IOError, UnicodeDecodeError):
            continue

    logger.warning("[WARN] Skill file not found, using fallback: %s", skill_path)
    return _FALLBACK_SKILL



# 內嵌精簡版 Skill（Graceful Degradation — Skill 檔案遺失時的保底）
_FALLBACK_SKILL = """
# Skill: Threat Intelligence Collection (minimal fallback)

## SOP
1. First call read_memory(agent_name="scout") to read history.
2. Query vulnerabilities for each technology package with search_nvd.
3. For CVEs with CVSS >= 7.0, call search_otx for threat intelligence.
4. Compare with history and mark is_new.
5. Write the result with write_memory.
6. Output pure JSON with no extra text.

## Quality Red Lines
- CVE IDs must come from search_nvd. Do not fabricate them.
- CVSS scores must come from the NVD API.
- Output pure JSON only.
""".strip()


# ══════════════════════════════════════════════════════════════
# 系統憲法（Constraints 支柱 — 層級 A）
# ══════════════════════════════════════════════════════════════

CONSTITUTION = """
## System Constitution - Non-Negotiable Rules

1. **CVE source constraint**: every CVE ID must come from search_nvd tool results.
   Never fabricate, infer, or recall CVE IDs from memory.
   Violating this rule creates hallucinated output and causes the Sentinel fact-check to fail the pipeline.

2. **CVSS source constraint**: every CVSS score must come from the NVD API result.
   Do not estimate, adjust, or round CVSS values yourself.

3. **Output format constraint**: your Final Answer must be JSON and only JSON.
   Do not add explanations, headings, markdown, or natural-language text before or after the JSON.

4. **Tool-use constraint**: you must query vulnerabilities through the search_nvd tool.
   Do not skip tool calls and answer from training data; training data may be outdated.

5. **Honesty constraint**: if a package has no results, report count: 0 honestly.
   Do not invent vulnerabilities to make the report look useful.

6. **Memory-read constraint**: the first step after startup must be read_memory.
   Sentinel Behavior Monitor checks this behavior.

7. **Loop constraint**: run at most 15 ReAct iterations.
   If all packages are queried before 15 iterations, output the result immediately and do no extra work.

8. **Memory-write constraint (most important)**: before giving the Final Answer,
   you must call write_memory to store the complete report.
   Order: query all packages -> assemble JSON -> call write_memory -> confirm success -> then provide Final Answer.
   If you are about to answer before calling write_memory, stop and call write_memory first.
""".strip()


# ══════════════════════════════════════════════════════════════
# Agent 工廠函式
# ══════════════════════════════════════════════════════════════

def create_scout_agent(
    excluded_models: list[str] | None = None,
    input_type: str = "pkg",
) -> "Agent":
    """
    Build Scout Agent with Path-Aware Skill SOP (v3.7).

    input_type selects which Skill file to embed in backstory:
      pkg       -> threat_intel.md         (NVD CVE scan for packages)
      code      -> source_code_audit.md    (OWASP Top10 + CWE for source code)
      injection -> ai_security_audit.md    (OWASP LLM Top10 + MITRE ATLAS)
      config    -> config_audit.md         (CIS Benchmark for config files)

    Args:
        excluded_models: Models to skip (429-rate-limited)
        input_type: Path type from frontend detector

    Returns:
        CrewAI Agent instance ready for Task and Crew
    """
    skill_filename = SKILL_MAP.get(input_type, "threat_intel.md")
    skill_content = _load_skill(skill_filename)
    logger.info("[Scout] Path=%s -> Skill=%s", input_type, skill_filename)

    # Goal adapts to the input path
    _GOAL_MAP = {
        "pkg":       "Collect known CVEs for the given package list from OSV/NVD, merge Intel Fusion evidence when available, compare with history, and output structured JSON.",
        "code":      "Audit source code for OWASP Top10 / CWE vulnerabilities; extract package imports and scan NVD; output structured JSON.",
        "injection": "Classify and assess AI security threats (OWASP LLM Top10 / MITRE ATLAS) in the given input; output structured JSON with no CVE hallucination.",
        "config":    "Audit the given configuration file against CIS Benchmarks for misconfigurations and hardcoded secrets; output structured JSON.",
    }
    agent_goal = _GOAL_MAP.get(input_type, _GOAL_MAP["pkg"])

    backstory = f"""You are an expert security analyst specialized in identifying software and AI system vulnerabilities.
You are rigorous, precise, and never fabricate data.

{CONSTITUTION}

---

## Analysis Methodology (Skill SOP)

You MUST follow this Standard Operating Procedure for the current scan path ({input_type}):

{skill_content}
"""

    # 延遲匯入 CrewAI，避免純 helper / 測試路徑在 import 階段觸發本機儲存副作用。
    from crewai import Agent
    from tools.nvd_tool import search_nvd
    from tools.osv_tool import search_osv
    from tools.memory_tool import read_memory, write_memory, history_search

    llm = get_llm(exclude_models=excluded_models)
    scout = Agent(
        role="Threat Intelligence Scout",
        goal=agent_goal,
        backstory=backstory,
        tools=[
            search_osv,   # 主力：OSV.dev ecosystem-aware 精確查詢（不會返回無關 1999 CVE）
            search_nvd,   # 備用：NVD CPE 查詢（OSV 無結果時使用）
            read_memory, write_memory, history_search,
        ],
        llm=llm,
        verbose=True,
        max_iter=15,   # SOP: 最多 15 輪 ReAct（包含 6 步驟程：read_memory+search_nvd+OTX+write_memory）
        allow_delegation=False,
    )

    logger.info(
        "[OK] Scout Agent ready | input_type=%s | skill=%s | llm=%s",
        input_type,
        skill_filename,
        llm.model if hasattr(llm, 'model') else 'unknown',
    )
    return scout


# ══════════════════════════════════════════════════════════════
# CrewAI Task 工廠函式（便利函式，供 main.py 使用）
# ══════════════════════════════════════════════════════════════

def create_scout_task(
    agent,
    tech_stack: str,
    intel_fusion_result: dict | None = None,
):
    """
    v3.4: Scout Task - package-aware mode.
    When tech_stack is a short comma-separated package list (from PackageExtractor),
    explicitly enumerate each package for the LLM to query via search_nvd.
    """
    from crewai import Task

    intel_summary = _summarize_intel_fusion_for_task(intel_fusion_result)

    # Detect if input is a clean package list or raw code/long text
    is_package_list = (
        len(tech_stack) < 300
        and "\n" not in tech_stack
        and "def " not in tech_stack
        and "import " not in tech_stack
    )

    if is_package_list:
        packages = [p.strip() for p in tech_stack.split(",") if p.strip()]
        packages_display = "\n".join(f"   {i+1}. {pkg}" for i, pkg in enumerate(packages))
        _osv_cmd_lines = "\n".join(f"   - search_osv('{pkg}')" for pkg in packages[:8])
        task_desc = (
            f"You are analyzing security vulnerabilities for packages extracted from source code.\n\n"
            f"Package list to scan:\n{packages_display}\n\n"
            f"{intel_summary + chr(10) + chr(10) if intel_summary else ''}"
            f"Steps to follow (MUST call tools in order):\n\n"
            f"Step 1: Call read_memory\n"
            f"   Action: read_memory\n"
            f"   Action Input: scout\n\n"
            f"Step 2: For EACH package, call search_osv first (more precise), search_nvd as fallback:\n"
            f"{_osv_cmd_lines}\n"
            f"   If search_osv returns count=0, then try: search_nvd('<package>')\n\n"
            f"Step 3: Reuse Intel Fusion evidence when available.\n"
            f"   - Do NOT re-query EPSS or OTX from Scout.\n"
            f"   - Prefer Intel Fusion values for composite_score, KEV, EPSS, GHSA, and OTX fields.\n"
            f"   - If Intel Fusion has no matching CVE, continue with OSV/NVD-only evidence.\n\n"
            f"Step 4: Assemble JSON report from REAL tool results only\n"
            f"   - CVE IDs MUST come from search_osv or search_nvd output\n"
            f"   - Compare with read_memory history, mark is_new\n\n"
            f"Step 5: Call write_memory to save results\n"
            f"   Action: write_memory\n"
            f"   Action Input: scout|{{JSON report}}\n\n"
            f"Step 6: Output JSON report as Final Answer\n\n"
            f"FORBIDDEN:\n"
            f"- Do NOT skip tool calls\n"
            f"- Do NOT fabricate CVE IDs\n"
            f"- Do NOT use backstory examples (they are fake)\n"
            f"- write_memory MUST be called before Final Answer"
        )
    else:
        task_desc = (
            f"You are analyzing security vulnerabilities in: {tech_stack[:800]}\n\n"
            f"{intel_summary + chr(10) + chr(10) if intel_summary else ''}"
            f"Steps to follow (MUST call tools in order):\n\n"
            f"Step 1: Call read_memory\n"
            f"   Action: read_memory\n"
            f"   Action Input: scout\n\n"
            f"Step 2: Extract PACKAGE NAMES from the code, then call search_osv first:\n"
            f"   RULE: Package names come from require() or import statements ONLY.\n"
            f"   Example: require('express') -> search_osv('express')\n"
            f"   Example: require('lodash')  -> search_osv('lodash')\n"
            f"   If search_osv returns count=0 for a package, fallback: search_nvd('<package>')\n"
            f"   FORBIDDEN search terms (these are syntax, NOT packages):\n"
            f"   - eval, exec, Function, innerHTML, script, html, document\n"
            f"   - const, let, var, function, class, async, await\n"
            f"   - req, res, app, user, input (these are variable names)\n"
            f"   If no require()/import found, output empty vulnerabilities list.\n\n"
            f"Step 3: Reuse Intel Fusion evidence when available.\n"
            f"   - Do NOT re-query EPSS or OTX from Scout.\n"
            f"   - Keep Scout focused on package extraction, OSV/NVD discovery, and schema output.\n"
            f"   - If Intel Fusion has no matching CVE, continue with OSV/NVD-only evidence.\n\n"
            f"Step 4: Assemble JSON report from REAL tool results only\n\n"
            f"Step 5: Call write_memory\n"
            f"   Action: write_memory\n"
            f"   Action Input: scout|{{JSON report}}\n\n"
            f"Step 6: Output JSON report as Final Answer\n\n"
            f"FORBIDDEN:\n"
            f"- Do NOT search NVD with: eval, html, innerHTML, script, const, function\n"
            f"- Do NOT skip tool calls\n"
            f"- Do NOT fabricate CVE IDs\n"
            f"- write_memory MUST be called before Final Answer"
        )

    return Task(
        description=task_desc,
        expected_output="Structured JSON threat intel report with CVEs from search_osv (primary) or search_nvd (fallback), ready for deterministic Intel Fusion evidence merge.",
        agent=agent,
    )



def run_scout_pipeline(
    tech_stack: str,
    input_type: str = "pkg",
    intel_fusion_result: dict | None = None,
) -> dict:
    """
    Execute full Scout Pipeline with Harness code-level guarantees.

    v5.0 (Phase 7.5) 新增：
      - OSV Batch 預熱：LLM 啟動前批量查詢所有套件，結果預存快取
        → LLM 呼叫 search_osv() 時直接命中快取，無需等待 API
      - Harness 2.5：改用 OSV 資料做 LLM 遺漏補充（取代 NVD cache inject）
      - GHSA severity 維度：從 OSV database_specific.severity 直接解析

    v3.7: input_type selects the correct Skill SOP for path-aware analysis.

    Args:
        tech_stack: User input (e.g. "Django 4.2, Redis 7.0" or source code)
        input_type: Path type (pkg/code/injection/config)
        intel_fusion_result: Optional Layer 1 enrichment to merge into Scout output

    Returns:
        dict: Parsed Scout JSON report
    """
    import json
    from crewai import Crew, Process
    from config import mark_model_failed, get_current_model_name, rate_limiter
    from tools.memory_tool import write_memory
    # 新版 memory_tool 無 _write_memory_impl，使用公開 Tool 介面

    # ── Harness 0：OSV Batch 預熱快取（在 LLM 之前執行）───────────
    # 理由：LLM 呼叫 search_osv() 是一個一個套件查，無法自行批量。
    # 在 Code-level Harness 層先做 Batch 查詢，結果存入快取，
    # Agent 呼叫 search_osv() 時直接命中快取，延遲從 N×API_RTT → 1×API_RTT。
    _osv_batch_cache: dict[str, list] = {}  # pkg → [vuln_dict, ...]
    if input_type == "pkg":
        # 從 tech_stack 提取套件名稱（去版本號）
        _pkg_list = [item.strip().split()[0] for item in tech_stack.split(",") if item.strip()]
        if _pkg_list:
            try:
                from tools.osv_tool import search_osv_batch
                logger.info("[HARNESS 0] OSV Batch warmup: %s", _pkg_list)
                _osv_batch_cache = search_osv_batch(_pkg_list)
                logger.info("[HARNESS 0] OSV Batch warmup done: %d packages cached",
                            len(_osv_batch_cache))
            except Exception as _e:
                logger.warning("[HARNESS 0] OSV Batch warmup failed (non-fatal): %s", _e)
    # ────────────────────────────────────────────────────────────

    # 429 自動輪替：最多重試 MAX_LLM_RETRIES 次（每次切換模型）
    MAX_LLM_RETRIES = 2
    excluded_models: list[str] = []

    for attempt in range(MAX_LLM_RETRIES + 1):
        # v3.7: pass input_type so agent loads the correct Skill SOP
        agent = create_scout_agent(excluded_models, input_type=input_type)
        task = create_scout_task(agent, tech_stack, intel_fusion_result=intel_fusion_result)
        crew = Crew(agents=[agent], tasks=[task], process=Process.sequential, verbose=True)

        # 執行 Agent
        logger.info("[START] Scout Pipeline: %s (attempt %d/%d)", tech_stack, attempt + 1, MAX_LLM_RETRIES + 1)
        try:
            from checkpoint import recorder as _cp
            _current_model = get_current_model_name(agent.llm)
            _cp.llm_call("scout", _current_model, "openrouter", f"attempt={attempt+1}")
        except Exception:
            _current_model = "unknown"
        _t_llm = time.time()
        try:
            result = crew.kickoff()
            try:
                _cp.llm_result("scout", _current_model, "SUCCESS",
                               len(str(result)), int((time.time() - _t_llm) * 1000),
                               thinking=str(result)[:1000])
            except Exception:
                pass
            break  # 成功則跳出重試迴圈
        except Exception as e:
            error_str = str(e)
            if "429" in error_str and attempt < MAX_LLM_RETRIES:
                # 標記當前模型為冷卻中，下次迴圈會選擇其他模型
                current_model = get_current_model_name(agent.llm)
                mark_model_failed(current_model)
                excluded_models.append(current_model)
                # 解析 API 回傳的 retry_after 秒數
                import re as _re
                _m = _re.search(r'retry.{1,10}(\d+\.?\d*)s', error_str, _re.IGNORECASE)
                retry_after = float(_m.group(1)) if _m else 0.0
                logger.warning("[RETRY] Scout 429 on %s (attempt %d/%d), api_retry_after=%.0fs",
                              current_model, attempt + 1, MAX_LLM_RETRIES, retry_after)
                try:
                    _cp.llm_retry("scout", current_model, error_str[:200],
                                  attempt + 1, "next_in_waterfall")
                except Exception:
                    pass
                rate_limiter.on_429(retry_after=retry_after, caller="scout")  # 最少 30s
                continue

            try:
                _cp.llm_error("scout", _current_model, error_str[:300])
            except Exception:
                pass
            raise  # 非 429 或已超過重試次數，直接拋出

    result_str = str(result).strip()

    # 解析 JSON（處理可能的 markdown 包裝）
    json_str = result_str
    if "```json" in json_str:
        json_str = json_str.split("```json")[1].split("```")[0].strip()
    elif "```" in json_str:
        parts = json_str.split("```")
        if len(parts) >= 3:
            json_str = parts[1].strip()

    try:
        output = json.loads(json_str)
    except json.JSONDecodeError:
        logger.error("[FAIL] Agent output is not valid JSON: %s", result_str[:200])
        raise ValueError(f"Scout Agent output is not valid JSON: {result_str[:200]}")

    # ── Harness 保障 0.5：先補齊基礎 Schema，再寫入記憶 ───────────────
    from datetime import datetime, timezone

    if "scan_id" not in output:
        logger.warning("[WARN] Output missing required field: scan_id")
        output["scan_id"] = f"scan_{int(time.time())}"
    if "timestamp" not in output:
        logger.warning("[WARN] Output missing required field: timestamp")
        output["timestamp"] = datetime.now(timezone.utc).isoformat()
    if "tech_stack" not in output:
        logger.warning("[WARN] Output missing required field: tech_stack")
        output["tech_stack"] = [
            item.strip().lower()
            for item in str(tech_stack).split(",")
            if item.strip()
        ]
    if "vulnerabilities" not in output:
        logger.warning("[WARN] Output missing required field: vulnerabilities")
        output["vulnerabilities"] = []
    if "summary" not in output:
        logger.warning("[WARN] Output missing required field: summary")
        output["summary"] = {"total": 0, "critical": 0, "high": 0, "medium": 0, "low": 0}

    for vuln in output.get("vulnerabilities", []):
        severity = str(vuln.get("severity", "MEDIUM")).upper()
        if severity == "MODERATE":
            severity = "MEDIUM"
        elif severity not in {"CRITICAL", "HIGH", "MEDIUM", "LOW"}:
            severity = _severity_from_cvss(float(vuln.get("cvss_score") or 0.0))
        vuln["severity"] = severity

    # ── Harness 保障 1：強制 write_memory ──────────────────────
    memory_path = os.path.join(PROJECT_ROOT, "memory", "scout_memory.json")
    need_write = False
    if not os.path.exists(memory_path):
        need_write = True
    else:
        try:
            with open(memory_path, "r", encoding="utf-8") as f:
                content = f.read().strip()
            if not content or content == "{}":
                need_write = True
        except (IOError, json.JSONDecodeError):
            need_write = True

    if need_write:
        logger.warning("[WARN] Agent did not call write_memory -- code forcing write (Harness)")
        write_result = write_memory.run(agent_name="scout", data=json.dumps(output, ensure_ascii=False))
        logger.info("[OK] Forced memory write: %s", write_result)

    # ── Harness 保障 2：基礎 Schema 驗證 ──────────────────────
    required = ["scan_id", "timestamp", "tech_stack", "vulnerabilities", "summary"]
    for field in required:
        if field not in output:
            logger.warning("[WARN] Output missing required field: %s", field)
            if field == "vulnerabilities":
                output["vulnerabilities"] = []
            elif field == "summary":
                output["summary"] = {"total": 0, "critical": 0, "high": 0, "medium": 0, "low": 0}

    # ── Harness 保障 2.5：Cache 注入（Anti-LLM-Omission）──────
    # v5.0：改用 OSV Batch 快取資料（更精確，不會混入 1999 年 CVE）
    # 當 LLM 輸出 0 vulnerabilities，從 Batch 預熱快取直接注入
    if not output.get("vulnerabilities"):
        injected = []
        # 優先用 OSV Batch 預熱快取（最精確）
        if _osv_batch_cache:
            for pkg_name, vuln_list in _osv_batch_cache.items():
                for v in vuln_list:
                    cve_id = v.get("cve_id", "")
                    if not cve_id.startswith(("CVE-", "GHSA-")):
                        continue
                    ghsa_sev = _extract_ghsa_severity_from_osv(v)
                    injected.append({
                        "cve_id":      cve_id,
                        "package":     v.get("package", pkg_name),
                        "cvss_score":  v.get("cvss_score", 0.0),
                        "severity":    v.get("severity", "MEDIUM"),
                        "description": v.get("description", "")[:300],
                        "published":   v.get("published", ""),
                        "is_new":      True,
                        "in_cisa_kev": False,
                        "has_public_exploit": False,
                        "source":      "OSV",
                        "osv_id":      v.get("osv_id", ""),
                        # GHSA 維度：從 OSV database_specific.severity 解析
                        "ghsa_severity": ghsa_sev,
                    })
        else:
            # Fallback：NVD cache（舊路徑，OSV Batch 失敗時使用）
            from tools.nvd_tool import _search_nvd_impl
            for item in (tech_stack or "").split(","):
                pkg = item.strip().split()[0].lower()
                if not pkg:
                    continue
                try:
                    cached_result = json.loads(_search_nvd_impl(pkg))
                    for v in cached_result.get("vulnerabilities", []):
                        cve_id = v.get("cve_id") or v.get("id", "")
                        if not cve_id.startswith("CVE-"):
                            continue
                        injected.append({
                            "cve_id":      cve_id,
                            "package":     v.get("package", pkg),
                            "cvss_score":  v.get("cvss_score", 0.0),
                            "severity":    v.get("severity", "MEDIUM"),
                            "description": v.get("description", "")[:300],
                            "published":   v.get("published_date", ""),
                            "is_new":      True,
                            "in_cisa_kev": v.get("in_cisa_kev", False),
                            "has_public_exploit": v.get("has_public_exploit", False),
                        })
                except Exception as e:
                    logger.warning("[WARN] NVD cache inject failed for %s: %s", pkg, e)

        if injected:
            output["vulnerabilities"] = injected
            logger.warning(
                "[HARNESS 2.5] LLM output 0 CVEs, injected %d CVEs from %s for tech_stack=%s",
                len(injected),
                "OSV batch cache" if _osv_batch_cache else "NVD cache",
                tech_stack[:60]
            )

    # 重新查 NVD 建立真實 CVE 清單 + CVE→package 對應表
    # v5.0：優先用 OSV Batch Cache（已在 Harness 0 預熱），只有 OSV 無資料才查 NVD
    # NVD 查到的結果也加年份過濾（< 2005 → 不列入 real_cves）
    from tools.nvd_tool import _search_nvd_impl
    real_cves = set()
    cve_to_package = {}  # CVE-XXXX-YYYY → package name

    # 收集所有需要查的 package：Agent 輸出的 + tech_stack 裡的
    packages_to_check = set()
    for vuln in output.get("vulnerabilities", []):
        pkg = vuln.get("package", "").lower().strip()
        if pkg:
            packages_to_check.add(pkg)
    # 從 tech_stack 提取（去版本號）
    for item in tech_stack.split(","):
        pkg_name = item.strip().split()[0].lower()
        if pkg_name:
            packages_to_check.add(pkg_name)

    # 優先從 OSV Batch Cache 建 real_cves（最精確，不含 CVE-1999）
    for pkg_name, vuln_list in _osv_batch_cache.items():
        for v in vuln_list:
            cve_id = v.get("cve_id", "")
            if cve_id.startswith(("CVE-", "GHSA-")):
                real_cves.add(cve_id)
                cve_to_package[cve_id] = pkg_name

    # OSV 無資料的套件，嘗試 NVD（但過濾 < 2005 年份）
    for pkg in packages_to_check:
        if any(cve_to_package.get(c) == pkg for c in real_cves):
            continue  # OSV 已有該套件資料
        try:
            import tools.nvd_tool as nvd_mod
            original_page_size = nvd_mod.RESULTS_PER_PAGE
            nvd_mod.RESULTS_PER_PAGE = 100
            try:
                nvd_result = json.loads(_search_nvd_impl(pkg))
            finally:
                nvd_mod.RESULTS_PER_PAGE = original_page_size
            for v in nvd_result.get("vulnerabilities", []):
                cve_id = v["cve_id"]
                # 年份過濾：CVE-1999/2000... 遠古 CVE 不計入 real_cves
                try:
                    cve_year = int(cve_id.split("-")[1])
                    if cve_year < 2005:
                        logger.debug("[FILTER] NVD verification skip ancient CVE: %s", cve_id)
                        continue
                except (IndexError, ValueError):
                    pass
                real_cves.add(cve_id)
                cve_to_package[cve_id] = pkg
        except Exception as e:
            logger.warning("[WARN] CVE verification NVD query failed (%s): %s", pkg, e)

    if real_cves:
        original_count = len(output.get("vulnerabilities", []))
        verified_vulns = []
        suspect_vulns = []  # 可能是真的但 keywordSearch 沒找到
        for vuln in output.get("vulnerabilities", []):
            if vuln.get("cve_id") in real_cves:
                verified_vulns.append(vuln)
            else:
                suspect_vulns.append(vuln)

        # 對可疑的 CVE 做精確查詢（cveId lookup）
        hallucinated = []
        if suspect_vulns:
            import re
            for vuln in suspect_vulns:
                cve_id = vuln.get("cve_id", "")
                if not re.match(r"^CVE-\d{4}-\d{4,}$", cve_id):
                    hallucinated.append(cve_id)
                    continue
                try:
                    resp = requests.get(
                        "https://services.nvd.nist.gov/rest/json/cves/2.0",
                        params={"cveId": cve_id},
                        headers={"apiKey": os.getenv("NVD_API_KEY", "")},
                        timeout=10,
                    )
                    if resp.status_code == 200:
                        data = resp.json()
                        if data.get("totalResults", 0) > 0:
                            logger.info("[OK] CVE exact verification passed: %s", cve_id)
                            verified_vulns.append(vuln)
                            # 補 package：從 description 推斷
                            if not vuln.get("package"):
                                desc = data["vulnerabilities"][0]["cve"]["descriptions"][0]["value"].lower()
                                for pkg in packages_to_check:
                                    if pkg in desc:
                                        vuln["package"] = pkg
                                        cve_to_package[cve_id] = pkg
                                        break
                            continue
                    # NVD 明確回應但找不到 → 才算幻覺
                    hallucinated.append(cve_id)
                except Exception:
                    # NVD API 不可達（timeout/connection）→ 保守保留，不當幻覺處理
                    logger.warning("[WARN] NVD verify unreachable for %s, keeping conservatively", cve_id)
                    verified_vulns.append(vuln)


        if hallucinated:
            logger.warning(
                "[ALERT] Detected %d hallucinated CVEs, removed: %s",
                len(hallucinated), hallucinated
            )
            output["vulnerabilities"] = verified_vulns
            # 重新計算 summary
            output["summary"] = {
                "total": len(verified_vulns),
                "new_since_last_scan": sum(1 for v in verified_vulns if v.get("is_new")),
                "critical": sum(1 for v in verified_vulns if v.get("severity") == "CRITICAL"),
                "high": sum(1 for v in verified_vulns if v.get("severity") == "HIGH"),
                "medium": sum(1 for v in verified_vulns if v.get("severity") == "MEDIUM"),
                "low": sum(1 for v in verified_vulns if v.get("severity") == "LOW"),
            }
            logger.info(
                "[OK] CVE verification result: %d -> %d (removed %d hallucinated)",
                original_count, len(verified_vulns), len(hallucinated)
            )
        else:
            logger.info("[OK] All %d CVEs passed verification", original_count)
    else:
        logger.warning("[WARN] Cannot build real CVE list, skipping verification")

    # ── Harness 保障 4：補全 package 欄位 ──────────────────────
    # Agent 常忘記加 package，用 Layer 3 建好的 cve_to_package 補
    patched_count = 0
    for vuln in output.get("vulnerabilities", []):
        if not vuln.get("package"):
            cve_id = vuln.get("cve_id", "")
            if cve_id in cve_to_package:
                vuln["package"] = cve_to_package[cve_id]
                patched_count += 1
            else:
                # 最後手段：從 description 猜 package
                desc = vuln.get("description", "").lower()
                for pkg in packages_to_check:
                    if pkg in desc:
                        vuln["package"] = pkg
                        patched_count += 1
                        break
                else:
                    vuln["package"] = "unknown"
                    patched_count += 1
    if patched_count:
        logger.info("[OK] Patched %d CVE package fields", patched_count)

    # 先合併 Intel Fusion 結果，再做歷史記憶校正，避免後補漏洞被誤標成新發現。
    output = _merge_intel_fusion_evidence(output, intel_fusion_result)

    # ── Harness 保障 5：校正 is_new 標記 ──────────────────────
    # Agent 常常不正確比對歷史，程式碼代為校正
    try:
        mem_data = {}
        if os.path.exists(memory_path):
            with open(memory_path, "r", encoding="utf-8") as f:
                mem_data = json.load(f)

        # 從 memory 的所有歷史掃描中提取已知 CVE 集合
        historical_cves = set()
        # 新版 memory_tool 直接存 scan 結構
        if "vulnerabilities" in mem_data:
            for v in mem_data.get("vulnerabilities", []):
                historical_cves.add(v.get("cve_id", ""))
        # 舊版 memory 有 latest/history 結構
        elif "latest" in mem_data:
            for v in mem_data.get("latest", {}).get("vulnerabilities", []):
                historical_cves.add(v.get("cve_id", ""))

        output = _reconcile_is_new_flags(output, historical_cves)
        corrected = output.get("_is_new_corrected", 0)
        if corrected:
            logger.info("[OK] Corrected %d CVE is_new flags", corrected)
    except Exception as e:
        logger.warning("[WARN] is_new correction failed: %s", e)

    # ── Harness 保障 3.5：CVE 年份過濾器（最後防線）──────────────
    # 不管哪個 Agent 或 NVD 返回了遠古 CVE，在 Scout 輸出前一律攔截
    # 根據：CISA KEV 最早 2002 年，現代套件漏洞幾乎都 >= 2005
    # 例外：GHSA- 前綴（OSV/GitHub Advisory，不用年份過濾）
    CVE_YEAR_MIN = 2005
    ancient_removed = []
    fresh_vulns = []
    for vuln in output.get("vulnerabilities", []):
        cve_id = vuln.get("cve_id", "")
        if cve_id.startswith("GHSA-") or not cve_id.startswith("CVE-"):
            fresh_vulns.append(vuln)  # GHSA/非標準 ID 保留
            continue
        try:
            cve_year = int(cve_id.split("-")[1])
            if cve_year < CVE_YEAR_MIN:
                ancient_removed.append(cve_id)
                logger.warning(
                    "[HARNESS 3.5] Ancient CVE removed (year=%d < %d): %s",
                    cve_year, CVE_YEAR_MIN, cve_id
                )
            else:
                fresh_vulns.append(vuln)
        except (IndexError, ValueError):
            fresh_vulns.append(vuln)  # 解析失敗保留（保守）

    if ancient_removed:
        output["vulnerabilities"] = fresh_vulns
        output["ancient_cves_removed"] = ancient_removed  # 審計用
        logger.warning(
            "[HARNESS 3.5] Removed %d ancient CVEs (< %d): %s",
            len(ancient_removed), CVE_YEAR_MIN, ancient_removed
        )
    # ────────────────────────────────────────────────────────────

    # ── 最終 Summary 校正（確保一致性）──────────────────────────
    vulns = output.get("vulnerabilities", [])
    output["summary"] = {
        "total": len(vulns),
        "new_since_last_scan": sum(1 for v in vulns if v.get("is_new")),
        "critical": sum(1 for v in vulns if v.get("severity") == "CRITICAL"),
        "high": sum(1 for v in vulns if v.get("severity") == "HIGH"),
        "medium": sum(1 for v in vulns if v.get("severity") == "MEDIUM"),
        "low": sum(1 for v in vulns if v.get("severity") == "LOW"),
    }

    vuln_count = output["summary"]["total"]
    new_count = output["summary"]["new_since_last_scan"]
    logger.info(
        "[OK] Scout Pipeline complete: %d CVEs, %d new", vuln_count, new_count
    )

    return output