| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| import json |
| import os |
| import time |
| import hashlib |
| import logging |
| from datetime import datetime, timezone |
|
|
| import requests |
|
|
| logger = logging.getLogger("ThreatHunter") |
|
|
| |
| |
| |
|
|
| NVD_API_BASE = "https://services.nvd.nist.gov/rest/json/cves/2.0" |
| RESULTS_PER_PAGE = 10 |
| REQUEST_TIMEOUT = 30 |
|
|
| |
| RATE_LIMIT_WITH_KEY = 0.6 |
| RATE_LIMIT_WITHOUT_KEY = 6.0 |
| MAX_RETRIES = 2 |
|
|
| |
| CACHE_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "data") |
| CACHE_TTL = 3600 * 24 |
|
|
| |
| PACKAGE_MAP_PATH = os.path.join(CACHE_DIR, "package_map.json") |
|
|
| |
| _last_request_time = 0.0 |
|
|
|
|
| |
| |
| |
|
|
| def _load_package_map() -> dict: |
| """่ผๅ
ฅๅฅไปถๅ็จฑๅฐๆ่กจ๏ผๅคฑๆๅๅณ็ฉบ dict""" |
| try: |
| if os.path.exists(PACKAGE_MAP_PATH): |
| with open(PACKAGE_MAP_PATH, "r", encoding="utf-8") as f: |
| return json.load(f) |
| except (json.JSONDecodeError, IOError) as e: |
| logger.warning("[WARN] package_map.json load failed: %s", e) |
| return {} |
|
|
|
|
| def _normalize_package_name(raw_name: str) -> list[str]: |
| """ |
| ๅฐไฝฟ็จ่
่ผธๅ
ฅ็ๅฅไปถๅ็จฑๆญฃ่ฆๅ๏ผๅๅณๅฏ่ฝ็ๆฅ่ฉขๅ็จฑๅ่กจใ |
| ็ฌฌไธๅๆฏๆๅฏ่ฝ็๏ผๅพ็บๆฏๅฅๅๅ้ธใ |
| |
| ไพๅฆ๏ผ |
| "postgres" โ ["postgresql", "postgres"] |
| "django" โ ["django"] |
| """ |
| name = raw_name.strip().lower() |
| |
| name = name.split()[0] if " " in name else name |
|
|
| pkg_map = _load_package_map() |
| candidates = [] |
|
|
| if name in pkg_map: |
| mapped = pkg_map[name] |
| candidates.append(mapped) |
| if mapped != name: |
| candidates.append(name) |
| else: |
| candidates.append(name) |
| |
| for alias, target in pkg_map.items(): |
| if target == name and alias not in candidates: |
| candidates.append(alias) |
|
|
| return candidates |
|
|
|
|
| def _get_cache_path(package_name: str) -> str: |
| """ๅๅพ้ข็ทๅฟซๅๆชๆก่ทฏๅพ""" |
| safe_name = hashlib.md5(package_name.encode()).hexdigest()[:12] |
| return os.path.join(CACHE_DIR, f"nvd_cache_{package_name}_{safe_name}.json") |
|
|
|
|
| def _read_cache(package_name: str, allow_stale: bool = False) -> dict | None: |
| """่ฎๅ้ข็ทๅฟซๅ๏ผ้ๆๆไธๅญๅจๅๅณ Noneใ""" |
| cache_path = _get_cache_path(package_name) |
| try: |
| if os.path.exists(cache_path): |
| with open(cache_path, "r", encoding="utf-8") as f: |
| cached = json.load(f) |
| cached_time = cached.get("_cached_at", 0) |
| cache_age = time.time() - cached_time |
| if cache_age < CACHE_TTL: |
| logger.info("[OK] NVD cache hit: %s", package_name) |
| return cached |
| if allow_stale: |
| logger.warning("[WARN] NVD stale cache fallback: %s (age=%.0fs)", package_name, cache_age) |
| return cached |
| logger.info("[INFO] NVD cache expired: %s", package_name) |
| except (json.JSONDecodeError, IOError) as e: |
| logger.warning("[WARN] NVD cache read failed: %s", e) |
| return None |
|
|
|
|
| def _write_cache(package_name: str, data: dict) -> None: |
| """ๅฏซๅ
ฅ้ข็ทๅฟซๅ""" |
| try: |
| os.makedirs(CACHE_DIR, exist_ok=True) |
| cache_path = _get_cache_path(package_name) |
| data["_cached_at"] = time.time() |
| with open(cache_path, "w", encoding="utf-8") as f: |
| json.dump(data, f, ensure_ascii=False, indent=2) |
| except (IOError, PermissionError) as e: |
| logger.warning("[WARN] NVD cache write failed: %s", e) |
|
|
|
|
| def _rate_limit() -> None: |
| """Rate limiter โ ็ขบไฟไธ่ถ
้ NVD API ้้""" |
| global _last_request_time |
| api_key = os.getenv("NVD_API_KEY", "") |
| interval = RATE_LIMIT_WITH_KEY if api_key else RATE_LIMIT_WITHOUT_KEY |
| elapsed = time.time() - _last_request_time |
| if elapsed < interval: |
| wait = interval - elapsed |
| logger.info("[WAIT] NVD rate limit: waiting %.1fs", wait) |
| time.sleep(wait) |
| _last_request_time = time.time() |
|
|
|
|
| def _extract_cvss(metrics: dict) -> tuple[float, str]: |
| """ |
| ๅพ NVD metrics ไธญๆๅ CVSS ๅๆธๅๅด้ๅบฆใ |
| ๅชๅ
v3.1 โ v3.0 โ v2 โ ้ ่จญๅผใ |
| |
| Returns: |
| (cvss_score, severity) |
| """ |
| |
| v31 = metrics.get("cvssMetricV31", []) |
| if v31: |
| data = v31[0].get("cvssData", {}) |
| score = data.get("baseScore", 0.0) |
| severity = data.get("baseSeverity", "") |
| if score and severity: |
| return float(score), severity.upper() |
|
|
| |
| v30 = metrics.get("cvssMetricV30", []) |
| if v30: |
| data = v30[0].get("cvssData", {}) |
| score = data.get("baseScore", 0.0) |
| severity = data.get("baseSeverity", "") |
| if score and severity: |
| return float(score), severity.upper() |
|
|
| |
| v2 = metrics.get("cvssMetricV2", []) |
| if v2: |
| data = v2[0].get("cvssData", {}) |
| score = data.get("baseScore", 0.0) |
| if score: |
| severity = _cvss_to_severity(float(score)) |
| return float(score), severity |
|
|
| return 0.0, "LOW" |
|
|
|
|
| def _cvss_to_severity(score: float) -> str: |
| """CVSS ๅๆธ โ ๅด้ๅบฆ็ญ็ด่ฝๆ๏ผๅ
ๅจ API ๆชๆไพ severity ๆไฝฟ็จ๏ผ""" |
| if score >= 9.0: |
| return "CRITICAL" |
| elif score >= 7.0: |
| return "HIGH" |
| elif score >= 4.0: |
| return "MEDIUM" |
| else: |
| return "LOW" |
|
|
|
|
| def _extract_affected_versions(configurations: list) -> str: |
| """ๅ่ฉฆๅพ NVD configurations ๆๅๅๅฝฑ้ฟ็ๆฌ็ฏๅ""" |
| versions = [] |
| try: |
| for config in configurations: |
| for node in config.get("nodes", []): |
| for cpe_match in node.get("cpeMatch", []): |
| if cpe_match.get("vulnerable", False): |
| cpe = cpe_match.get("criteria", "") |
| version_start = cpe_match.get("versionStartIncluding", "") |
| version_end = cpe_match.get("versionEndExcluding", "") |
| version_end_incl = cpe_match.get("versionEndIncluding", "") |
|
|
| if version_end: |
| versions.append(f"< {version_end}") |
| elif version_end_incl: |
| versions.append(f"<= {version_end_incl}") |
| elif version_start: |
| versions.append(f">= {version_start}") |
| elif cpe: |
| |
| parts = cpe.split(":") |
| if len(parts) > 5 and parts[5] not in ("*", "-"): |
| versions.append(parts[5]) |
| except (KeyError, IndexError, TypeError): |
| pass |
|
|
| return ", ".join(versions[:3]) if versions else "" |
|
|
|
|
| def _extract_description(descriptions: list) -> str: |
| """ๆๅ่ฑๆๆ่ฟฐ๏ผๅชๅ
en๏ผfallback ๅฐ็ฌฌไธๅ""" |
| for desc in descriptions: |
| if desc.get("lang", "") == "en": |
| return desc.get("value", "") |
| if descriptions: |
| return descriptions[0].get("value", "") |
| return "" |
|
|
|
|
| |
| |
| |
|
|
| def _query_nvd_api(keyword: str) -> dict | None: |
| """ |
| ๅผๅซ NVD API๏ผไปฅ keywordSearch ๅ
จๆๆๅฐใ |
| ๅคฑๆๅๅณ Noneใ |
| """ |
| api_key = os.getenv("NVD_API_KEY", "") |
| headers = {"apiKey": api_key} if api_key else {} |
| params = { |
| "keywordSearch": keyword, |
| "resultsPerPage": RESULTS_PER_PAGE, |
| } |
| for attempt in range(1, MAX_RETRIES + 1): |
| _rate_limit() |
| try: |
| logger.info("[QUERY] NVD keywordSearch: %s (attempt %d)", keyword, attempt) |
| response = requests.get(NVD_API_BASE, params=params, headers=headers, timeout=REQUEST_TIMEOUT) |
| if response.status_code == 200: |
| return response.json() |
| if response.status_code == 403: |
| logger.warning("[WARN] NVD API 403 (rate limited), retrying...") |
| time.sleep(RATE_LIMIT_WITHOUT_KEY * 2) |
| continue |
| if response.status_code >= 500: |
| logger.warning("[WARN] NVD API %d (server error)", response.status_code) |
| time.sleep(2) |
| continue |
| logger.warning("[WARN] NVD API returned %d: %s", response.status_code, response.text[:200]) |
| return None |
| except requests.exceptions.Timeout: |
| logger.warning("[WARN] NVD API timeout (%ds)", REQUEST_TIMEOUT) |
| continue |
| except requests.exceptions.ConnectionError: |
| logger.warning("[WARN] NVD API connection failed (network issue)") |
| continue |
| except requests.exceptions.RequestException as e: |
| logger.warning("[WARN] NVD API request error: %s", e) |
| return None |
| return None |
|
|
|
|
| def _query_nvd_api_cpe(cpe_name: str) -> dict | None: |
| """ |
| ๅผๅซ NVD API๏ผไปฅ cpeName ็ฒพ็ขบๆๅฐใ |
| ๆฏ keywordSearch ็ฒพ็ขบ โ ๅชๅๅณๅๅฝฑ้ฟ CPE ๆฏๅฐๆๅ็ CVE๏ผ |
| ้ฟๅ
่ชๆณ้้ตๅญ๏ผevalใhtml ็ญ๏ผๆฑกๆ็ตๆใ |
| ๅคฑๆๅๅณ Noneใ |
| """ |
| api_key = os.getenv("NVD_API_KEY", "") |
| headers = {"apiKey": api_key} if api_key else {} |
| params = { |
| "cpeName": cpe_name, |
| "resultsPerPage": RESULTS_PER_PAGE, |
| } |
| for attempt in range(1, MAX_RETRIES + 1): |
| _rate_limit() |
| try: |
| logger.info("[QUERY] NVD cpeName: %s (attempt %d)", cpe_name, attempt) |
| response = requests.get(NVD_API_BASE, params=params, headers=headers, timeout=REQUEST_TIMEOUT) |
| if response.status_code == 200: |
| return response.json() |
| if response.status_code == 403: |
| logger.warning("[WARN] NVD API 403 (rate limited), retrying...") |
| time.sleep(RATE_LIMIT_WITHOUT_KEY * 2) |
| continue |
| if response.status_code >= 500: |
| time.sleep(2) |
| continue |
| logger.warning("[WARN] NVD cpeName returned %d", response.status_code) |
| return None |
| except requests.exceptions.Timeout: |
| continue |
| except requests.exceptions.ConnectionError: |
| continue |
| except requests.exceptions.RequestException as e: |
| logger.warning("[WARN] NVD cpe request error: %s", e) |
| return None |
| return None |
|
|
|
|
| def _extract_cpe_vendors(configurations: list) -> list[str]: |
| """ |
| ๅพ NVD configurations ๆๅๅๅฝฑ้ฟ CPE ็ vendor:product ็ตๅใ |
| ไพ Analyst CPE ็ธ้ๆง้ๆฟพไฝฟ็จใ |
| ๅๅณๆ ผๅผๅฆ๏ผ["nodejs:node.js", "expressjs:express"] |
| """ |
| vendors = [] |
| try: |
| for config in configurations: |
| for node in config.get("nodes", []): |
| for cpe_match in node.get("cpeMatch", []): |
| if cpe_match.get("vulnerable", False): |
| cpe = cpe_match.get("criteria", "") |
| parts = cpe.split(":") |
| |
| if len(parts) >= 5: |
| vendor_product = f"{parts[3]}:{parts[4]}" |
| if vendor_product not in vendors: |
| vendors.append(vendor_product) |
| except (KeyError, IndexError, TypeError): |
| pass |
| return vendors[:10] |
|
|
|
|
| def _parse_nvd_response(raw: dict, package_name: str) -> dict: |
| """ |
| ๅฐ NVD API ๅๅง response ่ฝๆ็บ Tool ่ผธๅบๆ ผๅผใ |
| v3.8: ่ผธๅบ cpe_vendors ไพ Analyst ๅ็ธ้ๆง้ฉ่ญใ |
| """ |
| vulnerabilities = [] |
| raw_vulns = raw.get("vulnerabilities", []) |
|
|
| for item in raw_vulns: |
| cve = item.get("cve", {}) |
| cve_id = cve.get("id", "") |
|
|
| |
| if not cve_id.startswith("CVE-"): |
| continue |
|
|
| description = _extract_description(cve.get("descriptions", [])) |
| metrics = cve.get("metrics", {}) |
| cvss_score, severity = _extract_cvss(metrics) |
| published = cve.get("published", "") |
| configurations = cve.get("configurations", []) |
| affected_versions = _extract_affected_versions(configurations) |
| cpe_vendors = _extract_cpe_vendors(configurations) |
|
|
| vulnerabilities.append({ |
| "cve_id": cve_id, |
| "cvss_score": cvss_score, |
| "severity": severity, |
| "description": description[:500], |
| "published": published, |
| "affected_versions": affected_versions, |
| "cpe_vendors": cpe_vendors, |
| }) |
|
|
| |
| vulnerabilities.sort(key=lambda v: v["cvss_score"], reverse=True) |
|
|
| return { |
| "package": package_name, |
| "source": "NVD", |
| "count": len(vulnerabilities), |
| "vulnerabilities": vulnerabilities, |
| } |
|
|
|
|
| |
| |
| PACKAGE_CPE_MAP: dict[str, str] = { |
| |
| "express": "cpe:2.3:a:expressjs:express:*:*:*:*:*:*:*:*", |
| "node": "cpe:2.3:a:nodejs:node.js:*:*:*:*:*:*:*:*", |
| "nodejs": "cpe:2.3:a:nodejs:node.js:*:*:*:*:*:*:*:*", |
| "lodash": "cpe:2.3:a:lodash:lodash:*:*:*:*:*:*:*:*", |
| "axios": "cpe:2.3:a:axios:axios:*:*:*:*:*:node.js:*:*", |
| "webpack": "cpe:2.3:a:webpack:webpack:*:*:*:*:*:node.js:*:*", |
| "moment": "cpe:2.3:a:momentjs:moment.js:*:*:*:*:*:node.js:*:*", |
| "next": "cpe:2.3:a:vercel:next.js:*:*:*:*:*:node.js:*:*", |
| "nextjs": "cpe:2.3:a:vercel:next.js:*:*:*:*:*:node.js:*:*", |
| "react": "cpe:2.3:a:facebook:react:*:*:*:*:*:node.js:*:*", |
| "vue": "cpe:2.3:a:vuejs:vue.js:*:*:*:*:*:node.js:*:*", |
| "angular": "cpe:2.3:a:google:angular.js:*:*:*:*:*:node.js:*:*", |
| |
| "django": "cpe:2.3:a:djangoproject:django:*:*:*:*:*:*:*:*", |
| "flask": "cpe:2.3:a:palletsprojects:flask:*:*:*:*:*:*:*:*", |
| "requests": "cpe:2.3:a:python-requests:requests:*:*:*:*:*:*:*:*", |
| "pillow": "cpe:2.3:a:python:pillow:*:*:*:*:*:*:*:*", |
| "pyyaml": "cpe:2.3:a:pyyaml:pyyaml:*:*:*:*:*:*:*:*", |
| "cryptography": "cpe:2.3:a:cryptography.io:cryptography:*:*:*:*:*:python:*:*", |
| "jinja2": "cpe:2.3:a:palletsprojects:jinja:*:*:*:*:*:python:*:*", |
| "werkzeug": "cpe:2.3:a:palletsprojects:werkzeug:*:*:*:*:*:python:*:*", |
| "sqlalchemy": "cpe:2.3:a:sqlalchemy:sqlalchemy:*:*:*:*:*:*:*:*", |
| |
| "log4j": "cpe:2.3:a:apache:log4j:*:*:*:*:*:*:*:*", |
| "spring": "cpe:2.3:a:pivotal_software:spring_framework:*:*:*:*:*:*:*:*", |
| "struts": "cpe:2.3:a:apache:struts:*:*:*:*:*:*:*:*", |
| |
| "go": "cpe:2.3:a:golang:go:*:*:*:*:*:*:*:*", |
| |
| "redis": "cpe:2.3:a:redis:redis:*:*:*:*:*:*:*:*", |
| "postgresql": "cpe:2.3:a:postgresql:postgresql:*:*:*:*:*:*:*:*", |
| "postgres": "cpe:2.3:a:postgresql:postgresql:*:*:*:*:*:*:*:*", |
| "mysql": "cpe:2.3:a:mysql:mysql:*:*:*:*:*:*:*:*", |
| "mongodb": "cpe:2.3:a:mongodb:mongodb:*:*:*:*:*:*:*:*", |
| "nginx": "cpe:2.3:a:nginx:nginx:*:*:*:*:*:*:*:*", |
| "openssl": "cpe:2.3:a:openssl:openssl:*:*:*:*:*:*:*:*", |
| } |
|
|
|
|
| def _search_nvd_impl(package_name: str) -> str: |
| """ |
| search_nvd ๆ ธๅฟๅฏฆไฝ๏ผv3.8๏ผใ |
| |
| ๆๅฐ็ญ็ฅๅชๅ
้ ๅบ๏ผ |
| 1. ๅฟซๅๅฝไธญ โ ็ดๆฅๅๅณ๏ผCache-First๏ผ |
| 2. CPE ็ฒพ็ขบๆๅฐ๏ผPACKAGE_CPE_MAP ๅฝไธญๆ๏ผโ ๅชๅๅณ็ๆญฃๅฝฑ้ฟ่ฉฒๅฅไปถ็ CVE |
| 3. Keyword ๅ
จๆๆๅฐ๏ผCPE ๆชๅฝไธญ fallback๏ผ |
| 4. ้ข็ทๅฟซๅ fallback |
| 5. ๅๅณ็ฉบ็ตๆ๏ผ็ตไธ crash๏ผ |
| """ |
| try: |
| candidates = _normalize_package_name(package_name) |
| logger.info("[QUERY] NVD package: %s -> candidates: %s", package_name, candidates) |
|
|
| |
| for keyword in candidates: |
| cached = _read_cache(keyword) |
| if cached: |
| cached.pop("_cached_at", None) |
| cached["fallback_used"] = False |
| logger.info("[OK] NVD cache hit: %s -> %d CVEs", |
| keyword, len(cached.get("vulnerabilities", []))) |
| return json.dumps(cached, ensure_ascii=False, indent=2) |
|
|
| |
| primary = candidates[0] |
| cpe_name = PACKAGE_CPE_MAP.get(primary) |
| if cpe_name: |
| raw = _query_nvd_api_cpe(cpe_name) |
| if raw is not None: |
| result = _parse_nvd_response(raw, package_name) |
| result["search_mode"] = "cpe" |
| if result["count"] > 0: |
| _write_cache(primary, result) |
| logger.info("[OK] NVD CPE query: %s -> %d CVEs", package_name, result["count"]) |
| return json.dumps(result, ensure_ascii=False, indent=2) |
| logger.info("[INFO] NVD CPE no results for: %s", primary) |
|
|
| |
| for keyword in candidates: |
| raw = _query_nvd_api(keyword) |
| if raw is not None: |
| result = _parse_nvd_response(raw, package_name) |
| result["search_mode"] = "keyword" |
| if result["count"] > 0: |
| _write_cache(keyword, result) |
| logger.info("[OK] NVD keyword query: %s -> %d CVEs", package_name, result["count"]) |
| return json.dumps(result, ensure_ascii=False, indent=2) |
| logger.info("[INFO] NVD keyword no results for: %s, trying next alias", keyword) |
| continue |
| cached = _read_cache(keyword, allow_stale=True) |
| if cached: |
| cached.pop("_cached_at", None) |
| cached["fallback_used"] = True |
| cached["cache_stale"] = True |
| cached["error"] = f"NVD API unavailable, using cached data for '{keyword}'" |
| return json.dumps(cached, ensure_ascii=False, indent=2) |
|
|
| |
| empty_result = { |
| "package": package_name, |
| "source": "NVD", |
| "count": 0, |
| "vulnerabilities": [], |
| "search_mode": "none", |
| "error": f"No vulnerabilities found for '{package_name}' (tried: {candidates})", |
| "fallback_used": False, |
| } |
| logger.info("[INFO] NVD no results for: %s", package_name) |
| return json.dumps(empty_result, ensure_ascii=False, indent=2) |
|
|
| except Exception as e: |
| logger.error("[FAIL] NVD Tool unexpected error: %s", e, exc_info=True) |
| return json.dumps({ |
| "package": package_name, "source": "NVD", "count": 0, |
| "vulnerabilities": [], "error": f"Unexpected error: {str(e)}", |
| "fallback_used": False, |
| }, ensure_ascii=False, indent=2) |
|
|
|
|
| |
| |
| |
|
|
| def _create_tool(): |
| """ๅปถ้ฒๅปบ็ซ CrewAI Tool๏ผๅ
ๅจ Agent ๅฏฆ้ไฝฟ็จๆๆ import""" |
| from crewai.tools import tool |
|
|
| @tool("search_nvd") |
| def search_nvd(package_name: str) -> str: |
| """ๆฅ่ฉข NVD (National Vulnerability Database) ไธญๆๅฎๅฅไปถ็ๅทฒ็ฅๆผๆดใ |
| ่ผธๅ
ฅๅฅไปถๅ็จฑ๏ผๅฆ djangoใredisใpostgresql๏ผ๏ผๅๅณ่ฉฒๅฅไปถ็ CVE ๆผๆดๆธ
ๅฎ๏ผJSON ๆ ผๅผ๏ผใ |
| ๅ
ๅซ CVE ็ทจ่ใCVSS ๅๆธใๅด้ๅบฆใๆ่ฟฐใๅๅฝฑ้ฟ็ๆฌ็ญ่ณ่จใ |
| ่ฅ API ไธๅฏ็จๆ่ชๅไฝฟ็จ้ข็ทๅฟซๅใ""" |
| return _search_nvd_impl(package_name) |
|
|
| return search_nvd |
|
|
|
|
| |
|
|
| class _LazyToolLoader: |
| def __init__(self): |
| self._tool = None |
|
|
| def _load(self): |
| if self._tool is None: |
| self._tool = _create_tool() |
|
|
| @property |
| def search_nvd(self): |
| self._load() |
| return self._tool |
|
|
|
|
| _loader = _LazyToolLoader() |
|
|
|
|
| def __getattr__(name): |
| """ๆจก็ตๅฑค็ด __getattr__๏ผๆฏๆด from tools.nvd_tool import search_nvd""" |
| if name == "search_nvd": |
| return _loader.search_nvd |
| raise AttributeError(f"module 'tools.nvd_tool' has no attribute {name!r}") |
|
|