Threat_Hunter / tools /package_extractor.py
EricChen2005's picture
Deploy ThreatHunter - AMD MI300X + Qwen2.5-32B
c8d30bc
# tools/package_extractor.py
# ๅŠŸ่ƒฝ๏ผš็ขบๅฎšๆ€งๅฅ—ไปถ่ƒๅ–ๆฉ‹ๆŽฅๅฑค
# ๆžถๆง‹ไพๆ“š๏ผšHarness Engineering โ€” Constrain + Graceful Degradation
#
# ๆ ธๅฟƒ่ท่ฒฌ๏ผš
# ๅพž Security Guard ๆๅ–็š„ imports ๅˆ—่กจ่ƒๅ–็ฌฌไธ‰ๆ–นๅฅ—ไปถๅ็จฑใ€‚
# ้Žๆฟพ Python ๆจ™ๆบ–ๅบซใ€็›ธๅฐๅŒฏๅ…ฅใ€้›œ่จŠๆจก็ต„ใ€‚
# ้™ๅˆถๆœ€ๅคš MAX_PACKAGES ๅ€‹ๅฅ—ไปถ๏ผˆไฟ่ญท Rate Limit๏ผ‰ใ€‚
#
# ้‡่ฆ่จญ่จˆๅŽŸๅ‰‡๏ผš
# - ๅฎŒๅ…จ็ขบๅฎšๆ€ง๏ผŒ็„ก LLM ไพ่ณด
# - ่ผธๅ…ฅๅคฑๆ•—ๆ™‚ๅ›žๅ‚ณ็ฉบๅˆ—่กจ๏ผˆไธๅดฉๆฝฐ๏ผ‰
# - ๆญคๆจก็ต„ไธ็”Ÿๆˆไปปไฝ• CVE๏ผŒไธๅšๅฎ‰ๅ…จๅˆคๆ–ท
import logging
import re
from typing import Any
logger = logging.getLogger("ThreatHunter.package_extractor")
# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
# Python ๆจ™ๆบ–ๅบซ้ป‘ๅๅ–ฎ๏ผˆcpython 3.12 ๅ…จ้›†๏ผŒๅƒ…ๅˆ—ๅธธ่ฆ‹่€…๏ผ‰
# ๅฎŒๆ•ดๆธ…ๅ–ฎ๏ผšhttps://docs.python.org/3/library/index.html
# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
STDLIB_BLACKLIST: frozenset[str] = frozenset({
# ๅ…งๅปบ
"__future__", "__main__", "builtins",
# ๆ–‡ๅญ—ใ€ๅญ—ไธฒ
"string", "re", "difflib", "textwrap", "unicodedata", "readline",
"rlcompleter", "codecs", "encodings",
# ่ณ‡ๆ–™ๅž‹ๅˆฅ
"datetime", "calendar", "collections", "heapq", "bisect",
"array", "weakref", "types", "copy", "pprint", "reprlib",
"enum", "graphlib", "dataclasses",
# ๆ•ธๅญธ
"numbers", "math", "cmath", "decimal", "fractions", "random",
"statistics",
# ๅ‡ฝๅผๅผ็จ‹ๅผ่จญ่จˆ
"itertools", "functools", "operator",
# ๆช”ๆกˆ่ˆ‡ I/O
"io", "time", "logging", "os", "os.path", "pathlib",
"fileinput", "stat", "filecmp", "shutil", "tempfile",
"glob", "fnmatch", "linecache", "pickle", "shelve",
"marshal", "dbm", "sqlite3", "csv", "configparser",
"tomllib", "netrc", "plistlib",
# ๅฃ“็ธฎ
"zlib", "gzip", "bz2", "lzma", "zipfile", "tarfile",
# ่ณ‡ๆ–™ๆ ผๅผ
"json", "html", "html.parser", "xml", "xml.etree",
"xml.etree.ElementTree", "xml.dom", "xml.sax",
"csv", "struct",
# ๅฏ†็ขผๅญธ
"hashlib", "hmac", "secrets",
# ้€š็”จ OS ๆœๅ‹™
"sys", "sysconfig", "builtins", "warnings", "contextlib",
"abc", "atexit", "traceback", "gc", "inspect", "site",
"codeop", "code", "zipimport", "pkgutil", "modulefinder",
"importlib", "ast", "dis", "py_compile",
# ไธฆ็™ผ
"threading", "multiprocessing", "concurrent",
"concurrent.futures", "subprocess", "sched", "queue",
"asyncio", "socket", "ssl", "select", "selectors",
"signal", "mmap", "ctypes",
# ็ถฒ่ทฏ
"urllib", "urllib.parse", "urllib.request", "urllib.error",
"urllib.response", "urllib.robotparser",
"http", "http.client", "http.server", "http.cookies",
"http.cookiejar", "ftplib", "poplib", "imaplib",
"smtplib", "uuid", "socketserver", "xmlrpc",
"email", "mailbox", "mimetypes",
# ๅ–ฎๅ…ƒๆธฌ่ฉฆ
"unittest", "doctest", "pdb", "profile", "cProfile",
"timeit", "trace", "tracemalloc",
# ้กžๅž‹
"typing", "typing_extensions",
# ๅ…ถไป–ๅธธ่ฆ‹
"platform", "errno", "ctypes", "locale", "gettext",
"argparse", "getopt", "getpass", "curses", "turtle",
"copy", "pprint", "base64", "binascii", "quopri",
"uu", "struct", "codecs", "unicodedata",
})
# ็›ธๅฐๅŒฏๅ…ฅ็š„ๆจก็ต„ๅ็จฑๅ‰็ถด๏ผˆๆœƒไปฅ "." ้–‹้ ญ๏ผŒไฝ†ๆœ‰ๆ™‚่งฃๆžๅพŒๆ˜ฏ็ฉบๅญ—ไธฒๆˆ–ๆ•ธๅญ— level๏ผ‰
_RELATIVE_IMPORT_MODULE_PREFIXES = frozenset({"", None})
# ๅฅ—ไปถๆœ€ๅคงๆ•ธ้‡๏ผˆไฟ่ญท LLM Rate Limit๏ผ‰
MAX_PACKAGES = 8
# ๆŽ’้™ค็š„ไธๅˆ็†ๅฅ—ไปถๅ็จฑ๏ผˆๅคช็Ÿญใ€ๅซ็‰นๆฎŠๅญ—ๅ…ƒ๏ผ‰
_MIN_PACKAGE_NAME_LEN = 2
_INVALID_NAME_RE = re.compile(r"[^a-zA-Z0-9_\-]")
# Node.js ๅ…งๅปบๆจก็ต„้ป‘ๅๅ–ฎ๏ผˆไธๆ‡‰่ฆ–็‚บ npm ๅฅ—ไปถๆŸฅ่ฉข NVD๏ผ‰
# ไพ†ๆบ๏ผšhttps://nodejs.org/api/ (Node.js 20 LTS)
NODEJS_BUILTIN_BLACKLIST: frozenset[str] = frozenset({
"fs", "path", "http", "https", "url", "events", "stream",
"util", "crypto", "os", "child_process", "net", "tls",
"dns", "readline", "cluster", "worker_threads", "buffer",
"assert", "querystring", "punycode", "string_decoder",
"zlib", "timers", "process", "console", "module",
"v8", "vm", "perf_hooks", "async_hooks", "inspector",
"http2", "dgram", "domain", "repl", "tty", "wasi",
"trace_events", "diagnostics_channel", "node:fs", "node:path",
})
# Go ๆจ™ๆบ–ๅบซ้ป‘ๅๅ–ฎ๏ผˆไธๆ‡‰่ฆ–็‚บ็ฌฌไธ‰ๆ–นๅฅ—ไปถๆŸฅ่ฉข NVD๏ผ‰
# ไพ†ๆบ๏ผšhttps://pkg.go.dev/std (Go 1.22)
# ๆ ผๅผ๏ผšGo import path ็š„้ ‚ๅฑค + ๅฎŒๆ•ด่ทฏๅพ‘๏ผˆๅ› ็‚บ Go ็”จ / ไธ็”จ .๏ผ‰
GO_STDLIB_BLACKLIST: frozenset[str] = frozenset({
# ้ ‚ๅฑคๆจก็ต„ๅ๏ผˆ็ถ“้Ž _normalize_package_name ๅพŒ็š„็ตๆžœ๏ผ‰
"fmt", "log", "os", "io", "net", "sync", "time", "math",
"sort", "strings", "strconv", "bytes", "errors", "context",
"flag", "regexp", "reflect", "runtime", "unsafe", "builtin",
"testing", "debug", "embed", "encoding", "archive", "compress",
"crypto", "database", "image", "index", "mime", "path",
"plugin", "text", "unicode", "html", "hash", "container",
"expvar", "go", "internal", "maps", "slices", "cmp", "iter",
# ๅธธ่ฆ‹ๅฎŒๆ•ด่ทฏๅพ‘๏ผˆ_normalize_package_name ๅชๅ– / ๅ‰็ฌฌไธ€ๆฎต๏ผŒ
# ไฝ†่‹ฅ Go import regex ไฟ็•™ๅฎŒๆ•ด่ทฏๅพ‘ๅ‰‡้œ€่ฆๅŒน้…๏ผ‰
"net/http", "net/url", "os/exec", "os/signal", "io/ioutil",
"encoding/json", "encoding/xml", "encoding/csv", "encoding/base64",
"crypto/tls", "crypto/sha256", "crypto/md5", "crypto/rand",
"database/sql", "html/template", "text/template", "path/filepath",
"log/slog", "sync/atomic", "testing/fstest",
})
# Java JDK ๆจ™ๆบ–ๅบซ้ป‘ๅๅ–ฎ๏ผˆไธๆ‡‰่ฆ–็‚บ็ฌฌไธ‰ๆ–นๅฅ—ไปถๆŸฅ่ฉข NVD๏ผ‰
# import java.io.ObjectInputStreamใ€import java.sql.Statement ๅ‡ๆ˜ฏ JDK ๅ…งๅปบ
# ๅฐ้€™ไบ›ๅฅ—ไปถๆŸฅ่ฉข NVD ๅชๆœƒๅพ—ๅˆฐ้›œ่จŠ๏ผŒๆˆ–ๅฐŽ่‡ด Intel Fusion forceRun ๅคฑๆ•—
JAVA_STDLIB_BLACKLIST: frozenset[str] = frozenset({
# ้ ‚ๅฑคๅ‰็ถด๏ผšjava.* ๅ’Œ javax.*
"java", "javax",
# ๅธธ่ฆ‹ๅฎŒๆ•ดๅญๅฅ—ไปถ๏ผˆ้˜ฒๆญข module_raw ็›ดๆŽฅๆฏ”ๅฐ๏ผ‰
"java.io", "java.sql", "java.lang", "java.util",
"java.net", "java.nio", "java.security", "java.math",
"java.time", "java.text", "java.beans", "java.rmi",
"java.awt", "java.applet", "javax.swing", "java.swing",
"java.management", "javax.sql", "javax.net",
"javax.security", "javax.crypto", "javax.xml", "javax.naming",
# Android / Kotlin ๅ…งๅปบ
"android", "dalvik", "kotlin",
})
JVM_STDLIB_PREFIXES: tuple[str, ...] = (
"java.",
"javax.",
"org.w3c.",
"org.xml.sax.",
)
# Java/C# ้กž source import ้€šๅธธๆ˜ฏ namespace๏ผŒไธๆ˜ฏๅฏ็›ดๆŽฅๆŸฅ CVE ็š„ package nameใ€‚
# ็œŸๆญฃ package CVE ๆ‡‰ๅ„ชๅ…ˆไพ†่‡ช pom.xmlใ€package.jsonใ€requirements.txt ็ญ‰ manifestใ€‚
GENERIC_NAMESPACE_ROOTS: frozenset[str] = frozenset({
"com", "org", "net", "io", "edu", "gov", "mil",
})
def _is_valid_package_name(name: str) -> bool:
"""
ๅˆคๆ–ทๅฅ—ไปถๅ็จฑๆ˜ฏๅฆ็‚บๅˆ็†็š„ PyPI/npm ๅฅ—ไปถๅ็จฑใ€‚
้Žๆฟพ่ฆๅ‰‡๏ผš
- ้•ทๅบฆ >= 2
- ไธๅซ็‰นๆฎŠๅญ—ๅ…ƒ๏ผˆ้™ค _ ๅ’Œ - ๅค–๏ผ‰
- ไธๆ˜ฏ็ด”ๆ•ธๅญ—
"""
if not name or len(name) < _MIN_PACKAGE_NAME_LEN:
return False
if name.isdigit():
return False
if _INVALID_NAME_RE.search(name):
return False
return True
def _normalize_package_name(module_str: str) -> str | None:
"""
ๅฐ‡ๆจก็ต„่ทฏๅพ‘ๆญฃ่ฆๅŒ–็‚บ้ ‚ๅฑคๅฅ—ไปถๅ็จฑใ€‚
ไพ‹ๅฆ‚๏ผš
"flask.views" โ†’ "flask"
"PIL.Image" โ†’ "PIL"
"requests" โ†’ "requests"
"os.path" โ†’ "os"๏ผˆๅพŒ็บŒ็”ฑ้ป‘ๅๅ–ฎ้Žๆฟพ๏ผ‰
"" โ†’ None๏ผˆ็›ธๅฐๅŒฏๅ…ฅ๏ผ‰
"""
if not module_str:
return None
# ๅŽปๆމ็‰ˆๆœฌ่™Ÿ๏ผˆๅฆ‚ "django 4.2" โ†’ "django"๏ผ‰
module_str = module_str.strip().split()[0]
# ๅ–้ ‚ๅฑคๆจก็ต„
top_level = module_str.split(".")[0].strip()
if not top_level:
return None
return top_level.lower()
def extract_third_party_packages(
imports: list[dict[str, Any]],
max_packages: int = MAX_PACKAGES,
) -> list[str]:
"""
ๅพž Security Guard ๆๅ–็š„ imports ๅˆ—่กจไธญ่ƒๅ–็ฌฌไธ‰ๆ–นๅฅ—ไปถๅ็จฑใ€‚
Harness ่จญ่จˆ๏ผš
- ็ขบๅฎšๆ€ง้‚่ผฏ๏ผŒไธไพ่ณด LLM
- ้Žๆฟพ Python ๆจ™ๆบ–ๅบซ
- ้™ๅˆถๆ•ธ้‡ไธŠ้™๏ผˆไฟ่ญท Rate Limit๏ผ‰
- ่ผธๅ…ฅๆ ผๅผ้Œฏ่ชคๆ™‚ไธๅดฉๆฝฐ
Args:
imports: Security Guard extract_code_surface() ๅ›žๅ‚ณ็š„ imports ๅˆ—่กจใ€‚
ๆฏๅ€‹ๅ…ƒ็ด ็‚บ {"module": "requests", "items": [...], "line": 1, ...}
max_packages: ๆœ€ๅคšๅ›žๅ‚ณๅนพๅ€‹ๅฅ—ไปถ๏ผˆ้ ่จญ 8๏ผ‰
Returns:
ๅŽป้‡ๅพŒ็š„็ฌฌไธ‰ๆ–นๅฅ—ไปถๅ็จฑๅˆ—่กจ๏ผˆๅฐๅฏซ๏ผ‰ใ€‚
ไพ‹ๅฆ‚๏ผš["requests", "flask", "pymysql"]
"""
if not imports:
logger.info("[PKG_EX] No imports provided, returning empty list")
return []
seen: set[str] = set()
packages: list[str] = []
for imp in imports:
try:
if not isinstance(imp, dict):
continue
module_raw: str = imp.get("module", "") or ""
module_clean = module_raw.strip().rstrip(".*")
# ็›ธๅฐๅŒฏๅ…ฅ๏ผˆlevel > 0 or module is empty๏ผ‰๏ผš่ทณ้Ž
level = imp.get("level", 0)
if level and level > 0:
continue
if not module_raw.strip():
continue
top_level = _normalize_package_name(module_raw)
if top_level is None:
continue
# ้Žๆฟพๆจ™ๆบ–ๅบซ๏ผˆPython๏ผ‰
if top_level in STDLIB_BLACKLIST:
logger.debug("[PKG_EX] Filtered Python stdlib: %s", top_level)
continue
# ้Žๆฟพ Node.js ๅ…งๅปบๆจก็ต„
if top_level in NODEJS_BUILTIN_BLACKLIST:
logger.debug("[PKG_EX] Filtered Node.js builtin: %s", top_level)
continue
# ้Žๆฟพ Go ๆจ™ๆบ–ๅบซ๏ผˆๅฎŒๆ•ด่ทฏๅพ‘ + ้ ‚ๅฑคๆจก็ต„๏ผ‰
if module_raw.strip() in GO_STDLIB_BLACKLIST or top_level in GO_STDLIB_BLACKLIST:
logger.debug("[PKG_EX] Filtered Go stdlib: %s (raw: %s)", top_level, module_raw)
continue
# Go source import path ไธๆ˜ฏ OSV/NVD ๅฏ็ฒพๆบ–ๆŸฅ่ฉข็š„ package/version ๅบงๆจ™ใ€‚
# ๆฒ’ๆœ‰ go.mod ็‰ˆๆœฌ่ญ‰ๆ“šๆ™‚ๆŸฅ่ฉข "github" ๆœƒ้€ ๆˆ source-token CVE ๆฑกๆŸ“ใ€‚
if "/" in module_clean and module_clean.startswith((
"github.com/", "gitlab.com/", "bitbucket.org/", "gopkg.in/",
"golang.org/", "google.golang.org/",
)):
logger.debug("[PKG_EX] Filtered unversioned Go module import: %s", module_clean)
continue
# ้Žๆฟพ Java JDK ๆจ™ๆบ–ๅบซ๏ผˆjava.io, java.sql, java.lang ็ญ‰ๅ‡็‚บ JDK ๅ…งๅปบ๏ผ‰
if (
top_level in JAVA_STDLIB_BLACKLIST
or module_clean in JAVA_STDLIB_BLACKLIST
or any(module_clean.startswith(prefix) for prefix in JVM_STDLIB_PREFIXES)
):
logger.debug("[PKG_EX] Filtered Java stdlib: %s (raw: %s)", top_level, module_raw)
continue
if "." in module_clean and top_level in GENERIC_NAMESPACE_ROOTS:
logger.debug("[PKG_EX] Filtered generic JVM namespace root: %s (raw: %s)", top_level, module_raw)
continue
# ้Žๆฟพไธๅˆ็†ๅ็จฑ
if not _is_valid_package_name(top_level):
logger.debug("[PKG_EX] Filtered invalid name: %s", top_level)
continue
# ๅŽป้‡
if top_level in seen:
continue
seen.add(top_level)
packages.append(top_level)
if len(packages) >= max_packages:
logger.info("[PKG_EX] Reached max_packages=%d, truncating", max_packages)
break
except Exception as exc:
# ไธๅ› ๅ–ฎๅ€‹ import ่งฃๆžๅคฑๆ•—่€Œๅดฉๆฝฐ
logger.warning("[PKG_EX] Failed to parse import entry %r: %s", imp, exc)
continue
logger.info(
"[PKG_EX] Extracted %d third-party packages from %d imports: %s",
len(packages), len(imports), packages,
)
return packages
def packages_from_security_guard(sg_result: dict[str, Any]) -> list[str]:
"""
ไพฟๅˆฉๅ‡ฝๅผ๏ผš็›ดๆŽฅๅพž Security Guard ๅฎŒๆ•ด่ผธๅ‡บไธญ่ƒๅ–ๅฅ—ไปถๅˆ—่กจใ€‚
Args:
sg_result: run_security_guard() ็š„ๅ›žๅ‚ณๅ€ผ
Returns:
็ฌฌไธ‰ๆ–นๅฅ—ไปถๅ็จฑๅˆ—่กจ
"""
if not sg_result or not isinstance(sg_result, dict):
logger.warning("[PKG_EX] Invalid sg_result type: %s", type(sg_result))
return []
imports = sg_result.get("imports", [])
if not isinstance(imports, list):
logger.warning("[PKG_EX] sg_result.imports is not a list: %s", type(imports))
return []
return extract_third_party_packages(imports)
def format_packages_for_intel_fusion(packages: list[str]) -> str:
"""
ๅฐ‡ๅฅ—ไปถๅˆ—่กจๆ ผๅผๅŒ–็‚บ Intel Fusion ๅฏไปฅ็›ดๆŽฅไฝฟ็”จ็š„ๅญ—ไธฒใ€‚
ไพ‹ๅฆ‚๏ผš["requests", "flask"] โ†’ "requests, flask"
Args:
packages: ๅฅ—ไปถๅ็จฑๅˆ—่กจ
Returns:
้€—่™Ÿๅˆ†้š”็š„ๅฅ—ไปถๅญ—ไธฒ
"""
return ", ".join(packages) if packages else ""
# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
# ็‰ˆๆœฌๆ„Ÿ็Ÿฅๆๅ–๏ผˆv5.3 ๆ–ฐๅขž๏ผ‰
# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
def extract_packages_with_versions(source_text: str, filename: str = "") -> list[dict]:
"""
ๅพžไพ่ณดๆ–‡ไปถ๏ผˆrequirements.txt / package.json / pom.xml / Pipfile๏ผ‰
ๆๅ–ๅฅ—ไปถๅ็จฑ + ็‰ˆๆœฌ่™Ÿใ€‚
่‹ฅ็‰ˆๆœฌๆœช็Ÿฅ๏ผˆไพ‹ๅฆ‚็›ดๆŽฅๅพž import ๆๅ–๏ผ‰๏ผŒ
ๅ›žๅ‚ณ version=None, version_known=Falseใ€‚
Args:
source_text: ๆ–‡ไปถๅ…งๅฎน
filename: ๆ–‡ไปถๅ็จฑ๏ผˆ็”จๆ–ผๅˆคๆ–ทๆ ผๅผ๏ผ‰
Returns:
list[dict]: [{"package": "requests", "version": "2.28.0", "version_known": True}, ...]
"""
results = []
fname = filename.lower()
# โ”€โ”€ requirements.txt โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
if "requirements" in fname or fname.endswith(".txt"):
for line in source_text.splitlines():
line = line.strip()
if not line or line.startswith("#"):
continue
# requests==2.28.0 / requests>=2.28.0 / requests~=2.28.0
m = re.match(r"^([a-zA-Z0-9_.-]+)\s*(?:==|>=|<=|~=|!=|>|<)\s*([^\s;]+)", line)
if m:
pkg, ver = m.group(1), m.group(2)
results.append({"package": pkg.lower(), "version": ver, "version_known": True})
else:
# ็„ก็‰ˆๆœฌ่™Ÿ็š„่กŒ๏ผˆๅฆ‚ requests๏ผ‰
m2 = re.match(r"^([a-zA-Z0-9_.-]+)\s*$", line)
if m2:
results.append({"package": m2.group(1).lower(), "version": None, "version_known": False})
# โ”€โ”€ package.json โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
elif fname.endswith("package.json"):
import json as _json
try:
data = _json.loads(source_text)
for section in ["dependencies", "devDependencies"]:
for pkg, ver in data.get(section, {}).items():
# ๆธ…้™ค ^, ~, >= ๅ‰็ถด
clean_ver = re.sub(r"^[^0-9]*", "", ver) if ver else None
known = bool(clean_ver and re.match(r"^\d", clean_ver))
results.append({"package": pkg.lower(), "version": clean_ver if known else ver, "version_known": known})
except Exception:
pass
# โ”€โ”€ pom.xml๏ผˆMaven๏ผ‰โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
elif fname.endswith("pom.xml"):
# ็ฐกๅ–ฎๆๅ– <artifactId> + ๅฐๆ‡‰ <version>
deps = re.findall(
r"<dependency>.*?<artifactId>([^<]+)</artifactId>.*?(?:<version>([^<]+)</version>)?.*?</dependency>",
source_text,
re.DOTALL,
)
for art, ver in deps:
if art.strip() and not art.strip().startswith("$"):
results.append({
"package": art.strip().lower(),
"version": ver.strip() if ver and not ver.strip().startswith("$") else None,
"version_known": bool(ver and not ver.strip().startswith("$")),
})
# โ”€โ”€ Pipfile โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
elif fname == "pipfile" or fname.endswith("pipfile"):
for line in source_text.splitlines():
m = re.match(r'''(?x)^([a-zA-Z0-9_.\-]+)\s*=\s*["\']?([^"\' \t]+)["\']?''', line.strip())
if m:
pkg, ver = m.group(1), m.group(2)
clean = re.sub(r"^[^0-9]*", "", ver)
known = bool(clean and re.match(r"^\d", clean))
results.append({"package": pkg.lower(), "version": clean if known else ver, "version_known": known})
return results
def build_version_disclaimer(package: str, version: str | None) -> str:
"""
็‚บ Intel Fusion ็š„ CVE ่ผธๅ‡บ็”Ÿๆˆ็‰ˆๆœฌๅ…่ฒฌ่ฒๆ˜Žใ€‚
Args:
package: ๅฅ—ไปถๅ็จฑ
version: ็‰ˆๆœฌ่™Ÿ๏ผˆNone ่กจ็คบๆœช็Ÿฅ๏ผ‰
Returns:
ๅ…่ฒฌ่ฒๆ˜Žๅญ—ไธฒ๏ผˆ่‹ฅ็‰ˆๆœฌๅทฒ็Ÿฅๅ‰‡็‚บ็ฉบๅญ—ไธฒ๏ผ‰
"""
if version:
return "" # ็‰ˆๆœฌๅทฒ็Ÿฅ๏ผŒ็„ก้œ€ๅ…่ฒฌ่ฒๆ˜Ž
return (
f"[็‰ˆๆœฌๆœช็Ÿฅ] ็„กๆณ•็ขบ่ช {package} ็š„็ขบๅˆ‡็‰ˆๆœฌใ€‚"
f"ไปฅไธ‹ CVE ็‚บ่ฉฒๅฅ—ไปถ็š„ๆ‰€ๆœ‰ๅทฒ็Ÿฅๆผๆดž๏ผŒ่ซ‹็ขบ่ชไฝ ็š„็‰ˆๆœฌๆ˜ฏๅฆ่ฝๅœจๅ—ๅฝฑ้Ÿฟ็ฏ„ๅœๅ…งๅ†ๆŽกๅ–่กŒๅ‹•ใ€‚"
)