| """ |
| ThreatHunter 配置模組 |
| ==================== |
| |
| 三模式 LLM 切換引擎 + API Key 管理 + 降級瀑布 |
| |
| 遵循文件: |
| - FINAL_PLAN.md §六(LLM 策略:OpenRouter 同模型開發) |
| - FINAL_PLAN.md §支柱 4(Graceful Degradation:五層降級瀑布) |
| - leader_plan.md(組長交付清單:config.py) |
| """ |
|
|
| import os |
| import sys |
| import time |
| import logging |
| import threading |
| from pathlib import Path |
| from datetime import datetime, timezone |
|
|
| from dotenv import load_dotenv |
|
|
| |
| load_dotenv() |
|
|
| |
| PROJECT_ROOT = Path(__file__).parent.resolve() |
| MEMORY_DIR = PROJECT_ROOT / "memory" |
| DATA_DIR = PROJECT_ROOT / "data" |
| SKILLS_DIR = PROJECT_ROOT / "skills" |
| DOCS_DIR = PROJECT_ROOT / "docs" |
| HARNESS_DIR = PROJECT_ROOT / "harness" |
| TESTS_DIR = PROJECT_ROOT / "tests" |
| CREWAI_STORAGE_DIR = PROJECT_ROOT / ".crewai_storage" |
|
|
| |
| MEMORY_DIR.mkdir(exist_ok=True) |
| (MEMORY_DIR / "vector_store").mkdir(exist_ok=True) |
| DATA_DIR.mkdir(exist_ok=True) |
| CREWAI_STORAGE_DIR.mkdir(exist_ok=True) |
|
|
| |
| os.environ.setdefault("CREWAI_STORAGE_DIR", str(CREWAI_STORAGE_DIR)) |
|
|
| |
| LOG_FORMAT = "[%(asctime)s] %(levelname)-8s %(name)s: %(message)s" |
| LOG_DATE_FORMAT = "%Y-%m-%dT%H:%M:%S" |
|
|
| |
| |
| |
| |
| class SafeStreamHandler(logging.StreamHandler): |
| """Unicode 安全的 StreamHandler,防止 Windows cp950 編碼錯誤""" |
|
|
| def emit(self, record: logging.LogRecord) -> None: |
| try: |
| msg = self.format(record) |
| stream = self.stream |
| |
| encoding = getattr(stream, 'encoding', None) or 'utf-8' |
| try: |
| msg.encode(encoding) |
| except (UnicodeEncodeError, LookupError): |
| msg = msg.encode(encoding, errors='replace').decode(encoding, errors='replace') |
| stream.write(msg + self.terminator) |
| self.flush() |
| except Exception: |
| self.handleError(record) |
|
|
|
|
| |
| |
| if sys.platform == 'win32' and 'pytest' not in sys.modules: |
| import io |
| if hasattr(sys.stdout, 'buffer'): |
| sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace') |
| if hasattr(sys.stderr, 'buffer'): |
| sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace') |
|
|
| logging.basicConfig( |
| level=logging.INFO, |
| format=LOG_FORMAT, |
| datefmt=LOG_DATE_FORMAT, |
| handlers=[SafeStreamHandler(sys.stdout)], |
| ) |
| logger = logging.getLogger("threathunter") |
|
|
| |
| LLM_PROVIDER = os.getenv("LLM_PROVIDER", "amd").strip().lower() |
| OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY", "") |
| OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1" |
| OPENROUTER_PRIMARY_MODEL = os.getenv("OPENROUTER_PRIMARY_MODEL", "").strip() |
| VLLM_BASE_URL = os.getenv("VLLM_BASE_URL", "").strip().rstrip("/") |
| VLLM_MODEL = os.getenv("VLLM_MODEL", "").strip() |
| VLLM_API_KEY = os.getenv("VLLM_API_KEY", "").strip() |
| AMD_LLM_BASE_URL = os.getenv("AMD_LLM_BASE_URL", VLLM_BASE_URL).strip().rstrip("/") |
| AMD_LLM_MODEL = os.getenv( |
| "AMD_LLM_MODEL", |
| VLLM_MODEL or "meta-llama/Llama-3.3-70B-Instruct", |
| ).strip() |
| AMD_LLM_API_KEY = os.getenv("AMD_LLM_API_KEY", VLLM_API_KEY or "dummy").strip() or "dummy" |
| AMD_LLM_MAX_TOKENS = int(os.getenv("AMD_LLM_MAX_TOKENS", "8192")) |
| OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "") |
| OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini").strip() |
| BACKUP_LLM_MAX_TOKENS = int(os.getenv("BACKUP_LLM_MAX_TOKENS", "8192")) |
| MINIMAX_API_KEY = os.getenv("MINIMAX_API_KEY", "").strip() |
| MINIMAX_BASE_URL = os.getenv("MINIMAX_BASE_URL", "").strip().rstrip("/") |
| MINIMAX_MODEL = os.getenv("MINIMAX_MODEL", "").strip() |
| MINIMAX_MAX_TOKENS = int(os.getenv("MINIMAX_MAX_TOKENS", str(BACKUP_LLM_MAX_TOKENS))) |
| GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "").strip() |
| GEMINI_BASE_URL = os.getenv("GEMINI_BASE_URL", "").strip().rstrip("/") |
| GEMINI_MODEL = os.getenv("GEMINI_MODEL", "").strip() |
| GEMINI_MAX_TOKENS = int(os.getenv("GEMINI_MAX_TOKENS", str(BACKUP_LLM_MAX_TOKENS))) |
| CLAUDE_API_KEY = os.getenv("CLAUDE_API_KEY", "").strip() |
| CLAUDE_BASE_URL = os.getenv("CLAUDE_BASE_URL", "").strip().rstrip("/") |
| CLAUDE_MODEL = os.getenv("CLAUDE_MODEL", "").strip() |
| CLAUDE_MAX_TOKENS = int(os.getenv("CLAUDE_MAX_TOKENS", str(BACKUP_LLM_MAX_TOKENS))) |
| QWEN_API_KEY = os.getenv("QWEN_API_KEY", "").strip() |
| QWEN_BASE_URL = os.getenv("QWEN_BASE_URL", "").strip().rstrip("/") |
| QWEN_MODEL = os.getenv("QWEN_MODEL", "").strip() |
| QWEN_MAX_TOKENS = int(os.getenv("QWEN_MAX_TOKENS", str(BACKUP_LLM_MAX_TOKENS))) |
| |
| NVD_API_KEY = os.getenv("NVD_API_KEY", "") |
| OTX_API_KEY = os.getenv("OTX_API_KEY", "") |
| GITHUB_TOKEN = os.getenv("GITHUB_TOKEN", "") |
|
|
| |
| ENABLE_CRITIC = os.getenv("ENABLE_CRITIC", "true").lower() == "true" |
| ENABLE_MEMORY_RAG = os.getenv("ENABLE_MEMORY_RAG", "false").lower() == "true" |
|
|
| |
| MAX_DEBATE_ROUNDS = int(os.getenv("MAX_DEBATE_ROUNDS", "2")) |
| SIMILARITY_THRESHOLD = float(os.getenv("SIMILARITY_THRESHOLD", "0.75")) |
| CONSTRAINT_THRESHOLD = float(os.getenv("CONSTRAINT_THRESHOLD", "0.75")) |
| LLM_RPM = int(os.getenv("LLM_RPM", "15")) |
| |
| LLM_MIN_INTERVAL_SEC = float(os.getenv("LLM_MIN_INTERVAL_SEC", "8.0")) |
| |
| LLM_TIMEOUT_SEC = int(os.getenv("LLM_TIMEOUT_SEC", "90")) |
|
|
|
|
| |
| |
| |
| OPENROUTER_FREE_MODELS: list[tuple[str, str]] = [ |
| |
| ("OpenRouter (Qwen3 Coder 480B A35B Free)", "openrouter/qwen/qwen3-coder:free"), |
| ("OpenRouter (Qwen3 Next 80B A3B Instruct Free)", "openrouter/qwen/qwen3-next-80b-a3b-instruct:free"), |
| |
| ("OpenRouter (Tencent Hy3 Preview Free)", "openrouter/tencent/hy3-preview:free"), |
| ("OpenRouter (NVIDIA Nemotron 3 Super 120B A12B Free)", "openrouter/nvidia/nemotron-3-super-120b-a12b:free"), |
| ("OpenRouter (inclusionAI Ling-2.6-1T Free)", "openrouter/inclusionai/ling-2.6-1t:free"), |
| ("OpenRouter (OpenAI gpt-oss-120b Free)", "openrouter/openai/gpt-oss-120b:free"), |
| ("OpenRouter (MiniMax M2.5 Free)", "openrouter/minimax/minimax-m2.5:free"), |
| ("OpenRouter (Poolside Laguna M.1 Free)", "openrouter/poolside/laguna-m.1:free"), |
| ("OpenRouter (Z.ai GLM-4.5-Air Free)", "openrouter/z-ai/glm-4.5-air:free"), |
| ("OpenRouter (Nous Hermes 3 405B Instruct Free)", "openrouter/nousresearch/hermes-3-llama-3.1-405b:free"), |
| ("OpenRouter (Meta Llama 3.3 70B Instruct Free)", "openrouter/meta-llama/llama-3.3-70b-instruct:free"), |
| ("OpenRouter (OpenAI gpt-oss-20b Free)", "openrouter/openai/gpt-oss-20b:free"), |
| |
| ("OpenRouter (Google Gemma 4 31B Free)", "openrouter/google/gemma-4-31b-it:free"), |
| ("OpenRouter (Google Gemma 3 27B Free)", "openrouter/google/gemma-3-27b-it:free"), |
| ("OpenRouter (NVIDIA Nemotron 3 Nano 30B A3B Free)", "openrouter/nvidia/nemotron-3-nano-30b-a3b:free"), |
| |
| ] |
|
|
|
|
| def _openrouter_free_provider_chain(label_suffix: str = "") -> list[tuple[str, dict]]: |
| """建立 OpenRouter free model provider chain,供各 LLM_PROVIDER fallback 共用。""" |
| if not OPENROUTER_API_KEY: |
| return [] |
| models = OPENROUTER_FREE_MODELS |
| if OPENROUTER_PRIMARY_MODEL: |
| models = [("OpenRouter (Primary Override Free)", OPENROUTER_PRIMARY_MODEL)] + [ |
| item for item in OPENROUTER_FREE_MODELS |
| if item[1] != OPENROUTER_PRIMARY_MODEL |
| ] |
| suffix = f" {label_suffix}" if label_suffix else "" |
| return [ |
| ( |
| f"{provider_name}{suffix}", |
| { |
| "model": model, |
| "api_key": OPENROUTER_API_KEY, |
| "base_url": OPENROUTER_BASE_URL, |
| "max_tokens": 8192, |
| "timeout": LLM_TIMEOUT_SEC, |
| }, |
| ) |
| for provider_name, model in models |
| ] |
|
|
|
|
| def _amd_llm_provider_chain(label_suffix: str = "") -> list[tuple[str, dict]]: |
| """建立 AMD Developer Cloud vLLM/OpenAI-compatible 主接口。""" |
| base_url = AMD_LLM_BASE_URL or VLLM_BASE_URL |
| model = AMD_LLM_MODEL or VLLM_MODEL |
| if not base_url or not model: |
| return [] |
|
|
| suffix = f" {label_suffix}" if label_suffix else "" |
| |
| litellm_model = model if model.startswith("openai/") else f"openai/{model}" |
| return [ |
| ( |
| f"AMD Developer Cloud vLLM (OpenAI-compatible){suffix}", |
| { |
| "model": litellm_model, |
| "api_key": AMD_LLM_API_KEY, |
| "base_url": base_url, |
| "max_tokens": AMD_LLM_MAX_TOKENS, |
| "timeout": LLM_TIMEOUT_SEC, |
| }, |
| ) |
| ] |
|
|
|
|
| _PLACEHOLDER_MARKERS = ("your-", "your_", "xxxxxxxx", "placeholder", "dummy") |
|
|
|
|
| def _is_configured(value: str) -> bool: |
| """判斷 env value 是否像真實設定,而不是 .env.example placeholder。""" |
| text = str(value or "").strip() |
| if not text: |
| return False |
| lowered = text.lower() |
| return not any(marker in lowered for marker in _PLACEHOLDER_MARKERS) |
|
|
|
|
| def _optional_base_url(value: str) -> str: |
| """BASE_URL 可選;placeholder 不送進 CrewAI LLM。""" |
| return value if _is_configured(value) else "" |
|
|
|
|
| def _direct_llm_provider_entry( |
| provider_label: str, |
| api_key: str, |
| model: str, |
| base_url: str = "", |
| max_tokens: int = BACKUP_LLM_MAX_TOKENS, |
| label_suffix: str = "", |
| ) -> list[tuple[str, dict]]: |
| """建立 MiniMax/Gemini/Claude/Qwen direct 或 OpenAI-compatible provider。""" |
| if not _is_configured(api_key) or not _is_configured(model): |
| return [] |
|
|
| suffix = f" {label_suffix}" if label_suffix else "" |
| provider_config = { |
| "model": model, |
| "api_key": api_key, |
| "max_tokens": max_tokens, |
| "timeout": LLM_TIMEOUT_SEC, |
| } |
| clean_base_url = _optional_base_url(base_url) |
| if clean_base_url: |
| provider_config["base_url"] = clean_base_url |
|
|
| return [(f"{provider_label}{suffix}", provider_config)] |
|
|
|
|
| _DIRECT_BACKUP_PROVIDER_SPECS = { |
| "minimax": ("MiniMax Direct API", MINIMAX_API_KEY, MINIMAX_MODEL, MINIMAX_BASE_URL, MINIMAX_MAX_TOKENS), |
| "gemini": ("Google Gemini Direct API", GEMINI_API_KEY, GEMINI_MODEL, GEMINI_BASE_URL, GEMINI_MAX_TOKENS), |
| "claude": ("Anthropic Claude Direct API", CLAUDE_API_KEY, CLAUDE_MODEL, CLAUDE_BASE_URL, CLAUDE_MAX_TOKENS), |
| "qwen": ("Qwen Direct API", QWEN_API_KEY, QWEN_MODEL, QWEN_BASE_URL, QWEN_MAX_TOKENS), |
| } |
|
|
|
|
| def _direct_backup_provider_chain( |
| label_suffix: str = "", |
| priority_provider: str = "", |
| ) -> list[tuple[str, dict]]: |
| """建立所有已設定的直連備案 LLM,並可指定其中一個優先。""" |
| provider_names = list(_DIRECT_BACKUP_PROVIDER_SPECS) |
| priority = str(priority_provider or "").strip().lower() |
| if priority in _DIRECT_BACKUP_PROVIDER_SPECS: |
| provider_names = [priority] + [name for name in provider_names if name != priority] |
|
|
| chain: list[tuple[str, dict]] = [] |
| for name in provider_names: |
| label, api_key, model, base_url, max_tokens = _DIRECT_BACKUP_PROVIDER_SPECS[name] |
| chain.extend( |
| _direct_llm_provider_entry( |
| provider_label=label, |
| api_key=api_key, |
| model=model, |
| base_url=base_url, |
| max_tokens=max_tokens, |
| label_suffix=label_suffix, |
| ) |
| ) |
| return chain |
|
|
|
|
| def _direct_provider_is_configured(provider_name: str) -> bool: |
| """確認指定直連 provider 自己的 API_KEY/MODEL 是否完整。""" |
| spec = _DIRECT_BACKUP_PROVIDER_SPECS.get(str(provider_name or "").strip().lower()) |
| if not spec: |
| return False |
| _, api_key, model, _, _ = spec |
| return _is_configured(api_key) and _is_configured(model) |
|
|
|
|
| def _openai_provider_chain(label_suffix: str = "") -> list[tuple[str, dict]]: |
| """建立 OpenAI 備案 provider。""" |
| if not _is_configured(OPENAI_API_KEY) or not _is_configured(OPENAI_MODEL): |
| return [] |
| suffix = f" {label_suffix}" if label_suffix else "" |
| return [ |
| ( |
| f"OpenAI ({OPENAI_MODEL}){suffix}", |
| { |
| "model": OPENAI_MODEL, |
| "api_key": OPENAI_API_KEY, |
| "max_tokens": BACKUP_LLM_MAX_TOKENS, |
| "timeout": LLM_TIMEOUT_SEC, |
| }, |
| ) |
| ] |
|
|
|
|
| |
| SYSTEM_CONSTITUTION = """=== ThreatHunter Constitution === |
| 1. All CVE IDs must come from Tool-returned data. Fabrication is prohibited. |
| 2. You must use the provided Tools for queries. Skip is not allowed. |
| 3. Output must conform to the specified JSON schema. |
| 4. Uncertain reasoning must be tagged with confidence: HIGH / MEDIUM / NEEDS_VERIFICATION. |
| 5. Each judgment must include a reasoning field. |
| 6. Reports use English; technical terms are not translated. |
| 7. Do not call the same Tool twice for the same data.""" |
|
|
|
|
| |
| class DegradationStatus: |
| """ |
| 降級狀態追蹤器 |
| |
| 追蹤系統當前的降級層級,供 UI 和日誌使用。 |
| |
| 層級定義(FINAL_PLAN.md §支柱 4): |
| Level 1: ⚡ 全速運行 |
| Level 2: ⚠️ LLM 降級(vLLM → OpenRouter → OpenAI) |
| Level 3: ⚠️ API 降級(即時 API → 離線快取) |
| Level 4: 🔶 Agent 降級(跳過故障 Agent) |
| Level 5: 🔶 最低生存模式(離線摘要) |
| """ |
|
|
| LEVEL_LABELS = { |
| 1: "[FULL] 全速運行", |
| 2: "[WARN] LLM 降級", |
| 3: "[WARN] API 降級", |
| 4: "[DEGRADE] Agent 降級", |
| 5: "[DEGRADE] 最低生存模式", |
| } |
|
|
| def __init__(self): |
| self.current_level: int = 1 |
| self.degraded_components: list[str] = [] |
| self.timestamp: str = datetime.now(timezone.utc).isoformat() |
|
|
| def degrade(self, component: str, reason: str) -> None: |
| """記錄一個元件降級""" |
| self.degraded_components.append(f"{component}: {reason}") |
| if "LLM" in component: |
| self.current_level = max(self.current_level, 2) |
| elif "API" in component: |
| self.current_level = max(self.current_level, 3) |
| elif "Agent" in component: |
| self.current_level = max(self.current_level, 4) |
| self.timestamp = datetime.now(timezone.utc).isoformat() |
| logger.warning("[DEGRADE] %s -- %s (level: %d)", component, reason, self.current_level) |
|
|
| def get_display(self) -> str: |
| """取得 UI 顯示用的降級狀態文字""" |
| return self.LEVEL_LABELS.get(self.current_level, "[?] 未知") |
|
|
| def to_dict(self) -> dict: |
| """序列化為 dict""" |
| return { |
| "level": self.current_level, |
| "label": self.get_display(), |
| "degraded_components": self.degraded_components, |
| "timestamp": self.timestamp, |
| } |
|
|
| def reset(self) -> None: |
| """重設降級狀態""" |
| self.current_level = 1 |
| self.degraded_components = [] |
| self.timestamp = datetime.now(timezone.utc).isoformat() |
|
|
|
|
| |
| degradation_status = DegradationStatus() |
|
|
|
|
| |
| class LLMRateLimiter: |
| """ |
| 全局 LLM 請求速率限制器(Singleton)。 |
| |
| 目標:解決 OpenRouter Free Tier (8 req/min) 導致的 429 連鎖降級。 |
| |
| 所有 Agent 共享同一個實例,每次 LLM 呼叫前呼叫 wait_if_needed()。 |
| 確保相鄰兩次 LLM 請求之間至少間隔 LLM_MIN_INTERVAL_SEC 秒。 |
| |
| 執行緒安全:使用 threading.Lock() 確保原子操作。 |
| """ |
|
|
| def __init__(self, min_interval: float = 10.0): |
| self._min_interval = min_interval |
| self._last_call_time: float = 0.0 |
| self._lock = threading.Lock() |
| self._total_waited: float = 0.0 |
| self._call_count: int = 0 |
|
|
| def wait_if_needed(self, caller: str = "") -> float: |
| """ |
| 在 LLM 呼叫前自動等待,確保不超過速率限制。 |
| |
| Args: |
| caller: 呼叫者名稱(供日誌識別),如 "scout", "intel_fusion" |
| |
| Returns: |
| 實際等待的秒數(0.0 表示無需等待) |
| """ |
| with self._lock: |
| now = time.time() |
| elapsed = now - self._last_call_time |
| wait_sec = max(0.0, self._min_interval - elapsed) |
| if wait_sec > 0.1: |
| logger.info( |
| "[RATE_LIMITER] %s waiting %.1fs (interval=%.0fs, elapsed=%.1fs)", |
| caller or "unknown", wait_sec, self._min_interval, elapsed |
| ) |
| time.sleep(wait_sec) |
| self._total_waited += wait_sec |
| self._last_call_time = time.time() |
| self._call_count += 1 |
| return wait_sec |
|
|
| def reset(self) -> None: |
| """重設限速狀態(供測試使用)""" |
| with self._lock: |
| self._last_call_time = 0.0 |
| self._total_waited = 0.0 |
| self._call_count = 0 |
|
|
| def on_429(self, retry_after: float = 0.0, caller: str = "") -> float: |
| """ |
| 收到 429 時呼叫:強制等待 retry_after 秒(或最少 30 秒)。 |
| 重設 last_call_time,下一次呼叫重新計時。 |
| |
| Args: |
| retry_after: API 回傳的 Retry-After 秒數(0 表示未提供) |
| caller: 呼叫者名稱(供日誌識別) |
| |
| Returns: |
| 實際等待的秒數 |
| """ |
| with self._lock: |
| wait_sec = max(retry_after, 30.0) |
| logger.warning( |
| "[RATE_LIMITER] 429 received! %s force-waiting %.0fs before retry", |
| caller or "unknown", wait_sec |
| ) |
| time.sleep(wait_sec) |
| self._last_call_time = time.time() |
| self._total_waited += wait_sec |
| return wait_sec |
|
|
| @property |
| def total_waited(self) -> float: |
| """累計等待秒數(供監控使用)""" |
| return self._total_waited |
|
|
| @property |
| def call_count(self) -> int: |
| """累計 LLM 呼叫次數""" |
| return self._call_count |
|
|
|
|
| |
| rate_limiter = LLMRateLimiter(LLM_MIN_INTERVAL_SEC) |
|
|
|
|
| |
| def _build_provider_chain() -> list[tuple[str, dict]]: |
| """ |
| 根據 LLM_PROVIDER 建立降級鏈。 |
| |
| 預設:AMD/vLLM -> direct backup APIs -> OpenRouter waterfall -> OpenAI。 |
| """ |
| chain = [] |
| direct_backup_names = set(_DIRECT_BACKUP_PROVIDER_SPECS) |
|
|
| if LLM_PROVIDER in {"amd", "vllm"}: |
| chain.extend(_amd_llm_provider_chain()) |
| chain.extend(_direct_backup_provider_chain(f"[{LLM_PROVIDER}-fallback]")) |
| chain.extend(_openrouter_free_provider_chain(f"[{LLM_PROVIDER}-fallback]")) |
| chain.extend(_openai_provider_chain(f"[{LLM_PROVIDER}-fallback]")) |
|
|
| elif LLM_PROVIDER == "openrouter": |
| chain.extend(_openrouter_free_provider_chain()) |
| chain.extend(_direct_backup_provider_chain("[openrouter-fallback]")) |
| chain.extend(_openai_provider_chain("[openrouter-fallback]")) |
|
|
| elif LLM_PROVIDER == "google": |
| logger.warning( |
| "[WARN] LLM_PROVIDER=google is deprecated; use LLM_PROVIDER=gemini for direct Gemini API" |
| ) |
| chain.extend(_direct_backup_provider_chain("[legacy-google-provider]", priority_provider="gemini")) |
| chain.extend(_openrouter_free_provider_chain("[legacy-google-provider]")) |
|
|
| elif LLM_PROVIDER == "openai": |
| chain.extend(_openai_provider_chain()) |
| chain.extend(_direct_backup_provider_chain("[openai-fallback]")) |
|
|
| elif LLM_PROVIDER in direct_backup_names: |
| chain.extend(_direct_backup_provider_chain(priority_provider=LLM_PROVIDER)) |
| chain.extend(_openrouter_free_provider_chain(f"[{LLM_PROVIDER}-fallback]")) |
| chain.extend(_openai_provider_chain(f"[{LLM_PROVIDER}-fallback]")) |
|
|
| return chain |
|
|
|
|
| |
| |
| _model_health: dict[str, float] = {} |
| MODEL_COOLDOWN = 300 |
|
|
|
|
| def mark_model_failed(model_name: str) -> None: |
| """ |
| 將模型標記為暫時不可用(冷卻中)。 |
| |
| 由各 Agent 的 run_*_pipeline() 在捕獲 429 錯誤時呼叫。 |
| 冷卻 MODEL_COOLDOWN 秒後,該模型會再次被 get_llm() 嘗試。 |
| |
| Args: |
| model_name: 失敗的模型名稱(如 'openrouter/qwen/qwen3.6-plus:free') |
| """ |
| _model_health[model_name] = time.time() |
| logger.warning("[COOLDOWN] Model marked as rate-limited: %s (cooldown %ds)", model_name, MODEL_COOLDOWN) |
|
|
|
|
| def _is_model_in_cooldown(model_name: str) -> bool: |
| """檢查模型是否在冷卻期間內""" |
| if model_name not in _model_health: |
| return False |
| elapsed = time.time() - _model_health[model_name] |
| if elapsed >= MODEL_COOLDOWN: |
| |
| del _model_health[model_name] |
| return False |
| return True |
|
|
|
|
| def get_llm(exclude_models: list[str] | None = None): |
| """ |
| 取得 LLM 實例,含降級瀑布邏輯 + 模型健康狀態過濾。 |
| |
| 依序嘗試供應商: |
| 1. 跳過 exclude_models 列表中的模型(運行時被 429 的模型) |
| 2. 跳過冷卻中的模型(MODEL_COOLDOWN 秒內曾失敗) |
| 3. 第一個可用的即回傳 |
| |
| Args: |
| exclude_models: 明確排除的模型名稱列表(由 Agent 在 429 重試時傳入) |
| |
| Returns: |
| crewai.LLM 實例 |
| |
| Raises: |
| RuntimeError: 所有供應商均不可用 |
| """ |
| from crewai import LLM |
|
|
| providers = _build_provider_chain() |
| exclude = set(exclude_models or []) |
|
|
| if not providers: |
| raise RuntimeError( |
| "未配置任何 LLM 供應商。\n" |
| "請在 .env 中設定至少一個:\n" |
| " AMD_LLM_BASE_URL(AMD Developer Cloud 主接口)\n" |
| " VLLM_BASE_URL(AMD/vLLM legacy alias)\n" |
| " MINIMAX_API_KEY + MINIMAX_MODEL(MiniMax 備案)\n" |
| " GEMINI_API_KEY + GEMINI_MODEL(Gemini 備案)\n" |
| " CLAUDE_API_KEY + CLAUDE_MODEL(Claude 備案)\n" |
| " QWEN_API_KEY + QWEN_MODEL(Qwen 備案)\n" |
| " OPENROUTER_API_KEY(fallback)\n" |
| " OPENAI_API_KEY(備案)" |
| ) |
|
|
| |
| providers = model_stats.get_priority_order(providers) |
|
|
| for provider_name, provider_config in providers: |
| model = provider_config["model"] |
|
|
| |
| if model in exclude: |
| logger.info("[SKIP] %s excluded by caller", provider_name) |
| continue |
|
|
| |
| if _is_model_in_cooldown(model): |
| remaining = MODEL_COOLDOWN - (time.time() - _model_health.get(model, 0)) |
| logger.info("[SKIP] %s in cooldown (%.0fs remaining)", provider_name, remaining) |
| continue |
|
|
| try: |
| llm = LLM(**provider_config) |
| logger.info("[OK] LLM connected: %s", provider_name) |
| return llm |
| except Exception as e: |
| degradation_status.degrade(f"LLM:{provider_name}", str(e)) |
| logger.warning("[FAIL] LLM %s connection failed: %s", provider_name, e) |
| continue |
|
|
| |
| if _model_health: |
| logger.warning("[WARN] All models in cooldown, clearing cooldown for last-resort retry") |
| _model_health.clear() |
| return get_llm(exclude_models=list(exclude)) |
|
|
| raise RuntimeError( |
| f"所有 LLM 供應商均連接失敗。\n" |
| f"已嘗試:{[name for name, _ in providers]}\n" |
| f"降級詳情:{degradation_status.to_dict()}" |
| ) |
|
|
|
|
| |
| def retry_on_429( |
| fn, |
| *args, |
| max_retries: int = 4, |
| base_delay: float = 15.0, |
| caller: str = "unknown", |
| **kwargs, |
| ): |
| """ |
| 對任意 LLM 呼叫函式加入 429 指數退避重試保護。 |
| |
| 策略: |
| 第 1 次 429 → 等 15s |
| 第 2 次 429 → 等 30s |
| 第 3 次 429 → 等 60s |
| 第 4 次 429 → 等 120s,之後 raise |
| |
| 使用方式: |
| result = retry_on_429(crew.kickoff, max_retries=3, caller="scout") |
| |
| Args: |
| fn: 要呼叫的函制(callable) |
| *args: 傳入 fn 的位置參數 |
| max_retries: 最多重試次數(預設 4) |
| base_delay: 第一次退避基礎秒數(預設 15 秒) |
| caller: 呼叫者名稱(供日誌識別) |
| **kwargs: 傳入 fn 的關鍵字參數 |
| |
| Returns: |
| fn 的回傳值 |
| |
| Raises: |
| 最後一次例外(若所有重試均失敗) |
| """ |
| last_exc = None |
| for attempt in range(max_retries + 1): |
| try: |
| if attempt > 0: |
| rate_limiter.wait_if_needed(caller) |
| return fn(*args, **kwargs) |
| except Exception as e: |
| err_str = str(e) |
| is_429 = ( |
| "429" in err_str |
| or "rate limit" in err_str.lower() |
| or "quota" in err_str.lower() |
| or "resource_exhausted" in err_str.lower() |
| ) |
| if not is_429: |
| raise |
|
|
| last_exc = e |
|
|
| |
| retry_after = 0.0 |
| import re as _re |
| m = _re.search(r'retry.{1,10}(\d+\.?\d*)s', err_str, _re.IGNORECASE) |
| if m: |
| retry_after = float(m.group(1)) |
|
|
| if attempt >= max_retries: |
| logger.error( |
| "[RETRY] %s: 429 after %d attempts, giving up", |
| caller, max_retries |
| ) |
| break |
|
|
| |
| backoff = base_delay * (2 ** attempt) |
| wait_sec = max(backoff, retry_after, 15.0) |
| logger.warning( |
| "[RETRY] %s: 429 (attempt %d/%d), backoff=%.0fs (api_retry_after=%.0fs)", |
| caller, attempt + 1, max_retries, wait_sec, retry_after |
| ) |
| |
| |
| mdl_m = _re.search(r'model["\s:]+([\w/.\-:]+)', err_str) |
| if mdl_m: |
| mark_model_failed(mdl_m.group(1)) |
|
|
| rate_limiter.on_429(retry_after=wait_sec, caller=caller) |
|
|
| raise last_exc |
|
|
|
|
| def get_current_model_name(llm) -> str: |
| """ |
| 從 CrewAI LLM 物件中提取模型名稱。 |
| 用於 Agent 在捕獲 429 時標記失敗模型。 |
| """ |
| return getattr(llm, 'model', getattr(llm, 'model_name', 'unknown')) |
|
|
|
|
| def validate_api_keys() -> dict[str, bool]: |
| """ |
| 驗證所有 API Key 是否已設定(不驗證有效性) |
| |
| Returns: |
| 各 Key 名稱 → 是否已設定 |
| """ |
| status = { |
| "AMD_LLM_BASE_URL": bool(AMD_LLM_BASE_URL or VLLM_BASE_URL), |
| "OPENROUTER_API_KEY": bool(OPENROUTER_API_KEY), |
| "VLLM_BASE_URL": bool(VLLM_BASE_URL), |
| "OPENAI_API_KEY": bool(OPENAI_API_KEY), |
| "MINIMAX_API_KEY": _is_configured(MINIMAX_API_KEY), |
| "GEMINI_API_KEY": _is_configured(GEMINI_API_KEY), |
| "CLAUDE_API_KEY": _is_configured(CLAUDE_API_KEY), |
| "QWEN_API_KEY": _is_configured(QWEN_API_KEY), |
| "NVD_API_KEY": bool(NVD_API_KEY), |
| "OTX_API_KEY": bool(OTX_API_KEY), |
| "GITHUB_TOKEN": bool(GITHUB_TOKEN), |
| } |
| if LLM_PROVIDER in {"amd", "vllm"} and not (AMD_LLM_BASE_URL or VLLM_BASE_URL): |
| logger.warning("[WARN] AMD LLM endpoint not configured; fallback providers will be used if available") |
| if LLM_PROVIDER == "openrouter" and not OPENROUTER_API_KEY: |
| logger.error("[ERROR] OPENROUTER_API_KEY 未設定!OpenRouter 為目前主力 LLM provider") |
| if LLM_PROVIDER in _DIRECT_BACKUP_PROVIDER_SPECS and not _direct_provider_is_configured(LLM_PROVIDER): |
| logger.error("[ERROR] LLM_PROVIDER=%s 但對應的 API_KEY/MODEL 未設定", LLM_PROVIDER) |
|
|
| optional_llm_keys = {"MINIMAX_API_KEY", "GEMINI_API_KEY", "CLAUDE_API_KEY", "QWEN_API_KEY"} |
| missing = [k for k, v in status.items() if not v and k not in optional_llm_keys] |
| if missing: |
| logger.warning("[WARN] Missing API Keys: %s", ', '.join(missing)) |
| else: |
| logger.info("[OK] All API Keys configured (provider=%s)", LLM_PROVIDER) |
| return status |
|
|
|
|
| |
| import json as _json |
|
|
|
|
| class ModelStats: |
| """ |
| 模型效能統計追蹤器 + JSON 持久化。 |
| |
| 記錄每個模型的呼叫次數、成功率、平均延遲, |
| 據此動態調整模型優先順序(分數高者優先)。 |
| |
| 持久化路徑:data/model_stats.json |
| """ |
|
|
| STATS_FILE = DATA_DIR / "model_stats.json" |
|
|
| def __init__(self): |
| self._stats: dict[str, dict] = self._load() |
|
|
| def _load(self) -> dict: |
| """從 JSON 載入歷史統計""" |
| if self.STATS_FILE.exists(): |
| try: |
| with open(self.STATS_FILE, encoding="utf-8") as f: |
| return _json.load(f) |
| except (ValueError, OSError) as e: |
| logger.warning("[WARN] ModelStats load failed: %s, starting fresh", e) |
| return {} |
|
|
| def _save(self) -> None: |
| """持久化到 JSON""" |
| try: |
| with open(self.STATS_FILE, "w", encoding="utf-8") as f: |
| _json.dump(self._stats, f, ensure_ascii=False, indent=2) |
| except OSError as e: |
| logger.warning("[WARN] ModelStats save failed: %s", e) |
|
|
| def _ensure_entry(self, model_name: str) -> dict: |
| """確保模型在統計表中有記錄""" |
| if model_name not in self._stats: |
| self._stats[model_name] = { |
| "total_calls": 0, |
| "success_count": 0, |
| "fail_count": 0, |
| "total_latency_ms": 0.0, |
| "avg_latency_ms": 0.0, |
| "success_rate": 0.0, |
| "last_success": None, |
| "last_failure": None, |
| "last_error": None, |
| } |
| return self._stats[model_name] |
|
|
| def record_success(self, model_name: str, latency_ms: float) -> None: |
| """記錄一次成功呼叫""" |
| entry = self._ensure_entry(model_name) |
| entry["total_calls"] += 1 |
| entry["success_count"] += 1 |
| entry["total_latency_ms"] += latency_ms |
| entry["avg_latency_ms"] = entry["total_latency_ms"] / entry["success_count"] |
| entry["success_rate"] = entry["success_count"] / entry["total_calls"] |
| entry["last_success"] = time.time() |
| logger.info( |
| "[STATS] %s success | latency=%.0fms | avg=%.0fms | rate=%.0f%%", |
| model_name, latency_ms, entry["avg_latency_ms"], entry["success_rate"] * 100, |
| ) |
| self._save() |
|
|
| def record_failure(self, model_name: str, error: str) -> None: |
| """記錄一次失敗呼叫""" |
| entry = self._ensure_entry(model_name) |
| entry["total_calls"] += 1 |
| entry["fail_count"] += 1 |
| entry["success_rate"] = entry["success_count"] / entry["total_calls"] |
| entry["last_failure"] = time.time() |
| entry["last_error"] = error[:200] |
| logger.info( |
| "[STATS] %s failure | error=%s | rate=%.0f%%", |
| model_name, error[:80], entry["success_rate"] * 100, |
| ) |
| self._save() |
|
|
| def get_priority_order(self, providers: list[tuple[str, dict]]) -> list[tuple[str, dict]]: |
| """ |
| 基於效能統計重排模型優先權。 |
| |
| 排序公式:score = success_rate * 100 - avg_latency_ms * 0.01 |
| 分數高者優先。無統計資料的模型保留原始順序但排在有統計者之後。 |
| """ |
| scored = [] |
| unscored = [] |
|
|
| for provider_name, config in providers: |
| model = config["model"] |
| if model in self._stats and self._stats[model]["total_calls"] >= 2: |
| entry = self._stats[model] |
| score = entry["success_rate"] * 100 - entry["avg_latency_ms"] * 0.001 |
| scored.append((score, provider_name, config)) |
| else: |
| unscored.append((provider_name, config)) |
|
|
| |
| scored.sort(key=lambda x: x[0], reverse=True) |
| result = [(name, cfg) for _, name, cfg in scored] + unscored |
| return result |
|
|
| def get_report(self) -> dict: |
| """取得效能報告(供 UI 顯示)""" |
| return { |
| model: { |
| "calls": s["total_calls"], |
| "success_rate": f"{s['success_rate']:.0%}", |
| "avg_latency": f"{s['avg_latency_ms']:.0f}ms", |
| "fails": s["fail_count"], |
| } |
| for model, s in self._stats.items() |
| if s["total_calls"] > 0 |
| } |
|
|
|
|
| |
| model_stats = ModelStats() |
|
|
|
|