| """ |
| Services d'API pour la recherche web. |
| Intègre les APIs Tavily et Serper pour la recherche d'informations. |
| """ |
|
|
| from abc import ABC, abstractmethod |
| from typing import List, Dict, Any, Optional |
| import requests |
| import asyncio |
| import aiohttp |
| from datetime import datetime |
| import json |
|
|
| from src.core.logging import setup_logger |
| from src.models.research_models import SearchResult |
|
|
| |
| try: |
| from config.settings import api_config |
| except Exception as e: |
| print(f"Erreur lors de l'import de la configuration: {e}") |
| api_config = None |
|
|
|
|
| class SearchAPIError(Exception): |
| """Exception pour les erreurs d'API de recherche.""" |
| pass |
|
|
|
|
| class BaseSearchAPI(ABC): |
| """Interface de base pour les APIs de recherche.""" |
| |
| @abstractmethod |
| async def search( |
| self, |
| query: str, |
| max_results: int = 5, |
| **kwargs |
| ) -> List[SearchResult]: |
| """ |
| Effectue une recherche. |
| |
| Args: |
| query: Requête de recherche |
| max_results: Nombre maximum de résultats |
| **kwargs: Paramètres spécifiques à l'API |
| |
| Returns: |
| Liste des résultats de recherche |
| """ |
| pass |
|
|
|
|
| class TavilySearchAPI(BaseSearchAPI): |
| """ |
| Client pour l'API Tavily. |
| Documentation: https://docs.tavily.com/ |
| """ |
| |
| def __init__(self, api_key: Optional[str] = None): |
| |
| if api_config: |
| self.api_key = api_key or getattr(api_config, 'TAVILY_API_KEY', '') |
| else: |
| self.api_key = api_key or '' |
| self.base_url = "https://api.tavily.com" |
| self.logger = setup_logger("tavily_api") |
| |
| if not self.api_key: |
| raise SearchAPIError("Clé API Tavily manquante") |
| |
| async def search( |
| self, |
| query: str, |
| max_results: int = 5, |
| search_depth: str = "basic", |
| include_images: bool = False, |
| include_answer: bool = True, |
| **kwargs |
| ) -> List[SearchResult]: |
| """ |
| Recherche avec l'API Tavily. |
| |
| Args: |
| query: Requête de recherche |
| max_results: Nombre de résultats (max 20) |
| search_depth: "basic" ou "advanced" |
| include_images: Inclure les images |
| include_answer: Inclure une réponse IA |
| |
| Returns: |
| Liste des résultats |
| """ |
| self.logger.info(f"Recherche Tavily: '{query}' (max: {max_results})") |
| |
| payload = { |
| "api_key": self.api_key, |
| "query": query, |
| "search_depth": search_depth, |
| "max_results": min(max_results, 20), |
| "include_images": include_images, |
| "include_answer": include_answer, |
| "include_raw_content": False |
| } |
| |
| async with aiohttp.ClientSession() as session: |
| try: |
| async with session.post( |
| f"{self.base_url}/search", |
| json=payload, |
| timeout=30 |
| ) as response: |
| |
| if response.status != 200: |
| error_text = await response.text() |
| raise SearchAPIError(f"Erreur Tavily {response.status}: {error_text}") |
| |
| data = await response.json() |
| return self._parse_tavily_results(data) |
| |
| except aiohttp.ClientTimeout: |
| raise SearchAPIError("Timeout lors de la requête Tavily") |
| except aiohttp.ClientError as e: |
| raise SearchAPIError(f"Erreur de connexion Tavily: {str(e)}") |
| |
| def _parse_tavily_results(self, data: Dict[str, Any]) -> List[SearchResult]: |
| """Parse les résultats de l'API Tavily.""" |
| results = [] |
| |
| for item in data.get("results", []): |
| try: |
| |
| published_date = None |
| if "published_date" in item and item["published_date"]: |
| try: |
| published_date = datetime.fromisoformat(item["published_date"].replace('Z', '+00:00')) |
| except: |
| pass |
| |
| result = SearchResult( |
| title=item.get("title", ""), |
| url=item.get("url", ""), |
| snippet=item.get("content", ""), |
| published_date=published_date, |
| source=item.get("source", ""), |
| score=item.get("score", 0.0) |
| ) |
| results.append(result) |
| |
| except Exception as e: |
| self.logger.warning(f"Erreur parsing résultat Tavily: {e}") |
| continue |
| |
| self.logger.info(f"Tavily: {len(results)} résultats parsés") |
| return results |
|
|
|
|
| class SerperSearchAPI(BaseSearchAPI): |
| """ |
| Client pour l'API Serper (Google Search). |
| Documentation: https://serper.dev/ |
| """ |
| |
| def __init__(self, api_key: Optional[str] = None): |
| |
| if api_config: |
| self.api_key = api_key or getattr(api_config, 'SERPER_API_KEY', '') |
| else: |
| self.api_key = api_key or '' |
| self.base_url = "https://google.serper.dev" |
| self.logger = setup_logger("serper_api") |
| |
| if not self.api_key: |
| raise SearchAPIError("Clé API Serper manquante") |
| |
| async def search( |
| self, |
| query: str, |
| max_results: int = 5, |
| country: str = "fr", |
| language: str = "fr", |
| search_type: str = "search", |
| **kwargs |
| ) -> List[SearchResult]: |
| """ |
| Recherche avec l'API Serper. |
| |
| Args: |
| query: Requête de recherche |
| max_results: Nombre de résultats (max 100) |
| country: Code pays (ex: "fr", "us") |
| language: Code langue (ex: "fr", "en") |
| search_type: Type de recherche ("search", "news", "images") |
| |
| Returns: |
| Liste des résultats |
| """ |
| self.logger.info(f"Recherche Serper: '{query}' (max: {max_results})") |
| |
| payload = { |
| "q": query, |
| "num": min(max_results, 100), |
| "gl": country, |
| "hl": language |
| } |
| |
| headers = { |
| "X-API-KEY": self.api_key, |
| "Content-Type": "application/json" |
| } |
| |
| endpoint = f"{self.base_url}/{search_type}" |
| |
| async with aiohttp.ClientSession() as session: |
| try: |
| async with session.post( |
| endpoint, |
| json=payload, |
| headers=headers, |
| timeout=30 |
| ) as response: |
| |
| if response.status != 200: |
| error_text = await response.text() |
| raise SearchAPIError(f"Erreur Serper {response.status}: {error_text}") |
| |
| data = await response.json() |
| return self._parse_serper_results(data, search_type) |
| |
| except aiohttp.ClientTimeout: |
| raise SearchAPIError("Timeout lors de la requête Serper") |
| except aiohttp.ClientError as e: |
| raise SearchAPIError(f"Erreur de connexion Serper: {str(e)}") |
| |
| def _parse_serper_results(self, data: Dict[str, Any], search_type: str) -> List[SearchResult]: |
| """Parse les résultats de l'API Serper.""" |
| results = [] |
| |
| |
| items_key = "organic" if search_type == "search" else "news" if search_type == "news" else "images" |
| items = data.get(items_key, []) |
| |
| for item in items: |
| try: |
| |
| published_date = None |
| if "date" in item: |
| try: |
| published_date = datetime.fromisoformat(item["date"]) |
| except: |
| pass |
| |
| result = SearchResult( |
| title=item.get("title", ""), |
| url=item.get("link", ""), |
| snippet=item.get("snippet", ""), |
| published_date=published_date, |
| source=item.get("source", ""), |
| score=item.get("position", 0) / 100.0 |
| ) |
| results.append(result) |
| |
| except Exception as e: |
| self.logger.warning(f"Erreur parsing résultat Serper: {e}") |
| continue |
| |
| self.logger.info(f"Serper: {len(results)} résultats parsés") |
| return results |
|
|
|
|
| class SearchAPIManager: |
| """ |
| Gestionnaire des APIs de recherche. |
| Permet de basculer entre les APIs et de gérer les fallbacks. |
| """ |
| |
| def __init__(self): |
| self.apis = {} |
| self.logger = setup_logger("search_manager") |
| |
| |
| try: |
| if api_config and getattr(api_config, 'TAVILY_API_KEY', ''): |
| self.apis["tavily"] = TavilySearchAPI() |
| self.logger.info("API Tavily initialisée") |
| except Exception as e: |
| self.logger.warning(f"Impossible d'initialiser Tavily: {e}") |
| |
| try: |
| if api_config and getattr(api_config, 'SERPER_API_KEY', ''): |
| self.apis["serper"] = SerperSearchAPI() |
| self.logger.info("API Serper initialisée") |
| except Exception as e: |
| self.logger.warning(f"Impossible d'initialiser Serper: {e}") |
| |
| if not self.apis: |
| raise SearchAPIError("Aucune API de recherche disponible") |
| |
| async def search( |
| self, |
| query: str, |
| max_results: int = 5, |
| preferred_api: str = "tavily", |
| **kwargs |
| ) -> List[SearchResult]: |
| """ |
| Effectue une recherche avec fallback entre APIs. |
| |
| Args: |
| query: Requête de recherche |
| max_results: Nombre de résultats |
| preferred_api: API préférée ("tavily" ou "serper") |
| |
| Returns: |
| Liste des résultats |
| """ |
| |
| api_order = [preferred_api] + [api for api in self.apis.keys() if api != preferred_api] |
| |
| for api_name in api_order: |
| if api_name not in self.apis: |
| continue |
| |
| try: |
| self.logger.info(f"Tentative de recherche avec {api_name}") |
| results = await self.apis[api_name].search(query, max_results, **kwargs) |
| |
| if results: |
| self.logger.info(f"Recherche réussie avec {api_name}: {len(results)} résultats") |
| return results |
| else: |
| self.logger.warning(f"Aucun résultat avec {api_name}") |
| |
| except Exception as e: |
| self.logger.warning(f"Erreur avec {api_name}: {e}") |
| continue |
| |
| |
| raise SearchAPIError(f"Échec de recherche avec toutes les APIs pour: {query}") |
| |
| def get_available_apis(self) -> List[str]: |
| """Retourne la liste des APIs disponibles.""" |
| return list(self.apis.keys()) |
| |
| def is_api_available(self, api_name: str) -> bool: |
| """Vérifie si une API est disponible.""" |
| return api_name in self.apis |