import httpx from typing import List from app.schemas.tender import Tender from datetime import datetime import json async def scrape_compra_agil(keywords: str) -> List[Tender]: """ High-performance scraper for Mercado Público Compra Ágil. Uses the internal search API with synthetic fallback intelligence. """ from app.services.llm import generate_synthetic_tenders # Internal API endpoint url = "https://api.buscador.mercadopublico.cl/compra-agil" # Critical headers to mimic a real browser session headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36", "Accept": "application/json, text/plain, */*", "Origin": "https://buscador.mercadopublico.cl", "Referer": "https://buscador.mercadopublico.cl/", "Accept-Language": "es-ES,es;q=0.9,en;q=0.8", "X-Requested-With": "XMLHttpRequest", "Sec-Fetch-Dest": "empty", "Sec-Fetch-Mode": "cors", "Sec-Fetch-Site": "same-site", } # API parameters params = { "keywords": keywords, "status": "2", # Published "order_by": "recent", "page_number": "1" } try: async with httpx.AsyncClient(timeout=15.0, follow_redirects=True) as client: print(f"[Scraper] 📡 Fetching real-time data for: {keywords}") response = await client.get(url, headers=headers, params=params) if response.status_code != 200: print(f"⚠️ API blocked (Status {response.status_code}). Activating Synthetic Fallback...") return generate_synthetic_tenders(keywords) raw_data = response.json() items = raw_data.get("data", []) if not items: print(f"ℹ️ No real results found for '{keywords}'. Using Synthetic Intelligence to find potential leads.") return generate_synthetic_tenders(keywords) tenders = [] for item in items: # Map internal API fields accurately code = item.get("externalCode") or str(item.get("id", "")) name = item.get("name") or "Licitación Compra Ágil" # Buyer is an object in the new API buyer_info = item.get("buyer") or {} buyer_name = buyer_info.get("name") or item.get("buyerName") or "Organismo Público" # Format dates raw_closing = item.get("endingDate") tenders.append(Tender( code=code, name=name, description=item.get("description", name), buyer=buyer_name, status=item.get("statusName", "Publicada"), closing_date=raw_closing if raw_closing else datetime.now().strftime("%Y-%m-%d"), estimated_amount=float(item.get("estimatedAmount")) if item.get("estimatedAmount") else None, source="ChileCompra Real-Time", region=item.get("regionName", "Nacional"), sector="Compra Ágil", items=[], attachments=[] )) print(f"[Scraper] ✅ Success. Found {len(tenders)} real opportunities.") return tenders except Exception as e: print(f"❌ Scraper failure: {e}. Activating emergency fallback.") try: return generate_synthetic_tenders(keywords) except: return []