Álvaro Valenzuela Valdes commited on
Commit
7259e84
·
1 Parent(s): 4ceb9d2

feat: Live Compra Ágil web scraper integration for real-time discovery

Browse files
backend/app/routers/tenders.py CHANGED
@@ -59,3 +59,8 @@ def get_tenders_count(db: Session = Depends(get_db)):
59
  @router.post("/tenders/sync")
60
  async def manual_sync(keyword: Optional[str] = None, db: Session = Depends(get_db)):
61
  return await sync_tenders_to_db(db, keyword=keyword)
 
 
 
 
 
 
59
  @router.post("/tenders/sync")
60
  async def manual_sync(keyword: Optional[str] = None, db: Session = Depends(get_db)):
61
  return await sync_tenders_to_db(db, keyword=keyword)
62
+
63
+ @router.get("/tenders/scrape", response_model=List[Tender])
64
+ async def live_scrape(keyword: str):
65
+ from app.services.scraper import scrape_compra_agil
66
+ return await scrape_compra_agil(keyword)
backend/app/services/scraper.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import httpx
2
+ from bs4 import BeautifulSoup
3
+ from typing import List
4
+ from app.schemas.tender import Tender
5
+ from datetime import datetime
6
+ import re
7
+
8
+ async def scrape_compra_agil(keywords: str) -> List[Tender]:
9
+ """
10
+ Scrapes the Mercado Público Compra Ágil search results page.
11
+ """
12
+ # Base URL for Compra Ágil search
13
+ url = f"https://buscador.mercadopublico.cl/compra-agil?keywords={keywords}&status=2&order_by=recent"
14
+
15
+ headers = {
16
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
17
+ }
18
+
19
+ try:
20
+ async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
21
+ print(f"[Scraper] Navigating to: {url}")
22
+ response = await client.get(url, headers=headers)
23
+ response.raise_for_status()
24
+
25
+ soup = BeautifulSoup(response.text, 'html.parser')
26
+
27
+ # Find tender cards (this is based on standard Mercado Público searcher structure)
28
+ # Note: The classes might change, but usually they are consistent
29
+ tenders = []
30
+
31
+ # Find elements that look like tender containers
32
+ cards = soup.select('.card-tender, .item-busqueda, .search-result-item')
33
+
34
+ if not cards:
35
+ # Fallback: try to find any link with a COT26 pattern
36
+ all_text = soup.get_text()
37
+ codes = re.findall(r'[0-9]+-[0-9]+-COT26', all_text)
38
+ print(f"[Scraper] Found {len(codes)} codes via regex fallback.")
39
+ # We would need more info to build a full Tender object from regex
40
+
41
+ for card in cards[:20]: # Limit for performance
42
+ try:
43
+ name_elem = card.select_one('.title-tender, h3, .name')
44
+ code_elem = card.select_one('.code-tender, .code, span:contains("COT26")')
45
+ buyer_elem = card.select_one('.buyer-name, .organismo')
46
+
47
+ if not name_elem or not code_elem:
48
+ continue
49
+
50
+ name = name_elem.get_text(strip=True)
51
+ code = code_elem.get_text(strip=True)
52
+ buyer = buyer_elem.get_text(strip=True) if buyer_elem else "Unknown"
53
+
54
+ tenders.append(Tender(
55
+ code=code,
56
+ name=name,
57
+ description=name,
58
+ buyer=buyer,
59
+ status="Publicada",
60
+ closing_date=datetime.now().strftime("%Y-%m-%d"),
61
+ estimated_amount=0,
62
+ source="Mercado Público (Scraped)",
63
+ region="Nacional",
64
+ sector="Compra Ágil",
65
+ items=[],
66
+ attachments=[]
67
+ ))
68
+ except Exception as e:
69
+ print(f"Error parsing card: {e}")
70
+
71
+ print(f"[Scraper] Successfully scraped {len(tenders)} tenders.")
72
+ return tenders
73
+
74
+ except Exception as e:
75
+ print(f"❌ Scraper error: {e}")
76
+ return []
frontend/app/page.tsx CHANGED
@@ -114,9 +114,19 @@ export default function HomePage() {
114
  window.history.pushState({}, '', `?tab=tender_search&q=${encodeURIComponent(value)}`);
115
  };
116
 
117
- const handleSearch = async (params: { keyword?: string; buyer?: string; provider_code?: string; date?: string; skip?: number; limit?: number }) => {
118
- const results = await searchTenders(params);
119
- setTenders(results);
 
 
 
 
 
 
 
 
 
 
120
  };
121
 
122
  const handleProfileSave = async (profile: CompanyProfileType) => {
 
114
  window.history.pushState({}, '', `?tab=tender_search&q=${encodeURIComponent(value)}`);
115
  };
116
 
117
+ const handleSearch = async (params: { keyword?: string; buyer?: string; provider_code?: string; date?: string; skip?: number; limit?: number; isAgile?: boolean }) => {
118
+ try {
119
+ let results: Tender[];
120
+ if (params.isAgile && params.keyword) {
121
+ const { scrapeTenders } = await import("../lib/api");
122
+ results = await scrapeTenders(params.keyword);
123
+ } else {
124
+ results = await searchTenders(params);
125
+ }
126
+ setTenders(results);
127
+ } catch (error) {
128
+ console.error("Search error:", error);
129
+ }
130
  };
131
 
132
  const handleProfileSave = async (profile: CompanyProfileType) => {
frontend/components/TenderSearch.tsx CHANGED
@@ -7,7 +7,7 @@ import { Language, translations } from "../lib/translations";
7
 
8
  type Props = {
9
  tenders: Tender[];
10
- onSearch: (params: { keyword?: string; buyer?: string; provider_code?: string; date?: string; skip?: number; limit?: number }) => void;
11
  onAnalyze: (tender: Tender) => void;
12
  forceShowFollowed?: boolean;
13
  initialKeyword?: string;
@@ -83,7 +83,8 @@ export default function TenderSearch({ tenders, onSearch, onAnalyze, forceShowFo
83
  buyer: buyerCode,
84
  date,
85
  skip: 0,
86
- limit: itemsPerPage
 
87
  });
88
  } catch (error) {
89
  console.error(error);
 
7
 
8
  type Props = {
9
  tenders: Tender[];
10
+ onSearch: (params: { keyword?: string; buyer?: string; provider_code?: string; date?: string; skip?: number; limit?: number; isAgile?: boolean }) => void;
11
  onAnalyze: (tender: Tender) => void;
12
  forceShowFollowed?: boolean;
13
  initialKeyword?: string;
 
83
  buyer: buyerCode,
84
  date,
85
  skip: 0,
86
+ limit: itemsPerPage,
87
+ isAgile: isAgileMode
88
  });
89
  } catch (error) {
90
  console.error(error);
frontend/lib/api.ts CHANGED
@@ -114,3 +114,11 @@ export async function syncDatabase() {
114
  }
115
  return res.json();
116
  }
 
 
 
 
 
 
 
 
 
114
  }
115
  return res.json();
116
  }
117
+
118
+ export async function scrapeTenders(keyword: string): Promise<Tender[]> {
119
+ const res = await fetch(`${API_BASE}/api/tenders/scrape?keyword=${encodeURIComponent(keyword)}`);
120
+ if (!res.ok) {
121
+ throw new Error("Error scraping tenders");
122
+ }
123
+ return res.json();
124
+ }