| """ |
| Economic Calendar Scraper - Investing.com |
| Scrapes upcoming economic events, indicators, and releases |
| No API key required - web scraping approach |
| """ |
|
|
| from datetime import datetime, timedelta |
| from typing import List, Dict, Optional |
| import logging |
| import re |
|
|
| import requests |
| from bs4 import BeautifulSoup |
|
|
| |
| logging.basicConfig(level=logging.INFO) |
| logger = logging.getLogger(__name__) |
|
|
|
|
| class EconomicCalendarService: |
| """ |
| Scrapes economic calendar data from Investing.com |
| Focus: High and medium importance events |
| """ |
|
|
| def __init__(self): |
| """Initialize scraper with session""" |
| self.session = requests.Session() |
| self.session.headers.update({ |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36', |
| 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8', |
| 'Accept-Language': 'en-US,en;q=0.9', |
| 'Accept-Encoding': 'gzip, deflate, br', |
| 'Referer': 'https://www.google.com/', |
| 'DNT': '1', |
| 'Connection': 'keep-alive', |
| 'Upgrade-Insecure-Requests': '1', |
| 'Sec-Fetch-Dest': 'document', |
| 'Sec-Fetch-Mode': 'navigate', |
| 'Sec-Fetch-Site': 'none', |
| 'Cache-Control': 'max-age=0' |
| }) |
|
|
| def get_upcoming_events(self, days_ahead: int = 7, min_importance: str = 'medium') -> List[Dict]: |
| """ |
| Get upcoming economic events |
| Returns list of events in standardized format |
| """ |
| try: |
| |
| events = self._scrape_investing_com(days_ahead, min_importance) |
|
|
| if events: |
| logger.info(f"Scraped {len(events)} economic events from Investing.com") |
| return events |
| else: |
| logger.warning("No events scraped - using mock data") |
| return self._get_mock_events() |
|
|
| except Exception as e: |
| logger.error(f"Error fetching economic calendar: {e}") |
| return self._get_mock_events() |
|
|
| def _scrape_investing_com(self, days_ahead: int, min_importance: str) -> List[Dict]: |
| """ |
| Scrape economic calendar from Investing.com |
| Note: This may be fragile and break if they change their HTML structure |
| """ |
| try: |
| url = 'https://www.investing.com/economic-calendar/' |
| response = self.session.get(url, timeout=10) |
| response.raise_for_status() |
|
|
| soup = BeautifulSoup(response.content, 'html.parser') |
| events = [] |
|
|
| |
| |
| calendar_table = soup.find('table', {'id': 'economicCalendarData'}) |
|
|
| if not calendar_table: |
| logger.warning("Could not find economic calendar table on Investing.com") |
| return [] |
|
|
| rows = calendar_table.find_all('tr', {'class': 'js-event-item'}) |
|
|
| for row in rows[:50]: |
| try: |
| |
| event_data = self._parse_event_row(row) |
|
|
| if event_data and self._should_include_event(event_data, days_ahead, min_importance): |
| events.append(event_data) |
|
|
| except Exception as e: |
| logger.debug(f"Error parsing event row: {e}") |
| continue |
|
|
| return events |
|
|
| except Exception as e: |
| logger.error(f"Error scraping Investing.com: {e}") |
| return [] |
|
|
| def _parse_event_row(self, row) -> Optional[Dict]: |
| """Parse a single event row from Investing.com table""" |
| try: |
| |
| timestamp_elem = row.find('td', {'class': 'first left time'}) |
| time_str = timestamp_elem.get_text(strip=True) if timestamp_elem else '' |
|
|
| |
| country_elem = row.find('td', {'class': 'flagCur'}) |
| country = country_elem.get('title', 'US') if country_elem else 'US' |
|
|
| |
| importance_elem = row.find('td', {'class': 'sentiment'}) |
| importance = self._parse_importance(importance_elem) if importance_elem else 'low' |
|
|
| |
| event_elem = row.find('td', {'class': 'left event'}) |
| event_name = event_elem.get_text(strip=True) if event_elem else '' |
|
|
| |
| actual_elem = row.find('td', {'id': re.compile('eventActual_')}) |
| forecast_elem = row.find('td', {'id': re.compile('eventForecast_')}) |
| previous_elem = row.find('td', {'id': re.compile('eventPrevious_')}) |
|
|
| actual = self._parse_value(actual_elem.get_text(strip=True) if actual_elem else '') |
| forecast = self._parse_value(forecast_elem.get_text(strip=True) if forecast_elem else '') |
| previous = self._parse_value(previous_elem.get_text(strip=True) if previous_elem else '') |
|
|
| |
| event_date = self._parse_event_time(time_str) |
| time_to_event = self._calculate_time_to_event(event_date) |
|
|
| return { |
| 'id': hash(f"{event_name}_{event_date}_{country}"), |
| 'title': f"{country} - {event_name}", |
| 'event_name': event_name, |
| 'event_date': event_date, |
| 'country': country, |
| 'category': self._categorize_event(event_name), |
| 'importance': importance, |
| 'forecast': forecast, |
| 'previous': previous, |
| 'actual': actual, |
| 'time_to_event': time_to_event, |
| 'timestamp': datetime.now(), |
| 'source': 'Investing.com', |
| 'url': 'https://www.investing.com/economic-calendar/', |
| 'impact': importance, |
| 'sentiment': self._determine_sentiment(actual, forecast, previous) |
| } |
|
|
| except Exception as e: |
| logger.debug(f"Error parsing event row: {e}") |
| return None |
|
|
| def _parse_importance(self, importance_elem) -> str: |
| """Parse importance from bull icons""" |
| if not importance_elem: |
| return 'low' |
|
|
| |
| bulls = importance_elem.find_all('i', {'class': 'grayFullBullishIcon'}) |
| num_bulls = len(bulls) |
|
|
| if num_bulls >= 3: |
| return 'high' |
| elif num_bulls == 2: |
| return 'medium' |
| else: |
| return 'low' |
|
|
| def _parse_value(self, value_str: str) -> Optional[float]: |
| """Parse numeric value from string""" |
| if not value_str or value_str == '' or value_str == '-': |
| return None |
|
|
| try: |
| |
| value_str = value_str.replace('%', '').replace('K', '').replace('M', '').replace('B', '') |
| value_str = value_str.replace(',', '') |
| return float(value_str) |
| except: |
| return None |
|
|
| def _parse_event_time(self, time_str: str) -> datetime: |
| """Parse event time string to datetime""" |
| try: |
| |
| if 'All Day' in time_str or not time_str: |
| |
| return datetime.now().replace(hour=12, minute=0, second=0, microsecond=0) |
|
|
| |
| time_parts = time_str.split(':') |
| hour = int(time_parts[0]) |
| minute = int(time_parts[1]) if len(time_parts) > 1 else 0 |
|
|
| event_time = datetime.now().replace(hour=hour, minute=minute, second=0, microsecond=0) |
|
|
| |
| if event_time < datetime.now(): |
| event_time += timedelta(days=1) |
|
|
| return event_time |
|
|
| except Exception as e: |
| logger.debug(f"Error parsing time: {e}") |
| return datetime.now() + timedelta(hours=2) |
|
|
| def _calculate_time_to_event(self, event_date: datetime) -> str: |
| """Calculate human-readable time until event""" |
| delta = event_date - datetime.now() |
|
|
| if delta.total_seconds() < 0: |
| return "In progress" |
|
|
| days = delta.days |
| hours = delta.seconds // 3600 |
| minutes = (delta.seconds % 3600) // 60 |
|
|
| if days > 0: |
| return f"in {days}d {hours}h" |
| elif hours > 0: |
| return f"in {hours}h {minutes}m" |
| else: |
| return f"in {minutes}m" |
|
|
| def _categorize_event(self, event_name: str) -> str: |
| """Categorize economic event""" |
| event_lower = event_name.lower() |
|
|
| if any(kw in event_lower for kw in ['cpi', 'inflation', 'pce', 'price']): |
| return 'inflation' |
| elif any(kw in event_lower for kw in ['employment', 'jobs', 'unemployment', 'nfp', 'payroll']): |
| return 'employment' |
| elif any(kw in event_lower for kw in ['gdp', 'growth']): |
| return 'gdp' |
| elif any(kw in event_lower for kw in ['fed', 'fomc', 'ecb', 'rate', 'boe', 'boj']): |
| return 'central_bank' |
| elif any(kw in event_lower for kw in ['pmi', 'manufacturing', 'services']): |
| return 'pmi' |
| else: |
| return 'other' |
|
|
| def _determine_sentiment(self, actual: Optional[float], forecast: Optional[float], previous: Optional[float]) -> str: |
| """Determine sentiment based on actual vs forecast""" |
| if actual is None or forecast is None: |
| return 'neutral' |
|
|
| if actual > forecast: |
| return 'positive' |
| elif actual < forecast: |
| return 'negative' |
| else: |
| return 'neutral' |
|
|
| def _should_include_event(self, event: Dict, days_ahead: int, min_importance: str) -> bool: |
| """Determine if event should be included""" |
| |
| importance_levels = ['low', 'medium', 'high'] |
| min_level = importance_levels.index(min_importance) |
| event_level = importance_levels.index(event['importance']) |
|
|
| if event_level < min_level: |
| return False |
|
|
| |
| days_until = (event['event_date'] - datetime.now()).days |
| if days_until > days_ahead: |
| return False |
|
|
| return True |
|
|
| def _get_mock_events(self) -> List[Dict]: |
| """Mock economic events for development/testing""" |
| now = datetime.now() |
|
|
| return [ |
| { |
| 'id': 1, |
| 'title': 'US - Consumer Price Index (CPI)', |
| 'event_name': 'Consumer Price Index', |
| 'event_date': now + timedelta(hours=2), |
| 'country': 'US', |
| 'category': 'inflation', |
| 'importance': 'high', |
| 'forecast': 2.5, |
| 'previous': 2.3, |
| 'actual': None, |
| 'time_to_event': 'in 2h 0m', |
| 'timestamp': now, |
| 'source': 'Economic Calendar', |
| 'url': 'https://www.investing.com/economic-calendar/', |
| 'impact': 'high', |
| 'sentiment': 'neutral' |
| }, |
| { |
| 'id': 2, |
| 'title': 'US - Non-Farm Payrolls (NFP)', |
| 'event_name': 'Non-Farm Payrolls', |
| 'event_date': now + timedelta(days=2, hours=8, minutes=30), |
| 'country': 'US', |
| 'category': 'employment', |
| 'importance': 'high', |
| 'forecast': 180.0, |
| 'previous': 175.0, |
| 'actual': None, |
| 'time_to_event': 'in 2d 8h', |
| 'timestamp': now, |
| 'source': 'Economic Calendar', |
| 'url': 'https://www.investing.com/economic-calendar/', |
| 'impact': 'high', |
| 'sentiment': 'neutral' |
| }, |
| { |
| 'id': 3, |
| 'title': 'EU - ECB Interest Rate Decision', |
| 'event_name': 'ECB Interest Rate Decision', |
| 'event_date': now + timedelta(days=3, hours=12), |
| 'country': 'EU', |
| 'category': 'central_bank', |
| 'importance': 'high', |
| 'forecast': 3.75, |
| 'previous': 4.00, |
| 'actual': None, |
| 'time_to_event': 'in 3d 12h', |
| 'timestamp': now, |
| 'source': 'Economic Calendar', |
| 'url': 'https://www.investing.com/economic-calendar/', |
| 'impact': 'high', |
| 'sentiment': 'neutral' |
| }, |
| { |
| 'id': 4, |
| 'title': 'US - GDP Growth Rate', |
| 'event_name': 'GDP Growth Rate', |
| 'event_date': now + timedelta(days=5, hours=8, minutes=30), |
| 'country': 'US', |
| 'category': 'gdp', |
| 'importance': 'high', |
| 'forecast': 2.8, |
| 'previous': 2.5, |
| 'actual': None, |
| 'time_to_event': 'in 5d 8h', |
| 'timestamp': now, |
| 'source': 'Economic Calendar', |
| 'url': 'https://www.investing.com/economic-calendar/', |
| 'impact': 'high', |
| 'sentiment': 'neutral' |
| }, |
| { |
| 'id': 5, |
| 'title': 'US - Manufacturing PMI', |
| 'event_name': 'Manufacturing PMI', |
| 'event_date': now + timedelta(days=1, hours=10), |
| 'country': 'US', |
| 'category': 'pmi', |
| 'importance': 'medium', |
| 'forecast': 51.5, |
| 'previous': 50.8, |
| 'actual': None, |
| 'time_to_event': 'in 1d 10h', |
| 'timestamp': now, |
| 'source': 'Economic Calendar', |
| 'url': 'https://www.investing.com/economic-calendar/', |
| 'impact': 'medium', |
| 'sentiment': 'neutral' |
| } |
| ] |
|
|
| def get_todays_events(self) -> List[Dict]: |
| """Get events happening today""" |
| all_events = self.get_upcoming_events(days_ahead=1) |
| today = datetime.now().date() |
|
|
| todays_events = [ |
| event for event in all_events |
| if event['event_date'].date() == today |
| ] |
|
|
| return todays_events |
|
|