""" Economic Calendar Scraper - Investing.com Scrapes upcoming economic events, indicators, and releases No API key required - web scraping approach """ from datetime import datetime, timedelta from typing import List, Dict, Optional import logging import re import requests from bs4 import BeautifulSoup # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) class EconomicCalendarService: """ Scrapes economic calendar data from Investing.com Focus: High and medium importance events """ def __init__(self): """Initialize scraper with session""" self.session = requests.Session() self.session.headers.update({ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.9', 'Accept-Encoding': 'gzip, deflate, br', 'Referer': 'https://www.google.com/', 'DNT': '1', 'Connection': 'keep-alive', 'Upgrade-Insecure-Requests': '1', 'Sec-Fetch-Dest': 'document', 'Sec-Fetch-Mode': 'navigate', 'Sec-Fetch-Site': 'none', 'Cache-Control': 'max-age=0' }) def get_upcoming_events(self, days_ahead: int = 7, min_importance: str = 'medium') -> List[Dict]: """ Get upcoming economic events Returns list of events in standardized format """ try: # Try to scrape from Investing.com events = self._scrape_investing_com(days_ahead, min_importance) if events: logger.info(f"Scraped {len(events)} economic events from Investing.com") return events else: logger.warning("No events scraped - using mock data") return self._get_mock_events() except Exception as e: logger.error(f"Error fetching economic calendar: {e}") return self._get_mock_events() def _scrape_investing_com(self, days_ahead: int, min_importance: str) -> List[Dict]: """ Scrape economic calendar from Investing.com Note: This may be fragile and break if they change their HTML structure """ try: url = 'https://www.investing.com/economic-calendar/' response = self.session.get(url, timeout=10) response.raise_for_status() soup = BeautifulSoup(response.content, 'html.parser') events = [] # Investing.com uses a table structure for the calendar # Look for table rows with event data calendar_table = soup.find('table', {'id': 'economicCalendarData'}) if not calendar_table: logger.warning("Could not find economic calendar table on Investing.com") return [] rows = calendar_table.find_all('tr', {'class': 'js-event-item'}) for row in rows[:50]: # Limit to 50 events try: # Extract event data from row event_data = self._parse_event_row(row) if event_data and self._should_include_event(event_data, days_ahead, min_importance): events.append(event_data) except Exception as e: logger.debug(f"Error parsing event row: {e}") continue return events except Exception as e: logger.error(f"Error scraping Investing.com: {e}") return [] def _parse_event_row(self, row) -> Optional[Dict]: """Parse a single event row from Investing.com table""" try: # Extract timestamp timestamp_elem = row.find('td', {'class': 'first left time'}) time_str = timestamp_elem.get_text(strip=True) if timestamp_elem else '' # Extract country country_elem = row.find('td', {'class': 'flagCur'}) country = country_elem.get('title', 'US') if country_elem else 'US' # Extract importance (bull icons) importance_elem = row.find('td', {'class': 'sentiment'}) importance = self._parse_importance(importance_elem) if importance_elem else 'low' # Extract event name event_elem = row.find('td', {'class': 'left event'}) event_name = event_elem.get_text(strip=True) if event_elem else '' # Extract actual, forecast, previous values actual_elem = row.find('td', {'id': re.compile('eventActual_')}) forecast_elem = row.find('td', {'id': re.compile('eventForecast_')}) previous_elem = row.find('td', {'id': re.compile('eventPrevious_')}) actual = self._parse_value(actual_elem.get_text(strip=True) if actual_elem else '') forecast = self._parse_value(forecast_elem.get_text(strip=True) if forecast_elem else '') previous = self._parse_value(previous_elem.get_text(strip=True) if previous_elem else '') # Create event dictionary event_date = self._parse_event_time(time_str) time_to_event = self._calculate_time_to_event(event_date) return { 'id': hash(f"{event_name}_{event_date}_{country}"), 'title': f"{country} - {event_name}", 'event_name': event_name, 'event_date': event_date, 'country': country, 'category': self._categorize_event(event_name), 'importance': importance, 'forecast': forecast, 'previous': previous, 'actual': actual, 'time_to_event': time_to_event, 'timestamp': datetime.now(), 'source': 'Investing.com', 'url': 'https://www.investing.com/economic-calendar/', 'impact': importance, # Map importance to impact 'sentiment': self._determine_sentiment(actual, forecast, previous) } except Exception as e: logger.debug(f"Error parsing event row: {e}") return None def _parse_importance(self, importance_elem) -> str: """Parse importance from bull icons""" if not importance_elem: return 'low' # Investing.com uses bull icons (1-3 bulls) bulls = importance_elem.find_all('i', {'class': 'grayFullBullishIcon'}) num_bulls = len(bulls) if num_bulls >= 3: return 'high' elif num_bulls == 2: return 'medium' else: return 'low' def _parse_value(self, value_str: str) -> Optional[float]: """Parse numeric value from string""" if not value_str or value_str == '' or value_str == '-': return None try: # Remove % sign, K, M, B suffixes value_str = value_str.replace('%', '').replace('K', '').replace('M', '').replace('B', '') value_str = value_str.replace(',', '') return float(value_str) except: return None def _parse_event_time(self, time_str: str) -> datetime: """Parse event time string to datetime""" try: # Investing.com uses formats like "10:00" or "All Day" if 'All Day' in time_str or not time_str: # Default to noon today return datetime.now().replace(hour=12, minute=0, second=0, microsecond=0) # Parse time (assumes today for now - real implementation would need date context) time_parts = time_str.split(':') hour = int(time_parts[0]) minute = int(time_parts[1]) if len(time_parts) > 1 else 0 event_time = datetime.now().replace(hour=hour, minute=minute, second=0, microsecond=0) # If time has passed today, assume it's tomorrow if event_time < datetime.now(): event_time += timedelta(days=1) return event_time except Exception as e: logger.debug(f"Error parsing time: {e}") return datetime.now() + timedelta(hours=2) def _calculate_time_to_event(self, event_date: datetime) -> str: """Calculate human-readable time until event""" delta = event_date - datetime.now() if delta.total_seconds() < 0: return "In progress" days = delta.days hours = delta.seconds // 3600 minutes = (delta.seconds % 3600) // 60 if days > 0: return f"in {days}d {hours}h" elif hours > 0: return f"in {hours}h {minutes}m" else: return f"in {minutes}m" def _categorize_event(self, event_name: str) -> str: """Categorize economic event""" event_lower = event_name.lower() if any(kw in event_lower for kw in ['cpi', 'inflation', 'pce', 'price']): return 'inflation' elif any(kw in event_lower for kw in ['employment', 'jobs', 'unemployment', 'nfp', 'payroll']): return 'employment' elif any(kw in event_lower for kw in ['gdp', 'growth']): return 'gdp' elif any(kw in event_lower for kw in ['fed', 'fomc', 'ecb', 'rate', 'boe', 'boj']): return 'central_bank' elif any(kw in event_lower for kw in ['pmi', 'manufacturing', 'services']): return 'pmi' else: return 'other' def _determine_sentiment(self, actual: Optional[float], forecast: Optional[float], previous: Optional[float]) -> str: """Determine sentiment based on actual vs forecast""" if actual is None or forecast is None: return 'neutral' if actual > forecast: return 'positive' # Beat forecast elif actual < forecast: return 'negative' # Missed forecast else: return 'neutral' def _should_include_event(self, event: Dict, days_ahead: int, min_importance: str) -> bool: """Determine if event should be included""" # Filter by importance importance_levels = ['low', 'medium', 'high'] min_level = importance_levels.index(min_importance) event_level = importance_levels.index(event['importance']) if event_level < min_level: return False # Filter by date range days_until = (event['event_date'] - datetime.now()).days if days_until > days_ahead: return False return True def _get_mock_events(self) -> List[Dict]: """Mock economic events for development/testing""" now = datetime.now() return [ { 'id': 1, 'title': 'US - Consumer Price Index (CPI)', 'event_name': 'Consumer Price Index', 'event_date': now + timedelta(hours=2), 'country': 'US', 'category': 'inflation', 'importance': 'high', 'forecast': 2.5, 'previous': 2.3, 'actual': None, 'time_to_event': 'in 2h 0m', 'timestamp': now, 'source': 'Economic Calendar', 'url': 'https://www.investing.com/economic-calendar/', 'impact': 'high', 'sentiment': 'neutral' }, { 'id': 2, 'title': 'US - Non-Farm Payrolls (NFP)', 'event_name': 'Non-Farm Payrolls', 'event_date': now + timedelta(days=2, hours=8, minutes=30), 'country': 'US', 'category': 'employment', 'importance': 'high', 'forecast': 180.0, 'previous': 175.0, 'actual': None, 'time_to_event': 'in 2d 8h', 'timestamp': now, 'source': 'Economic Calendar', 'url': 'https://www.investing.com/economic-calendar/', 'impact': 'high', 'sentiment': 'neutral' }, { 'id': 3, 'title': 'EU - ECB Interest Rate Decision', 'event_name': 'ECB Interest Rate Decision', 'event_date': now + timedelta(days=3, hours=12), 'country': 'EU', 'category': 'central_bank', 'importance': 'high', 'forecast': 3.75, 'previous': 4.00, 'actual': None, 'time_to_event': 'in 3d 12h', 'timestamp': now, 'source': 'Economic Calendar', 'url': 'https://www.investing.com/economic-calendar/', 'impact': 'high', 'sentiment': 'neutral' }, { 'id': 4, 'title': 'US - GDP Growth Rate', 'event_name': 'GDP Growth Rate', 'event_date': now + timedelta(days=5, hours=8, minutes=30), 'country': 'US', 'category': 'gdp', 'importance': 'high', 'forecast': 2.8, 'previous': 2.5, 'actual': None, 'time_to_event': 'in 5d 8h', 'timestamp': now, 'source': 'Economic Calendar', 'url': 'https://www.investing.com/economic-calendar/', 'impact': 'high', 'sentiment': 'neutral' }, { 'id': 5, 'title': 'US - Manufacturing PMI', 'event_name': 'Manufacturing PMI', 'event_date': now + timedelta(days=1, hours=10), 'country': 'US', 'category': 'pmi', 'importance': 'medium', 'forecast': 51.5, 'previous': 50.8, 'actual': None, 'time_to_event': 'in 1d 10h', 'timestamp': now, 'source': 'Economic Calendar', 'url': 'https://www.investing.com/economic-calendar/', 'impact': 'medium', 'sentiment': 'neutral' } ] def get_todays_events(self) -> List[Dict]: """Get events happening today""" all_events = self.get_upcoming_events(days_ahead=1) today = datetime.now().date() todays_events = [ event for event in all_events if event['event_date'].date() == today ] return todays_events