FinancialPlatform / app /services /economic_calendar.py
Dmitry Beresnev
fix prediction market section, etc
650204f
"""
Economic Calendar Scraper - Investing.com
Scrapes upcoming economic events, indicators, and releases
No API key required - web scraping approach
"""
from datetime import datetime, timedelta
from typing import List, Dict, Optional
import logging
import re
import requests
from bs4 import BeautifulSoup
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class EconomicCalendarService:
"""
Scrapes economic calendar data from Investing.com
Focus: High and medium importance events
"""
def __init__(self):
"""Initialize scraper with session"""
self.session = requests.Session()
self.session.headers.update({
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.9',
'Accept-Encoding': 'gzip, deflate, br',
'Referer': 'https://www.google.com/',
'DNT': '1',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'none',
'Cache-Control': 'max-age=0'
})
def get_upcoming_events(self, days_ahead: int = 7, min_importance: str = 'medium') -> List[Dict]:
"""
Get upcoming economic events
Returns list of events in standardized format
"""
try:
# Try to scrape from Investing.com
events = self._scrape_investing_com(days_ahead, min_importance)
if events:
logger.info(f"Scraped {len(events)} economic events from Investing.com")
return events
else:
logger.warning("No events scraped - using mock data")
return self._get_mock_events()
except Exception as e:
logger.error(f"Error fetching economic calendar: {e}")
return self._get_mock_events()
def _scrape_investing_com(self, days_ahead: int, min_importance: str) -> List[Dict]:
"""
Scrape economic calendar from Investing.com
Note: This may be fragile and break if they change their HTML structure
"""
try:
url = 'https://www.investing.com/economic-calendar/'
response = self.session.get(url, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.content, 'html.parser')
events = []
# Investing.com uses a table structure for the calendar
# Look for table rows with event data
calendar_table = soup.find('table', {'id': 'economicCalendarData'})
if not calendar_table:
logger.warning("Could not find economic calendar table on Investing.com")
return []
rows = calendar_table.find_all('tr', {'class': 'js-event-item'})
for row in rows[:50]: # Limit to 50 events
try:
# Extract event data from row
event_data = self._parse_event_row(row)
if event_data and self._should_include_event(event_data, days_ahead, min_importance):
events.append(event_data)
except Exception as e:
logger.debug(f"Error parsing event row: {e}")
continue
return events
except Exception as e:
logger.error(f"Error scraping Investing.com: {e}")
return []
def _parse_event_row(self, row) -> Optional[Dict]:
"""Parse a single event row from Investing.com table"""
try:
# Extract timestamp
timestamp_elem = row.find('td', {'class': 'first left time'})
time_str = timestamp_elem.get_text(strip=True) if timestamp_elem else ''
# Extract country
country_elem = row.find('td', {'class': 'flagCur'})
country = country_elem.get('title', 'US') if country_elem else 'US'
# Extract importance (bull icons)
importance_elem = row.find('td', {'class': 'sentiment'})
importance = self._parse_importance(importance_elem) if importance_elem else 'low'
# Extract event name
event_elem = row.find('td', {'class': 'left event'})
event_name = event_elem.get_text(strip=True) if event_elem else ''
# Extract actual, forecast, previous values
actual_elem = row.find('td', {'id': re.compile('eventActual_')})
forecast_elem = row.find('td', {'id': re.compile('eventForecast_')})
previous_elem = row.find('td', {'id': re.compile('eventPrevious_')})
actual = self._parse_value(actual_elem.get_text(strip=True) if actual_elem else '')
forecast = self._parse_value(forecast_elem.get_text(strip=True) if forecast_elem else '')
previous = self._parse_value(previous_elem.get_text(strip=True) if previous_elem else '')
# Create event dictionary
event_date = self._parse_event_time(time_str)
time_to_event = self._calculate_time_to_event(event_date)
return {
'id': hash(f"{event_name}_{event_date}_{country}"),
'title': f"{country} - {event_name}",
'event_name': event_name,
'event_date': event_date,
'country': country,
'category': self._categorize_event(event_name),
'importance': importance,
'forecast': forecast,
'previous': previous,
'actual': actual,
'time_to_event': time_to_event,
'timestamp': datetime.now(),
'source': 'Investing.com',
'url': 'https://www.investing.com/economic-calendar/',
'impact': importance, # Map importance to impact
'sentiment': self._determine_sentiment(actual, forecast, previous)
}
except Exception as e:
logger.debug(f"Error parsing event row: {e}")
return None
def _parse_importance(self, importance_elem) -> str:
"""Parse importance from bull icons"""
if not importance_elem:
return 'low'
# Investing.com uses bull icons (1-3 bulls)
bulls = importance_elem.find_all('i', {'class': 'grayFullBullishIcon'})
num_bulls = len(bulls)
if num_bulls >= 3:
return 'high'
elif num_bulls == 2:
return 'medium'
else:
return 'low'
def _parse_value(self, value_str: str) -> Optional[float]:
"""Parse numeric value from string"""
if not value_str or value_str == '' or value_str == '-':
return None
try:
# Remove % sign, K, M, B suffixes
value_str = value_str.replace('%', '').replace('K', '').replace('M', '').replace('B', '')
value_str = value_str.replace(',', '')
return float(value_str)
except:
return None
def _parse_event_time(self, time_str: str) -> datetime:
"""Parse event time string to datetime"""
try:
# Investing.com uses formats like "10:00" or "All Day"
if 'All Day' in time_str or not time_str:
# Default to noon today
return datetime.now().replace(hour=12, minute=0, second=0, microsecond=0)
# Parse time (assumes today for now - real implementation would need date context)
time_parts = time_str.split(':')
hour = int(time_parts[0])
minute = int(time_parts[1]) if len(time_parts) > 1 else 0
event_time = datetime.now().replace(hour=hour, minute=minute, second=0, microsecond=0)
# If time has passed today, assume it's tomorrow
if event_time < datetime.now():
event_time += timedelta(days=1)
return event_time
except Exception as e:
logger.debug(f"Error parsing time: {e}")
return datetime.now() + timedelta(hours=2)
def _calculate_time_to_event(self, event_date: datetime) -> str:
"""Calculate human-readable time until event"""
delta = event_date - datetime.now()
if delta.total_seconds() < 0:
return "In progress"
days = delta.days
hours = delta.seconds // 3600
minutes = (delta.seconds % 3600) // 60
if days > 0:
return f"in {days}d {hours}h"
elif hours > 0:
return f"in {hours}h {minutes}m"
else:
return f"in {minutes}m"
def _categorize_event(self, event_name: str) -> str:
"""Categorize economic event"""
event_lower = event_name.lower()
if any(kw in event_lower for kw in ['cpi', 'inflation', 'pce', 'price']):
return 'inflation'
elif any(kw in event_lower for kw in ['employment', 'jobs', 'unemployment', 'nfp', 'payroll']):
return 'employment'
elif any(kw in event_lower for kw in ['gdp', 'growth']):
return 'gdp'
elif any(kw in event_lower for kw in ['fed', 'fomc', 'ecb', 'rate', 'boe', 'boj']):
return 'central_bank'
elif any(kw in event_lower for kw in ['pmi', 'manufacturing', 'services']):
return 'pmi'
else:
return 'other'
def _determine_sentiment(self, actual: Optional[float], forecast: Optional[float], previous: Optional[float]) -> str:
"""Determine sentiment based on actual vs forecast"""
if actual is None or forecast is None:
return 'neutral'
if actual > forecast:
return 'positive' # Beat forecast
elif actual < forecast:
return 'negative' # Missed forecast
else:
return 'neutral'
def _should_include_event(self, event: Dict, days_ahead: int, min_importance: str) -> bool:
"""Determine if event should be included"""
# Filter by importance
importance_levels = ['low', 'medium', 'high']
min_level = importance_levels.index(min_importance)
event_level = importance_levels.index(event['importance'])
if event_level < min_level:
return False
# Filter by date range
days_until = (event['event_date'] - datetime.now()).days
if days_until > days_ahead:
return False
return True
def _get_mock_events(self) -> List[Dict]:
"""Mock economic events for development/testing"""
now = datetime.now()
return [
{
'id': 1,
'title': 'US - Consumer Price Index (CPI)',
'event_name': 'Consumer Price Index',
'event_date': now + timedelta(hours=2),
'country': 'US',
'category': 'inflation',
'importance': 'high',
'forecast': 2.5,
'previous': 2.3,
'actual': None,
'time_to_event': 'in 2h 0m',
'timestamp': now,
'source': 'Economic Calendar',
'url': 'https://www.investing.com/economic-calendar/',
'impact': 'high',
'sentiment': 'neutral'
},
{
'id': 2,
'title': 'US - Non-Farm Payrolls (NFP)',
'event_name': 'Non-Farm Payrolls',
'event_date': now + timedelta(days=2, hours=8, minutes=30),
'country': 'US',
'category': 'employment',
'importance': 'high',
'forecast': 180.0,
'previous': 175.0,
'actual': None,
'time_to_event': 'in 2d 8h',
'timestamp': now,
'source': 'Economic Calendar',
'url': 'https://www.investing.com/economic-calendar/',
'impact': 'high',
'sentiment': 'neutral'
},
{
'id': 3,
'title': 'EU - ECB Interest Rate Decision',
'event_name': 'ECB Interest Rate Decision',
'event_date': now + timedelta(days=3, hours=12),
'country': 'EU',
'category': 'central_bank',
'importance': 'high',
'forecast': 3.75,
'previous': 4.00,
'actual': None,
'time_to_event': 'in 3d 12h',
'timestamp': now,
'source': 'Economic Calendar',
'url': 'https://www.investing.com/economic-calendar/',
'impact': 'high',
'sentiment': 'neutral'
},
{
'id': 4,
'title': 'US - GDP Growth Rate',
'event_name': 'GDP Growth Rate',
'event_date': now + timedelta(days=5, hours=8, minutes=30),
'country': 'US',
'category': 'gdp',
'importance': 'high',
'forecast': 2.8,
'previous': 2.5,
'actual': None,
'time_to_event': 'in 5d 8h',
'timestamp': now,
'source': 'Economic Calendar',
'url': 'https://www.investing.com/economic-calendar/',
'impact': 'high',
'sentiment': 'neutral'
},
{
'id': 5,
'title': 'US - Manufacturing PMI',
'event_name': 'Manufacturing PMI',
'event_date': now + timedelta(days=1, hours=10),
'country': 'US',
'category': 'pmi',
'importance': 'medium',
'forecast': 51.5,
'previous': 50.8,
'actual': None,
'time_to_event': 'in 1d 10h',
'timestamp': now,
'source': 'Economic Calendar',
'url': 'https://www.investing.com/economic-calendar/',
'impact': 'medium',
'sentiment': 'neutral'
}
]
def get_todays_events(self) -> List[Dict]:
"""Get events happening today"""
all_events = self.get_upcoming_events(days_ahead=1)
today = datetime.now().date()
todays_events = [
event for event in all_events
if event['event_date'].date() == today
]
return todays_events