"""
Market Events Scraper - Earnings, Economic Indicators & Central Bank Events
Aggregates upcoming and recent market-moving events
Web scraping approach - no API keys required
"""

from datetime import datetime, timedelta
from typing import List, Dict, Optional
import logging
import re
from concurrent.futures import ThreadPoolExecutor

import requests
import feedparser
from bs4 import BeautifulSoup

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


class MarketEventsScraper:
    """
    Scrapes market events from multiple sources
    Focus: Earnings, economic indicators, central bank announcements
    """

    # Central bank RSS feeds (already in use for news)
    CENTRAL_BANKS = {
        'fed': {
            'name': 'Federal Reserve',
            'rss': 'https://www.federalreserve.gov/feeds/press_all.xml',
            'weight': 2.0
        },
        'ecb': {
            'name': 'European Central Bank',
            'rss': 'https://www.ecb.europa.eu/rss/press.xml',
            'weight': 2.0
        }
    }

    def __init__(self):
        """Initialize scraper"""
        self.session = requests.Session()
        self.session.headers.update({
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'en-US,en;q=0.9',
        })

    def scrape_market_events(self, max_items: int = 50, days_ahead: int = 14) -> List[Dict]:
        """
        Scrape market events from all sources
        Returns unified list sorted by date and impact
        """
        all_events = []
        seen_urls = set()

        # Parallel fetching
        with ThreadPoolExecutor(max_workers=3) as executor:
            futures = []

            # Submit tasks
            futures.append((executor.submit(self._fetch_earnings), 'earnings'))
            futures.append((executor.submit(self._fetch_economic_indicators), 'indicators'))
            futures.append((executor.submit(self._fetch_central_bank_events), 'central_banks'))

            for future, source_type in futures:
                try:
                    events = future.result(timeout=35)

                    # Deduplicate by URL
                    for event in events:
                        if event['url'] not in seen_urls:
                            seen_urls.add(event['url'])
                            all_events.append(event)

                    logger.info(f"Fetched {len(events)} events from {source_type}")

                except Exception as e:
                    logger.error(f"Error fetching {source_type}: {e}")

        # If no events fetched, use mock data
        if not all_events:
            logger.warning("No market events fetched - using mock data")
            return self._get_mock_events()

        # Sort by event date and impact
        all_events.sort(
            key=lambda x: (x.get('event_date', x['timestamp']), x['impact'] != 'high'),
        )

        return all_events[:max_items]

    def _fetch_earnings(self) -> List[Dict]:
        """
        Fetch earnings calendar from Yahoo Finance
        Web scraping approach
        """
        try:
            url = 'https://finance.yahoo.com/calendar/earnings'
            response = self.session.get(url, timeout=10)
            response.raise_for_status()

            soup = BeautifulSoup(response.content, 'html.parser')
            events = []

            # Yahoo Finance uses a table for earnings
            table = soup.find('table', {'class': re.compile('earnings')})

            if not table:
                logger.warning("Could not find earnings table on Yahoo Finance")
                return self._get_mock_earnings()

            rows = table.find_all('tr')[1:20]  # Skip header, limit to 20

            for row in rows:
                try:
                    cells = row.find_all('td')
                    if len(cells) < 4:
                        continue

                    # Parse cells
                    ticker = cells[0].get_text(strip=True)
                    company = cells[1].get_text(strip=True) if len(cells) > 1 else ticker
                    eps_estimate = cells[2].get_text(strip=True) if len(cells) > 2 else 'N/A'
                    reported_eps = cells[3].get_text(strip=True) if len(cells) > 3 else None
                    event_time = cells[4].get_text(strip=True) if len(cells) > 4 else 'N/A'

                    # Create event
                    event_date = self._parse_earnings_date(event_time)

                    events.append({
                        'id': hash(f"earnings_{ticker}_{event_date}"),
                        'title': f"{company} ({ticker}) Earnings Report",
                        'summary': f"Expected EPS: {eps_estimate}" + (f", Reported: {reported_eps}" if reported_eps and reported_eps != 'N/A' else ''),
                        'source': 'Yahoo Finance',
                        'category': 'earnings',
                        'timestamp': datetime.now(),
                        'event_date': event_date,
                        'url': f"https://finance.yahoo.com/quote/{ticker}",
                        'event_type': 'earnings',
                        'ticker': ticker,
                        'expected_value': self._parse_float(eps_estimate),
                        'actual_value': self._parse_float(reported_eps) if reported_eps else None,
                        'previous_value': None,
                        'impact': 'medium',  # Earnings are generally medium impact
                        'sentiment': self._determine_earnings_sentiment(eps_estimate, reported_eps),
                        'is_breaking': False,
                        'source_weight': 1.3,
                        'likes': 0,
                        'retweets': 0
                    })

                except Exception as e:
                    logger.debug(f"Error parsing earnings row: {e}")
                    continue

            return events if events else self._get_mock_earnings()

        except Exception as e:
            logger.error(f"Error fetching earnings: {e}")
            return self._get_mock_earnings()

    def _fetch_economic_indicators(self) -> List[Dict]:
        """
        Fetch economic indicators from FRED and other sources
        Uses RSS feeds
        """
        try:
            events = []

            # FRED Economic Data releases (via RSS - if available)
            # For now, use mock data as FRED RSS is primarily historical data
            # Real implementation would scrape FRED release calendar

            events.extend(self._get_mock_indicators())

            return events

        except Exception as e:
            logger.error(f"Error fetching economic indicators: {e}")
            return self._get_mock_indicators()

    def _fetch_central_bank_events(self) -> List[Dict]:
        """
        Fetch central bank announcements from RSS feeds
        """
        events = []

        for bank_id, bank_info in self.CENTRAL_BANKS.items():
            try:
                feed = feedparser.parse(bank_info['rss'])

                for entry in feed.entries[:10]:
                    try:
                        # Parse timestamp
                        if hasattr(entry, 'published_parsed') and entry.published_parsed:
                            timestamp = datetime(*entry.published_parsed[:6])
                        else:
                            timestamp = datetime.now()

                        # Skip old events (>7 days)
                        if (datetime.now() - timestamp).days > 7:
                            continue

                        title = entry.get('title', '')
                        summary = entry.get('summary', '') or title
                        url = entry.get('link', '')

                        # Clean HTML from summary
                        if summary:
                            summary = BeautifulSoup(summary, 'html.parser').get_text()
                            summary = summary[:200] + '...' if len(summary) > 200 else summary

                        events.append({
                            'id': hash(url),
                            'title': f"{bank_info['name']}: {title}",
                            'summary': summary,
                            'source': bank_info['name'],
                            'category': 'central_bank',
                            'timestamp': timestamp,
                            'event_date': timestamp,
                            'url': url,
                            'event_type': 'central_bank_announcement',
                            'ticker': None,
                            'expected_value': None,
                            'actual_value': None,
                            'previous_value': None,
                            'impact': 'high',  # Central bank events are high impact
                            'sentiment': 'neutral',
                            'is_breaking': (datetime.now() - timestamp).days < 1,
                            'source_weight': bank_info['weight'],
                            'likes': 0,
                            'retweets': 0
                        })

                    except Exception as e:
                        logger.debug(f"Error parsing {bank_id} entry: {e}")
                        continue

            except Exception as e:
                logger.error(f"Error fetching {bank_id} RSS: {e}")

        return events

    def _parse_earnings_date(self, time_str: str) -> datetime:
        """Parse earnings report time"""
        # Yahoo Finance uses "Before Market Open", "After Market Close", or specific dates
        now = datetime.now()

        if 'Before Market' in time_str or 'BMO' in time_str:
            return now.replace(hour=7, minute=0, second=0, microsecond=0)
        elif 'After Market' in time_str or 'AMC' in time_str:
            return now.replace(hour=16, minute=0, second=0, microsecond=0)
        else:
            # Default to tomorrow morning
            return (now + timedelta(days=1)).replace(hour=7, minute=0, second=0, microsecond=0)

    def _parse_float(self, value_str: str) -> Optional[float]:
        """Parse float from string"""
        if not value_str or value_str == 'N/A' or value_str == '-':
            return None

        try:
            # Remove $ and other non-numeric characters except . and -
            cleaned = re.sub(r'[^\d.-]', '', value_str)
            return float(cleaned)
        except:
            return None

    def _determine_earnings_sentiment(self, expected: str, actual: Optional[str]) -> str:
        """Determine sentiment based on earnings beat/miss"""
        if not actual or actual == 'N/A':
            return 'neutral'

        exp_val = self._parse_float(expected)
        act_val = self._parse_float(actual)

        if exp_val is None or act_val is None:
            return 'neutral'

        if act_val > exp_val:
            return 'positive'  # Beat
        elif act_val < exp_val:
            return 'negative'  # Miss
        else:
            return 'neutral'  # In-line

    def _get_mock_earnings(self) -> List[Dict]:
        """Mock earnings data"""
        now = datetime.now()

        return [
            {
                'id': 1,
                'title': 'Apple Inc. (AAPL) Earnings Report',
                'summary': 'Expected EPS: $2.10',
                'source': 'Yahoo Finance',
                'category': 'earnings',
                'timestamp': now,
                'event_date': now + timedelta(days=2, hours=16),
                'url': 'https://finance.yahoo.com/quote/AAPL',
                'event_type': 'earnings',
                'ticker': 'AAPL',
                'expected_value': 2.10,
                'actual_value': None,
                'previous_value': 1.95,
                'impact': 'high',
                'sentiment': 'neutral',
                'is_breaking': False,
                'source_weight': 1.5,
                'likes': 0,
                'retweets': 0
            },
            {
                'id': 2,
                'title': 'Microsoft Corporation (MSFT) Earnings Report',
                'summary': 'Expected EPS: $2.75',
                'source': 'Yahoo Finance',
                'category': 'earnings',
                'timestamp': now,
                'event_date': now + timedelta(days=3, hours=16),
                'url': 'https://finance.yahoo.com/quote/MSFT',
                'event_type': 'earnings',
                'ticker': 'MSFT',
                'expected_value': 2.75,
                'actual_value': None,
                'previous_value': 2.50,
                'impact': 'high',
                'sentiment': 'neutral',
                'is_breaking': False,
                'source_weight': 1.5,
                'likes': 0,
                'retweets': 0
            }
        ]

    def _get_mock_indicators(self) -> List[Dict]:
        """Mock economic indicator data"""
        now = datetime.now()

        return [
            {
                'id': 3,
                'title': 'US Retail Sales Data Release',
                'summary': 'Monthly retail sales figures',
                'source': 'US Census Bureau',
                'category': 'economic_indicator',
                'timestamp': now,
                'event_date': now + timedelta(days=1, hours=8, minutes=30),
                'url': 'https://www.census.gov/retail/',
                'event_type': 'retail_sales',
                'ticker': None,
                'expected_value': 0.5,
                'actual_value': None,
                'previous_value': 0.3,
                'impact': 'medium',
                'sentiment': 'neutral',
                'is_breaking': False,
                'source_weight': 1.6,
                'likes': 0,
                'retweets': 0
            }
        ]

    def _get_mock_events(self) -> List[Dict]:
        """Combined mock data"""
        return self._get_mock_earnings() + self._get_mock_indicators() + [
            {
                'id': 4,
                'title': 'Federal Reserve: FOMC Meeting Minutes Released',
                'summary': 'Minutes from the latest Federal Open Market Committee meeting',
                'source': 'Federal Reserve',
                'category': 'central_bank',
                'timestamp': datetime.now() - timedelta(hours=2),
                'event_date': datetime.now() - timedelta(hours=2),
                'url': 'https://www.federalreserve.gov/',
                'event_type': 'central_bank_announcement',
                'ticker': None,
                'expected_value': None,
                'actual_value': None,
                'previous_value': None,
                'impact': 'high',
                'sentiment': 'neutral',
                'is_breaking': True,
                'source_weight': 2.0,
                'likes': 0,
                'retweets': 0
            }
        ]