""" Trends Client - PyTrends Wrapper Provides Google Trends and YouTube Trends data """ import logging from typing import Optional, List, Dict from pytrends.request import TrendReq import pandas as pd logger = logging.getLogger(__name__) class TrendsClient: """ Client for Google Trends data using pytrends. Features: - Trending Now (daily trends) - Related Topics - Related Queries - YouTube Trends """ # Category codes CATEGORIES = { "all": 0, "arts_entertainment": 3, "autos_vehicles": 47, "beauty_fitness": 44, "books_literature": 22, "business_industrial": 12, "computers_electronics": 5, "finance": 7, "food_drink": 71, "games": 8, "health": 45, "hobbies_leisure": 65, "home_garden": 11, "internet_telecom": 13, "jobs_education": 958, "law_government": 19, "news": 16, "online_communities": 299, "people_society": 14, "pets_animals": 66, "real_estate": 29, "reference": 533, "science": 174, "shopping": 18, "sports": 20, "travel": 67, } # Timeframe options TIMEFRAMES = { "now_1h": "now 1-H", "now_4h": "now 4-H", "now_1d": "now 1-d", "now_7d": "now 7-d", "today_1m": "today 1-m", "today_3m": "today 3-m", "today_12m": "today 12-m", "today_5y": "today 5-y", } def __init__(self, hl: str = "en-US", tz: int = 360): """ Initialize TrendsClient. Args: hl: Host language tz: Timezone offset """ self.hl = hl self.tz = tz self.pytrends = TrendReq(hl=hl, tz=tz) def get_trending_now( self, country: str = "united_states", limit: int = 20 ) -> List[Dict]: """ Get currently trending searches using Google Trends RSS feed. More reliable than pytrends API for daily trends. Args: country: Country code (e.g., 'united_states', 'bangladesh', 'india') limit: Number of results (default 20) Returns: List of trending topics with rank """ import requests import xml.etree.ElementTree as ET # Country code mapping for RSS country_codes = { "united_states": "US", "united_kingdom": "GB", "india": "IN", "bangladesh": "BD", "japan": "JP", "germany": "DE", "france": "FR", "brazil": "BR", "canada": "CA", "australia": "AU", } geo = country_codes.get(country, "US") try: # Google Trends RSS Feed URL rss_url = f"https://trends.google.com/trending/rss?geo={geo}" response = requests.get(rss_url, timeout=10) response.raise_for_status() # Parse XML root = ET.fromstring(response.content) results = [] items = root.findall('.//item') for i, item in enumerate(items[:limit]): title = item.find('title') traffic = item.find('{https://trends.google.com/trending/rss}approx_traffic') if title is not None: results.append({ "rank": i + 1, "topic": title.text, "country": country, "traffic": traffic.text if traffic is not None else "N/A" }) logger.info(f"Got {len(results)} trending topics for {country} via RSS") return results except Exception as e: logger.error(f"Error getting trending searches via RSS: {e}") # Fallback to pytrends try: df = self.pytrends.trending_searches(pn=country) results = [] for i, topic in enumerate(df[0].head(limit).tolist()): results.append({ "rank": i + 1, "topic": topic, "country": country, "traffic": "N/A" }) logger.info(f"Got {len(results)} trending topics via pytrends fallback") return results except Exception as e2: logger.error(f"Fallback also failed: {e2}") return [] def get_realtime_trends( self, country: str = "US", category: str = "all", limit: int = 20 ) -> List[Dict]: """ Get realtime trending stories. Args: country: Country code (US, BD, IN, etc.) category: Category name limit: Number of results Returns: List of trending stories """ try: cat_code = self.CATEGORIES.get(category, 0) df = self.pytrends.realtime_trending_searches(pn=country) results = [] if not df.empty: for i, row in df.head(limit).iterrows(): results.append({ "rank": i + 1, "title": row.get('title', ''), "entity_names": row.get('entityNames', []), "articles": row.get('articles', []) }) logger.info(f"Got {len(results)} realtime trends") return results except Exception as e: logger.error(f"Error getting realtime trends: {e}") return [] def get_related_topics( self, keyword: str, region: str = "", timeframe: str = "today 12-m", category: int = 0, search_type: str = "" ) -> Dict: """ Get related topics for a keyword. Args: keyword: Search keyword region: Region code (empty for worldwide) timeframe: Time range category: Category code search_type: 'youtube', 'news', 'images', 'froogle' or '' for web Returns: Dict with 'top' and 'rising' topics """ try: self.pytrends.build_payload( kw_list=[keyword], cat=category, timeframe=timeframe, geo=region, gprop=search_type ) data = self.pytrends.related_topics() result = { "keyword": keyword, "top": [], "rising": [] } if keyword in data: topic_data = data[keyword] # Top topics if 'top' in topic_data and topic_data['top'] is not None: top_df = topic_data['top'] for _, row in top_df.iterrows(): result["top"].append({ "topic": row.get('topic_title', ''), "type": row.get('topic_type', ''), "value": int(row.get('value', 0)) }) # Rising topics if 'rising' in topic_data and topic_data['rising'] is not None: rising_df = topic_data['rising'] for _, row in rising_df.iterrows(): result["rising"].append({ "topic": row.get('topic_title', ''), "type": row.get('topic_type', ''), "value": str(row.get('value', '')) }) # Sort by value result["top"] = sorted(result["top"], key=lambda x: x["value"], reverse=True) logger.info(f"Got {len(result['top'])} top and {len(result['rising'])} rising topics") return result except Exception as e: logger.error(f"Error getting related topics: {e}") return {"keyword": keyword, "top": [], "rising": [], "error": str(e)} def get_related_queries( self, keyword: str, region: str = "", timeframe: str = "today 12-m", category: int = 0, search_type: str = "" ) -> Dict: """ Get related queries for a keyword. Args: keyword: Search keyword region: Region code (empty for worldwide) timeframe: Time range category: Category code search_type: 'youtube', 'news', 'images', 'froogle' or '' for web Returns: Dict with 'top' and 'rising' queries """ try: self.pytrends.build_payload( kw_list=[keyword], cat=category, timeframe=timeframe, geo=region, gprop=search_type ) data = self.pytrends.related_queries() result = { "keyword": keyword, "top": [], "rising": [] } if keyword in data: query_data = data[keyword] # Top queries if 'top' in query_data and query_data['top'] is not None: top_df = query_data['top'] for _, row in top_df.iterrows(): result["top"].append({ "query": row.get('query', ''), "value": int(row.get('value', 0)) }) # Rising queries if 'rising' in query_data and query_data['rising'] is not None: rising_df = query_data['rising'] for _, row in rising_df.iterrows(): result["rising"].append({ "query": row.get('query', ''), "value": str(row.get('value', '')) }) # Sort by value result["top"] = sorted(result["top"], key=lambda x: x["value"], reverse=True) logger.info(f"Got {len(result['top'])} top and {len(result['rising'])} rising queries") return result except Exception as e: logger.error(f"Error getting related queries: {e}") return {"keyword": keyword, "top": [], "rising": [], "error": str(e)} def keyword_research( self, keyword: str, region: str = "", timeframe: str = "today 12-m", category: str = "all", search_type: str = "web" ) -> Dict: """ Complete keyword research - combines related topics and queries. Args: keyword: Search keyword region: Region code (empty for worldwide) timeframe: Time range key category: Category name search_type: 'web', 'youtube', 'news', 'images', 'shopping' Returns: Combined dict with topics and queries """ # Convert params cat_code = self.CATEGORIES.get(category, 0) tf = self.TIMEFRAMES.get(timeframe, "today 12-m") gprop = "" if search_type == "web" else search_type # Get both topics = self.get_related_topics(keyword, region, tf, cat_code, gprop) queries = self.get_related_queries(keyword, region, tf, cat_code, gprop) return { "keyword": keyword, "region": region if region else "worldwide", "timeframe": timeframe, "category": category, "search_type": search_type, "related_topics": topics, "related_queries": queries } def get_youtube_trends( self, keyword: str, region: str = "", timeframe: str = "today 12-m" ) -> Dict: """ Get YouTube-specific trends for a keyword. """ return self.keyword_research( keyword=keyword, region=region, timeframe=timeframe, category="all", search_type="youtube" )