import requests from bs4 import BeautifulSoup def scrape_product_info(url: str) -> str: """ Scrape product information from a given URL """ try: headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' } response = requests.get(url, headers=headers, timeout=10) response.raise_for_status() soup = BeautifulSoup(response.content, 'html.parser') # Extract product information from common e-commerce patterns product_info = {} # Try to find product title title_selectors = [ 'h1[data-testid="product-title"]', # Amazon '.product-title', '.product-name', 'h1.product_title', '.pdp-product-name', # Flipkart '[data-automation-id="product-title"]', 'h1' ] title = None for selector in title_selectors: element = soup.select_one(selector) if element: title = element.get_text(strip=True) break if not title: # Fallback to page title title_tag = soup.find('title') title = title_tag.get_text(strip=True) if title_tag else "Product" product_info['title'] = title # Try to find product description description_selectors = [ '.product-description', '.product-details', '[data-testid="product-description"]', '.product-summary', '.pdp-product-description-content', '.feature-bullets ul', '.a-unordered-list.a-vertical' ] description_parts = [] for selector in description_selectors: elements = soup.select(selector) for element in elements: text = element.get_text(strip=True) if text and len(text) > 20: # Filter out short/empty descriptions description_parts.append(text) # Try to find specifications spec_selectors = [ '.product-specifications', '.tech-specs', '.product-details-table', '.specification-table', '[data-testid="specifications"]' ] specs = [] for selector in spec_selectors: elements = soup.select(selector) for element in elements: text = element.get_text(strip=True) if text: specs.append(text) # Combine all information full_description = f"Product: {title}\n\n" if description_parts: full_description += "Description: " + " ".join(description_parts[:3]) + "\n\n" if specs: full_description += "Specifications: " + " ".join(specs[:2]) return full_description[:2000] # Limit length except Exception as e: # st.error(f"Error scraping product information: {str(e)}") return f"Unable to extract product information from the provided URL. Please enter product description manually."