import asyncio
import sys
import os

# Add parent directory to path to import app services
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from app.services.scraper import WebScraper

async def run_test(urls):
    async with WebScraper() as scraper:
        for url in urls:
            print(f"\n{'='*20} Testing: {url} {'='*20}")
            try:
                # Detect type first
                site_type = await scraper.detect_website_type(url)
                print(f"Detected Type: {site_type}")
                
                # Perform scrape
                results = await scraper.scrape_page(url)
                
                if results:
                    print(f"✓ Successfully extracted {len(results)} content chunks.")
                    # Show first 2 chunks as sample
                    for i, res in enumerate(results[:2]):
                        content_snippet = res['content'][:300] + "..." if len(res['content']) > 300 else res['content']
                        print(f"\n[Chunk {i+1}] Title: {res.get('title')}")
                        print(f"Content: {content_snippet}")
                else:
                    print(f"✗ Failed to extract content from {url}")
                    
            except Exception as e:
                print(f"❌ Error during scrape of {url}: {e}")

if __name__ == "__main__":
    # Test URLs:
    # 1. React/JS based site
    # 2. Complex Medical Fact Sheet (Highly unstructured but detailed)
    test_urls = [
        "https://react.dev", 
        "https://www.who.int/news-room/fact-sheets/detail/diabetes"
    ]
    
    print("🚀 Starting Advanced Scraping Test (JS + Complex HTML)...")
    asyncio.run(run_test(test_urls))
    print("\n✅ Test Complete.")