Spaces:
Runtime error
Runtime error
| import asyncio | |
| from app.services.scraper import WebScraper | |
| from app.core.database import SessionLocal | |
| from app.models.website import Website, WebsiteContent | |
| async def test_scrape(): | |
| db = SessionLocal() | |
| website_id = 8 | |
| website = db.query(Website).filter(Website.id == website_id).first() | |
| if not website: | |
| print(f"Website {website_id} not found") | |
| return | |
| print(f"Scraping website: {website.url}") | |
| scraper = WebScraper() | |
| async with scraper: | |
| results = await scraper.scrape_website(website.url) | |
| print(f"\nScraped {len(results)} pages") | |
| # Save to database | |
| # Clear existing content first | |
| db.query(WebsiteContent).filter(WebsiteContent.website_id == website_id).delete() | |
| for result in results: | |
| print(f"\n--- {result['url']} ---") | |
| print(f"Title: {result['title']}") | |
| print(f"Content length: {len(result['content'])}") | |
| print(f"Preview: {result['content'][:200]}...") | |
| content = WebsiteContent( | |
| website_id=website_id, | |
| page_url=result['url'], | |
| content=result['content'] | |
| ) | |
| db.add(content) | |
| db.commit() | |
| print("\nContent saved to database") | |
| if __name__ == "__main__": | |
| asyncio.run(test_scrape()) | |