customeragent-api / server /tests /test_scrape.py
anasraza526's picture
Clean deploy to Hugging Face
ac90985
import asyncio
from app.services.scraper import WebScraper
from app.core.database import SessionLocal
from app.models.website import Website, WebsiteContent
async def test_scrape():
db = SessionLocal()
website_id = 8
website = db.query(Website).filter(Website.id == website_id).first()
if not website:
print(f"Website {website_id} not found")
return
print(f"Scraping website: {website.url}")
scraper = WebScraper()
async with scraper:
results = await scraper.scrape_website(website.url)
print(f"\nScraped {len(results)} pages")
# Save to database
# Clear existing content first
db.query(WebsiteContent).filter(WebsiteContent.website_id == website_id).delete()
for result in results:
print(f"\n--- {result['url']} ---")
print(f"Title: {result['title']}")
print(f"Content length: {len(result['content'])}")
print(f"Preview: {result['content'][:200]}...")
content = WebsiteContent(
website_id=website_id,
page_url=result['url'],
content=result['content']
)
db.add(content)
db.commit()
print("\nContent saved to database")
if __name__ == "__main__":
asyncio.run(test_scrape())