Spaces:
Runtime error
Runtime error
| import sys | |
| import os | |
| # Add parent directory to path | |
| current_dir = os.path.dirname(os.path.abspath(__file__)) | |
| parent_dir = os.path.dirname(current_dir) | |
| sys.path.append(parent_dir) | |
| from app.services.content_processor import SemanticChunker | |
| def test_chunker(): | |
| html = """ | |
| <html> | |
| <head> | |
| <title>Test Page</title> | |
| <meta name="description" content="This is a test description."> | |
| </head> | |
| <body> | |
| <h1>Main Header</h1> | |
| <p>Intro paragraph.</p> | |
| <h2>Section 1: Details</h2> | |
| <p>This is the first section content.</p> | |
| <p>More details here.</p> | |
| <h2>Section 2: Conclusion</h2> | |
| <p>Final thoughts.</p> | |
| </body> | |
| </html> | |
| """ | |
| chunker = SemanticChunker() | |
| chunks = chunker.chunk_document(html, "http://example.com") | |
| print(f"Generated {len(chunks)} chunks:") | |
| for i, c in enumerate(chunks): | |
| print(f"--- Chunk {i} ({c.chunk_type}) ---") | |
| print(f"Hash: {c.content_hash}") | |
| print(f"Parent: {c.parent_id}") | |
| print(f"Metadata: {c.metadata}") | |
| print(f"Text Preview: {c.text[:50]}...") | |
| print("") | |
| if __name__ == "__main__": | |
| test_chunker() | |