| from recap.ingestion.pdf import load_pdf | |
| def test_extracts_pages_with_text_and_metadata(): | |
| pages = load_pdf("tests/fixtures/tiny_lab.pdf") | |
| assert len(pages) == 1 | |
| assert pages[0].page_number == 1 | |
| assert "Creatinine" in pages[0].text | |
| assert "1.4 mg/dL" in pages[0].text | |
| def test_pages_have_source_id(): | |
| pages = load_pdf("tests/fixtures/tiny_lab.pdf", source_id="lab_2022-03-14.pdf") | |
| assert pages[0].source_id == "lab_2022-03-14.pdf" | |
| def test_default_source_id_is_filename(): | |
| pages = load_pdf("tests/fixtures/tiny_lab.pdf") | |
| assert pages[0].source_id == "tiny_lab.pdf" | |