DoclingAIO / requirements.txt
thethinkmachine's picture
Upload 3 files
7df3afe verified
# ── Core ─────────────────────────────────────────────────────────────────────
docling>=2.5.0
docling-core>=2.0.0
# ── Streamlit ─────────────────────────────────────────────────────────────────
streamlit>=1.35.0
# ── OCR backends ─────────────────────────────────────────────────────────────
easyocr>=1.7.0 # Pure-Python OCR (no system deps needed)
# pytesseract # Tesseract wrapper β€” uncomment if packages.txt has tesseract
# ── Document format support ───────────────────────────────────────────────────
python-docx>=1.1.0 # DOCX reading/writing
python-pptx>=0.6.23 # PPTX support
openpyxl>=3.1.2 # XLSX support
pandas>=2.0.0 # CSV / tabular
beautifulsoup4>=4.12.0 # HTML parsing
lxml>=5.0.0 # XML/HTML backend
# ── Image processing ──────────────────────────────────────────────────────────
Pillow>=10.0.0
opencv-python-headless>=4.9.0 # headless for server environments
# ── PDF ───────────────────────────────────────────────────────────────────────
pypdfium2>=4.0.0 # Fast PDF rendering backend used by Docling
pdfminer.six>=20221105
# ── ML / model support ────────────────────────────────────────────────────────
torch>=2.1.0
torchvision>=0.16.0
transformers>=4.40.0
huggingface-hub>=0.20.0
# ── Misc utilities ────────────────────────────────────────────────────────────
requests>=2.31.0
tqdm>=4.66.0