# Document Text Extraction using Small Language Model (SLM) # Core ML and NLP libraries torch>=2.0.0 transformers>=4.30.0 tokenizers>=0.13.0 datasets>=2.14.0 # OCR and image processing pytesseract>=0.3.10 easyocr>=1.7.0 opencv-python>=4.8.0 Pillow>=10.0.0 # PDF and document processing PyMuPDF>=1.23.0 python-docx>=0.8.11 # Data processing and analysis pandas>=2.0.0 numpy>=1.24.0 scikit-learn>=1.3.0 # NER evaluation metrics seqeval>=1.2.2 # Visualization matplotlib>=3.7.0 seaborn>=0.12.0 # Web API fastapi>=0.100.0 uvicorn>=0.22.0 python-multipart>=0.0.6 # Utility libraries pathlib2>=2.3.7 tqdm>=4.65.0 python-dotenv>=1.0.0 # Development and testing (optional) pytest>=7.4.0 black>=23.0.0 flake8>=6.0.0 jupyter>=1.0.0 ipykernel>=6.25.0 # Optional: For GPU support (uncomment if you have CUDA) # torch>=2.0.0+cu118 # torchvision>=0.15.0+cu118 # torchaudio>=2.0.0+cu118