| # Document Text Extraction using Small Language Model (SLM) | |
| # Core ML and NLP libraries | |
| torch>=2.0.0 | |
| transformers>=4.30.0 | |
| tokenizers>=0.13.0 | |
| datasets>=2.14.0 | |
| # OCR and image processing | |
| pytesseract>=0.3.10 | |
| easyocr>=1.7.0 | |
| opencv-python>=4.8.0 | |
| Pillow>=10.0.0 | |
| # PDF and document processing | |
| PyMuPDF>=1.23.0 | |
| python-docx>=0.8.11 | |
| # Data processing and analysis | |
| pandas>=2.0.0 | |
| numpy>=1.24.0 | |
| scikit-learn>=1.3.0 | |
| # NER evaluation metrics | |
| seqeval>=1.2.2 | |
| # Visualization | |
| matplotlib>=3.7.0 | |
| seaborn>=0.12.0 | |
| # Web API | |
| fastapi>=0.100.0 | |
| uvicorn>=0.22.0 | |
| python-multipart>=0.0.6 | |
| # Utility libraries | |
| pathlib2>=2.3.7 | |
| tqdm>=4.65.0 | |
| python-dotenv>=1.0.0 | |
| # Development and testing (optional) | |
| pytest>=7.4.0 | |
| black>=23.0.0 | |
| flake8>=6.0.0 | |
| jupyter>=1.0.0 | |
| ipykernel>=6.25.0 | |
| # Optional: For GPU support (uncomment if you have CUDA) | |
| # torch>=2.0.0+cu118 | |
| # torchvision>=0.15.0+cu118 | |
| # torchaudio>=2.0.0+cu118 |