Changed tessaract download path
Browse files- Dockerfile +1 -5
- app/services/ocr_service.py +9 -4
Dockerfile
CHANGED
|
@@ -3,11 +3,7 @@ FROM python:3.11-slim
|
|
| 3 |
WORKDIR /app
|
| 4 |
|
| 5 |
# Install system dependencies
|
| 6 |
-
RUN apt-get update && apt-get install -y
|
| 7 |
-
tesseract-ocr \
|
| 8 |
-
libgl1 \
|
| 9 |
-
libglib2.0-0 \
|
| 10 |
-
&& rm -rf /var/lib/apt/lists/*
|
| 11 |
|
| 12 |
# Install Python dependencies
|
| 13 |
COPY requirements.txt .
|
|
|
|
| 3 |
WORKDIR /app
|
| 4 |
|
| 5 |
# Install system dependencies
|
| 6 |
+
RUN apt-get update && apt-get install -y tesseract-ocr
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
# Install Python dependencies
|
| 9 |
COPY requirements.txt .
|
app/services/ocr_service.py
CHANGED
|
@@ -1,15 +1,20 @@
|
|
| 1 |
import pytesseract
|
| 2 |
import re
|
| 3 |
from PIL import Image
|
|
|
|
|
|
|
| 4 |
|
| 5 |
-
from app.config import settings
|
| 6 |
-
|
| 7 |
-
pytesseract.pytesseract.tesseract_cmd = settings.TESSERACT_PATH
|
| 8 |
|
| 9 |
class OCRService:
|
| 10 |
|
| 11 |
def __init__(self):
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
def extract(self, image: Image.Image) -> dict:
|
| 15 |
w, h = image.size
|
|
|
|
| 1 |
import pytesseract
|
| 2 |
import re
|
| 3 |
from PIL import Image
|
| 4 |
+
import sys
|
| 5 |
+
import os
|
| 6 |
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
class OCRService:
|
| 9 |
|
| 10 |
def __init__(self):
|
| 11 |
+
# Auto-detect tesseract path
|
| 12 |
+
if sys.platform.startswith("win"):
|
| 13 |
+
pytesseract.pytesseract.tesseract_cmd = os.getenv("TESSERACT_PATH", "C:/Program Files/Tesseract-OCR/tesseract.exe")
|
| 14 |
+
else:
|
| 15 |
+
# Linux / Hugging Face Spaces
|
| 16 |
+
pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"
|
| 17 |
+
print(f"OCR service initialized. Using Tesseract at {pytesseract.pytesseract.tesseract_cmd}")
|
| 18 |
|
| 19 |
def extract(self, image: Image.Image) -> dict:
|
| 20 |
w, h = image.size
|