Spaces:
Sleeping
Sleeping
| """Celery tasks for OCR-only worker queue (`ocr`).""" | |
| from __future__ import annotations | |
| import asyncio | |
| import logging | |
| from worker.celery_app import celery_app | |
| logger = logging.getLogger(__name__) | |
| def run_ocr_from_url(image_url: str) -> str: | |
| """ | |
| Download image from public URL and run OCR models only (YOLO / PaddleOCR / Pix2Tex). | |
| LLM post-processing runs on the API via ``OCRAgent.refine_with_llm`` after the result is returned. | |
| """ | |
| from vision_ocr.pipeline import OcrVisionPipeline | |
| pipeline = OcrVisionPipeline() | |
| logger.info("[run_ocr_from_url] starting OCR for url host=%s", image_url.split("/")[2] if "/" in image_url else "?") | |
| text = asyncio.run(pipeline.process_url(image_url)) | |
| logger.info("[run_ocr_from_url] done, text_len=%s", len(text or "")) | |
| return text if text is not None else "" | |