"""Celery tasks for OCR-only worker queue (`ocr`).""" from __future__ import annotations import asyncio import logging from worker.celery_app import celery_app logger = logging.getLogger(__name__) @celery_app.task(name="worker.ocr_tasks.run_ocr_from_url") def run_ocr_from_url(image_url: str) -> str: """ Download image from public URL and run OCR models only (YOLO / PaddleOCR / Pix2Tex). LLM post-processing runs on the API via ``OCRAgent.refine_with_llm`` after the result is returned. """ from vision_ocr.pipeline import OcrVisionPipeline pipeline = OcrVisionPipeline() logger.info("[run_ocr_from_url] starting OCR for url host=%s", image_url.split("/")[2] if "/" in image_url else "?") text = asyncio.run(pipeline.process_url(image_url)) logger.info("[run_ocr_from_url] done, text_len=%s", len(text or "")) return text if text is not None else ""