| """ |
| File: module_ocr.py |
| Description: Use a vision language model for Optical Character Recognition (OCR) tasks. |
| Author: Didier Guillevic |
| Date: 2025-04-06 |
| """ |
|
|
| import gradio as gr |
| import ocr |
| import pdf2image |
| import tempfile |
| import os |
|
|
| |
| |
| |
| def process(input_file: str): |
| """Process given file with OCR." |
| """ |
| return ocr.process_file(input_file) |
|
|
| |
| |
| |
| def preview_file(file): |
| if file is None: |
| return None, None |
| |
| file_path = file.name |
| file_extension = file_path.lower().split('.')[-1] |
| |
| if file_extension in ['jpg', 'jpeg', 'png', 'gif', 'bmp']: |
| |
| return file_path, None |
| |
| elif file_extension == 'pdf': |
| |
| try: |
| |
| pages = pdf2image.convert_from_path( |
| file_path, |
| first_page=1, |
| last_page=1, |
| dpi=150 |
| ) |
| |
| if pages: |
| |
| with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmp_file: |
| pages[0].save(tmp_file.name, 'PNG') |
| return tmp_file.name, f"PDF Preview: {os.path.basename(file_path)}" |
| else: |
| return None, "<p>Could not convert PDF to image</p>" |
| |
| except Exception as e: |
| return None, f"<p>Error previewing PDF: {str(e)}</p>" |
| |
| else: |
| return None, f"<p>Preview not available for {file_extension} files</p>" |
|
|
|
|
| |
| |
| |
| with gr.Blocks() as demo: |
|
|
| |
| with gr.Row(): |
| with gr.Column(): |
| input_file = gr.File( |
| label="Upload a PDF or image file", |
| file_types=[".pdf", ".jpg", ".jpeg", ".png", ".gif", ".bmp"], |
| scale=1) |
| preview_image = gr.Image(label="Preview", show_label=True) |
| preview_text = gr.HTML(label="Status") |
| output_text = gr.Textbox(label="OCR output", scale=2) |
|
|
| |
| with gr.Row(): |
| ocr_btn = gr.Button(value="OCR", variant="primary") |
| clear_btn = gr.Button("Clear", variant="secondary") |
| |
| |
| with gr.Accordion("Examples", open=False): |
| examples = gr.Examples( |
| [ |
| ['./scanned_doc.pdf',], |
| ['./passport_jp.png',] |
| ], |
| inputs=[input_file,], |
| outputs=[output_text,], |
| fn=process, |
| cache_examples=False, |
| label="Examples" |
| ) |
| |
| |
| input_file.change( |
| fn=preview_file, |
| inputs=[input_file], |
| outputs=[preview_image, preview_text] |
| ) |
| |
| |
| ocr_btn.click( |
| fn=process, |
| inputs=[input_file,], |
| outputs=[output_text,] |
| ) |
| clear_btn.click( |
| fn=lambda : (None, ''), |
| inputs=[], |
| outputs=[input_file, output_text] |
| ) |
|
|
| if __name__ == '__main__': |
| demo.launch() |
|
|