| import gradio as gr |
| from helpers import OCRD |
|
|
|
|
| def run_ocrd_pipeline(img_path, font_size='small', binarize_mode='detailed', min_pixel_sum=30, median_bounds=(None, None), status=gr.Progress()): |
| """ |
| Executes the OCRD pipeline on an image from file loading to text overlay creation. This function orchestrates |
| the calling of various OCRD class methods to process the image, extract and recognize text, and then overlay |
| this text on the original image. |
| |
| Parameters: |
| img_path (str): Path to the image file. |
| font_size (int, optional): Font size to be used in text overlay. Can be 'small', 'medium', 'large' or 'adjusted'. |
| If set to 'adjusted', the font size is dynamically adjusted to fit the text within its bounding box width. |
| binarize_mode (str): Mode to be used for image binarization. Can be 'detailed', 'fast', or 'no'. |
| min_pixel_sum (int, optional): Minimum sum of pixels to consider a text line segmentation for extraction. |
| If 'default', the default value (see function definition) is applied. Set to None for no filtering. |
| median_bounds (tuple, optional): Bounds to filter text line segmentations based on size relative to the median. |
| If 'default', default values (see function definition) are applied. Set to None for no filtering. |
| |
| Returns: |
| Image: An image with overlay text, where text is extracted and recognized from the original image. |
| |
| This function handles: |
| - Image binarization. |
| - Text line segmentation. |
| - Text line extraction and deskewing. |
| - Optical character recognition on text lines. |
| - Creating an image overlay with recognized text. |
| """ |
| |
| |
| if font_size == 'small': |
| font_size = 30 |
| if font_size == 'medium': |
| font_size = 50 |
| if font_size == 'large': |
| font_size = 70 |
| elif font_size == 'adjusted': |
| font_size = -1 |
|
|
| |
| efadt_kwargs = {} |
| if min_pixel_sum != 'default': |
| efadt_kwargs['min_pixel_sum'] = min_pixel_sum |
| if median_bounds != 'default': |
| efadt_kwargs['median_bounds'] = median_bounds |
|
|
| ctoi_kwargs = {} |
| if font_size != 'default': |
| ctoi_kwargs['font_size'] = font_size |
|
|
| |
| ocrd = OCRD(img_path) |
| status(0, desc='\nStep 1/5: Binarizing image...\n') |
| binarized = ocrd.binarize_image(ocrd.image, binarize_mode) |
| status(0, desc='\nStep 2/5: Segmenting textlines...\n') |
| textline_segments = ocrd.segment_textlines(binarized) |
| status(0, desc='\nStep 3/5: Extracting, filtering and de-skewing textlines...\n') |
| image_scaled = ocrd.scale_image(ocrd.image) |
| textline_images, _ = ocrd.extract_filter_and_deskew_textlines(image_scaled, textline_segments[...,0], **efadt_kwargs) |
| status(0, desc='\nStep 4/5: OCR on textlines...\n') |
| textline_preds = ocrd.ocr_on_textlines(textline_images) |
| status(0, desc='\nStep 5/5: Creating output overlay image...') |
| img_gen = ocrd.create_text_overlay_image(textline_images, textline_preds, (image_scaled.shape[0], image_scaled.shape[1]), **ctoi_kwargs) |
| status(1, desc='\nJOB COMPLETED\n') |
|
|
| return img_gen |
|
|
|
|
| demo_data = [ |
| ['./demo_data/act_image.jpg', None], |
| ['./demo_data/newjersey2_image.jpg', None], |
| ['./demo_data/washington_image.jpg', None] |
| ] |
|
|
|
|
| description = """<ul> |
| <li>This interactive demo showcases an 'Optical Character Recognition Digitization' pipeline that processes images to recognize text.</li> |
| <li>Steps include: |
| <ol> |
| <li>Image binarization</li> |
| <li>Text line segmentation</li> |
| <li>Text line extraction, filtering and deskewing</li> |
| <li>OCR on textlines</li> |
| <li>Printing recognized text on generated image for visualization</li> |
| </ol> |
| </li> |
| <li>Optimized for <b>English</b>; other languages (e.g., German) may require OCR model fine-tuning.</li> |
| <li>Uses free CPU-based compute, which is rather slow. Depending on the input image, a pipeline run can take over 10 minutes.</li> |
| <li>For lengthy waits, look at these <b>pre-computed examples</b>: <a href='https://github.com/pluniak/ocrd/tree/main/data/demo_data'>https://github.com/pluniak/ocrd/tree/main/data/demo_data</a></li> |
| <li>The demo is based on code from my GitHub repository: <a href='https://github.com/pluniak/ocrd'>https://github.com/pluniak/ocrd</a></li> |
| <li>The demo is just a <b>first prototype</b>! OCR performance and computation speed should be optimized.</li> |
| <li>Please <b>keep this page untouched</b> during the pipeline run to prevent errors.</li> |
| </ul>""" |
|
|
| iface = gr.Interface(fn = run_ocrd_pipeline, |
| title="OCRD Pipeline", |
| description=description, |
| inputs=[ |
| gr.Image(type='filepath', label='Input image'), |
| gr.Dropdown(choices=['small', 'medium', 'large', 'adjusted'], label='Output image font size', value='small', |
| info='"adjusted" will try to mimic font sizes from the input image') |
| ], |
| outputs=gr.Image(label='Output image: overlay with recognized text', type='pil', format='jpeg'), |
| |
| ) |
| iface.launch() |