| from transformers import ( |
| TrOCRConfig, |
| TrOCRProcessor, |
| TrOCRForCausalLM, |
| ViTConfig, |
| ViTModel, |
| VisionEncoderDecoderModel, |
| ) |
| import gradio as gr |
|
|
| processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten") |
| model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten") |
| def ocr(image): |
| pixel_values = processor(image, return_tensors="pt").pixel_values |
| generated_ids = model.generate(pixel_values) |
| generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] |
| return generated_text |
| |
|
|
| demo = gr.Interface(fn=ocr, inputs="image",outputs= ["text"]) |
| demo.launch() |