| """Alias entrypoint for Streamlit on Hugging Face Spaces. |
| This is a copy of app.py to match Spaces' default file naming. |
| """ |
|
|
| import streamlit as st |
| from PIL import Image |
| import fitz |
| import numpy as np |
| import tempfile |
| import os |
| import time |
| import io |
| import json |
| import torch |
| import cv2 |
|
|
| |
| import ocr_engines |
|
|
| |
| llm_available = False |
| try: |
| import llm_processor |
|
|
| llm_available = True |
| except ImportError: |
| pass |
|
|
| |
| if not os.path.exists("results"): |
| os.makedirs("results") |
|
|
| |
| st.title("OCRInsight") |
|
|
| |
| st.sidebar.header("Settings") |
|
|
|
|
| |
| def save_text_to_file(attributes_of_output, all_ocr_text, filename): |
| with open(filename, "a", encoding="utf-8") as f: |
| f.write("\n" + "-" * 75 + "\n") |
| f.write("Attributes of Output:\n") |
| f.write(attributes_of_output) |
| f.write("\nOCR Result:\n") |
| f.write(all_ocr_text) |
| f.write("\n" + "-" * 75 + "\n") |
| st.success(f"{filename} saved successfully!") |
|
|
|
|
| |
| device = st.sidebar.radio("Select Device", ["CPU", "GPU (CUDA)"]) |
| save_output = st.sidebar.checkbox("Save Outputs") |
|
|
| |
| language = st.sidebar.selectbox( |
| "Select Language", ["Türkçe", "English", "Français", "Deutsch", "Español"] |
| ) |
|
|
| |
| language_codes = { |
| "Türkçe": "tr", |
| "English": "en", |
| "Français": "fr", |
| "Deutsch": "de", |
| "Español": "es", |
| } |
|
|
| |
| ocr_models = st.sidebar.multiselect( |
| "Select OCR Models", |
| ["EasyOCR", "DocTR", "Tesseract", "PaddleOCR"], |
| ["EasyOCR"], |
| ) |
|
|
| |
| llm_model = st.sidebar.selectbox( |
| "Select LLM Model", ["Only OCR Mode", "llama3.1", "llama3", "gemma2"] |
| ) |
|
|
| |
| if llm_model != "Only OCR Mode" and llm_available: |
| user_command = st.sidebar.text_input("Enter command:", "") |
|
|
| task_type = st.sidebar.radio("Select task type:", ["Summarize", "Generate"]) |
| elif llm_model != "Only OCR Mode" and not llm_available: |
| st.sidebar.warning( |
| "LLM features are not available. Please install 'ollama' to enable LLM processing." |
| ) |
| llm_model = "Only OCR Mode" |
|
|
| |
| if device == "GPU (CUDA)" and not torch.cuda.is_available(): |
| st.sidebar.warning("GPU (CUDA) not available. Switching to CPU.") |
| device = "CPU" |
|
|
| |
| ocr_readers = ocr_engines.initialize_ocr_models( |
| ocr_models, language_codes[language], device |
| ) |
|
|
| |
| uploaded_file = st.file_uploader( |
| "Upload File (PDF, Image)", type=["pdf", "png", "jpg", "jpeg"] |
| ) |
|
|
| |
| if not os.path.exists("results"): |
| os.makedirs("results") |
|
|
| if uploaded_file is not None: |
| start_time = time.time() |
|
|
| if uploaded_file.type == "application/pdf": |
| pdf_document = fitz.open(stream=uploaded_file.read(), filetype="pdf") |
| images = [] |
| for page_num in range(len(pdf_document)): |
| page = pdf_document.load_page(page_num) |
| pix = page.get_pixmap() |
| img_data = pix.tobytes("png") |
| img = Image.open(io.BytesIO(img_data)) |
| images.append(img) |
| total_pages = len(pdf_document) |
| pdf_document.close() |
| else: |
| images = [Image.open(uploaded_file)] |
| total_pages = 1 |
|
|
| all_ocr_texts = { |
| model_name: "" for model_name in ocr_models |
| } |
|
|
| for page_num, image in enumerate(images, start=1): |
| st.image(image, caption=f"Page {page_num}/{total_pages}", use_column_width=True) |
|
|
| |
| for model_name in ocr_models: |
| text = ocr_engines.perform_ocr( |
| model_name, ocr_readers, image, language_codes[language] |
| ) |
| all_ocr_texts[ |
| model_name |
| ] += f"--- Page {page_num} ({model_name}) ---\n{text}\n\n" |
|
|
| st.subheader(f"OCR Result ({model_name}) - Page {page_num}/{total_pages}:") |
| st.text(text) |
|
|
| end_time = time.time() |
| process_time = end_time - start_time |
|
|
| st.info(f"Processing time: {process_time:.2f} seconds") |
|
|
| |
| if save_output: |
| attributes_of_output = { |
| "Model Names": ocr_models, |
| "Language": language, |
| "Device": device, |
| "Process Time": process_time, |
| } |
| for model_name, ocr_text in all_ocr_texts.items(): |
| filename = f"results//ocr_output_{model_name}.txt" |
| save_text_to_file( |
| json.dumps(attributes_of_output, ensure_ascii=False), ocr_text, filename |
| ) |
|
|
| |
| if ( |
| llm_model != "Only OCR Mode" |
| and llm_available |
| and st.sidebar.button("Start LLM Processing") |
| ): |
| st.subheader("LLM Processing Result:") |
|
|
| |
| combined_ocr_text = "\n".join(all_ocr_texts.values()) |
|
|
| |
| if task_type == "Summarize": |
| prompt = f"Please summarize the following text. Command: {user_command}\n\nText: {combined_ocr_text}" |
| else: |
| prompt = f"Please generate new text based on the following text. Command: {user_command}\n\nText: {combined_ocr_text}" |
|
|
| llm_output = llm_processor.process_with_llm(llm_model, prompt) |
|
|
| |
| st.write(f"Processing completed using '{llm_model}' model.") |
| st.text_area("LLM Output:", value=llm_output, height=300) |
|
|
| |
| if save_output: |
| filename = "llm_output.txt" |
| save_text_to_file(llm_output, "", filename) |
|
|
| elif llm_model != "Only OCR Mode" and not llm_available: |
| st.warning( |
| "LLM features are not available. Please install 'ollama' to enable LLM processing." |
| ) |
|
|
| st.sidebar.info(f"Selected device: {device}") |
|
|