Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import torch | |
| import cv2 | |
| import numpy as np | |
| import easyocr | |
| import os | |
| import io | |
| import time | |
| from gtts import gTTS | |
| from PIL import Image, ImageOps | |
| from transformers import TrOCRProcessor, VisionEncoderDecoderModel | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # UI CONFIGURATION & ATOMIC CSS OVERRIDES | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| st.set_page_config(page_title="Handwriting Engine", layout="wide", initial_sidebar_state="collapsed") | |
| st.markdown(""" | |
| <style> | |
| @import url('https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@300;500;700&family=Manrope:wght@300;400;600&display=swap'); | |
| @import url('https://fonts.googleapis.com/css2?family=Material+Symbols+Outlined:wght,FILL@100..700,0..1&display=swap'); | |
| /* Global Dark Base & NUKED PADDING */ | |
| .stApp { background-color: #0c0e12 !important; color: #f6f6fc !important; font-family: 'Manrope', sans-serif; overflow: hidden; } | |
| .block-container { padding-top: 0rem !important; padding-bottom: 0rem !important; max-width: 95% !important; } | |
| /* Subtle Title */ | |
| .hero-title { font-family: 'Space Grotesk'; font-size: 38px; font-weight: 300; margin-top: 5px; margin-bottom: 15px; text-align: center; } | |
| .hero-accent { color: #8ff5ff; font-weight: 700; font-style: italic; text-shadow: 0 0 20px rgba(143, 245, 255, 0.5); } | |
| .strike { text-decoration: line-through; color: #46484d; font-size: 18px; opacity: 0.5; margin-right: 12px; vertical-align: middle; } | |
| /* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| NUKE ALL ANCHOR LINKS & HEADER HOVERS | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */ | |
| a.header-anchor, a[href^="#"] { display: none !important; pointer-events: none !important; } | |
| h1 a, h2 a, h3 a, h4 a, h5 a, h6 a { display: none !important; pointer-events: none !important; } | |
| .stMarkdown a { text-decoration: none !important; pointer-events: none !important; } | |
| /* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| STATUS, SPINNERS, TOASTS & PROGRESS BARS (100% DARK MODE FIX) | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */ | |
| /* Target absolutely every single popup, toast, and cache notification container */ | |
| [data-testid="stStatusWidget"], [data-testid="stToast"], [data-testid="stToastContainer"], | |
| [data-testid="stNotification"], [data-testid="stNotificationContainer"], | |
| div[role="status"], div[role="alert"], div[role="dialog"], | |
| div[data-baseweb="toast"], div[data-baseweb="snackbar"], div[data-baseweb="notification"] { | |
| background-color: #171a1f !important; border: 1px solid #8ff5ff !important; border-radius: 4px !important; box-shadow: 0 0 15px rgba(143, 245, 255, 0.05) !important; | |
| } | |
| /* Force nested white boxes to turn transparent so the dark background shows */ | |
| [data-testid="stStatusWidget"] *, [data-testid="stToast"] *, [data-testid="stNotification"] *, | |
| div[role="status"] *, div[role="alert"] *, div[role="dialog"] *, | |
| div[data-baseweb="toast"] *, div[data-baseweb="snackbar"] *, div[data-baseweb="notification"] * { | |
| background-color: transparent !important; color: #8ff5ff !important; font-family: 'Space Grotesk', sans-serif !important; | |
| } | |
| [data-testid="stStatusWidget"] label { color: #f6f6fc !important; } | |
| /* Dynamic Progress Bar Override */ | |
| [data-testid="stProgress"] > div > div > div > div { background-color: #8ff5ff !important; } | |
| [data-testid="stProgress"] p { color: #8ff5ff !important; font-family: 'Space Grotesk' !important; font-size: 14px !important; font-weight: 500 !important; letter-spacing: 1px; text-transform: uppercase;} | |
| /* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| SELECT MODEL BOX | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */ | |
| div[data-testid="stSelectbox"] label { color: #8ff5ff !important; font-family: 'Space Grotesk' !important; text-transform: uppercase; letter-spacing: 1.5px; font-size: 11px; } | |
| div[data-testid="stSelectbox"] div[data-baseweb="select"] { background-color: #0c0e12 !important; border: 1px solid #8ff5ff !important; border-radius: 4px !important; } | |
| div[data-testid="stSelectbox"] div[data-baseweb="select"] * { background-color: #0c0e12 !important; color: #f6f6fc !important; } | |
| /* Dropdown Menu Portal */ | |
| div[data-baseweb="popover"], div[data-baseweb="menu"], ul[role="listbox"] { background-color: #0c0e12 !important; border: 1px solid #8ff5ff !important; } | |
| li[role="option"] { background-color: #0c0e12 !important; color: #f6f6fc !important; } | |
| li[role="option"]:hover, li[role="option"]:hover * { background-color: #171a1f !important; color: #8ff5ff !important; } | |
| /* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| INFO POPOVER BOX | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */ | |
| div[data-testid="stPopover"] button { background-color: #171a1f !important; border: 1px solid rgba(143, 245, 255, 0.3) !important; color: #8ff5ff !important; min-width: 80px !important; height: 38px !important; } | |
| div[data-testid="stPopover"] span[data-testid="stBaseButton-label"] div { display: none !important; } | |
| div[data-testid="stPopoverBody"] { background-color: #0c0e12 !important; border: 1px solid #8ff5ff !important; padding: 40px !important; min-width: 850px !important; max-height: none !important; overflow: visible !important; } | |
| div[data-testid="stPopoverBody"] * { color: #f6f6fc !important; background-color: transparent !important; font-size: 15px; } | |
| /* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| THE ABSOLUTE CLICKABLE FIX (Nuclear 100% Stretch) | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */ | |
| .ingest-card { | |
| height: 280px; max-width: 500px; margin: 0 auto; | |
| background: linear-gradient(145deg, #13161b, #0c0e12); | |
| border: 1px solid rgba(143, 245, 255, 0.15); border-radius: 8px; | |
| display: flex; flex-direction: column; align-items: center; justify-content: center; | |
| pointer-events: none; z-index: 1; | |
| } | |
| .camera-box { | |
| border: 2px dashed rgba(143, 245, 255, 0.4); border-radius: 4px; | |
| width: 80px; height: 80px; display: flex; align-items: center; justify-content: center; margin-bottom: 20px; | |
| } | |
| .ingest-title { font-family: 'Space Grotesk'; font-size: 22px; font-weight: 600; color: #f6f6fc; } | |
| .browse-btn { background-color: #8ff5ff; color: #000; padding: 10px 30px; font-family: 'Space Grotesk'; font-weight: 700; border-radius: 2px; margin-top: 15px; } | |
| /* The invisible uploader wrapper pulled precisely over the card */ | |
| div[data-testid="stFileUploader"] { | |
| margin-top: -296px !important; | |
| height: 280px !important; | |
| max-width: 500px !important; | |
| margin-left: auto !important; | |
| margin-right: auto !important; | |
| z-index: 999 !important; | |
| position: relative !important; | |
| opacity: 0.0 !important; | |
| } | |
| /* THE TRUE FIX: Force every single internal element to stretch 100% over the box */ | |
| div[data-testid="stFileUploader"] * { | |
| position: absolute !important; | |
| top: 0 !important; | |
| left: 0 !important; | |
| right: 0 !important; | |
| bottom: 0 !important; | |
| width: 100% !important; | |
| height: 100% !important; | |
| cursor: pointer !important; | |
| } | |
| /* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */ | |
| /* Stats & DYNAMIC Output Box */ | |
| .stat-card { background: #000; padding: 15px; border-radius: 4px; text-align: center; border: 1px solid rgba(143, 245, 255, 0.1); margin-bottom: 10px; } | |
| .stat-val { color: #8ff5ff; font-size: 24px; font-weight: 700; font-family: 'Space Grotesk'; } | |
| .stat-lbl { font-size: 9px; color: #46484d; text-transform: uppercase; letter-spacing: 2px; } | |
| .output-box { | |
| border-left: 3px solid #8ff5ff; | |
| background: #171a1f; | |
| padding: 25px; | |
| font-family: 'Space Grotesk'; | |
| font-size: 18px; | |
| line-height: 1.8; | |
| height: calc(100vh - 320px); /* Dynamically scales to viewport */ | |
| min-height: 400px; /* Safe fallback */ | |
| overflow-y: auto; | |
| white-space: pre-wrap; | |
| border-radius: 0 4px 4px 0; | |
| } | |
| .stButton>button { background-color: rgba(143, 245, 255, 0.05) !important; border: 1px solid #8ff5ff !important; color: #8ff5ff !important; width: 100%; padding: 12px; } | |
| .stButton>button:hover { background-color: #8ff5ff !important; color: #000 !important; } | |
| /* Hide default streamlit items completely */ | |
| [data-testid="stHeader"], footer, [data-testid="stDecoration"], [data-testid="stToolbar"] { visibility: hidden; display: none !important; } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # MODELS & OCR LOGIC | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # THE KILL-SWITCH: show_spinner=False completely deletes the un-styleable white cache boxes | |
| def load_vision_engine(): | |
| import logging | |
| logging.getLogger("easyocr").setLevel(logging.ERROR) | |
| return easyocr.Reader(['en'], gpu=torch.cuda.is_available()) | |
| def load_trocr_model(model_path): | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| proc = TrOCRProcessor.from_pretrained(model_path) | |
| model = VisionEncoderDecoderModel.from_pretrained(model_path) | |
| model.to(device) | |
| if device.type == "cuda": | |
| model = model.half() | |
| # βββ THE ACTUAL ROOT-CAUSE FIX βββ | |
| for module in model.modules(): | |
| if "TrOCRSinusoidalPositionalEmbedding" in module.__class__.__name__: | |
| num_positions, embedding_dim = module.weights.shape | |
| new_weights = module.__class__.get_embedding( | |
| num_positions, | |
| embedding_dim, | |
| padding_idx=getattr(module, "padding_idx", None) | |
| ) | |
| module.weights = new_weights.to(device=device, dtype=model.dtype) | |
| model.eval() | |
| return proc, model, device | |
| def extract_lines(pil_img, reader): | |
| img_cv = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR) | |
| results = reader.readtext(img_cv, paragraph=False) | |
| raw_boxes = [] | |
| for bbox, _, _ in results: | |
| x_c, y_c = [pt[0] for pt in bbox], [pt[1] for pt in bbox] | |
| raw_boxes.append({'x_min': min(x_c), 'x_max': max(x_c), 'y_min': min(y_c), 'y_max': max(y_c)}) | |
| if not raw_boxes: return [] | |
| raw_boxes.sort(key=lambda b: b['y_min']) | |
| median_h = np.median([b['y_max'] - b['y_min'] for b in raw_boxes]) | |
| y_tol = median_h * 0.6 | |
| fused = [] | |
| for box in raw_boxes: | |
| cy, placed = (box['y_min'] + box['y_max']) / 2.0, False | |
| for line in fused: | |
| if abs(cy - (line['y_min'] + line['y_max']) / 2.0) < y_tol: | |
| line.update({'x_min': min(line['x_min'], box['x_min']), 'x_max': max(line['x_max'], box['x_max']), 'y_min': min(line['y_min'], box['y_min']), 'y_max': max(line['y_max'], box['y_max'])}) | |
| placed = True; break | |
| if not placed: fused.append(box.copy()) | |
| crops = [] | |
| for line in sorted(fused, key=lambda b: b['y_min']): | |
| crop = pil_img.crop((max(0, int(line['x_min']) - 20), max(0, int(line['y_min']) - 15), min(pil_img.width, int(line['x_max']) + 20), min(pil_img.height, int(line['y_max']) + 15))) | |
| crops.append(ImageOps.expand(crop, border=40, fill=(255, 255, 255))) | |
| return crops | |
| def main(): | |
| col_t1, col_t2, col_t3 = st.columns([1, 8, 1]) | |
| with col_t2: st.markdown('<h1 class="hero-title"><span class="strike">Handwronging</span><span class="hero-accent">Handwriting</span> OCR</h1>', unsafe_allow_html=True) | |
| with col_t3: | |
| with st.popover("INFO"): | |
| st.markdown("### π§ Forensic Neural Architecture") | |
| st.write("This engine operates in a two-stage forensic sequence designed to maximize character fidelity. First, **EasyOCR** maps the image using mathematical line fusion, isolating text rows. Second, a **TrOCR Transformer** synthesizes the features into text. It may take a long time if ran online.") | |
| st.markdown("---") | |
| st.markdown("### βοΈ The Neural Engines") | |
| st.write("**Model V13 (Specialist):** I trained this specific model myself using the **IAM Handwriting Database** (over 65,000 instances). It is highly optimized for cursive loops and manual pen-strokes. It is excellent for handwritten manuscripts but might struggle with standard modern print.") | |
| st.write("**Microsoft Large (1.3B Fallback):** A massive generalist model trained on millions of varied script and print examples. It is better for general use cases, complex historical documents, or heavily degraded text where V13 might struggle.") | |
| if "image_data" not in st.session_state: st.session_state.update({"image_data": None, "ocr_results": None}) | |
| reader = load_vision_engine() | |
| c_left, c_right = st.columns([1, 2], gap="large") | |
| run_scan_trigger = False | |
| with c_left: | |
| # βββ THE REPOSITORY MAP βββ | |
| model_choice = st.selectbox("SELECT MODEL", ["V13 Specialist", "Microsoft Large"]) | |
| st.markdown("<div style='height: 15px;'></div>", unsafe_allow_html=True) | |
| m_map = { | |
| "V13 Specialist": "Hypernova823/ReadAI", | |
| "Microsoft Large": "microsoft/trocr-large-handwritten" | |
| } | |
| if st.session_state.image_data is None: | |
| st.markdown(""" | |
| <div class="ingest-card"> | |
| <div class="camera-box"><span class="material-symbols-outlined" style="font-size:42px; color:#8ff5ff;">add_a_photo</span></div> | |
| <div class="ingest-title">Initialize Data Input</div> | |
| <div class="browse-btn">BROWSE LOCAL STORAGE</div> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| uploaded = st.file_uploader("Upload", type=['png', 'jpg', 'jpeg'], label_visibility="hidden") | |
| if uploaded: st.session_state.image_data = Image.open(uploaded).convert("RGB"); st.rerun() | |
| else: | |
| st.image(st.session_state.image_data, width=350) | |
| btn_col1, btn_col2 = st.columns(2) | |
| with btn_col1: | |
| if st.button("REMOVE IMAGE"): | |
| st.session_state.update({"image_data": None, "ocr_results": None}) | |
| st.rerun() | |
| with btn_col2: | |
| if st.button("RUN NEURAL SCAN"): | |
| run_scan_trigger = True | |
| with c_right: | |
| if run_scan_trigger: | |
| start = time.time() | |
| # The fully-styled, dark-mode spinner handles the wait time so the app doesn't freeze or refresh weirdly | |
| with st.spinner("Allocating Neural Resources & Loading Weights..."): | |
| proc, model, device = load_trocr_model(m_map[model_choice]) | |
| crops = extract_lines(st.session_state.image_data, reader) | |
| decoded, scores = [], [] | |
| total_crops = len(crops) | |
| if total_crops > 0: | |
| # βββ DYNAMIC PROGRESS BAR INJECTION βββ | |
| progress_bar = st.progress(0, text="Initializing Neural Matrix...") | |
| for idx, crop in enumerate(crops): | |
| # Update Progress Text & Percentage | |
| pct = int((idx / total_crops) * 100) | |
| progress_bar.progress(pct, text=f"Synthesizing segment {idx + 1} out of {total_crops} | {pct}% Complete...") | |
| pixel_values = proc(crop, return_tensors="pt").pixel_values.to(device) | |
| if device.type == "cuda": pixel_values = pixel_values.half() | |
| with torch.no_grad(): | |
| out = model.generate(pixel_values, max_new_tokens=64, return_dict_in_generate=True, output_scores=True) | |
| decoded.append(proc.batch_decode(out.sequences, skip_special_tokens=True)[0].strip()) | |
| try: scores.extend(np.exp(model.compute_transition_scores(out.sequences, out.scores, normalize_logits=True)[0].cpu().numpy())) | |
| except: pass | |
| # Snap to 100% just before closing out | |
| progress_bar.progress(100, text="Sequence Complete. Compiling output...") | |
| time.sleep(0.3) | |
| st.session_state.ocr_results = {"text": "\n".join(decoded), "time": time.time() - start, "words": len("\n".join(decoded).split()), "conf": np.mean(scores)*100 if scores else 0} | |
| st.rerun() | |
| elif st.session_state.ocr_results: | |
| res = st.session_state.ocr_results | |
| s1, s2, s3 = st.columns(3) | |
| s1.markdown(f'<div class="stat-card"><div class="stat-val">{res["time"]:.1f}s</div><div class="stat-lbl">Latency</div></div>', unsafe_allow_html=True) | |
| s2.markdown(f'<div class="stat-card"><div class="stat-val">{res["words"]}</div><div class="stat-lbl">Words</div></div>', unsafe_allow_html=True) | |
| s3.markdown(f'<div class="stat-card"><div class="stat-val">{res["conf"]:.1f}%</div><div class="stat-lbl">Confidence</div></div>', unsafe_allow_html=True) | |
| st.markdown(f'<div class="output-box">{res["text"]}</div>', unsafe_allow_html=True) | |
| tts = gTTS(text=res["text"], lang='en'); fp = io.BytesIO(); tts.write_to_fp(fp); fp.seek(0) | |
| st.audio(fp, format='audio/mp3') | |
| else: | |
| st.markdown('<div style="height: 100%; display: flex; align-items: center; justify-content: center; opacity: 0.3; margin-top: 150px;"><h3 style="font-family:Space Grotesk; font-weight:300;">AWAITING SCAN SEQUENCE...</h3></div>', unsafe_allow_html=True) | |
| if __name__ == "__main__": main() |