Spaces:

Hypernova823
/

Handwronging

Sleeping

App Files Files Community

Hypernova823 commited on 18 days ago

Commit

24ca153

verified ·

1 Parent(s): d33ec2c

Upload streamlit_app.py

Browse files

Files changed (1) hide show

streamlit_app.py +300 -0

streamlit_app.py ADDED Viewed

	@@ -0,0 +1,300 @@

+import streamlit as st
+import torch
+import cv2
+import numpy as np
+import easyocr
+import os
+import io
+import time
+from gtts import gTTS
+from PIL import Image, ImageOps
+from transformers import TrOCRProcessor, VisionEncoderDecoderModel, VisionEncoderDecoderConfig
+# ═══════════════════════════════════════════════════════════════
+# UI CONFIGURATION & ATOMIC CSS OVERRIDES
+# ═══════════════════════════════════════════════════════════════
+st.set_page_config(page_title="Handwriting Engine", layout="wide", initial_sidebar_state="collapsed")
+st.markdown("""
+<style>
+@import url('https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@300;500;700&family=Manrope:wght@300;400;600&display=swap');
+@import url('https://fonts.googleapis.com/css2?family=Material+Symbols+Outlined:wght,FILL@100..700,0..1&display=swap');
+/* Global Dark Base & NUKED PADDING */
+.stApp { background-color: #0c0e12 !important; color: #f6f6fc !important; font-family: 'Manrope', sans-serif; overflow: hidden; }
+.block-container { padding-top: 0rem !important; padding-bottom: 0rem !important; max-width: 95% !important; }
+/* Subtle Title */
+.hero-title { font-family: 'Space Grotesk'; font-size: 38px; font-weight: 300; margin-top: 5px; margin-bottom: 15px; text-align: center; }
+.hero-accent { color: #8ff5ff; font-weight: 700; font-style: italic; text-shadow: 0 0 20px rgba(143, 245, 255, 0.5); }
+.strike { text-decoration: line-through; color: #46484d; font-size: 18px; opacity: 0.5; margin-right: 12px; vertical-align: middle; }
+/* ═══════════════════════════════════════════════════════════════
+   NUKE ALL ANCHOR LINKS & HEADER HOVERS
+   ═══════════════════════════════════════════════════════════════ */
+a.header-anchor, a[href^="#"] { display: none !important; pointer-events: none !important; }
+h1 a, h2 a, h3 a, h4 a, h5 a, h6 a { display: none !important; pointer-events: none !important; }
+.stMarkdown a { text-decoration: none !important; pointer-events: none !important; }
+/* ═══════════════════════════════════════════════════════════════
+   STATUS, SPINNERS & TOASTS
+   ═══════════════════════════════════════════════════════════════ */
+[data-testid="stStatusWidget"], [data-testid="stToast"], div[role="status"], div[data-baseweb="toast"] {
+    background-color: #171a1f !important; border: 1px solid #8ff5ff !important; border-radius: 4px !important;
+}
+[data-testid="stStatusWidget"] *, [data-testid="stToast"] *, div[role="status"] * {
+    color: #8ff5ff !important; font-family: 'Space Grotesk', sans-serif !important;
+}
+[data-testid="stStatusWidget"] label { color: #f6f6fc !important; }
+/* ═══════════════════════════════════════════════════════════════
+   SELECT MODEL BOX
+   ═══════════════════════════════════════════════════════════════ */
+div[data-testid="stSelectbox"] label { color: #8ff5ff !important; font-family: 'Space Grotesk' !important; text-transform: uppercase; letter-spacing: 1.5px; font-size: 11px; }
+div[data-testid="stSelectbox"] div[data-baseweb="select"] { background-color: #0c0e12 !important; border: 1px solid #8ff5ff !important; border-radius: 4px !important; }
+div[data-testid="stSelectbox"] div[data-baseweb="select"] * { background-color: #0c0e12 !important; color: #f6f6fc !important; }
+/* Dropdown Menu Portal */
+div[data-baseweb="popover"], div[data-baseweb="menu"], ul[role="listbox"] { background-color: #0c0e12 !important; border: 1px solid #8ff5ff !important; }
+li[role="option"] { background-color: #0c0e12 !important; color: #f6f6fc !important; }
+li[role="option"]:hover, li[role="option"]:hover * { background-color: #171a1f !important; color: #8ff5ff !important; }
+/* ═══════════════════════════════════════════════════════════════
+   INFO POPOVER BOX
+   ══════════════════════════════════════════════���════════════════ */
+div[data-testid="stPopover"] button { background-color: #171a1f !important; border: 1px solid rgba(143, 245, 255, 0.3) !important; color: #8ff5ff !important; min-width: 80px !important; height: 38px !important; }
+div[data-testid="stPopover"] span[data-testid="stBaseButton-label"] div { display: none !important; }
+div[data-testid="stPopoverBody"] { background-color: #0c0e12 !important; border: 1px solid #8ff5ff !important; padding: 40px !important; min-width: 850px !important; max-height: none !important; overflow: visible !important; }
+div[data-testid="stPopoverBody"] * { color: #f6f6fc !important; background-color: transparent !important; font-size: 15px; }
+/* ═══════════════════════════════════════════════════════════════
+   THE ABSOLUTE CLICKABLE FIX (Nuclear 100% Stretch)
+   ═══════════════════════════════════════════════════════════════ */
+.ingest-card {
+    height: 280px; max-width: 500px; margin: 0 auto;
+    background: linear-gradient(145deg, #13161b, #0c0e12);
+    border: 1px solid rgba(143, 245, 255, 0.15); border-radius: 8px;
+    display: flex; flex-direction: column; align-items: center; justify-content: center;
+    pointer-events: none; z-index: 1;
+}
+.camera-box {
+    border: 2px dashed rgba(143, 245, 255, 0.4); border-radius: 4px;
+    width: 80px; height: 80px; display: flex; align-items: center; justify-content: center; margin-bottom: 20px;
+}
+.ingest-title { font-family: 'Space Grotesk'; font-size: 22px; font-weight: 600; color: #f6f6fc; }
+.browse-btn { background-color: #8ff5ff; color: #000; padding: 10px 30px; font-family: 'Space Grotesk'; font-weight: 700; border-radius: 2px; margin-top: 15px; }
+/* The invisible uploader wrapper pulled precisely over the card */
+div[data-testid="stFileUploader"] {
+    margin-top: -296px !important;
+    height: 280px !important;
+    max-width: 500px !important;
+    margin-left: auto !important;
+    margin-right: auto !important;
+    z-index: 999 !important;
+    position: relative !important;
+    opacity: 0.0 !important;
+}
+/* THE TRUE FIX: Force every single internal element to stretch 100% over the box */
+div[data-testid="stFileUploader"] * {
+    position: absolute !important;
+    top: 0 !important;
+    left: 0 !important;
+    right: 0 !important;
+    bottom: 0 !important;
+    width: 100% !important;
+    height: 100% !important;
+    cursor: pointer !important;
+}
+/* ═══════════════════════════════════════════════════════════════ */
+/* Stats & DYNAMIC Output Box */
+.stat-card { background: #000; padding: 15px; border-radius: 4px; text-align: center; border: 1px solid rgba(143, 245, 255, 0.1); margin-bottom: 10px; }
+.stat-val { color: #8ff5ff; font-size: 24px; font-weight: 700; font-family: 'Space Grotesk'; }
+.stat-lbl { font-size: 9px; color: #46484d; text-transform: uppercase; letter-spacing: 2px; }
+.output-box {
+    border-left: 3px solid #8ff5ff;
+    background: #171a1f;
+    padding: 25px;
+    font-family: 'Space Grotesk';
+    font-size: 18px;
+    line-height: 1.8;
+    height: calc(100vh - 320px); /* Dynamically scales to viewport */
+    min-height: 400px; /* Safe fallback */
+    overflow-y: auto;
+    white-space: pre-wrap;
+    border-radius: 0 4px 4px 0;
+}
+.stButton>button { background-color: rgba(143, 245, 255, 0.05) !important; border: 1px solid #8ff5ff !important; color: #8ff5ff !important; width: 100%; padding: 12px; }
+.stButton>button:hover { background-color: #8ff5ff !important; color: #000 !important; }
+/* Hide default streamlit items completely */
+[data-testid="stHeader"], footer, [data-testid="stDecoration"], [data-testid="stToolbar"] { visibility: hidden; display: none !important; }
+</style>
+""", unsafe_allow_html=True)
+# ═══════════════════════════════════════════════════════════════
+# MODELS & OCR LOGIC
+# ═══════════════════════════════════════════════════════════════
+@st.cache_resource(show_spinner=False)
+def load_vision_engine():
+    import logging
+    logging.getLogger("easyocr").setLevel(logging.ERROR)
+    return easyocr.Reader(['en'], gpu=torch.cuda.is_available())
+@st.cache_resource(show_spinner=False)
+def load_trocr_model(model_path):
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    proc = TrOCRProcessor.from_pretrained(model_path)
+    if os.path.exists(model_path):
+        config = VisionEncoderDecoderConfig.from_pretrained(model_path)
+        model = VisionEncoderDecoderModel(config)
+        safe_path = os.path.join(model_path, "model.safetensors")
+        bin_path = os.path.join(model_path, "pytorch_model.bin")
+        if os.path.exists(safe_path):
+            from safetensors.torch import load_file
+            model.load_state_dict(load_file(safe_path), strict=False)
+        else:
+            model.load_state_dict(torch.load(bin_path, map_location="cpu", weights_only=True), strict=False)
+    else:
+        model = VisionEncoderDecoderModel.from_pretrained(model_path)
+    # Push standard registered parameters/buffers to device
+    model.to(device)
+    # ─── AGGRESSIVE ROGUE TENSOR MIGRATION ───
+    # Snapshot dict to avoid runtime size change errors while finding unregistered weights
+    for module in model.modules():
+        # 1. Double check parameters
+        for name, param in list(module._parameters.items()):
+            if param is not None:
+                module._parameters[name] = torch.nn.Parameter(param.to(device))
+        # 2. Double check buffers
+        for name, buf in list(module._buffers.items()):
+            if buf is not None:
+                module._buffers[name] = buf.to(device)
+        # 3. Hunt down unregistered raw tensors (Fixes the TrOCR positional weights crash)
+        for name, attr in list(module.__dict__.items()):
+            if isinstance(attr, torch.Tensor):
+                setattr(module, name, attr.to(device))
+    # If on GPU, push the entire model to Half precision
+    if device.type == "cuda":
+        model = model.half()
+        # Ensure those unregistered raw tensors are ALSO converted to half precision
+        for module in model.modules():
+            for name, attr in list(module.__dict__.items()):
+                if isinstance(attr, torch.Tensor) and attr.is_floating_point():
+                    setattr(module, name, attr.half())
+    model.eval()
+    return proc, model, device
+def extract_lines(pil_img, reader):
+    img_cv = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
+    results = reader.readtext(img_cv, paragraph=False)
+    raw_boxes = []
+    for bbox, _, _ in results:
+        x_c, y_c = [pt[0] for pt in bbox], [pt[1] for pt in bbox]
+        raw_boxes.append({'x_min': min(x_c), 'x_max': max(x_c), 'y_min': min(y_c), 'y_max': max(y_c)})
+    if not raw_boxes: return []
+    raw_boxes.sort(key=lambda b: b['y_min'])
+    median_h = np.median([b['y_max'] - b['y_min'] for b in raw_boxes])
+    y_tol = median_h * 0.6
+    fused = []
+    for box in raw_boxes:
+        cy, placed = (box['y_min'] + box['y_max']) / 2.0, False
+        for line in fused:
+            if abs(cy - (line['y_min'] + line['y_max']) / 2.0) < y_tol:
+                line.update({'x_min': min(line['x_min'], box['x_min']), 'x_max': max(line['x_max'], box['x_max']), 'y_min': min(line['y_min'], box['y_min']), 'y_max': max(line['y_max'], box['y_max'])})
+                placed = True; break
+        if not placed: fused.append(box.copy())
+    crops = []
+    for line in sorted(fused, key=lambda b: b['y_min']):
+        crop = pil_img.crop((max(0, int(line['x_min']) - 20), max(0, int(line['y_min']) - 15), min(pil_img.width, int(line['x_max']) + 20), min(pil_img.height, int(line['y_max']) + 15)))
+        crops.append(ImageOps.expand(crop, border=40, fill=(255, 255, 255)))
+    return crops
+def main():
+    col_t1, col_t2, col_t3 = st.columns([1, 8, 1])
+    with col_t2: st.markdown('<h1 class="hero-title"><span class="strike">Handwronging</span><span class="hero-accent">Handwriting</span> OCR</h1>', unsafe_allow_html=True)
+    with col_t3:
+        with st.popover("INFO"):
+            st.markdown("### 🧠 Forensic Neural Architecture")
+            st.write("This engine operates in a two-stage forensic sequence designed to maximize character fidelity. First, **EasyOCR** maps the image using mathematical line fusion, isolating text rows. Second, a **TrOCR Transformer** synthesizes the features into text.")
+            st.markdown("---")
+            st.markdown("### ⚙️ The Neural Engines")
+            st.write("**Model V13 (Specialist):** I trained this specific model myself using the **IAM Handwriting Database** (over 65,000 instances). It is highly optimized for cursive loops and manual pen-strokes. It is excellent for handwritten manuscripts but might struggle with standard modern print.")
+            st.write("**Microsoft Large (1.3B Fallback):** A massive generalist model trained on millions of varied script and print examples. It is better for general use cases, complex historical documents, or heavily degraded text where V13 might struggle.")
+    if "image_data" not in st.session_state: st.session_state.update({"image_data": None, "ocr_results": None})
+    reader = load_vision_engine()
+    c_left, c_right = st.columns([1, 2], gap="large")
+    run_scan_trigger = False
+    with c_left:
+        model_choice = st.selectbox("SELECT MODEL", ["V13 Specialist", "Microsoft Large"])
+        st.markdown("<div style='height: 15px;'></div>", unsafe_allow_html=True)
+        m_map = {"V13 Specialist": "./final_handwriting_model_v13", "Microsoft Large": "microsoft/trocr-large-handwritten"}
+        if st.session_state.image_data is None:
+            st.markdown("""
+                <div class="ingest-card">
+                    <div class="camera-box"><span class="material-symbols-outlined" style="font-size:42px; color:#8ff5ff;">add_a_photo</span></div>
+                    <div class="ingest-title">Initialize Data Input</div>
+                    <div class="browse-btn">BROWSE LOCAL STORAGE</div>
+                </div>
+            """, unsafe_allow_html=True)
+            uploaded = st.file_uploader("Upload", type=['png', 'jpg', 'jpeg'], label_visibility="hidden")
+            if uploaded: st.session_state.image_data = Image.open(uploaded).convert("RGB"); st.rerun()
+        else:
+            st.image(st.session_state.image_data, width=350)
+            btn_col1, btn_col2 = st.columns(2)
+            with btn_col1:
+                if st.button("REMOVE IMAGE"):
+                    st.session_state.update({"image_data": None, "ocr_results": None})
+                    st.rerun()
+            with btn_col2:
+                if st.button("RUN NEURAL SCAN"):
+                    run_scan_trigger = True
+    with c_right:
+        if run_scan_trigger:
+            with st.spinner("Extracting parameters and running neural synthesis..."):
+                start = time.time()
+                crops = extract_lines(st.session_state.image_data, reader)
+                proc, model, device = load_trocr_model(m_map[model_choice])
+                decoded, scores = [], []
+                for crop in crops:
+                    pixel_values = proc(crop, return_tensors="pt").pixel_values.to(device)
+                    if device.type == "cuda": pixel_values = pixel_values.half()
+                    with torch.no_grad():
+                        out = model.generate(pixel_values, max_new_tokens=64, return_dict_in_generate=True, output_scores=True)
+                    decoded.append(proc.batch_decode(out.sequences, skip_special_tokens=True)[0].strip())
+                    try: scores.extend(np.exp(model.compute_transition_scores(out.sequences, out.scores, normalize_logits=True)[0].cpu().numpy()))
+                    except: pass
+                st.session_state.ocr_results = {"text": "\n".join(decoded), "time": time.time() - start, "words": len("\n".join(decoded).split()), "conf": np.mean(scores)*100 if scores else 0}
+            st.rerun()
+        elif st.session_state.ocr_results:
+            res = st.session_state.ocr_results
+            s1, s2, s3 = st.columns(3)
+            s1.markdown(f'<div class="stat-card"><div class="stat-val">{res["time"]:.1f}s</div><div class="stat-lbl">Latency</div></div>', unsafe_allow_html=True)
+            s2.markdown(f'<div class="stat-card"><div class="stat-val">{res["words"]}</div><div class="stat-lbl">Words</div></div>', unsafe_allow_html=True)
+            s3.markdown(f'<div class="stat-card"><div class="stat-val">{res["conf"]:.1f}%</div><div class="stat-lbl">Confidence</div></div>', unsafe_allow_html=True)
+            st.markdown(f'<div class="output-box">{res["text"]}</div>', unsafe_allow_html=True)
+            tts = gTTS(text=res["text"], lang='en'); fp = io.BytesIO(); tts.write_to_fp(fp); fp.seek(0)
+            st.audio(fp, format='audio/mp3')
+        else:
+            st.markdown('<div style="height: 100%; display: flex; align-items: center; justify-content: center; opacity: 0.3; margin-top: 150px;"><h3 style="font-family:Space Grotesk; font-weight:300;">AWAITING SCAN SEQUENCE...</h3></div>', unsafe_allow_html=True)
+if __name__ == "__main__": main()