Handwronging / src /streamlit_app.py
Hypernova823's picture
Upload streamlit_app.py
cbdfce9 verified
import streamlit as st
import torch
import cv2
import numpy as np
import easyocr
import os
import io
import time
from gtts import gTTS
from PIL import Image, ImageOps
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
# ═══════════════════════════════════════════════════════════════
# UI CONFIGURATION & ATOMIC CSS OVERRIDES
# ═══════════════════════════════════════════════════════════════
st.set_page_config(page_title="Handwriting Engine", layout="wide", initial_sidebar_state="collapsed")
st.markdown("""
<style>
@import url('https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@300;500;700&family=Manrope:wght@300;400;600&display=swap');
@import url('https://fonts.googleapis.com/css2?family=Material+Symbols+Outlined:wght,FILL@100..700,0..1&display=swap');
/* Global Dark Base & NUKED PADDING */
.stApp { background-color: #0c0e12 !important; color: #f6f6fc !important; font-family: 'Manrope', sans-serif; overflow: hidden; }
.block-container { padding-top: 0rem !important; padding-bottom: 0rem !important; max-width: 95% !important; }
/* Subtle Title */
.hero-title { font-family: 'Space Grotesk'; font-size: 38px; font-weight: 300; margin-top: 5px; margin-bottom: 15px; text-align: center; }
.hero-accent { color: #8ff5ff; font-weight: 700; font-style: italic; text-shadow: 0 0 20px rgba(143, 245, 255, 0.5); }
.strike { text-decoration: line-through; color: #46484d; font-size: 18px; opacity: 0.5; margin-right: 12px; vertical-align: middle; }
/* ═══════════════════════════════════════════════════════════════
NUKE ALL ANCHOR LINKS & HEADER HOVERS
═══════════════════════════════════════════════════════════════ */
a.header-anchor, a[href^="#"] { display: none !important; pointer-events: none !important; }
h1 a, h2 a, h3 a, h4 a, h5 a, h6 a { display: none !important; pointer-events: none !important; }
.stMarkdown a { text-decoration: none !important; pointer-events: none !important; }
/* ═══════════════════════════════════════════════════════════════
STATUS, SPINNERS, TOASTS & PROGRESS BARS (100% DARK MODE FIX)
═══════════════════════════════════════════════════════════════ */
/* Target absolutely every single popup, toast, and cache notification container */
[data-testid="stStatusWidget"], [data-testid="stToast"], [data-testid="stToastContainer"],
[data-testid="stNotification"], [data-testid="stNotificationContainer"],
div[role="status"], div[role="alert"], div[role="dialog"],
div[data-baseweb="toast"], div[data-baseweb="snackbar"], div[data-baseweb="notification"] {
background-color: #171a1f !important; border: 1px solid #8ff5ff !important; border-radius: 4px !important; box-shadow: 0 0 15px rgba(143, 245, 255, 0.05) !important;
}
/* Force nested white boxes to turn transparent so the dark background shows */
[data-testid="stStatusWidget"] *, [data-testid="stToast"] *, [data-testid="stNotification"] *,
div[role="status"] *, div[role="alert"] *, div[role="dialog"] *,
div[data-baseweb="toast"] *, div[data-baseweb="snackbar"] *, div[data-baseweb="notification"] * {
background-color: transparent !important; color: #8ff5ff !important; font-family: 'Space Grotesk', sans-serif !important;
}
[data-testid="stStatusWidget"] label { color: #f6f6fc !important; }
/* Dynamic Progress Bar Override */
[data-testid="stProgress"] > div > div > div > div { background-color: #8ff5ff !important; }
[data-testid="stProgress"] p { color: #8ff5ff !important; font-family: 'Space Grotesk' !important; font-size: 14px !important; font-weight: 500 !important; letter-spacing: 1px; text-transform: uppercase;}
/* ═══════════════════════════════════════════════════════════════
SELECT MODEL BOX
═══════════════════════════════════════════════════════════════ */
div[data-testid="stSelectbox"] label { color: #8ff5ff !important; font-family: 'Space Grotesk' !important; text-transform: uppercase; letter-spacing: 1.5px; font-size: 11px; }
div[data-testid="stSelectbox"] div[data-baseweb="select"] { background-color: #0c0e12 !important; border: 1px solid #8ff5ff !important; border-radius: 4px !important; }
div[data-testid="stSelectbox"] div[data-baseweb="select"] * { background-color: #0c0e12 !important; color: #f6f6fc !important; }
/* Dropdown Menu Portal */
div[data-baseweb="popover"], div[data-baseweb="menu"], ul[role="listbox"] { background-color: #0c0e12 !important; border: 1px solid #8ff5ff !important; }
li[role="option"] { background-color: #0c0e12 !important; color: #f6f6fc !important; }
li[role="option"]:hover, li[role="option"]:hover * { background-color: #171a1f !important; color: #8ff5ff !important; }
/* ═══════════════════════════════════════════════════════════════
INFO POPOVER BOX
═══════════════════════════════════════════════════════════════ */
div[data-testid="stPopover"] button { background-color: #171a1f !important; border: 1px solid rgba(143, 245, 255, 0.3) !important; color: #8ff5ff !important; min-width: 80px !important; height: 38px !important; }
div[data-testid="stPopover"] span[data-testid="stBaseButton-label"] div { display: none !important; }
div[data-testid="stPopoverBody"] { background-color: #0c0e12 !important; border: 1px solid #8ff5ff !important; padding: 40px !important; min-width: 850px !important; max-height: none !important; overflow: visible !important; }
div[data-testid="stPopoverBody"] * { color: #f6f6fc !important; background-color: transparent !important; font-size: 15px; }
/* ═══════════════════════════════════════════════════════════════
THE ABSOLUTE CLICKABLE FIX (Nuclear 100% Stretch)
═══════════════════════════════════════════════════════════════ */
.ingest-card {
height: 280px; max-width: 500px; margin: 0 auto;
background: linear-gradient(145deg, #13161b, #0c0e12);
border: 1px solid rgba(143, 245, 255, 0.15); border-radius: 8px;
display: flex; flex-direction: column; align-items: center; justify-content: center;
pointer-events: none; z-index: 1;
}
.camera-box {
border: 2px dashed rgba(143, 245, 255, 0.4); border-radius: 4px;
width: 80px; height: 80px; display: flex; align-items: center; justify-content: center; margin-bottom: 20px;
}
.ingest-title { font-family: 'Space Grotesk'; font-size: 22px; font-weight: 600; color: #f6f6fc; }
.browse-btn { background-color: #8ff5ff; color: #000; padding: 10px 30px; font-family: 'Space Grotesk'; font-weight: 700; border-radius: 2px; margin-top: 15px; }
/* The invisible uploader wrapper pulled precisely over the card */
div[data-testid="stFileUploader"] {
margin-top: -296px !important;
height: 280px !important;
max-width: 500px !important;
margin-left: auto !important;
margin-right: auto !important;
z-index: 999 !important;
position: relative !important;
opacity: 0.0 !important;
}
/* THE TRUE FIX: Force every single internal element to stretch 100% over the box */
div[data-testid="stFileUploader"] * {
position: absolute !important;
top: 0 !important;
left: 0 !important;
right: 0 !important;
bottom: 0 !important;
width: 100% !important;
height: 100% !important;
cursor: pointer !important;
}
/* ═══════════════════════════════════════════════════════════════ */
/* Stats & DYNAMIC Output Box */
.stat-card { background: #000; padding: 15px; border-radius: 4px; text-align: center; border: 1px solid rgba(143, 245, 255, 0.1); margin-bottom: 10px; }
.stat-val { color: #8ff5ff; font-size: 24px; font-weight: 700; font-family: 'Space Grotesk'; }
.stat-lbl { font-size: 9px; color: #46484d; text-transform: uppercase; letter-spacing: 2px; }
.output-box {
border-left: 3px solid #8ff5ff;
background: #171a1f;
padding: 25px;
font-family: 'Space Grotesk';
font-size: 18px;
line-height: 1.8;
height: calc(100vh - 320px); /* Dynamically scales to viewport */
min-height: 400px; /* Safe fallback */
overflow-y: auto;
white-space: pre-wrap;
border-radius: 0 4px 4px 0;
}
.stButton>button { background-color: rgba(143, 245, 255, 0.05) !important; border: 1px solid #8ff5ff !important; color: #8ff5ff !important; width: 100%; padding: 12px; }
.stButton>button:hover { background-color: #8ff5ff !important; color: #000 !important; }
/* Hide default streamlit items completely */
[data-testid="stHeader"], footer, [data-testid="stDecoration"], [data-testid="stToolbar"] { visibility: hidden; display: none !important; }
</style>
""", unsafe_allow_html=True)
# ═══════════════════════════════════════════════════════════════
# MODELS & OCR LOGIC
# ═══════════════════════════════════════════════════════════════
# THE KILL-SWITCH: show_spinner=False completely deletes the un-styleable white cache boxes
@st.cache_resource(show_spinner=False)
def load_vision_engine():
import logging
logging.getLogger("easyocr").setLevel(logging.ERROR)
return easyocr.Reader(['en'], gpu=torch.cuda.is_available())
@st.cache_resource(show_spinner=False)
def load_trocr_model(model_path):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
proc = TrOCRProcessor.from_pretrained(model_path)
model = VisionEncoderDecoderModel.from_pretrained(model_path)
model.to(device)
if device.type == "cuda":
model = model.half()
# ─── THE ACTUAL ROOT-CAUSE FIX ───
for module in model.modules():
if "TrOCRSinusoidalPositionalEmbedding" in module.__class__.__name__:
num_positions, embedding_dim = module.weights.shape
new_weights = module.__class__.get_embedding(
num_positions,
embedding_dim,
padding_idx=getattr(module, "padding_idx", None)
)
module.weights = new_weights.to(device=device, dtype=model.dtype)
model.eval()
return proc, model, device
def extract_lines(pil_img, reader):
img_cv = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
results = reader.readtext(img_cv, paragraph=False)
raw_boxes = []
for bbox, _, _ in results:
x_c, y_c = [pt[0] for pt in bbox], [pt[1] for pt in bbox]
raw_boxes.append({'x_min': min(x_c), 'x_max': max(x_c), 'y_min': min(y_c), 'y_max': max(y_c)})
if not raw_boxes: return []
raw_boxes.sort(key=lambda b: b['y_min'])
median_h = np.median([b['y_max'] - b['y_min'] for b in raw_boxes])
y_tol = median_h * 0.6
fused = []
for box in raw_boxes:
cy, placed = (box['y_min'] + box['y_max']) / 2.0, False
for line in fused:
if abs(cy - (line['y_min'] + line['y_max']) / 2.0) < y_tol:
line.update({'x_min': min(line['x_min'], box['x_min']), 'x_max': max(line['x_max'], box['x_max']), 'y_min': min(line['y_min'], box['y_min']), 'y_max': max(line['y_max'], box['y_max'])})
placed = True; break
if not placed: fused.append(box.copy())
crops = []
for line in sorted(fused, key=lambda b: b['y_min']):
crop = pil_img.crop((max(0, int(line['x_min']) - 20), max(0, int(line['y_min']) - 15), min(pil_img.width, int(line['x_max']) + 20), min(pil_img.height, int(line['y_max']) + 15)))
crops.append(ImageOps.expand(crop, border=40, fill=(255, 255, 255)))
return crops
def main():
col_t1, col_t2, col_t3 = st.columns([1, 8, 1])
with col_t2: st.markdown('<h1 class="hero-title"><span class="strike">Handwronging</span><span class="hero-accent">Handwriting</span> OCR</h1>', unsafe_allow_html=True)
with col_t3:
with st.popover("INFO"):
st.markdown("### 🧠 Forensic Neural Architecture")
st.write("This engine operates in a two-stage forensic sequence designed to maximize character fidelity. First, **EasyOCR** maps the image using mathematical line fusion, isolating text rows. Second, a **TrOCR Transformer** synthesizes the features into text. It may take a long time if ran online.")
st.markdown("---")
st.markdown("### βš™οΈ The Neural Engines")
st.write("**Model V13 (Specialist):** I trained this specific model myself using the **IAM Handwriting Database** (over 65,000 instances). It is highly optimized for cursive loops and manual pen-strokes. It is excellent for handwritten manuscripts but might struggle with standard modern print.")
st.write("**Microsoft Large (1.3B Fallback):** A massive generalist model trained on millions of varied script and print examples. It is better for general use cases, complex historical documents, or heavily degraded text where V13 might struggle.")
if "image_data" not in st.session_state: st.session_state.update({"image_data": None, "ocr_results": None})
reader = load_vision_engine()
c_left, c_right = st.columns([1, 2], gap="large")
run_scan_trigger = False
with c_left:
# ─── THE REPOSITORY MAP ───
model_choice = st.selectbox("SELECT MODEL", ["V13 Specialist", "Microsoft Large"])
st.markdown("<div style='height: 15px;'></div>", unsafe_allow_html=True)
m_map = {
"V13 Specialist": "Hypernova823/ReadAI",
"Microsoft Large": "microsoft/trocr-large-handwritten"
}
if st.session_state.image_data is None:
st.markdown("""
<div class="ingest-card">
<div class="camera-box"><span class="material-symbols-outlined" style="font-size:42px; color:#8ff5ff;">add_a_photo</span></div>
<div class="ingest-title">Initialize Data Input</div>
<div class="browse-btn">BROWSE LOCAL STORAGE</div>
</div>
""", unsafe_allow_html=True)
uploaded = st.file_uploader("Upload", type=['png', 'jpg', 'jpeg'], label_visibility="hidden")
if uploaded: st.session_state.image_data = Image.open(uploaded).convert("RGB"); st.rerun()
else:
st.image(st.session_state.image_data, width=350)
btn_col1, btn_col2 = st.columns(2)
with btn_col1:
if st.button("REMOVE IMAGE"):
st.session_state.update({"image_data": None, "ocr_results": None})
st.rerun()
with btn_col2:
if st.button("RUN NEURAL SCAN"):
run_scan_trigger = True
with c_right:
if run_scan_trigger:
start = time.time()
# The fully-styled, dark-mode spinner handles the wait time so the app doesn't freeze or refresh weirdly
with st.spinner("Allocating Neural Resources & Loading Weights..."):
proc, model, device = load_trocr_model(m_map[model_choice])
crops = extract_lines(st.session_state.image_data, reader)
decoded, scores = [], []
total_crops = len(crops)
if total_crops > 0:
# ─── DYNAMIC PROGRESS BAR INJECTION ───
progress_bar = st.progress(0, text="Initializing Neural Matrix...")
for idx, crop in enumerate(crops):
# Update Progress Text & Percentage
pct = int((idx / total_crops) * 100)
progress_bar.progress(pct, text=f"Synthesizing segment {idx + 1} out of {total_crops} | {pct}% Complete...")
pixel_values = proc(crop, return_tensors="pt").pixel_values.to(device)
if device.type == "cuda": pixel_values = pixel_values.half()
with torch.no_grad():
out = model.generate(pixel_values, max_new_tokens=64, return_dict_in_generate=True, output_scores=True)
decoded.append(proc.batch_decode(out.sequences, skip_special_tokens=True)[0].strip())
try: scores.extend(np.exp(model.compute_transition_scores(out.sequences, out.scores, normalize_logits=True)[0].cpu().numpy()))
except: pass
# Snap to 100% just before closing out
progress_bar.progress(100, text="Sequence Complete. Compiling output...")
time.sleep(0.3)
st.session_state.ocr_results = {"text": "\n".join(decoded), "time": time.time() - start, "words": len("\n".join(decoded).split()), "conf": np.mean(scores)*100 if scores else 0}
st.rerun()
elif st.session_state.ocr_results:
res = st.session_state.ocr_results
s1, s2, s3 = st.columns(3)
s1.markdown(f'<div class="stat-card"><div class="stat-val">{res["time"]:.1f}s</div><div class="stat-lbl">Latency</div></div>', unsafe_allow_html=True)
s2.markdown(f'<div class="stat-card"><div class="stat-val">{res["words"]}</div><div class="stat-lbl">Words</div></div>', unsafe_allow_html=True)
s3.markdown(f'<div class="stat-card"><div class="stat-val">{res["conf"]:.1f}%</div><div class="stat-lbl">Confidence</div></div>', unsafe_allow_html=True)
st.markdown(f'<div class="output-box">{res["text"]}</div>', unsafe_allow_html=True)
tts = gTTS(text=res["text"], lang='en'); fp = io.BytesIO(); tts.write_to_fp(fp); fp.seek(0)
st.audio(fp, format='audio/mp3')
else:
st.markdown('<div style="height: 100%; display: flex; align-items: center; justify-content: center; opacity: 0.3; margin-top: 150px;"><h3 style="font-family:Space Grotesk; font-weight:300;">AWAITING SCAN SEQUENCE...</h3></div>', unsafe_allow_html=True)
if __name__ == "__main__": main()