File size: 19,823 Bytes
24ca153
 
 
 
 
 
 
 
 
 
96f9fd9
24ca153
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6ef7609
24ca153
6ef7609
 
 
 
 
 
24ca153
6ef7609
 
 
 
 
 
24ca153
6ef7609
24ca153
 
6ef7609
 
 
 
24ca153
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2a5d903
24ca153
 
 
3faf505
 
 
 
 
 
 
 
 
 
 
 
 
 
24ca153
 
 
 
 
 
 
 
 
 
 
 
cbdfce9
 
24ca153
 
 
 
 
cbdfce9
24ca153
 
2a5d903
24ca153
96f9fd9
24ca153
 
 
 
96f9fd9
 
 
 
 
 
 
 
 
 
 
24ca153
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6ef7609
24ca153
 
 
 
 
 
 
 
 
 
 
 
96f9fd9
24ca153
 
eb33491
 
 
 
24ca153
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6ef7609
 
cbdfce9
 
 
 
6ef7609
 
 
 
 
 
 
 
 
 
 
 
 
24ca153
 
 
 
 
 
 
6ef7609
 
 
 
 
 
24ca153
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
import streamlit as st
import torch
import cv2
import numpy as np
import easyocr
import os
import io
import time
from gtts import gTTS
from PIL import Image, ImageOps
from transformers import TrOCRProcessor, VisionEncoderDecoderModel

# ═══════════════════════════════════════════════════════════════
# UI CONFIGURATION & ATOMIC CSS OVERRIDES
# ═══════════════════════════════════════════════════════════════
st.set_page_config(page_title="Handwriting Engine", layout="wide", initial_sidebar_state="collapsed")

st.markdown("""

<style>

@import url('https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@300;500;700&family=Manrope:wght@300;400;600&display=swap');

@import url('https://fonts.googleapis.com/css2?family=Material+Symbols+Outlined:wght,FILL@100..700,0..1&display=swap');



/* Global Dark Base & NUKED PADDING */

.stApp { background-color: #0c0e12 !important; color: #f6f6fc !important; font-family: 'Manrope', sans-serif; overflow: hidden; }

.block-container { padding-top: 0rem !important; padding-bottom: 0rem !important; max-width: 95% !important; }



/* Subtle Title */

.hero-title { font-family: 'Space Grotesk'; font-size: 38px; font-weight: 300; margin-top: 5px; margin-bottom: 15px; text-align: center; }

.hero-accent { color: #8ff5ff; font-weight: 700; font-style: italic; text-shadow: 0 0 20px rgba(143, 245, 255, 0.5); }

.strike { text-decoration: line-through; color: #46484d; font-size: 18px; opacity: 0.5; margin-right: 12px; vertical-align: middle; }



/* ═══════════════════════════════════════════════════════════════

   NUKE ALL ANCHOR LINKS & HEADER HOVERS

   ═══════════════════════════════════════════════════════════════ */

a.header-anchor, a[href^="#"] { display: none !important; pointer-events: none !important; }

h1 a, h2 a, h3 a, h4 a, h5 a, h6 a { display: none !important; pointer-events: none !important; }

.stMarkdown a { text-decoration: none !important; pointer-events: none !important; }



/* ═══════════════════════════════════════════════════════════════

   STATUS, SPINNERS, TOASTS & PROGRESS BARS (100% DARK MODE FIX)

   ═══════════════════════════════════════════════════════════════ */

/* Target absolutely every single popup, toast, and cache notification container */

[data-testid="stStatusWidget"], [data-testid="stToast"], [data-testid="stToastContainer"], 

[data-testid="stNotification"], [data-testid="stNotificationContainer"], 

div[role="status"], div[role="alert"], div[role="dialog"], 

div[data-baseweb="toast"], div[data-baseweb="snackbar"], div[data-baseweb="notification"] { 

    background-color: #171a1f !important; border: 1px solid #8ff5ff !important; border-radius: 4px !important; box-shadow: 0 0 15px rgba(143, 245, 255, 0.05) !important;

}



/* Force nested white boxes to turn transparent so the dark background shows */

[data-testid="stStatusWidget"] *, [data-testid="stToast"] *, [data-testid="stNotification"] *, 

div[role="status"] *, div[role="alert"] *, div[role="dialog"] *, 

div[data-baseweb="toast"] *, div[data-baseweb="snackbar"] *, div[data-baseweb="notification"] * { 

    background-color: transparent !important; color: #8ff5ff !important; font-family: 'Space Grotesk', sans-serif !important;

}



[data-testid="stStatusWidget"] label { color: #f6f6fc !important; }



/* Dynamic Progress Bar Override */

[data-testid="stProgress"] > div > div > div > div { background-color: #8ff5ff !important; }

[data-testid="stProgress"] p { color: #8ff5ff !important; font-family: 'Space Grotesk' !important; font-size: 14px !important; font-weight: 500 !important; letter-spacing: 1px; text-transform: uppercase;}



/* ═══════════════════════════════════════════════════════════════

   SELECT MODEL BOX

   ═══════════════════════════════════════════════════════════════ */

div[data-testid="stSelectbox"] label { color: #8ff5ff !important; font-family: 'Space Grotesk' !important; text-transform: uppercase; letter-spacing: 1.5px; font-size: 11px; }

div[data-testid="stSelectbox"] div[data-baseweb="select"] { background-color: #0c0e12 !important; border: 1px solid #8ff5ff !important; border-radius: 4px !important; }

div[data-testid="stSelectbox"] div[data-baseweb="select"] * { background-color: #0c0e12 !important; color: #f6f6fc !important; }



/* Dropdown Menu Portal */

div[data-baseweb="popover"], div[data-baseweb="menu"], ul[role="listbox"] { background-color: #0c0e12 !important; border: 1px solid #8ff5ff !important; }

li[role="option"] { background-color: #0c0e12 !important; color: #f6f6fc !important; }

li[role="option"]:hover, li[role="option"]:hover * { background-color: #171a1f !important; color: #8ff5ff !important; }



/* ═══════════════════════════════════════════════════════════════

   INFO POPOVER BOX

   ═══════════════════════════════════════════════════════════════ */

div[data-testid="stPopover"] button { background-color: #171a1f !important; border: 1px solid rgba(143, 245, 255, 0.3) !important; color: #8ff5ff !important; min-width: 80px !important; height: 38px !important; }

div[data-testid="stPopover"] span[data-testid="stBaseButton-label"] div { display: none !important; }

div[data-testid="stPopoverBody"] { background-color: #0c0e12 !important; border: 1px solid #8ff5ff !important; padding: 40px !important; min-width: 850px !important; max-height: none !important; overflow: visible !important; }

div[data-testid="stPopoverBody"] * { color: #f6f6fc !important; background-color: transparent !important; font-size: 15px; }



/* ═══════════════════════════════════════════════════════════════

   THE ABSOLUTE CLICKABLE FIX (Nuclear 100% Stretch)

   ═══════════════════════════════════════════════════════════════ */

.ingest-card { 

    height: 280px; max-width: 500px; margin: 0 auto;

    background: linear-gradient(145deg, #13161b, #0c0e12); 

    border: 1px solid rgba(143, 245, 255, 0.15); border-radius: 8px;

    display: flex; flex-direction: column; align-items: center; justify-content: center;

    pointer-events: none; z-index: 1;

}

.camera-box {

    border: 2px dashed rgba(143, 245, 255, 0.4); border-radius: 4px;

    width: 80px; height: 80px; display: flex; align-items: center; justify-content: center; margin-bottom: 20px;

}

.ingest-title { font-family: 'Space Grotesk'; font-size: 22px; font-weight: 600; color: #f6f6fc; }

.browse-btn { background-color: #8ff5ff; color: #000; padding: 10px 30px; font-family: 'Space Grotesk'; font-weight: 700; border-radius: 2px; margin-top: 15px; }



/* The invisible uploader wrapper pulled precisely over the card */

div[data-testid="stFileUploader"] { 

    margin-top: -296px !important; 

    height: 280px !important; 

    max-width: 500px !important;

    margin-left: auto !important; 

    margin-right: auto !important; 

    z-index: 999 !important; 

    position: relative !important; 

    opacity: 0.0 !important;

}



/* THE TRUE FIX: Force every single internal element to stretch 100% over the box */

div[data-testid="stFileUploader"] * {

    position: absolute !important;

    top: 0 !important;

    left: 0 !important;

    right: 0 !important;

    bottom: 0 !important;

    width: 100% !important;

    height: 100% !important;

    cursor: pointer !important;

}



/* ═══════════════════════════════════════════════════════════════ */



/* Stats & DYNAMIC Output Box */

.stat-card { background: #000; padding: 15px; border-radius: 4px; text-align: center; border: 1px solid rgba(143, 245, 255, 0.1); margin-bottom: 10px; }

.stat-val { color: #8ff5ff; font-size: 24px; font-weight: 700; font-family: 'Space Grotesk'; }

.stat-lbl { font-size: 9px; color: #46484d; text-transform: uppercase; letter-spacing: 2px; }



.output-box { 

    border-left: 3px solid #8ff5ff; 

    background: #171a1f; 

    padding: 25px; 

    font-family: 'Space Grotesk'; 

    font-size: 18px; 

    line-height: 1.8; 

    height: calc(100vh - 320px); /* Dynamically scales to viewport */

    min-height: 400px; /* Safe fallback */

    overflow-y: auto; 

    white-space: pre-wrap; 

    border-radius: 0 4px 4px 0;

}



.stButton>button { background-color: rgba(143, 245, 255, 0.05) !important; border: 1px solid #8ff5ff !important; color: #8ff5ff !important; width: 100%; padding: 12px; }

.stButton>button:hover { background-color: #8ff5ff !important; color: #000 !important; }



/* Hide default streamlit items completely */

[data-testid="stHeader"], footer, [data-testid="stDecoration"], [data-testid="stToolbar"] { visibility: hidden; display: none !important; }

</style>

""", unsafe_allow_html=True)

# ═══════════════════════════════════════════════════════════════
# MODELS & OCR LOGIC
# ═══════════════════════════════════════════════════════════════
# THE KILL-SWITCH: show_spinner=False completely deletes the un-styleable white cache boxes
@st.cache_resource(show_spinner=False)
def load_vision_engine():
    import logging
    logging.getLogger("easyocr").setLevel(logging.ERROR)
    return easyocr.Reader(['en'], gpu=torch.cuda.is_available())

@st.cache_resource(show_spinner=False)
def load_trocr_model(model_path):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    proc = TrOCRProcessor.from_pretrained(model_path)
    model = VisionEncoderDecoderModel.from_pretrained(model_path)
        
    model.to(device)
    if device.type == "cuda": 
        model = model.half()
    
    # ─── THE ACTUAL ROOT-CAUSE FIX ───
    for module in model.modules():
        if "TrOCRSinusoidalPositionalEmbedding" in module.__class__.__name__:
            num_positions, embedding_dim = module.weights.shape
            new_weights = module.__class__.get_embedding(
                num_positions, 
                embedding_dim, 
                padding_idx=getattr(module, "padding_idx", None)
            )
            module.weights = new_weights.to(device=device, dtype=model.dtype)
                    
    model.eval()
    return proc, model, device

def extract_lines(pil_img, reader):
    img_cv = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
    results = reader.readtext(img_cv, paragraph=False)
    raw_boxes = []
    for bbox, _, _ in results:
        x_c, y_c = [pt[0] for pt in bbox], [pt[1] for pt in bbox]
        raw_boxes.append({'x_min': min(x_c), 'x_max': max(x_c), 'y_min': min(y_c), 'y_max': max(y_c)})
    if not raw_boxes: return []
    raw_boxes.sort(key=lambda b: b['y_min'])
    median_h = np.median([b['y_max'] - b['y_min'] for b in raw_boxes])
    y_tol = median_h * 0.6 
    fused = []
    for box in raw_boxes:
        cy, placed = (box['y_min'] + box['y_max']) / 2.0, False
        for line in fused:
            if abs(cy - (line['y_min'] + line['y_max']) / 2.0) < y_tol:
                line.update({'x_min': min(line['x_min'], box['x_min']), 'x_max': max(line['x_max'], box['x_max']), 'y_min': min(line['y_min'], box['y_min']), 'y_max': max(line['y_max'], box['y_max'])})
                placed = True; break
        if not placed: fused.append(box.copy())
    crops = []
    for line in sorted(fused, key=lambda b: b['y_min']):
        crop = pil_img.crop((max(0, int(line['x_min']) - 20), max(0, int(line['y_min']) - 15), min(pil_img.width, int(line['x_max']) + 20), min(pil_img.height, int(line['y_max']) + 15)))
        crops.append(ImageOps.expand(crop, border=40, fill=(255, 255, 255)))
    return crops

def main():
    col_t1, col_t2, col_t3 = st.columns([1, 8, 1])
    with col_t2: st.markdown('<h1 class="hero-title"><span class="strike">Handwronging</span><span class="hero-accent">Handwriting</span> OCR</h1>', unsafe_allow_html=True)
    with col_t3:
        with st.popover("INFO"):
            st.markdown("### 🧠 Forensic Neural Architecture")
            st.write("This engine operates in a two-stage forensic sequence designed to maximize character fidelity. First, **EasyOCR** maps the image using mathematical line fusion, isolating text rows. Second, a **TrOCR Transformer** synthesizes the features into text. It may take a long time if ran online.")
            st.markdown("---")
            st.markdown("### βš™οΈ The Neural Engines")
            st.write("**Model V13 (Specialist):** I trained this specific model myself using the **IAM Handwriting Database** (over 65,000 instances). It is highly optimized for cursive loops and manual pen-strokes. It is excellent for handwritten manuscripts but might struggle with standard modern print.")
            st.write("**Microsoft Large (1.3B Fallback):** A massive generalist model trained on millions of varied script and print examples. It is better for general use cases, complex historical documents, or heavily degraded text where V13 might struggle.")

    if "image_data" not in st.session_state: st.session_state.update({"image_data": None, "ocr_results": None})
    reader = load_vision_engine()

    c_left, c_right = st.columns([1, 2], gap="large")
    run_scan_trigger = False
    
    with c_left:
        # ─── THE REPOSITORY MAP ───
        model_choice = st.selectbox("SELECT MODEL", ["V13 Specialist", "Microsoft Large"])
        st.markdown("<div style='height: 15px;'></div>", unsafe_allow_html=True)
        m_map = {
            "V13 Specialist": "Hypernova823/ReadAI", 
            "Microsoft Large": "microsoft/trocr-large-handwritten"
        }

        if st.session_state.image_data is None:
            st.markdown("""

                <div class="ingest-card">

                    <div class="camera-box"><span class="material-symbols-outlined" style="font-size:42px; color:#8ff5ff;">add_a_photo</span></div>

                    <div class="ingest-title">Initialize Data Input</div>

                    <div class="browse-btn">BROWSE LOCAL STORAGE</div>

                </div>

            """, unsafe_allow_html=True)
            uploaded = st.file_uploader("Upload", type=['png', 'jpg', 'jpeg'], label_visibility="hidden")
            if uploaded: st.session_state.image_data = Image.open(uploaded).convert("RGB"); st.rerun()
        else:
            st.image(st.session_state.image_data, width=350)
            
            btn_col1, btn_col2 = st.columns(2)
            with btn_col1:
                if st.button("REMOVE IMAGE"): 
                    st.session_state.update({"image_data": None, "ocr_results": None})
                    st.rerun()
            with btn_col2:
                if st.button("RUN NEURAL SCAN"):
                    run_scan_trigger = True

    with c_right:
        if run_scan_trigger:
            start = time.time()
            
            # The fully-styled, dark-mode spinner handles the wait time so the app doesn't freeze or refresh weirdly
            with st.spinner("Allocating Neural Resources & Loading Weights..."):
                proc, model, device = load_trocr_model(m_map[model_choice])
                crops = extract_lines(st.session_state.image_data, reader)
            
            decoded, scores = [], []
            total_crops = len(crops)
            
            if total_crops > 0:
                # ─── DYNAMIC PROGRESS BAR INJECTION ───
                progress_bar = st.progress(0, text="Initializing Neural Matrix...")
                
                for idx, crop in enumerate(crops):
                    # Update Progress Text & Percentage
                    pct = int((idx / total_crops) * 100)
                    progress_bar.progress(pct, text=f"Synthesizing segment {idx + 1} out of {total_crops} | {pct}% Complete...")
                    
                    pixel_values = proc(crop, return_tensors="pt").pixel_values.to(device)
                    if device.type == "cuda": pixel_values = pixel_values.half()
                    with torch.no_grad():
                        out = model.generate(pixel_values, max_new_tokens=64, return_dict_in_generate=True, output_scores=True)
                    decoded.append(proc.batch_decode(out.sequences, skip_special_tokens=True)[0].strip())
                    try: scores.extend(np.exp(model.compute_transition_scores(out.sequences, out.scores, normalize_logits=True)[0].cpu().numpy()))
                    except: pass
                
                # Snap to 100% just before closing out
                progress_bar.progress(100, text="Sequence Complete. Compiling output...")
                time.sleep(0.3) 
                
            st.session_state.ocr_results = {"text": "\n".join(decoded), "time": time.time() - start, "words": len("\n".join(decoded).split()), "conf": np.mean(scores)*100 if scores else 0}
            st.rerun()
            
        elif st.session_state.ocr_results:
            res = st.session_state.ocr_results
            s1, s2, s3 = st.columns(3)
            s1.markdown(f'<div class="stat-card"><div class="stat-val">{res["time"]:.1f}s</div><div class="stat-lbl">Latency</div></div>', unsafe_allow_html=True)
            s2.markdown(f'<div class="stat-card"><div class="stat-val">{res["words"]}</div><div class="stat-lbl">Words</div></div>', unsafe_allow_html=True)
            s3.markdown(f'<div class="stat-card"><div class="stat-val">{res["conf"]:.1f}%</div><div class="stat-lbl">Confidence</div></div>', unsafe_allow_html=True)
            st.markdown(f'<div class="output-box">{res["text"]}</div>', unsafe_allow_html=True)
            tts = gTTS(text=res["text"], lang='en'); fp = io.BytesIO(); tts.write_to_fp(fp); fp.seek(0)
            st.audio(fp, format='audio/mp3')
            
        else:
            st.markdown('<div style="height: 100%; display: flex; align-items: center; justify-content: center; opacity: 0.3; margin-top: 150px;"><h3 style="font-family:Space Grotesk; font-weight:300;">AWAITING SCAN SEQUENCE...</h3></div>', unsafe_allow_html=True)

if __name__ == "__main__": main()