Hypernova823 commited on
Commit
24ca153
Β·
verified Β·
1 Parent(s): d33ec2c

Upload streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +300 -0
streamlit_app.py ADDED
@@ -0,0 +1,300 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import torch
3
+ import cv2
4
+ import numpy as np
5
+ import easyocr
6
+ import os
7
+ import io
8
+ import time
9
+ from gtts import gTTS
10
+ from PIL import Image, ImageOps
11
+ from transformers import TrOCRProcessor, VisionEncoderDecoderModel, VisionEncoderDecoderConfig
12
+
13
+ # ═══════════════════════════════════════════════════════════════
14
+ # UI CONFIGURATION & ATOMIC CSS OVERRIDES
15
+ # ═══════════════════════════════════════════════════════════════
16
+ st.set_page_config(page_title="Handwriting Engine", layout="wide", initial_sidebar_state="collapsed")
17
+
18
+ st.markdown("""
19
+ <style>
20
+ @import url('https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@300;500;700&family=Manrope:wght@300;400;600&display=swap');
21
+ @import url('https://fonts.googleapis.com/css2?family=Material+Symbols+Outlined:wght,FILL@100..700,0..1&display=swap');
22
+
23
+ /* Global Dark Base & NUKED PADDING */
24
+ .stApp { background-color: #0c0e12 !important; color: #f6f6fc !important; font-family: 'Manrope', sans-serif; overflow: hidden; }
25
+ .block-container { padding-top: 0rem !important; padding-bottom: 0rem !important; max-width: 95% !important; }
26
+
27
+ /* Subtle Title */
28
+ .hero-title { font-family: 'Space Grotesk'; font-size: 38px; font-weight: 300; margin-top: 5px; margin-bottom: 15px; text-align: center; }
29
+ .hero-accent { color: #8ff5ff; font-weight: 700; font-style: italic; text-shadow: 0 0 20px rgba(143, 245, 255, 0.5); }
30
+ .strike { text-decoration: line-through; color: #46484d; font-size: 18px; opacity: 0.5; margin-right: 12px; vertical-align: middle; }
31
+
32
+ /* ═══════════════════════════════════════════════════════════════
33
+ NUKE ALL ANCHOR LINKS & HEADER HOVERS
34
+ ═══════════════════════════════════════════════════════════════ */
35
+ a.header-anchor, a[href^="#"] { display: none !important; pointer-events: none !important; }
36
+ h1 a, h2 a, h3 a, h4 a, h5 a, h6 a { display: none !important; pointer-events: none !important; }
37
+ .stMarkdown a { text-decoration: none !important; pointer-events: none !important; }
38
+
39
+ /* ═══════════════════════════════════════════════════════════════
40
+ STATUS, SPINNERS & TOASTS
41
+ ═══════════════════════════════════════════════════════════════ */
42
+ [data-testid="stStatusWidget"], [data-testid="stToast"], div[role="status"], div[data-baseweb="toast"] {
43
+ background-color: #171a1f !important; border: 1px solid #8ff5ff !important; border-radius: 4px !important;
44
+ }
45
+ [data-testid="stStatusWidget"] *, [data-testid="stToast"] *, div[role="status"] * {
46
+ color: #8ff5ff !important; font-family: 'Space Grotesk', sans-serif !important;
47
+ }
48
+ [data-testid="stStatusWidget"] label { color: #f6f6fc !important; }
49
+
50
+ /* ═══════════════════════════════════════════════════════════════
51
+ SELECT MODEL BOX
52
+ ═══════════════════════════════════════════════════════════════ */
53
+ div[data-testid="stSelectbox"] label { color: #8ff5ff !important; font-family: 'Space Grotesk' !important; text-transform: uppercase; letter-spacing: 1.5px; font-size: 11px; }
54
+ div[data-testid="stSelectbox"] div[data-baseweb="select"] { background-color: #0c0e12 !important; border: 1px solid #8ff5ff !important; border-radius: 4px !important; }
55
+ div[data-testid="stSelectbox"] div[data-baseweb="select"] * { background-color: #0c0e12 !important; color: #f6f6fc !important; }
56
+
57
+ /* Dropdown Menu Portal */
58
+ div[data-baseweb="popover"], div[data-baseweb="menu"], ul[role="listbox"] { background-color: #0c0e12 !important; border: 1px solid #8ff5ff !important; }
59
+ li[role="option"] { background-color: #0c0e12 !important; color: #f6f6fc !important; }
60
+ li[role="option"]:hover, li[role="option"]:hover * { background-color: #171a1f !important; color: #8ff5ff !important; }
61
+
62
+ /* ═══════════════════════════════════════════════════════════════
63
+ INFO POPOVER BOX
64
+ ══════════════════════════════════════════════���════════════════ */
65
+ div[data-testid="stPopover"] button { background-color: #171a1f !important; border: 1px solid rgba(143, 245, 255, 0.3) !important; color: #8ff5ff !important; min-width: 80px !important; height: 38px !important; }
66
+ div[data-testid="stPopover"] span[data-testid="stBaseButton-label"] div { display: none !important; }
67
+ div[data-testid="stPopoverBody"] { background-color: #0c0e12 !important; border: 1px solid #8ff5ff !important; padding: 40px !important; min-width: 850px !important; max-height: none !important; overflow: visible !important; }
68
+ div[data-testid="stPopoverBody"] * { color: #f6f6fc !important; background-color: transparent !important; font-size: 15px; }
69
+
70
+ /* ═══════════════════════════════════════════════════════════════
71
+ THE ABSOLUTE CLICKABLE FIX (Nuclear 100% Stretch)
72
+ ═══════════════════════════════════════════════════════════════ */
73
+ .ingest-card {
74
+ height: 280px; max-width: 500px; margin: 0 auto;
75
+ background: linear-gradient(145deg, #13161b, #0c0e12);
76
+ border: 1px solid rgba(143, 245, 255, 0.15); border-radius: 8px;
77
+ display: flex; flex-direction: column; align-items: center; justify-content: center;
78
+ pointer-events: none; z-index: 1;
79
+ }
80
+ .camera-box {
81
+ border: 2px dashed rgba(143, 245, 255, 0.4); border-radius: 4px;
82
+ width: 80px; height: 80px; display: flex; align-items: center; justify-content: center; margin-bottom: 20px;
83
+ }
84
+ .ingest-title { font-family: 'Space Grotesk'; font-size: 22px; font-weight: 600; color: #f6f6fc; }
85
+ .browse-btn { background-color: #8ff5ff; color: #000; padding: 10px 30px; font-family: 'Space Grotesk'; font-weight: 700; border-radius: 2px; margin-top: 15px; }
86
+
87
+ /* The invisible uploader wrapper pulled precisely over the card */
88
+ div[data-testid="stFileUploader"] {
89
+ margin-top: -296px !important;
90
+ height: 280px !important;
91
+ max-width: 500px !important;
92
+ margin-left: auto !important;
93
+ margin-right: auto !important;
94
+ z-index: 999 !important;
95
+ position: relative !important;
96
+ opacity: 0.0 !important;
97
+ }
98
+
99
+ /* THE TRUE FIX: Force every single internal element to stretch 100% over the box */
100
+ div[data-testid="stFileUploader"] * {
101
+ position: absolute !important;
102
+ top: 0 !important;
103
+ left: 0 !important;
104
+ right: 0 !important;
105
+ bottom: 0 !important;
106
+ width: 100% !important;
107
+ height: 100% !important;
108
+ cursor: pointer !important;
109
+ }
110
+
111
+ /* ═══════════════════════════════════════════════════════════════ */
112
+
113
+ /* Stats & DYNAMIC Output Box */
114
+ .stat-card { background: #000; padding: 15px; border-radius: 4px; text-align: center; border: 1px solid rgba(143, 245, 255, 0.1); margin-bottom: 10px; }
115
+ .stat-val { color: #8ff5ff; font-size: 24px; font-weight: 700; font-family: 'Space Grotesk'; }
116
+ .stat-lbl { font-size: 9px; color: #46484d; text-transform: uppercase; letter-spacing: 2px; }
117
+
118
+ .output-box {
119
+ border-left: 3px solid #8ff5ff;
120
+ background: #171a1f;
121
+ padding: 25px;
122
+ font-family: 'Space Grotesk';
123
+ font-size: 18px;
124
+ line-height: 1.8;
125
+ height: calc(100vh - 320px); /* Dynamically scales to viewport */
126
+ min-height: 400px; /* Safe fallback */
127
+ overflow-y: auto;
128
+ white-space: pre-wrap;
129
+ border-radius: 0 4px 4px 0;
130
+ }
131
+
132
+ .stButton>button { background-color: rgba(143, 245, 255, 0.05) !important; border: 1px solid #8ff5ff !important; color: #8ff5ff !important; width: 100%; padding: 12px; }
133
+ .stButton>button:hover { background-color: #8ff5ff !important; color: #000 !important; }
134
+
135
+ /* Hide default streamlit items completely */
136
+ [data-testid="stHeader"], footer, [data-testid="stDecoration"], [data-testid="stToolbar"] { visibility: hidden; display: none !important; }
137
+ </style>
138
+ """, unsafe_allow_html=True)
139
+
140
+ # ═══════════════════════════════════════════════════════════════
141
+ # MODELS & OCR LOGIC
142
+ # ═══════════════════════════════════════════════════════════════
143
+ @st.cache_resource(show_spinner=False)
144
+ def load_vision_engine():
145
+ import logging
146
+ logging.getLogger("easyocr").setLevel(logging.ERROR)
147
+ return easyocr.Reader(['en'], gpu=torch.cuda.is_available())
148
+
149
+ @st.cache_resource(show_spinner=False)
150
+ def load_trocr_model(model_path):
151
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
152
+ proc = TrOCRProcessor.from_pretrained(model_path)
153
+
154
+ if os.path.exists(model_path):
155
+ config = VisionEncoderDecoderConfig.from_pretrained(model_path)
156
+ model = VisionEncoderDecoderModel(config)
157
+ safe_path = os.path.join(model_path, "model.safetensors")
158
+ bin_path = os.path.join(model_path, "pytorch_model.bin")
159
+
160
+ if os.path.exists(safe_path):
161
+ from safetensors.torch import load_file
162
+ model.load_state_dict(load_file(safe_path), strict=False)
163
+ else:
164
+ model.load_state_dict(torch.load(bin_path, map_location="cpu", weights_only=True), strict=False)
165
+ else:
166
+ model = VisionEncoderDecoderModel.from_pretrained(model_path)
167
+
168
+ # Push standard registered parameters/buffers to device
169
+ model.to(device)
170
+
171
+ # ─── AGGRESSIVE ROGUE TENSOR MIGRATION ───
172
+ # Snapshot dict to avoid runtime size change errors while finding unregistered weights
173
+ for module in model.modules():
174
+ # 1. Double check parameters
175
+ for name, param in list(module._parameters.items()):
176
+ if param is not None:
177
+ module._parameters[name] = torch.nn.Parameter(param.to(device))
178
+ # 2. Double check buffers
179
+ for name, buf in list(module._buffers.items()):
180
+ if buf is not None:
181
+ module._buffers[name] = buf.to(device)
182
+ # 3. Hunt down unregistered raw tensors (Fixes the TrOCR positional weights crash)
183
+ for name, attr in list(module.__dict__.items()):
184
+ if isinstance(attr, torch.Tensor):
185
+ setattr(module, name, attr.to(device))
186
+
187
+ # If on GPU, push the entire model to Half precision
188
+ if device.type == "cuda":
189
+ model = model.half()
190
+ # Ensure those unregistered raw tensors are ALSO converted to half precision
191
+ for module in model.modules():
192
+ for name, attr in list(module.__dict__.items()):
193
+ if isinstance(attr, torch.Tensor) and attr.is_floating_point():
194
+ setattr(module, name, attr.half())
195
+
196
+ model.eval()
197
+ return proc, model, device
198
+
199
+ def extract_lines(pil_img, reader):
200
+ img_cv = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
201
+ results = reader.readtext(img_cv, paragraph=False)
202
+ raw_boxes = []
203
+ for bbox, _, _ in results:
204
+ x_c, y_c = [pt[0] for pt in bbox], [pt[1] for pt in bbox]
205
+ raw_boxes.append({'x_min': min(x_c), 'x_max': max(x_c), 'y_min': min(y_c), 'y_max': max(y_c)})
206
+ if not raw_boxes: return []
207
+ raw_boxes.sort(key=lambda b: b['y_min'])
208
+ median_h = np.median([b['y_max'] - b['y_min'] for b in raw_boxes])
209
+ y_tol = median_h * 0.6
210
+ fused = []
211
+ for box in raw_boxes:
212
+ cy, placed = (box['y_min'] + box['y_max']) / 2.0, False
213
+ for line in fused:
214
+ if abs(cy - (line['y_min'] + line['y_max']) / 2.0) < y_tol:
215
+ line.update({'x_min': min(line['x_min'], box['x_min']), 'x_max': max(line['x_max'], box['x_max']), 'y_min': min(line['y_min'], box['y_min']), 'y_max': max(line['y_max'], box['y_max'])})
216
+ placed = True; break
217
+ if not placed: fused.append(box.copy())
218
+ crops = []
219
+ for line in sorted(fused, key=lambda b: b['y_min']):
220
+ crop = pil_img.crop((max(0, int(line['x_min']) - 20), max(0, int(line['y_min']) - 15), min(pil_img.width, int(line['x_max']) + 20), min(pil_img.height, int(line['y_max']) + 15)))
221
+ crops.append(ImageOps.expand(crop, border=40, fill=(255, 255, 255)))
222
+ return crops
223
+
224
+ def main():
225
+ col_t1, col_t2, col_t3 = st.columns([1, 8, 1])
226
+ with col_t2: st.markdown('<h1 class="hero-title"><span class="strike">Handwronging</span><span class="hero-accent">Handwriting</span> OCR</h1>', unsafe_allow_html=True)
227
+ with col_t3:
228
+ with st.popover("INFO"):
229
+ st.markdown("### 🧠 Forensic Neural Architecture")
230
+ st.write("This engine operates in a two-stage forensic sequence designed to maximize character fidelity. First, **EasyOCR** maps the image using mathematical line fusion, isolating text rows. Second, a **TrOCR Transformer** synthesizes the features into text.")
231
+ st.markdown("---")
232
+ st.markdown("### βš™οΈ The Neural Engines")
233
+ st.write("**Model V13 (Specialist):** I trained this specific model myself using the **IAM Handwriting Database** (over 65,000 instances). It is highly optimized for cursive loops and manual pen-strokes. It is excellent for handwritten manuscripts but might struggle with standard modern print.")
234
+ st.write("**Microsoft Large (1.3B Fallback):** A massive generalist model trained on millions of varied script and print examples. It is better for general use cases, complex historical documents, or heavily degraded text where V13 might struggle.")
235
+
236
+ if "image_data" not in st.session_state: st.session_state.update({"image_data": None, "ocr_results": None})
237
+ reader = load_vision_engine()
238
+
239
+ c_left, c_right = st.columns([1, 2], gap="large")
240
+ run_scan_trigger = False
241
+
242
+ with c_left:
243
+ model_choice = st.selectbox("SELECT MODEL", ["V13 Specialist", "Microsoft Large"])
244
+ st.markdown("<div style='height: 15px;'></div>", unsafe_allow_html=True)
245
+ m_map = {"V13 Specialist": "./final_handwriting_model_v13", "Microsoft Large": "microsoft/trocr-large-handwritten"}
246
+
247
+ if st.session_state.image_data is None:
248
+ st.markdown("""
249
+ <div class="ingest-card">
250
+ <div class="camera-box"><span class="material-symbols-outlined" style="font-size:42px; color:#8ff5ff;">add_a_photo</span></div>
251
+ <div class="ingest-title">Initialize Data Input</div>
252
+ <div class="browse-btn">BROWSE LOCAL STORAGE</div>
253
+ </div>
254
+ """, unsafe_allow_html=True)
255
+ uploaded = st.file_uploader("Upload", type=['png', 'jpg', 'jpeg'], label_visibility="hidden")
256
+ if uploaded: st.session_state.image_data = Image.open(uploaded).convert("RGB"); st.rerun()
257
+ else:
258
+ st.image(st.session_state.image_data, width=350)
259
+
260
+ btn_col1, btn_col2 = st.columns(2)
261
+ with btn_col1:
262
+ if st.button("REMOVE IMAGE"):
263
+ st.session_state.update({"image_data": None, "ocr_results": None})
264
+ st.rerun()
265
+ with btn_col2:
266
+ if st.button("RUN NEURAL SCAN"):
267
+ run_scan_trigger = True
268
+
269
+ with c_right:
270
+ if run_scan_trigger:
271
+ with st.spinner("Extracting parameters and running neural synthesis..."):
272
+ start = time.time()
273
+ crops = extract_lines(st.session_state.image_data, reader)
274
+ proc, model, device = load_trocr_model(m_map[model_choice])
275
+ decoded, scores = [], []
276
+ for crop in crops:
277
+ pixel_values = proc(crop, return_tensors="pt").pixel_values.to(device)
278
+ if device.type == "cuda": pixel_values = pixel_values.half()
279
+ with torch.no_grad():
280
+ out = model.generate(pixel_values, max_new_tokens=64, return_dict_in_generate=True, output_scores=True)
281
+ decoded.append(proc.batch_decode(out.sequences, skip_special_tokens=True)[0].strip())
282
+ try: scores.extend(np.exp(model.compute_transition_scores(out.sequences, out.scores, normalize_logits=True)[0].cpu().numpy()))
283
+ except: pass
284
+ st.session_state.ocr_results = {"text": "\n".join(decoded), "time": time.time() - start, "words": len("\n".join(decoded).split()), "conf": np.mean(scores)*100 if scores else 0}
285
+ st.rerun()
286
+
287
+ elif st.session_state.ocr_results:
288
+ res = st.session_state.ocr_results
289
+ s1, s2, s3 = st.columns(3)
290
+ s1.markdown(f'<div class="stat-card"><div class="stat-val">{res["time"]:.1f}s</div><div class="stat-lbl">Latency</div></div>', unsafe_allow_html=True)
291
+ s2.markdown(f'<div class="stat-card"><div class="stat-val">{res["words"]}</div><div class="stat-lbl">Words</div></div>', unsafe_allow_html=True)
292
+ s3.markdown(f'<div class="stat-card"><div class="stat-val">{res["conf"]:.1f}%</div><div class="stat-lbl">Confidence</div></div>', unsafe_allow_html=True)
293
+ st.markdown(f'<div class="output-box">{res["text"]}</div>', unsafe_allow_html=True)
294
+ tts = gTTS(text=res["text"], lang='en'); fp = io.BytesIO(); tts.write_to_fp(fp); fp.seek(0)
295
+ st.audio(fp, format='audio/mp3')
296
+
297
+ else:
298
+ st.markdown('<div style="height: 100%; display: flex; align-items: center; justify-content: center; opacity: 0.3; margin-top: 150px;"><h3 style="font-family:Space Grotesk; font-weight:300;">AWAITING SCAN SEQUENCE...</h3></div>', unsafe_allow_html=True)
299
+
300
+ if __name__ == "__main__": main()