Hypernova823 commited on
Commit
6ef7609
Β·
verified Β·
1 Parent(s): 96f9fd9

Upload streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +45 -22
src/streamlit_app.py CHANGED
@@ -37,16 +37,29 @@ h1 a, h2 a, h3 a, h4 a, h5 a, h6 a { display: none !important; pointer-events: n
37
  .stMarkdown a { text-decoration: none !important; pointer-events: none !important; }
38
 
39
  /* ═══════════════════════════════════════════════════════════════
40
- STATUS, SPINNERS & TOASTS
41
  ═══════════════════════════════════════════════════════════════ */
42
- [data-testid="stStatusWidget"], [data-testid="stToast"], div[role="status"], div[data-baseweb="toast"] {
43
- background-color: #171a1f !important; border: 1px solid #8ff5ff !important; border-radius: 4px !important;
 
 
 
 
44
  }
45
- [data-testid="stStatusWidget"] *, [data-testid="stToast"] *, div[role="status"] * {
46
- color: #8ff5ff !important; font-family: 'Space Grotesk', sans-serif !important;
 
 
 
 
47
  }
 
48
  [data-testid="stStatusWidget"] label { color: #f6f6fc !important; }
49
 
 
 
 
 
50
  /* ═══════════════════════════════════════════════════════════════
51
  SELECT MODEL BOX
52
  ═══════════════════════════════════════════════════════════════ */
@@ -140,13 +153,13 @@ div[data-testid="stFileUploader"] * {
140
  # ═══════════════════════════════════════════════════════════════
141
  # MODELS & OCR LOGIC
142
  # ═══════════════════════════════════════════════════════════════
143
- @st.cache_resource(show_spinner=False)
144
  def load_vision_engine():
145
  import logging
146
  logging.getLogger("easyocr").setLevel(logging.ERROR)
147
  return easyocr.Reader(['en'], gpu=torch.cuda.is_available())
148
 
149
- @st.cache_resource(show_spinner=False)
150
  def load_trocr_model(model_path):
151
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
152
 
@@ -158,21 +171,14 @@ def load_trocr_model(model_path):
158
  model = model.half()
159
 
160
  # ─── THE ACTUAL ROOT-CAUSE FIX ───
161
- # Find the broken Hugging Face class, destroy its empty meta tensor,
162
- # and mathematically rebuild a brand new tensor natively on the GPU.
163
  for module in model.modules():
164
  if "TrOCRSinusoidalPositionalEmbedding" in module.__class__.__name__:
165
- # Extract dimensions from the broken tensor
166
  num_positions, embedding_dim = module.weights.shape
167
-
168
- # Use the class's own method to generate a brand new, physical tensor
169
  new_weights = module.__class__.get_embedding(
170
  num_positions,
171
  embedding_dim,
172
  padding_idx=getattr(module, "padding_idx", None)
173
  )
174
-
175
- # Assign the real tensor directly to the GPU matching the model's datatype
176
  module.weights = new_weights.to(device=device, dtype=model.dtype)
177
 
178
  model.eval()
@@ -209,7 +215,7 @@ def main():
209
  with col_t3:
210
  with st.popover("INFO"):
211
  st.markdown("### 🧠 Forensic Neural Architecture")
212
- st.write("This engine operates in a two-stage forensic sequence designed to maximize character fidelity. First, **EasyOCR** maps the image using mathematical line fusion, isolating text rows. Second, a **TrOCR Transformer** synthesizes the features into text.")
213
  st.markdown("---")
214
  st.markdown("### βš™οΈ The Neural Engines")
215
  st.write("**Model V13 (Specialist):** I trained this specific model myself using the **IAM Handwriting Database** (over 65,000 instances). It is highly optimized for cursive loops and manual pen-strokes. It is excellent for handwritten manuscripts but might struggle with standard modern print.")
@@ -254,12 +260,24 @@ def main():
254
 
255
  with c_right:
256
  if run_scan_trigger:
257
- with st.spinner("Extracting parameters and running neural synthesis..."):
258
- start = time.time()
259
- crops = extract_lines(st.session_state.image_data, reader)
260
- proc, model, device = load_trocr_model(m_map[model_choice])
261
- decoded, scores = [], []
262
- for crop in crops:
 
 
 
 
 
 
 
 
 
 
 
 
263
  pixel_values = proc(crop, return_tensors="pt").pixel_values.to(device)
264
  if device.type == "cuda": pixel_values = pixel_values.half()
265
  with torch.no_grad():
@@ -267,7 +285,12 @@ def main():
267
  decoded.append(proc.batch_decode(out.sequences, skip_special_tokens=True)[0].strip())
268
  try: scores.extend(np.exp(model.compute_transition_scores(out.sequences, out.scores, normalize_logits=True)[0].cpu().numpy()))
269
  except: pass
270
- st.session_state.ocr_results = {"text": "\n".join(decoded), "time": time.time() - start, "words": len("\n".join(decoded).split()), "conf": np.mean(scores)*100 if scores else 0}
 
 
 
 
 
271
  st.rerun()
272
 
273
  elif st.session_state.ocr_results:
 
37
  .stMarkdown a { text-decoration: none !important; pointer-events: none !important; }
38
 
39
  /* ═══════════════════════════════════════════════════════════════
40
+ STATUS, SPINNERS, TOASTS & PROGRESS BARS (100% DARK MODE FIX)
41
  ═══════════════════════════════════════════════════════════════ */
42
+ /* Target absolutely every single popup, toast, and cache notification container */
43
+ [data-testid="stStatusWidget"], [data-testid="stToast"], [data-testid="stToastContainer"],
44
+ [data-testid="stNotification"], [data-testid="stNotificationContainer"],
45
+ div[role="status"], div[role="alert"], div[role="dialog"],
46
+ div[data-baseweb="toast"], div[data-baseweb="snackbar"], div[data-baseweb="notification"] {
47
+ background-color: #171a1f !important; border: 1px solid #8ff5ff !important; border-radius: 4px !important; box-shadow: 0 0 15px rgba(143, 245, 255, 0.05) !important;
48
  }
49
+
50
+ /* Force nested white boxes to turn transparent so the dark background shows */
51
+ [data-testid="stStatusWidget"] *, [data-testid="stToast"] *, [data-testid="stNotification"] *,
52
+ div[role="status"] *, div[role="alert"] *, div[role="dialog"] *,
53
+ div[data-baseweb="toast"] *, div[data-baseweb="snackbar"] *, div[data-baseweb="notification"] * {
54
+ background-color: transparent !important; color: #8ff5ff !important; font-family: 'Space Grotesk', sans-serif !important;
55
  }
56
+
57
  [data-testid="stStatusWidget"] label { color: #f6f6fc !important; }
58
 
59
+ /* Dynamic Progress Bar Override */
60
+ [data-testid="stProgress"] > div > div > div > div { background-color: #8ff5ff !important; }
61
+ [data-testid="stProgress"] p { color: #8ff5ff !important; font-family: 'Space Grotesk' !important; font-size: 14px !important; font-weight: 500 !important; letter-spacing: 1px; text-transform: uppercase;}
62
+
63
  /* ═══════════════════════════════════════════════════════════════
64
  SELECT MODEL BOX
65
  ═══════════════════════════════════════════════════════════════ */
 
153
  # ═══════════════════════════════════════════════════════════════
154
  # MODELS & OCR LOGIC
155
  # ═══════════════════════════════════════════════════════════════
156
+ @st.cache_resource(show_spinner="Waking up Neural Architecture...")
157
  def load_vision_engine():
158
  import logging
159
  logging.getLogger("easyocr").setLevel(logging.ERROR)
160
  return easyocr.Reader(['en'], gpu=torch.cuda.is_available())
161
 
162
+ @st.cache_resource(show_spinner="Loading TrOCR Weights into VRAM...")
163
  def load_trocr_model(model_path):
164
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
165
 
 
171
  model = model.half()
172
 
173
  # ─── THE ACTUAL ROOT-CAUSE FIX ───
 
 
174
  for module in model.modules():
175
  if "TrOCRSinusoidalPositionalEmbedding" in module.__class__.__name__:
 
176
  num_positions, embedding_dim = module.weights.shape
 
 
177
  new_weights = module.__class__.get_embedding(
178
  num_positions,
179
  embedding_dim,
180
  padding_idx=getattr(module, "padding_idx", None)
181
  )
 
 
182
  module.weights = new_weights.to(device=device, dtype=model.dtype)
183
 
184
  model.eval()
 
215
  with col_t3:
216
  with st.popover("INFO"):
217
  st.markdown("### 🧠 Forensic Neural Architecture")
218
+ st.write("This engine operates in a two-stage forensic sequence designed to maximize character fidelity. First, **EasyOCR** maps the image using mathematical line fusion, isolating text rows. Second, a **TrOCR Transformer** synthesizes the features into text. It may take a long time if ran online.")
219
  st.markdown("---")
220
  st.markdown("### βš™οΈ The Neural Engines")
221
  st.write("**Model V13 (Specialist):** I trained this specific model myself using the **IAM Handwriting Database** (over 65,000 instances). It is highly optimized for cursive loops and manual pen-strokes. It is excellent for handwritten manuscripts but might struggle with standard modern print.")
 
260
 
261
  with c_right:
262
  if run_scan_trigger:
263
+ start = time.time()
264
+
265
+ # This triggers the @st.cache_resource loaders
266
+ proc, model, device = load_trocr_model(m_map[model_choice])
267
+ crops = extract_lines(st.session_state.image_data, reader)
268
+
269
+ decoded, scores = [], []
270
+ total_crops = len(crops)
271
+
272
+ if total_crops > 0:
273
+ # ─── DYNAMIC PROGRESS BAR INJECTION ───
274
+ progress_bar = st.progress(0, text="Initializing Neural Matrix...")
275
+
276
+ for idx, crop in enumerate(crops):
277
+ # Update Progress Text & Percentage
278
+ pct = int((idx / total_crops) * 100)
279
+ progress_bar.progress(pct, text=f"Synthesizing segment {idx + 1} out of {total_crops} | {pct}% Complete...")
280
+
281
  pixel_values = proc(crop, return_tensors="pt").pixel_values.to(device)
282
  if device.type == "cuda": pixel_values = pixel_values.half()
283
  with torch.no_grad():
 
285
  decoded.append(proc.batch_decode(out.sequences, skip_special_tokens=True)[0].strip())
286
  try: scores.extend(np.exp(model.compute_transition_scores(out.sequences, out.scores, normalize_logits=True)[0].cpu().numpy()))
287
  except: pass
288
+
289
+ # Snap to 100% just before closing out
290
+ progress_bar.progress(100, text="Sequence Complete. Compiling output...")
291
+ time.sleep(0.3)
292
+
293
+ st.session_state.ocr_results = {"text": "\n".join(decoded), "time": time.time() - start, "words": len("\n".join(decoded).split()), "conf": np.mean(scores)*100 if scores else 0}
294
  st.rerun()
295
 
296
  elif st.session_state.ocr_results: