Spaces:
Sleeping
Sleeping
Upload streamlit_app.py
Browse files- src/streamlit_app.py +45 -22
src/streamlit_app.py
CHANGED
|
@@ -37,16 +37,29 @@ h1 a, h2 a, h3 a, h4 a, h5 a, h6 a { display: none !important; pointer-events: n
|
|
| 37 |
.stMarkdown a { text-decoration: none !important; pointer-events: none !important; }
|
| 38 |
|
| 39 |
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 40 |
-
STATUS, SPINNERS &
|
| 41 |
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 42 |
-
|
| 43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
}
|
| 45 |
-
|
| 46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
}
|
|
|
|
| 48 |
[data-testid="stStatusWidget"] label { color: #f6f6fc !important; }
|
| 49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 51 |
SELECT MODEL BOX
|
| 52 |
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
|
@@ -140,13 +153,13 @@ div[data-testid="stFileUploader"] * {
|
|
| 140 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 141 |
# MODELS & OCR LOGIC
|
| 142 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 143 |
-
@st.cache_resource(show_spinner=
|
| 144 |
def load_vision_engine():
|
| 145 |
import logging
|
| 146 |
logging.getLogger("easyocr").setLevel(logging.ERROR)
|
| 147 |
return easyocr.Reader(['en'], gpu=torch.cuda.is_available())
|
| 148 |
|
| 149 |
-
@st.cache_resource(show_spinner=
|
| 150 |
def load_trocr_model(model_path):
|
| 151 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 152 |
|
|
@@ -158,21 +171,14 @@ def load_trocr_model(model_path):
|
|
| 158 |
model = model.half()
|
| 159 |
|
| 160 |
# βββ THE ACTUAL ROOT-CAUSE FIX βββ
|
| 161 |
-
# Find the broken Hugging Face class, destroy its empty meta tensor,
|
| 162 |
-
# and mathematically rebuild a brand new tensor natively on the GPU.
|
| 163 |
for module in model.modules():
|
| 164 |
if "TrOCRSinusoidalPositionalEmbedding" in module.__class__.__name__:
|
| 165 |
-
# Extract dimensions from the broken tensor
|
| 166 |
num_positions, embedding_dim = module.weights.shape
|
| 167 |
-
|
| 168 |
-
# Use the class's own method to generate a brand new, physical tensor
|
| 169 |
new_weights = module.__class__.get_embedding(
|
| 170 |
num_positions,
|
| 171 |
embedding_dim,
|
| 172 |
padding_idx=getattr(module, "padding_idx", None)
|
| 173 |
)
|
| 174 |
-
|
| 175 |
-
# Assign the real tensor directly to the GPU matching the model's datatype
|
| 176 |
module.weights = new_weights.to(device=device, dtype=model.dtype)
|
| 177 |
|
| 178 |
model.eval()
|
|
@@ -209,7 +215,7 @@ def main():
|
|
| 209 |
with col_t3:
|
| 210 |
with st.popover("INFO"):
|
| 211 |
st.markdown("### π§ Forensic Neural Architecture")
|
| 212 |
-
st.write("This engine operates in a two-stage forensic sequence designed to maximize character fidelity. First, **EasyOCR** maps the image using mathematical line fusion, isolating text rows. Second, a **TrOCR Transformer** synthesizes the features into text.")
|
| 213 |
st.markdown("---")
|
| 214 |
st.markdown("### βοΈ The Neural Engines")
|
| 215 |
st.write("**Model V13 (Specialist):** I trained this specific model myself using the **IAM Handwriting Database** (over 65,000 instances). It is highly optimized for cursive loops and manual pen-strokes. It is excellent for handwritten manuscripts but might struggle with standard modern print.")
|
|
@@ -254,12 +260,24 @@ def main():
|
|
| 254 |
|
| 255 |
with c_right:
|
| 256 |
if run_scan_trigger:
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 263 |
pixel_values = proc(crop, return_tensors="pt").pixel_values.to(device)
|
| 264 |
if device.type == "cuda": pixel_values = pixel_values.half()
|
| 265 |
with torch.no_grad():
|
|
@@ -267,7 +285,12 @@ def main():
|
|
| 267 |
decoded.append(proc.batch_decode(out.sequences, skip_special_tokens=True)[0].strip())
|
| 268 |
try: scores.extend(np.exp(model.compute_transition_scores(out.sequences, out.scores, normalize_logits=True)[0].cpu().numpy()))
|
| 269 |
except: pass
|
| 270 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 271 |
st.rerun()
|
| 272 |
|
| 273 |
elif st.session_state.ocr_results:
|
|
|
|
| 37 |
.stMarkdown a { text-decoration: none !important; pointer-events: none !important; }
|
| 38 |
|
| 39 |
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 40 |
+
STATUS, SPINNERS, TOASTS & PROGRESS BARS (100% DARK MODE FIX)
|
| 41 |
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 42 |
+
/* Target absolutely every single popup, toast, and cache notification container */
|
| 43 |
+
[data-testid="stStatusWidget"], [data-testid="stToast"], [data-testid="stToastContainer"],
|
| 44 |
+
[data-testid="stNotification"], [data-testid="stNotificationContainer"],
|
| 45 |
+
div[role="status"], div[role="alert"], div[role="dialog"],
|
| 46 |
+
div[data-baseweb="toast"], div[data-baseweb="snackbar"], div[data-baseweb="notification"] {
|
| 47 |
+
background-color: #171a1f !important; border: 1px solid #8ff5ff !important; border-radius: 4px !important; box-shadow: 0 0 15px rgba(143, 245, 255, 0.05) !important;
|
| 48 |
}
|
| 49 |
+
|
| 50 |
+
/* Force nested white boxes to turn transparent so the dark background shows */
|
| 51 |
+
[data-testid="stStatusWidget"] *, [data-testid="stToast"] *, [data-testid="stNotification"] *,
|
| 52 |
+
div[role="status"] *, div[role="alert"] *, div[role="dialog"] *,
|
| 53 |
+
div[data-baseweb="toast"] *, div[data-baseweb="snackbar"] *, div[data-baseweb="notification"] * {
|
| 54 |
+
background-color: transparent !important; color: #8ff5ff !important; font-family: 'Space Grotesk', sans-serif !important;
|
| 55 |
}
|
| 56 |
+
|
| 57 |
[data-testid="stStatusWidget"] label { color: #f6f6fc !important; }
|
| 58 |
|
| 59 |
+
/* Dynamic Progress Bar Override */
|
| 60 |
+
[data-testid="stProgress"] > div > div > div > div { background-color: #8ff5ff !important; }
|
| 61 |
+
[data-testid="stProgress"] p { color: #8ff5ff !important; font-family: 'Space Grotesk' !important; font-size: 14px !important; font-weight: 500 !important; letter-spacing: 1px; text-transform: uppercase;}
|
| 62 |
+
|
| 63 |
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 64 |
SELECT MODEL BOX
|
| 65 |
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
|
|
|
| 153 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 154 |
# MODELS & OCR LOGIC
|
| 155 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 156 |
+
@st.cache_resource(show_spinner="Waking up Neural Architecture...")
|
| 157 |
def load_vision_engine():
|
| 158 |
import logging
|
| 159 |
logging.getLogger("easyocr").setLevel(logging.ERROR)
|
| 160 |
return easyocr.Reader(['en'], gpu=torch.cuda.is_available())
|
| 161 |
|
| 162 |
+
@st.cache_resource(show_spinner="Loading TrOCR Weights into VRAM...")
|
| 163 |
def load_trocr_model(model_path):
|
| 164 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 165 |
|
|
|
|
| 171 |
model = model.half()
|
| 172 |
|
| 173 |
# βββ THE ACTUAL ROOT-CAUSE FIX βββ
|
|
|
|
|
|
|
| 174 |
for module in model.modules():
|
| 175 |
if "TrOCRSinusoidalPositionalEmbedding" in module.__class__.__name__:
|
|
|
|
| 176 |
num_positions, embedding_dim = module.weights.shape
|
|
|
|
|
|
|
| 177 |
new_weights = module.__class__.get_embedding(
|
| 178 |
num_positions,
|
| 179 |
embedding_dim,
|
| 180 |
padding_idx=getattr(module, "padding_idx", None)
|
| 181 |
)
|
|
|
|
|
|
|
| 182 |
module.weights = new_weights.to(device=device, dtype=model.dtype)
|
| 183 |
|
| 184 |
model.eval()
|
|
|
|
| 215 |
with col_t3:
|
| 216 |
with st.popover("INFO"):
|
| 217 |
st.markdown("### π§ Forensic Neural Architecture")
|
| 218 |
+
st.write("This engine operates in a two-stage forensic sequence designed to maximize character fidelity. First, **EasyOCR** maps the image using mathematical line fusion, isolating text rows. Second, a **TrOCR Transformer** synthesizes the features into text. It may take a long time if ran online.")
|
| 219 |
st.markdown("---")
|
| 220 |
st.markdown("### βοΈ The Neural Engines")
|
| 221 |
st.write("**Model V13 (Specialist):** I trained this specific model myself using the **IAM Handwriting Database** (over 65,000 instances). It is highly optimized for cursive loops and manual pen-strokes. It is excellent for handwritten manuscripts but might struggle with standard modern print.")
|
|
|
|
| 260 |
|
| 261 |
with c_right:
|
| 262 |
if run_scan_trigger:
|
| 263 |
+
start = time.time()
|
| 264 |
+
|
| 265 |
+
# This triggers the @st.cache_resource loaders
|
| 266 |
+
proc, model, device = load_trocr_model(m_map[model_choice])
|
| 267 |
+
crops = extract_lines(st.session_state.image_data, reader)
|
| 268 |
+
|
| 269 |
+
decoded, scores = [], []
|
| 270 |
+
total_crops = len(crops)
|
| 271 |
+
|
| 272 |
+
if total_crops > 0:
|
| 273 |
+
# βββ DYNAMIC PROGRESS BAR INJECTION βββ
|
| 274 |
+
progress_bar = st.progress(0, text="Initializing Neural Matrix...")
|
| 275 |
+
|
| 276 |
+
for idx, crop in enumerate(crops):
|
| 277 |
+
# Update Progress Text & Percentage
|
| 278 |
+
pct = int((idx / total_crops) * 100)
|
| 279 |
+
progress_bar.progress(pct, text=f"Synthesizing segment {idx + 1} out of {total_crops} | {pct}% Complete...")
|
| 280 |
+
|
| 281 |
pixel_values = proc(crop, return_tensors="pt").pixel_values.to(device)
|
| 282 |
if device.type == "cuda": pixel_values = pixel_values.half()
|
| 283 |
with torch.no_grad():
|
|
|
|
| 285 |
decoded.append(proc.batch_decode(out.sequences, skip_special_tokens=True)[0].strip())
|
| 286 |
try: scores.extend(np.exp(model.compute_transition_scores(out.sequences, out.scores, normalize_logits=True)[0].cpu().numpy()))
|
| 287 |
except: pass
|
| 288 |
+
|
| 289 |
+
# Snap to 100% just before closing out
|
| 290 |
+
progress_bar.progress(100, text="Sequence Complete. Compiling output...")
|
| 291 |
+
time.sleep(0.3)
|
| 292 |
+
|
| 293 |
+
st.session_state.ocr_results = {"text": "\n".join(decoded), "time": time.time() - start, "words": len("\n".join(decoded).split()), "conf": np.mean(scores)*100 if scores else 0}
|
| 294 |
st.rerun()
|
| 295 |
|
| 296 |
elif st.session_state.ocr_results:
|