Spaces:
Sleeping
Sleeping
Upload streamlit_app.py
Browse files- src/streamlit_app.py +21 -48
src/streamlit_app.py
CHANGED
|
@@ -8,7 +8,7 @@ import io
|
|
| 8 |
import time
|
| 9 |
from gtts import gTTS
|
| 10 |
from PIL import Image, ImageOps
|
| 11 |
-
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
|
| 12 |
|
| 13 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 14 |
# UI CONFIGURATION & ATOMIC CSS OVERRIDES
|
|
@@ -150,57 +150,30 @@ def load_vision_engine():
|
|
| 150 |
def load_trocr_model(model_path):
|
| 151 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 152 |
|
| 153 |
-
# Hugging Face natively downloads the processor via the repo ID
|
| 154 |
proc = TrOCRProcessor.from_pretrained(model_path)
|
| 155 |
-
|
| 156 |
-
if os.path.exists(model_path):
|
| 157 |
-
# Local Loading Logic
|
| 158 |
-
config = VisionEncoderDecoderConfig.from_pretrained(model_path)
|
| 159 |
-
model = VisionEncoderDecoderModel(config)
|
| 160 |
-
safe_path = os.path.join(model_path, "model.safetensors")
|
| 161 |
-
bin_path = os.path.join(model_path, "pytorch_model.bin")
|
| 162 |
-
|
| 163 |
-
if os.path.exists(safe_path):
|
| 164 |
-
from safetensors.torch import load_file
|
| 165 |
-
model.load_state_dict(load_file(safe_path), strict=False)
|
| 166 |
-
else:
|
| 167 |
-
model.load_state_dict(torch.load(bin_path, map_location="cpu", weights_only=True), strict=False)
|
| 168 |
-
else:
|
| 169 |
-
# Cloud Loading Logic: Natively pulls your model from the Hugging Face Hub
|
| 170 |
-
model = VisionEncoderDecoderModel.from_pretrained(model_path)
|
| 171 |
|
| 172 |
-
# Push standard registered parameters/buffers to device
|
| 173 |
model.to(device)
|
| 174 |
-
|
| 175 |
-
# βββ BULLETPROOF TENSOR MIGRATION (WITH EXCEPTIONS CATCHER) βββ
|
| 176 |
-
for module in model.modules():
|
| 177 |
-
# 1. Double check parameters safely
|
| 178 |
-
for name, param in list(module._parameters.items()):
|
| 179 |
-
if param is not None:
|
| 180 |
-
try: module._parameters[name] = torch.nn.Parameter(param.to(device))
|
| 181 |
-
except (NotImplementedError, RuntimeError): pass
|
| 182 |
-
|
| 183 |
-
# 2. Double check buffers safely
|
| 184 |
-
for name, buf in list(module._buffers.items()):
|
| 185 |
-
if buf is not None:
|
| 186 |
-
try: module._buffers[name] = buf.to(device)
|
| 187 |
-
except (NotImplementedError, RuntimeError): pass
|
| 188 |
-
|
| 189 |
-
# 3. Hunt down unregistered raw tensors safely
|
| 190 |
-
for name, attr in list(module.__dict__.items()):
|
| 191 |
-
if isinstance(attr, torch.Tensor):
|
| 192 |
-
try: setattr(module, name, attr.to(device))
|
| 193 |
-
except (NotImplementedError, RuntimeError): pass
|
| 194 |
-
|
| 195 |
-
# If on GPU, push the entire model to Half precision safely
|
| 196 |
if device.type == "cuda":
|
| 197 |
model = model.half()
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
|
| 205 |
model.eval()
|
| 206 |
return proc, model, device
|
|
@@ -249,7 +222,7 @@ def main():
|
|
| 249 |
run_scan_trigger = False
|
| 250 |
|
| 251 |
with c_left:
|
| 252 |
-
# βββ THE
|
| 253 |
model_choice = st.selectbox("SELECT MODEL", ["V13 Specialist", "Microsoft Large"])
|
| 254 |
st.markdown("<div style='height: 15px;'></div>", unsafe_allow_html=True)
|
| 255 |
m_map = {
|
|
|
|
| 8 |
import time
|
| 9 |
from gtts import gTTS
|
| 10 |
from PIL import Image, ImageOps
|
| 11 |
+
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
|
| 12 |
|
| 13 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 14 |
# UI CONFIGURATION & ATOMIC CSS OVERRIDES
|
|
|
|
| 150 |
def load_trocr_model(model_path):
|
| 151 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 152 |
|
|
|
|
| 153 |
proc = TrOCRProcessor.from_pretrained(model_path)
|
| 154 |
+
model = VisionEncoderDecoderModel.from_pretrained(model_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
|
|
|
|
| 156 |
model.to(device)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
if device.type == "cuda":
|
| 158 |
model = model.half()
|
| 159 |
+
|
| 160 |
+
# βββ THE ACTUAL ROOT-CAUSE FIX βββ
|
| 161 |
+
# Find the broken Hugging Face class, destroy its empty meta tensor,
|
| 162 |
+
# and mathematically rebuild a brand new tensor natively on the GPU.
|
| 163 |
+
for module in model.modules():
|
| 164 |
+
if "TrOCRSinusoidalPositionalEmbedding" in module.__class__.__name__:
|
| 165 |
+
# Extract dimensions from the broken tensor
|
| 166 |
+
num_positions, embedding_dim = module.weights.shape
|
| 167 |
+
|
| 168 |
+
# Use the class's own method to generate a brand new, physical tensor
|
| 169 |
+
new_weights = module.__class__.get_embedding(
|
| 170 |
+
num_positions,
|
| 171 |
+
embedding_dim,
|
| 172 |
+
padding_idx=getattr(module, "padding_idx", None)
|
| 173 |
+
)
|
| 174 |
+
|
| 175 |
+
# Assign the real tensor directly to the GPU matching the model's datatype
|
| 176 |
+
module.weights = new_weights.to(device=device, dtype=model.dtype)
|
| 177 |
|
| 178 |
model.eval()
|
| 179 |
return proc, model, device
|
|
|
|
| 222 |
run_scan_trigger = False
|
| 223 |
|
| 224 |
with c_left:
|
| 225 |
+
# βββ THE REPOSITORY MAP βββ
|
| 226 |
model_choice = st.selectbox("SELECT MODEL", ["V13 Specialist", "Microsoft Large"])
|
| 227 |
st.markdown("<div style='height: 15px;'></div>", unsafe_allow_html=True)
|
| 228 |
m_map = {
|