Spaces:

Hypernova823
/

Handwronging

Sleeping

App Files Files Community

Hypernova823 commited on 19 days ago

Commit

96f9fd9

verified ·

1 Parent(s): 2a5d903

Upload streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +21 -48

src/streamlit_app.py CHANGED Viewed

@@ -8,7 +8,7 @@ import io
 import time
 from gtts import gTTS
 from PIL import Image, ImageOps
-from transformers import TrOCRProcessor, VisionEncoderDecoderModel, VisionEncoderDecoderConfig
 # ═══════════════════════════════════════════════════════════════
 # UI CONFIGURATION & ATOMIC CSS OVERRIDES
@@ -150,57 +150,30 @@ def load_vision_engine():
 def load_trocr_model(model_path):
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    # Hugging Face natively downloads the processor via the repo ID
     proc = TrOCRProcessor.from_pretrained(model_path)
-    if os.path.exists(model_path):
-        # Local Loading Logic
-        config = VisionEncoderDecoderConfig.from_pretrained(model_path)
-        model = VisionEncoderDecoderModel(config)
-        safe_path = os.path.join(model_path, "model.safetensors")
-        bin_path = os.path.join(model_path, "pytorch_model.bin")
-        if os.path.exists(safe_path):
-            from safetensors.torch import load_file
-            model.load_state_dict(load_file(safe_path), strict=False)
-        else:
-            model.load_state_dict(torch.load(bin_path, map_location="cpu", weights_only=True), strict=False)
-    else:
-        # Cloud Loading Logic: Natively pulls your model from the Hugging Face Hub
-        model = VisionEncoderDecoderModel.from_pretrained(model_path)
-    # Push standard registered parameters/buffers to device
     model.to(device)
-    # ─── BULLETPROOF TENSOR MIGRATION (WITH EXCEPTIONS CATCHER) ───
-    for module in model.modules():
-        # 1. Double check parameters safely
-        for name, param in list(module._parameters.items()):
-            if param is not None:
-                try: module._parameters[name] = torch.nn.Parameter(param.to(device))
-                except (NotImplementedError, RuntimeError): pass
-        # 2. Double check buffers safely
-        for name, buf in list(module._buffers.items()):
-            if buf is not None:
-                try: module._buffers[name] = buf.to(device)
-                except (NotImplementedError, RuntimeError): pass
-        # 3. Hunt down unregistered raw tensors safely
-        for name, attr in list(module.__dict__.items()):
-            if isinstance(attr, torch.Tensor):
-                try: setattr(module, name, attr.to(device))
-                except (NotImplementedError, RuntimeError): pass
-    # If on GPU, push the entire model to Half precision safely
     if device.type == "cuda":
         model = model.half()
-        # Ensure those unregistered raw tensors are ALSO converted to half precision safely
-        for module in model.modules():
-            for name, attr in list(module.__dict__.items()):
-                if isinstance(attr, torch.Tensor) and attr.is_floating_point():
-                    try: setattr(module, name, attr.half())
-                    except (NotImplementedError, RuntimeError): pass
     model.eval()
     return proc, model, device
@@ -249,7 +222,7 @@ def main():
     run_scan_trigger = False
     with c_left:
-        # ─── THE CLOUD REPOSITORY MAP ───
         model_choice = st.selectbox("SELECT MODEL", ["V13 Specialist", "Microsoft Large"])
         st.markdown("<div style='height: 15px;'></div>", unsafe_allow_html=True)
         m_map = {

 import time
 from gtts import gTTS
 from PIL import Image, ImageOps
+from transformers import TrOCRProcessor, VisionEncoderDecoderModel
 # ═══════════════════════════════════════════════════════════════
 # UI CONFIGURATION & ATOMIC CSS OVERRIDES
 def load_trocr_model(model_path):
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     proc = TrOCRProcessor.from_pretrained(model_path)
+    model = VisionEncoderDecoderModel.from_pretrained(model_path)
     model.to(device)
     if device.type == "cuda":
         model = model.half()
+    # ─── THE ACTUAL ROOT-CAUSE FIX ───
+    # Find the broken Hugging Face class, destroy its empty meta tensor,
+    # and mathematically rebuild a brand new tensor natively on the GPU.
+    for module in model.modules():
+        if "TrOCRSinusoidalPositionalEmbedding" in module.__class__.__name__:
+            # Extract dimensions from the broken tensor
+            num_positions, embedding_dim = module.weights.shape
+            # Use the class's own method to generate a brand new, physical tensor
+            new_weights = module.__class__.get_embedding(
+                num_positions,
+                embedding_dim,
+                padding_idx=getattr(module, "padding_idx", None)
+            )
+            # Assign the real tensor directly to the GPU matching the model's datatype
+            module.weights = new_weights.to(device=device, dtype=model.dtype)
     model.eval()
     return proc, model, device
     run_scan_trigger = False
     with c_left:
+        # ─── THE REPOSITORY MAP ───
         model_choice = st.selectbox("SELECT MODEL", ["V13 Specialist", "Microsoft Large"])
         st.markdown("<div style='height: 15px;'></div>", unsafe_allow_html=True)
         m_map = {