Hypernova823 commited on
Commit
96f9fd9
Β·
verified Β·
1 Parent(s): 2a5d903

Upload streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +21 -48
src/streamlit_app.py CHANGED
@@ -8,7 +8,7 @@ import io
8
  import time
9
  from gtts import gTTS
10
  from PIL import Image, ImageOps
11
- from transformers import TrOCRProcessor, VisionEncoderDecoderModel, VisionEncoderDecoderConfig
12
 
13
  # ═══════════════════════════════════════════════════════════════
14
  # UI CONFIGURATION & ATOMIC CSS OVERRIDES
@@ -150,57 +150,30 @@ def load_vision_engine():
150
  def load_trocr_model(model_path):
151
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
152
 
153
- # Hugging Face natively downloads the processor via the repo ID
154
  proc = TrOCRProcessor.from_pretrained(model_path)
155
-
156
- if os.path.exists(model_path):
157
- # Local Loading Logic
158
- config = VisionEncoderDecoderConfig.from_pretrained(model_path)
159
- model = VisionEncoderDecoderModel(config)
160
- safe_path = os.path.join(model_path, "model.safetensors")
161
- bin_path = os.path.join(model_path, "pytorch_model.bin")
162
-
163
- if os.path.exists(safe_path):
164
- from safetensors.torch import load_file
165
- model.load_state_dict(load_file(safe_path), strict=False)
166
- else:
167
- model.load_state_dict(torch.load(bin_path, map_location="cpu", weights_only=True), strict=False)
168
- else:
169
- # Cloud Loading Logic: Natively pulls your model from the Hugging Face Hub
170
- model = VisionEncoderDecoderModel.from_pretrained(model_path)
171
 
172
- # Push standard registered parameters/buffers to device
173
  model.to(device)
174
-
175
- # ─── BULLETPROOF TENSOR MIGRATION (WITH EXCEPTIONS CATCHER) ───
176
- for module in model.modules():
177
- # 1. Double check parameters safely
178
- for name, param in list(module._parameters.items()):
179
- if param is not None:
180
- try: module._parameters[name] = torch.nn.Parameter(param.to(device))
181
- except (NotImplementedError, RuntimeError): pass
182
-
183
- # 2. Double check buffers safely
184
- for name, buf in list(module._buffers.items()):
185
- if buf is not None:
186
- try: module._buffers[name] = buf.to(device)
187
- except (NotImplementedError, RuntimeError): pass
188
-
189
- # 3. Hunt down unregistered raw tensors safely
190
- for name, attr in list(module.__dict__.items()):
191
- if isinstance(attr, torch.Tensor):
192
- try: setattr(module, name, attr.to(device))
193
- except (NotImplementedError, RuntimeError): pass
194
-
195
- # If on GPU, push the entire model to Half precision safely
196
  if device.type == "cuda":
197
  model = model.half()
198
- # Ensure those unregistered raw tensors are ALSO converted to half precision safely
199
- for module in model.modules():
200
- for name, attr in list(module.__dict__.items()):
201
- if isinstance(attr, torch.Tensor) and attr.is_floating_point():
202
- try: setattr(module, name, attr.half())
203
- except (NotImplementedError, RuntimeError): pass
 
 
 
 
 
 
 
 
 
 
 
 
204
 
205
  model.eval()
206
  return proc, model, device
@@ -249,7 +222,7 @@ def main():
249
  run_scan_trigger = False
250
 
251
  with c_left:
252
- # ─── THE CLOUD REPOSITORY MAP ───
253
  model_choice = st.selectbox("SELECT MODEL", ["V13 Specialist", "Microsoft Large"])
254
  st.markdown("<div style='height: 15px;'></div>", unsafe_allow_html=True)
255
  m_map = {
 
8
  import time
9
  from gtts import gTTS
10
  from PIL import Image, ImageOps
11
+ from transformers import TrOCRProcessor, VisionEncoderDecoderModel
12
 
13
  # ═══════════════════════════════════════════════════════════════
14
  # UI CONFIGURATION & ATOMIC CSS OVERRIDES
 
150
  def load_trocr_model(model_path):
151
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
152
 
 
153
  proc = TrOCRProcessor.from_pretrained(model_path)
154
+ model = VisionEncoderDecoderModel.from_pretrained(model_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
 
 
156
  model.to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  if device.type == "cuda":
158
  model = model.half()
159
+
160
+ # ─── THE ACTUAL ROOT-CAUSE FIX ───
161
+ # Find the broken Hugging Face class, destroy its empty meta tensor,
162
+ # and mathematically rebuild a brand new tensor natively on the GPU.
163
+ for module in model.modules():
164
+ if "TrOCRSinusoidalPositionalEmbedding" in module.__class__.__name__:
165
+ # Extract dimensions from the broken tensor
166
+ num_positions, embedding_dim = module.weights.shape
167
+
168
+ # Use the class's own method to generate a brand new, physical tensor
169
+ new_weights = module.__class__.get_embedding(
170
+ num_positions,
171
+ embedding_dim,
172
+ padding_idx=getattr(module, "padding_idx", None)
173
+ )
174
+
175
+ # Assign the real tensor directly to the GPU matching the model's datatype
176
+ module.weights = new_weights.to(device=device, dtype=model.dtype)
177
 
178
  model.eval()
179
  return proc, model, device
 
222
  run_scan_trigger = False
223
 
224
  with c_left:
225
+ # ─── THE REPOSITORY MAP ───
226
  model_choice = st.selectbox("SELECT MODEL", ["V13 Specialist", "Microsoft Large"])
227
  st.markdown("<div style='height: 15px;'></div>", unsafe_allow_html=True)
228
  m_map = {