Spaces:

prithivMLmods
/

Multimodal-OCR3

Running on Zero

App Files Files Community

prithivMLmods commited on Oct 20, 2025

Commit

ee1045a

verified ·

1 Parent(s): 4e1e2ef

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -2

app.py CHANGED Viewed

@@ -138,6 +138,10 @@ model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to(device).eval()
 # Load Nanonets-OCR2-1.5B-exp
 MODEL_ID_N = "strangervisionhf/excess_layer_pruned-nanonets-1.5b"
@@ -146,8 +150,11 @@ model_n = AutoModelForImageTextToText.from_pretrained(
     MODEL_ID_N,
     trust_remote_code=True,
     torch_dtype=torch.float16,
-    attn_implementation="eager" # else "flash_attention_2"
 ).to(device).eval()
 # Load Dots.OCR from the local, patched directory
@@ -155,11 +162,15 @@ MODEL_PATH_D = model_path_d_local
 processor_d = AutoProcessor.from_pretrained(MODEL_PATH_D, trust_remote_code=True)
 model_d = AutoModelForCausalLM.from_pretrained(
     MODEL_PATH_D,
-    attn_implementation="eager", # else "flash_attention_2"
     torch_dtype=torch.bfloat16,
     device_map="auto",
     trust_remote_code=True
 ).eval()
 # Load PaddleOCR
 MODEL_ID_P = "strangervisionhf/paddle"
@@ -169,6 +180,9 @@ model_p = AutoModelForCausalLM.from_pretrained(
     trust_remote_code=True,
     torch_dtype=torch.bfloat16
 ).to(device).eval()
 @spaces.GPU
 def generate_image(model_name: str, text: str, image: Image.Image,

     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to(device).eval()
+# --- FIX: Set pad_token_id to silence the warning ---
+if model_m.config.pad_token_id is None:
+    model_m.config.pad_token_id = model_m.config.eos_token_id
 # Load Nanonets-OCR2-1.5B-exp
 MODEL_ID_N = "strangervisionhf/excess_layer_pruned-nanonets-1.5b"
     MODEL_ID_N,
     trust_remote_code=True,
     torch_dtype=torch.float16,
+    attn_implementation="flash_attention_2"
 ).to(device).eval()
+# --- FIX: Set pad_token_id to silence the warning ---
+if model_n.config.pad_token_id is None:
+    model_n.config.pad_token_id = model_n.config.eos_token_id
 # Load Dots.OCR from the local, patched directory
 processor_d = AutoProcessor.from_pretrained(MODEL_PATH_D, trust_remote_code=True)
 model_d = AutoModelForCausalLM.from_pretrained(
     MODEL_PATH_D,
+    attn_implementation="flash_attention_2",
     torch_dtype=torch.bfloat16,
     device_map="auto",
     trust_remote_code=True
 ).eval()
+# --- FIX: Set pad_token_id to silence the warning ---
+if model_d.config.pad_token_id is None:
+    model_d.config.pad_token_id = model_d.config.eos_token_id
 # Load PaddleOCR
 MODEL_ID_P = "strangervisionhf/paddle"
     trust_remote_code=True,
     torch_dtype=torch.bfloat16
 ).to(device).eval()
+# --- FIX: Set pad_token_id to silence the warning ---
+if model_p.config.pad_token_id is None:
+    model_p.config.pad_token_id = model_p.config.eos_token_id
 @spaces.GPU
 def generate_image(model_name: str, text: str, image: Image.Image,