prithivMLmods commited on
Commit
2174e9c
·
verified ·
1 Parent(s): d7c41ec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -11
app.py CHANGED
@@ -18,7 +18,6 @@ from transformers import (
18
  from gradio.themes import Soft
19
  from gradio.themes.utils import colors, fonts, sizes
20
 
21
-
22
  colors.steel_blue = colors.Color(
23
  name="steel_blue",
24
  c50="#EBF3F8",
@@ -103,7 +102,6 @@ model_path_d_local = snapshot_download(
103
  local_dir_use_symlinks=False
104
  )
105
 
106
-
107
  config_file_path = os.path.join(model_path_d_local, "configuration_dots.py")
108
 
109
  if os.path.exists(config_file_path):
@@ -125,15 +123,12 @@ if os.path.exists(config_file_path):
125
 
126
  sys.path.append(model_path_d_local)
127
 
128
-
129
-
130
  MAX_MAX_NEW_TOKENS = 4096
131
  DEFAULT_MAX_NEW_TOKENS = 2048
132
  MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
133
 
134
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
135
 
136
-
137
  # Load Nanonets-OCR2-3B
138
  MODEL_ID_M = "nanonets/Nanonets-OCR2-3B"
139
  processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
@@ -174,7 +169,6 @@ model_p = AutoModelForCausalLM.from_pretrained(
174
  torch_dtype=torch.bfloat16
175
  ).to(device).eval()
176
 
177
-
178
  @spaces.GPU
179
  def generate_image(model_name: str, text: str, image: Image.Image,
180
  max_new_tokens: int = 1024,
@@ -235,14 +229,12 @@ def generate_image(model_name: str, text: str, image: Image.Image,
235
  buffer += new_text.replace("<|im_end|>", "").replace("<end_of_utterance>", "")
236
  yield buffer, buffer
237
 
238
-
239
  image_examples = [
240
- ["Reconstruct the doc [table] as it is.", "images/0.png"],
241
- ["Describe the image!", "images/8.png"],
242
- ["OCR the image", "images/2.jpg"],
243
  ]
244
 
245
-
246
  with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
247
  gr.Markdown("# **Multimodal OCR3**", elem_id="main-title")
248
  with gr.Row():
 
18
  from gradio.themes import Soft
19
  from gradio.themes.utils import colors, fonts, sizes
20
 
 
21
  colors.steel_blue = colors.Color(
22
  name="steel_blue",
23
  c50="#EBF3F8",
 
102
  local_dir_use_symlinks=False
103
  )
104
 
 
105
  config_file_path = os.path.join(model_path_d_local, "configuration_dots.py")
106
 
107
  if os.path.exists(config_file_path):
 
123
 
124
  sys.path.append(model_path_d_local)
125
 
 
 
126
  MAX_MAX_NEW_TOKENS = 4096
127
  DEFAULT_MAX_NEW_TOKENS = 2048
128
  MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
129
 
130
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
131
 
 
132
  # Load Nanonets-OCR2-3B
133
  MODEL_ID_M = "nanonets/Nanonets-OCR2-3B"
134
  processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
 
169
  torch_dtype=torch.bfloat16
170
  ).to(device).eval()
171
 
 
172
  @spaces.GPU
173
  def generate_image(model_name: str, text: str, image: Image.Image,
174
  max_new_tokens: int = 1024,
 
229
  buffer += new_text.replace("<|im_end|>", "").replace("<end_of_utterance>", "")
230
  yield buffer, buffer
231
 
 
232
  image_examples = [
233
+ ["Perform OCR on the image.", "examples/1.jpg"],
234
+ ["Phrase the document [page].", "examples/2.jpg"],
235
+ ["OCR and reconstruct the table perfectly.", "examples/3.jpg"],
236
  ]
237
 
 
238
  with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
239
  gr.Markdown("# **Multimodal OCR3**", elem_id="main-title")
240
  with gr.Row():