Spaces:
Running on Zero
Running on Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -18,7 +18,6 @@ from transformers import (
|
|
| 18 |
from gradio.themes import Soft
|
| 19 |
from gradio.themes.utils import colors, fonts, sizes
|
| 20 |
|
| 21 |
-
|
| 22 |
colors.steel_blue = colors.Color(
|
| 23 |
name="steel_blue",
|
| 24 |
c50="#EBF3F8",
|
|
@@ -103,7 +102,6 @@ model_path_d_local = snapshot_download(
|
|
| 103 |
local_dir_use_symlinks=False
|
| 104 |
)
|
| 105 |
|
| 106 |
-
|
| 107 |
config_file_path = os.path.join(model_path_d_local, "configuration_dots.py")
|
| 108 |
|
| 109 |
if os.path.exists(config_file_path):
|
|
@@ -125,15 +123,12 @@ if os.path.exists(config_file_path):
|
|
| 125 |
|
| 126 |
sys.path.append(model_path_d_local)
|
| 127 |
|
| 128 |
-
|
| 129 |
-
|
| 130 |
MAX_MAX_NEW_TOKENS = 4096
|
| 131 |
DEFAULT_MAX_NEW_TOKENS = 2048
|
| 132 |
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
|
| 133 |
|
| 134 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
| 135 |
|
| 136 |
-
|
| 137 |
# Load Nanonets-OCR2-3B
|
| 138 |
MODEL_ID_M = "nanonets/Nanonets-OCR2-3B"
|
| 139 |
processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
|
|
@@ -174,7 +169,6 @@ model_p = AutoModelForCausalLM.from_pretrained(
|
|
| 174 |
torch_dtype=torch.bfloat16
|
| 175 |
).to(device).eval()
|
| 176 |
|
| 177 |
-
|
| 178 |
@spaces.GPU
|
| 179 |
def generate_image(model_name: str, text: str, image: Image.Image,
|
| 180 |
max_new_tokens: int = 1024,
|
|
@@ -235,14 +229,12 @@ def generate_image(model_name: str, text: str, image: Image.Image,
|
|
| 235 |
buffer += new_text.replace("<|im_end|>", "").replace("<end_of_utterance>", "")
|
| 236 |
yield buffer, buffer
|
| 237 |
|
| 238 |
-
|
| 239 |
image_examples = [
|
| 240 |
-
["
|
| 241 |
-
["
|
| 242 |
-
["OCR the
|
| 243 |
]
|
| 244 |
|
| 245 |
-
|
| 246 |
with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
|
| 247 |
gr.Markdown("# **Multimodal OCR3**", elem_id="main-title")
|
| 248 |
with gr.Row():
|
|
|
|
| 18 |
from gradio.themes import Soft
|
| 19 |
from gradio.themes.utils import colors, fonts, sizes
|
| 20 |
|
|
|
|
| 21 |
colors.steel_blue = colors.Color(
|
| 22 |
name="steel_blue",
|
| 23 |
c50="#EBF3F8",
|
|
|
|
| 102 |
local_dir_use_symlinks=False
|
| 103 |
)
|
| 104 |
|
|
|
|
| 105 |
config_file_path = os.path.join(model_path_d_local, "configuration_dots.py")
|
| 106 |
|
| 107 |
if os.path.exists(config_file_path):
|
|
|
|
| 123 |
|
| 124 |
sys.path.append(model_path_d_local)
|
| 125 |
|
|
|
|
|
|
|
| 126 |
MAX_MAX_NEW_TOKENS = 4096
|
| 127 |
DEFAULT_MAX_NEW_TOKENS = 2048
|
| 128 |
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
|
| 129 |
|
| 130 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
| 131 |
|
|
|
|
| 132 |
# Load Nanonets-OCR2-3B
|
| 133 |
MODEL_ID_M = "nanonets/Nanonets-OCR2-3B"
|
| 134 |
processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
|
|
|
|
| 169 |
torch_dtype=torch.bfloat16
|
| 170 |
).to(device).eval()
|
| 171 |
|
|
|
|
| 172 |
@spaces.GPU
|
| 173 |
def generate_image(model_name: str, text: str, image: Image.Image,
|
| 174 |
max_new_tokens: int = 1024,
|
|
|
|
| 229 |
buffer += new_text.replace("<|im_end|>", "").replace("<end_of_utterance>", "")
|
| 230 |
yield buffer, buffer
|
| 231 |
|
|
|
|
| 232 |
image_examples = [
|
| 233 |
+
["Perform OCR on the image.", "examples/1.jpg"],
|
| 234 |
+
["Phrase the document [page].", "examples/2.jpg"],
|
| 235 |
+
["OCR and reconstruct the table perfectly.", "examples/3.jpg"],
|
| 236 |
]
|
| 237 |
|
|
|
|
| 238 |
with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
|
| 239 |
gr.Markdown("# **Multimodal OCR3**", elem_id="main-title")
|
| 240 |
with gr.Row():
|