| import os |
| import json |
| import re |
| import hashlib |
| import gc |
| from io import BytesIO |
| from collections import OrderedDict |
| from PIL import Image, UnidentifiedImageError |
| import torch |
| from transformers import AutoProcessor, BitsAndBytesConfig |
| from transformers.models.qwen2_5_vl import Qwen2_5_VLForConditionalGeneration |
| from pdf2image import convert_from_bytes |
| import gradio as gr |
| import fitz |
| import spaces |
|
|
| |
| MODEL_ID = "prithivMLmods/Camel-Doc-OCR-062825" |
| CACHE_MAX_SIZE = 128 |
| DPI = 100 |
| THREAD_COUNT = 4 |
| IMAGE_MAX_DIM = 1024 |
| JPEG_QUALITY = 75 |
| GPU_MEMORY_FRACTION = 0.8 |
| PAD_TOKEN_ID = None |
|
|
| |
| MODEL_ID = "prithivMLmods/Camel-Doc-OCR-062825" |
| CACHE_MAX_SIZE = 128 |
| DPI = 200 |
| IMAGE_MAX_DIM = None |
| JPEG_QUALITY = 80 |
| GPU_MEMORY_FRACTION = 0.8 |
|
|
| |
| device = torch.device("cpu") |
| torch.backends.cudnn.benchmark = True |
| if device.type == 'cuda': |
| torch.cuda.set_per_process_memory_fraction(GPU_MEMORY_FRACTION, device=0) |
|
|
| |
| from transformers import AutoProcessor, BitsAndBytesConfig |
| from transformers.models.qwen2_5_vl import Qwen2_5_VLForConditionalGeneration |
|
|
| bnb = BitsAndBytesConfig( |
| load_in_4bit=True, |
| bnb_4bit_use_double_quant=True, |
| bnb_4bit_quant_type="nf4", |
| bnb_4bit_compute_dtype=torch.float16 |
| ) |
|
|
| processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True) |
| model = Qwen2_5_VLForConditionalGeneration.from_pretrained( |
| MODEL_ID, |
| quantization_config=bnb, |
| device_map="auto", |
| trust_remote_code=True |
| ).eval() |
| processor.tokenizer.pad_token_id = processor.tokenizer.eos_token_id |
|
|
| |
| import traceback |
| from concurrent.futures import ThreadPoolExecutor |
|
|
| |
| import traceback |
| from concurrent.futures import ThreadPoolExecutor |
|
|
| def handle_file(file, prompt, extra_prompt, max_new_tokens, progress=gr.Progress()): |
| try: |
| file_path = file.name if hasattr(file, "name") else file |
| filename = os.path.basename(file_path) |
| ext = filename.lower().split('.')[-1] |
| full_prompt = (prompt + "\n" + extra_prompt).strip() or "" |
|
|
| print(f"[INFO] handle_file → {filename} (.{ext})") |
|
|
| if ext == "pdf": |
| try: |
| with open(file_path, "rb") as f: |
| pdf_bytes = f.read() |
| print(f"[INFO] Read PDF bytes: {len(pdf_bytes)} bytes") |
|
|
| doc = fitz.open(stream=pdf_bytes, filetype="pdf") |
| pages = [] |
| zoom = DPI |
| mat = fitz.Matrix(zoom, zoom) |
| for i, page in enumerate(doc): |
| pix = page.get_pixmap(matrix=mat, colorspace=fitz.csRGB) |
| img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) |
| if max(img.size) > 3072: |
| img.thumbnail((3072, 3072), Image.Resampling.LANCZOS) |
| pages.append(img) |
| print(f"[INFO] Converted PDF → {len(pages)} pages") |
|
|
| except Exception as e: |
| traceback.print_exc() |
| return filename, f"[ERROR] PDF conversion failed: {e}" |
|
|
| outputs = [] |
| with ThreadPoolExecutor(max_workers=4) as executor: |
| futures = [executor.submit(run_inference, img, full_prompt, max_new_tokens) for img in pages] |
| for idx, future in enumerate(futures): |
| try: |
| out = future.result() |
| except Exception as e: |
| traceback.print_exc() |
| out = f"[ERROR] Inference page {idx+1} failed: {e}" |
| outputs.append(out) |
| progress((idx) / len(pages), desc=f"Page {idx+1}/{len(pages)}") |
|
|
| result = "\n\n--- Page Break ---\n\n".join(outputs) |
| print("[INFO] handle_file done") |
| return filename, result |
|
|
| else: |
| try: |
| img = Image.open(file_path) |
| print(f"[INFO] Opened image: {img.mode}, {img.size}") |
| except Exception as e: |
| traceback.print_exc() |
| return filename, f"[ERROR] Image open failed: {e}" |
|
|
| return filename, run_inference(img, full_prompt, max_new_tokens) |
|
|
| except Exception as e: |
| traceback.print_exc() |
| return "error", f"[ERROR] handle_file unexpected: {e}" |
|
|
| |
| @spaces.GPU |
| def run_inference(img, prompt="", max_new_tokens=512): |
| model.to("cuda") |
|
|
| if img.mode != "RGB": |
| img = img.convert("RGB") |
| prompt_text = prompt.strip() |
|
|
| messages = [{ |
| "role": "user", |
| "content": [ |
| {"type": "image", "image": img}, |
| {"type": "text", "text": prompt_text} |
| ] |
| }] |
|
|
| text_prompt = processor.apply_chat_template( |
| messages, tokenize=False, add_generation_prompt=True |
| ) |
|
|
| inputs = processor( |
| text=[text_prompt], images=[img], return_tensors="pt", padding=True |
| ).to("cuda") |
|
|
| with torch.inference_mode(), torch.cuda.amp.autocast(): |
| gen = model.generate( |
| **inputs, |
| max_new_tokens=max_new_tokens, |
| do_sample=False, |
| eos_token_id=processor.tokenizer.eos_token_id |
| ) |
|
|
| trimmed = [o[len(i):] for i, o in zip(inputs['input_ids'], gen)] |
| result = processor.tokenizer.batch_decode( |
| trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=True |
| )[0].strip() |
|
|
| return result |
| |
| prompt_templates = { |
| "Electrolux": """Extract all structured information from the delivery order document image. |
| You must return the result as a valid XML block that strictly follows the structure below. |
| STRICT INSTRUCTIONS – read carefully and follow EXACTLY: |
| 1. Return **ONLY** the XML block – nothing before or after it. |
| 2. DO NOT add, remove, rename, or reorder any XML tags. |
| 3. DO NOT include explanations, markdown, notes, comments, or extra spacing outside the XML block. |
| 4. For every tag, fill in the exact value read from the image. |
| • NEVER copy or repeat the label/placeholder text. |
| • NEVER guess or invent values. |
| 5. If a value is missing or unreadable, leave the tag EMPTY (e.g. <tag></tag>). |
| 6. DO NOT include Vietnamese text or translations inside tag values. |
| 7. The output MUST start with the root tag and end with its correct closing tag; all tags must be well-formed. |
| 8. Dates must be in YYYY-MM-DD format. |
| 9. Boolean tags must be exactly true or false (lower-case, no quotes). |
| ✔ √ Yes Passed ⇒ true | ✘ X No Fail ⇒ false |
| 10. **Inside each value** |
| • Replace every internal line-break with “, ” (comma + space). |
| • Trim leading/trailing whitespace. |
| • Escape XML special characters: & → &, < → <, > → >. |
| 11. **Phone / contact fields** – digits, “+”, “–”, spaces only; if multiple numbers, separate with “, ”. |
| 12. **Signature fields** – fill ONLY if the signature appears as legible text; if it is handwritten, leave the tag empty. |
| 13. Ignore any information not represented by the tags below. |
| <s_electrolux_form> |
| <document_number>Số lệnh giao nhận hàng</document_number> |
| <order_number>Số đơn hàng</order_number> |
| <customer_code>Mã số khách hàng</customer_code> |
| <customer_order_code>Mã đơn khách hàng</customer_order_code> |
| <customer_order_date>Ngày đặt hàng của khách</customer_order_date> |
| <delivery_date>Ngày giao hàng</delivery_date> |
| <requested_delivery_date>Ngày giao hàng yêu cầu</requested_delivery_date> |
| <invoice_number>Số hóa đơn</invoice_number> |
| <shipper_company_name>Tên công ty gửi hàng</shipper_company_name> |
| <shipper_address>Địa chỉ gửi hàng</shipper_address> |
| <shipper_phone>Số điện thoại</shipper_phone> |
| <shipper_fax>Số fax</shipper_fax> |
| <shipper_tax_code>Mã số thuế</shipper_tax_code> |
| <consignee_customer_code>Mã khách hàng</consignee_customer_code> |
| <consignee_company_name>Tên công ty nhận hàng</consignee_company_name> |
| <shipping_address>Địa chỉ nhận hàng chi tiết</shipping_address> |
| <city_province>Tỉnh/Thành phố</city_province> |
| <postal_code>Mã bưu chính</postal_code> |
| <preparer_name>Họ tên người lập phiếu</preparer_name> |
| <preparer_date>Ngày lập phiếu</preparer_date> |
| <s_is_signed>Đã ký hay chưa (true hoặc false)</s_is_signed> |
| </s_electrolux_form> |
| """, |
|
|
| "Jotun": """Extract all structured information from the delivery order document. |
| You must return the result as a valid XML block that strictly follows the structure below. |
| STRICT INSTRUCTIONS – read carefully and follow EXACTLY: |
| 1. Return **ONLY** the XML block – nothing before or after it. |
| 2. DO NOT add, remove, rename, or reorder any XML tags. |
| 3. DO NOT include explanations, markdown, notes, comments, or extra spacing outside the XML block. |
| 4. For every tag, fill in the exact value read from the image. |
| • NEVER copy or repeat the label/placeholder text. |
| • NEVER guess or invent values. |
| 5. If a value is missing or unreadable, leave the tag EMPTY (e.g. <tag></tag>). |
| 6. DO NOT include Vietnamese text or translations inside tag values. |
| 7. The output MUST start with the root tag and end with its correct closing tag; all tags must be well-formed. |
| 8. Dates must be in YYYY-MM-DD format. |
| 9. Boolean tags must be exactly true or false (lower-case, no quotes). |
| ✔ √ Yes Passed ⇒ true | ✘ X No Fail ⇒ false |
| 10. **Inside each value** |
| • Replace every internal line-break with “, ” (comma + space). |
| • Trim leading/trailing whitespace. |
| • Escape XML special characters: & → &, < → <, > → >. |
| 11. **Phone / contact fields** – digits, “+”, “–”, spaces only; if multiple numbers, separate with “, ”. |
| 12. **Signature fields** – fill ONLY if the signature appears as legible text; if it is handwritten, leave the tag empty. |
| 13. Ignore any information not represented by the tags below. |
| <s_jotun_form> |
| <document_number>Số lệnh giao hàng</document_number> |
| <delivery_order_code>Số lệnh giao hàng số</delivery_order_code> |
| <customer_code>Mã khách hàng</customer_code> |
| <customer_name>Tên khách hàng</customer_name> |
| <customer_address>Địa chỉ khách hàng</customer_address> |
| <customer_phone>Điện thoại khách hàng</customer_phone> |
| <invoice_receiver_name>Tên người nhận hóa đơn</invoice_receiver_name> |
| <invoice_receiver_address>Địa chỉ người nhận hóa đơn</invoice_receiver_address> |
| <order_code>Số đơn đặt hàng</order_code> |
| <order_date>Ngày đặt hàng</order_date> |
| <order_number>Số đơn hàng</order_number> |
| <delivery_date>Ngày giao hàng</delivery_date> |
| <s_is_signed>Đã ký hay chưa (true hoặc false)</s_is_signed> |
| </s_jotun_form> |
| """, |
|
|
| "MAWB": """Extract all structured information from the Master Air Waybill (MAWB) document. |
| You must return the result as a valid XML block that strictly follows the structure below. |
| STRICT INSTRUCTIONS – read carefully and follow EXACTLY: |
| 1. Return **ONLY** the XML block – nothing before or after it. |
| 2. DO NOT add, remove, rename, or reorder any XML tags. |
| 3. DO NOT include explanations, markdown, notes, comments, or extra spacing outside the XML block. |
| 4. For every tag, fill in the exact value read from the image. |
| • NEVER copy or repeat the label/placeholder text. |
| • NEVER guess or invent values. |
| 5. If a value is missing or unreadable, leave the tag EMPTY (e.g. <tag></tag>). |
| 6. DO NOT include Vietnamese text or translations inside tag values. |
| 7. The output MUST start with the root tag and end with its correct closing tag; all tags must be well-formed. |
| 8. Dates must be in YYYY-MM-DD format. |
| 9. Boolean tags must be exactly true or false (lower-case, no quotes). |
| ✔ √ Yes Passed ⇒ true | ✘ X No Fail ⇒ false |
| 10. **Inside each value** |
| • Replace every internal line-break with “, ” (comma + space). |
| • Trim leading/trailing whitespace. |
| • Escape XML special characters: & → &, < → <, > → >. |
| 11. **Phone / contact fields** – digits, “+”, “–”, spaces only; if multiple numbers, separate with “, ”. |
| 12. **Signature fields** – fill ONLY if the signature appears as legible text; if it is handwritten, leave the tag empty. |
| 13. Ignore any information not represented by the tags below. |
| <s_mawb_form> |
| <air_waybill_number>Số MAWB</air_waybill_number> |
| <shipper_name>Tên người gửi hàng</shipper_name> |
| <shipper_address>Địa chỉ người gửi hàng</shipper_address> |
| <shipper_account_number>Mã tài khoản người gửi</shipper_account_number> |
| <consignee_name>Tên người nhận hàng</consignee_name> |
| <consignee_address>Địa chỉ người nhận hàng</consignee_address> |
| <consignee_account_number>Mã tài khoản người nhận</consignee_account_number> |
| <dangerous_goods_note>Ghi chú hàng nguy hiểm (true or false)</dangerous_goods_note> |
| <shipper_signature>Chữ ký người gửi</shipper_signature> |
| </s_mawb_form> |
| """, |
|
|
| "Phiếu Cân": """Extract all structured information from the document 'PHIẾU CÂN / SHIPPER’S LETTER OF INSTRUCTIONS'. |
| You must return the result as a valid XML block that strictly follows the structure below. |
| STRICT INSTRUCTIONS – read carefully and follow EXACTLY: |
| 1. Return **ONLY** the XML block – nothing before or after it. |
| 2. DO NOT add, remove, rename, or reorder any XML tags. |
| 3. DO NOT include explanations, markdown, notes, comments, or extra spacing outside the XML block. |
| 4. For every tag, fill in the exact value read from the image. |
| • NEVER copy or repeat the label/placeholder text. |
| • NEVER guess or invent values. |
| 5. If a value is missing or unreadable, leave the tag EMPTY (e.g. <tag></tag>). |
| 6. DO NOT include Vietnamese text or translations inside tag values. |
| 7. The output MUST start with the root tag and end with its correct closing tag; all tags must be well-formed. |
| 8. Dates must be in YYYY-MM-DD format. |
| 9. Boolean tags must be exactly true or false (lower-case, no quotes). |
| ✔ √ Yes Passed ⇒ true | ✘ X No Fail ⇒ false |
| 10. **Inside each value** |
| • Replace every internal line-break with “, ” (comma + space). |
| • Trim leading/trailing whitespace. |
| • Escape XML special characters: & → &, < → <, > → >. |
| 11. **Phone / contact fields** – digits, “+”, “–”, spaces only; if multiple numbers, separate with “, ”. |
| 12. **Signature fields** – fill ONLY if the signature appears as legible text; if it is handwritten, leave the tag empty. |
| 13. Ignore any information not represented by the tags below. |
| <s_weight_ticket> |
| <awb_number>Số AWB</awb_number> |
| <shipper_name>Tên người gửi hàng</shipper_name> |
| <shipper_address>Địa chỉ người gửi hàng</shipper_address> |
| <shipper_contact>Số điện thoại người gửi</shipper_contact> |
| <consignee_name>Tên người nhận hàng</consignee_name> |
| <consignee_address>Địa chỉ người nhận hàng</consignee_address> |
| <cargo_description>Tên hàng hóa</cargo_description> |
| <security_check_complete>Đã kiểm tra an ninh (true/false)</security_check_complete> |
| <acceptance_staff_name>Tên nhân viên tiếp nhận</acceptance_staff_name> |
| <acceptance_staff_signature>Chữ ký nhân viên tiếp nhận</acceptance_staff_signature> |
| </s_weight_ticket> |
| """, |
|
|
| "PC 3U": """Extract all structured information from the PC 3U air cargo instruction document. |
| You must return the result as a valid XML block that strictly follows the structure below. |
| STRICT INSTRUCTIONS – read carefully and follow EXACTLY: |
| 1. Return **ONLY** the XML block – nothing before or after it. |
| 2. DO NOT add, remove, rename, or reorder any XML tags. |
| 3. DO NOT include explanations, markdown, notes, comments, or extra spacing outside the XML block. |
| 4. For every tag, fill in the exact value read from the image. |
| • NEVER copy or repeat the label/placeholder text. |
| • NEVER guess or invent values. |
| 5. If a value is missing or unreadable, leave the tag EMPTY (e.g. <tag></tag>). |
| 6. DO NOT include Vietnamese text or translations inside tag values. |
| 7. The output MUST start with the root tag and end with its correct closing tag; all tags must be well-formed. |
| 8. Dates must be in YYYY-MM-DD format. |
| 9. Boolean tags must be exactly true or false (lower-case, no quotes). |
| ✔ √ Yes Passed ⇒ true | ✘ X No Fail ⇒ false |
| 10. **Inside each value** |
| • Replace every internal line-break with “, ” (comma + space). |
| • Trim leading/trailing whitespace. |
| • Escape XML special characters: & → &, < → <, > → >. |
| 11. **Phone / contact fields** – digits, “+”, “–”, spaces only; if multiple numbers, separate with “, ”. |
| 12. **Signature fields** – fill ONLY if the signature appears as legible text; if it is handwritten, leave the tag empty. |
| 13. Ignore any information not represented by the tags below. |
| <s_pc3u_form> |
| <awb_number>Số AWB</awb_number> |
| <cargo_service_code>Mã dịch vụ</cargo_service_code> |
| <shipper_name>Tên người gửi</shipper_name> |
| <shipper_address>Địa chỉ người gửi</shipper_address> |
| <shipper_contact>Thông tin liên hệ người gửi</shipper_contact> |
| <payer_name>Người thanh toán</payer_name> |
| <payer_tax_code>Mã số thuế người thanh toán</payer_tax_code> |
| <consignee_name>Tên người nhận</consignee_name> |
| <consignee_address>Địa chỉ người nhận</consignee_address> |
| <consignee_contact>Thông tin liên hệ người nhận</consignee_contact> |
| <shipper_signature>Chữ ký người gửi</shipper_signature> |
| <acceptance_staff_signature>Chữ ký nhân viên tiếp nhận</acceptance_staff_signature> |
| </s_pc3u_form> |
| """, |
|
|
| "SLIS-AVS DAD": """Extract all structured information from the document 'TỜ KHAI GỬI HÀNG - SHIPPER’S LETTER OF INSTRUCTION'. |
| You must return the result as a valid XML block that strictly follows the structure below. |
| STRICT INSTRUCTIONS – read carefully and follow EXACTLY: |
| 1. Return **ONLY** the XML block – nothing before or after it. |
| 2. DO NOT add, remove, rename, or reorder any XML tags. |
| 3. DO NOT include explanations, markdown, notes, comments, or extra spacing outside the XML block. |
| 4. For every tag, fill in the exact value read from the image. |
| • NEVER copy or repeat the label/placeholder text. |
| • NEVER guess or invent values. |
| 5. If a value is missing or unreadable, leave the tag EMPTY (e.g. <tag></tag>). |
| 6. DO NOT include Vietnamese text or translations inside tag values. |
| 7. The output MUST start with the root tag and end with its correct closing tag; all tags must be well-formed. |
| 8. Dates must be in YYYY-MM-DD format. |
| 9. Boolean tags must be exactly true or false (lower-case, no quotes). |
| ✔ √ Yes Passed ⇒ true | ✘ X No Fail ⇒ false |
| 10. **Inside each value** |
| • Replace every internal line-break with “, ” (comma + space). |
| • Trim leading/trailing whitespace. |
| • Escape XML special characters: & → &, < → <, > → >. |
| 11. **Phone / contact fields** – digits, “+”, “–”, spaces only; if multiple numbers, separate with “, ”. |
| 12. **Signature fields** – fill ONLY if the signature appears as legible text; if it is handwritten, leave the tag empty. |
| 13. Ignore any information not represented by the tags below. |
| <s_avs_dad> |
| <air_waybill_number>Số AWB</air_waybill_number> |
| <form_code>Mã biểu mẫu</form_code> |
| <shipper_name>Tên người gửi</shipper_name> |
| <shipper_address>Địa chỉ người gửi</shipper_address> |
| <shipper_phone>Điện thoại người gửi</shipper_phone> |
| <shipper_email>Email người gửi</shipper_email> |
| <shipper_tax_code>Mã số thuế người gửi</shipper_tax_code> |
| <consignee_name>Tên người nhận</consignee_name> |
| <consignee_address>Địa chỉ người nhận</consignee_address> |
| <consignee_phone>Điện thoại người nhận</consignee_phone> |
| <consignee_email>Email người nhận</consignee_email> |
| <departure_airport>Nơi đi</departure_airport> |
| <destination_airport>Nơi đến</destination_airport> |
| <acceptance_staff_name>Tên nhân viên tiếp nhận</acceptance_staff_name> |
| <acceptance_signature>Chữ ký nhân viên tiếp nhận</acceptance_signature> |
| <acceptance_time>Thời điểm tiếp nhận</acceptance_time> |
| <shipper_signature>Chữ ký người gửi</shipper_signature> |
| <shipper_signature_date>Ngày ký người gửi</shipper_signature_date> |
| </s_avs_dad> |
| """ |
| } |
|
|
| def insert_template(name): |
| return prompt_templates.get(name, "") |
|
|
| def sanitize_filename(name): |
| return re.sub(r'[^a-zA-Z0-9_\-\.]', '_', name) |
|
|
| def clean_text(text): |
| text = re.sub(r'<[^<> ]+?>', lambda m: m.group(0).strip(), text) |
| text = re.sub(r'<[^<>]+?>[^<>]*?<[^<>]+?>', lambda m: m.group(0).strip(), text) |
| return text.strip() |
|
|
| def export_json(image_name, result_text): |
| try: |
| clean_name = sanitize_filename(image_name) |
| content = {"image": image_name, "text_sequence": clean_text(result_text)} |
| path = f"/tmp/{clean_name}.json" |
| with open(path, "w", encoding="utf-8") as f: |
| json.dump(content, f, ensure_ascii=False, indent=2) |
| return path, json.dumps(content, ensure_ascii=False, indent=2) |
| except Exception as e: |
| return "", f"[Export JSON Failed]: {e}" |
|
|
| |
| |
| css = """ |
| .gradio-textbox textarea { |
| font-size: 13px !important; |
| line-height: 1.3 !important; |
| padding: 6px 8px !important; |
| } |
| .gradio-textbox label { |
| font-size: 13px !important; |
| font-weight: 600 !important; |
| margin-bottom: 4px !important; |
| } |
| .gradio-button { |
| font-size: 12px !important; |
| padding: 4px 8px !important; |
| height: 28px !important; |
| min-height: 28px !important; |
| margin: 2px !important; |
| } |
| .gradio-button[data-variant="primary"] { |
| height: 36px !important; |
| font-size: 13px !important; |
| padding: 8px 16px !important; |
| } |
| .gradio-file { |
| font-size: 13px !important; |
| } |
| .gradio-file .file-upload { |
| padding: 8px !important; |
| min-height: 80px !important; |
| } |
| .gradio-markdown h3 { |
| font-size: 14px !important; |
| margin: 8px 0 4px 0 !important; |
| } |
| .gradio-markdown h2 { |
| font-size: 18px !important; |
| margin: 8px 0 !important; |
| } |
| .gradio-code { |
| font-size: 12px !important; |
| } |
| """ |
|
|
| with gr.Blocks(title="Camel-Doc-OCR", css=css) as demo: |
| gr.Markdown("## 🧾 Camel-Doc-OCR (Qwen2.5-VL, 4-bit)") |
|
|
| |
| with gr.Row(): |
| |
| with gr.Column(scale=1): |
| gr.Markdown("### 📥 INPUT") |
|
|
| |
| file_input = gr.File( |
| label="📤 Tải ảnh hoặc PDF", |
| file_types=[".jpg", ".jpeg", ".png", ".pdf"], |
| height=100 |
| ) |
|
|
| |
| prompt_input = gr.Textbox( |
| label="Prompt thuần", |
| lines=2, |
| placeholder="Nhập prompt tùy chỉnh...", |
| max_lines=3 |
| ) |
|
|
| |
| config_input = gr.Textbox( |
| label="JSON Prompt", |
| lines=6, |
| placeholder="Cấu hình JSON sẽ xuất hiện ở đây...", |
| max_lines=8 |
| ) |
|
|
| |
| max_new_tokens_input = gr.Radio( |
| choices=[128, 256, 512, 1024, 1536, 2048], |
| value=512, |
| label="🔢 Chọn max_new_tokens (giới hạn độ dài đầu ra)", |
| info="Chọn độ dài tối đa cho đầu ra của mô hình" |
| ) |
|
|
| |
| gr.Markdown("### 📑 Mẫu:") |
| with gr.Row(): |
| for key in list(prompt_templates.keys()): |
| gr.Button(f"{key}", size="sm", scale=1).click( |
| fn=lambda *, k=key: insert_template(k), |
| inputs=[], |
| outputs=config_input |
| ) |
|
|
| |
| run_btn = gr.Button("🚀 Chạy OCR", variant="primary") |
|
|
| |
| with gr.Column(scale=1): |
| gr.Markdown("### 📤 OUTPUT") |
|
|
| |
| result_output = gr.Textbox( |
| label="Kết quả trích xuất", |
| lines=10, |
| placeholder="Kết quả sẽ hiển thị ở đây sau khi chạy OCR...", |
| max_lines=12 |
| ) |
|
|
| |
| with gr.Row(): |
| export_btn = gr.Button("📦 Xuất JSON", visible=False, variant="secondary", size="sm") |
|
|
| |
| json_text = gr.Code( |
| label="JSON Output", |
| language="json", |
| lines=6, |
| visible=False |
| ) |
|
|
| |
| json_file = gr.File( |
| label="File JSON để tải", |
| visible=False, |
| file_types=[".json"] |
| ) |
|
|
| |
| hidden_name = gr.Textbox(visible=False) |
|
|
| |
|
|
| |
| run_btn.click( |
| fn=handle_file, |
| inputs=[file_input, prompt_input, config_input, max_new_tokens_input], |
| outputs=[hidden_name, result_output] |
| ) |
|
|
| |
| export_btn.click( |
| fn=export_json, |
| inputs=[hidden_name, result_output], |
| outputs=[json_file, json_text] |
| ) |
|
|
| export_btn.click(fn=lambda: gr.update(visible=True), outputs=[json_file]) |
| export_btn.click(fn=lambda: gr.update(visible=True), outputs=[json_text]) |
|
|
| if __name__ == "__main__": |
| demo.launch( |
| share=True, |
| server_name="0.0.0.0", |
| server_port=7860 |
| ) |