Spaces:
Sleeping
Sleeping
File size: 3,797 Bytes
5654237 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 | from dotenv import load_dotenv
import os
import pandas as pd
import json
from google.cloud import vision
import google.generativeai as genai
from google.oauth2 import service_account
import re
# Initialized Modules
from modules.mapping import mapping_employee, mapping_merchant, mapping_product, mapping_unit
load_dotenv()
# Load the credential for Cloud-Vision-API model
service_account_info_str = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON")
service_account_info = json.loads(service_account_info_str)
CREDENTIALS = service_account.Credentials.from_service_account_info(service_account_info)
# Load the Gemini model
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
MODEL_NAME = os.getenv("MODEL_NAME")
genai.configure(api_key=GEMINI_API_KEY)
# Gemini Model
LLM_model = genai.GenerativeModel(MODEL_NAME)
# Line Split Function
def line():
print("=" * 30)
# Image to raw text
def process_ocr(image_path):
try:
client = vision.ImageAnnotatorClient(credentials=CREDENTIALS)
with open(image_path, "rb") as image_file:
content = image_file.read()
image = vision.Image(content=content)
response = client.document_text_detection(image=image)
# Extract detected text
texts = response.text_annotations
return texts[0].description if texts else ""
except Exception as e:
print(f"OCR failed: {e}")
return ""
# Parsing image-text
def parse_image_text(text, extract_model):
prompt = f"""
Dưới đây là nội dung hóa đơn bằng tiếng Việt. Hãy trích xuất tên đại lý mua (seller), tên đại lý bán (buyer), tên sản phẩm (product_name), đơn vị tính (unit), số lượng theo từng đơn hàng (quantity), ngày đặt hàng (order_date).
Văn bản:
{text}
Trả về kết quả dạng JSON:
{{
"order_1": {{
"seller": "...",
"buyer": "...",
"product_name": "...",
"unit": "...",
"quantity": "...",
"order_date": "..."
}},
...
}}
"""
response = extract_model.generate_content(prompt)
try:
content = response.text
# Use regex to extract the JSON part
match = re.search(r"\{[\s\S]*\}", content)
if match:
json_str = match.group(0)
extracted_json = json.loads(json_str)
return list(extracted_json.values()) # List of orders
else:
raise ValueError("No valid JSON found in Gemini output")
except Exception as e:
print("Failed to parse JSON from LLM response:", e)
return []
# Image Handling Function
def image_process(image_path, order_id):
print(f"Start process image file: {os.path.basename(image_path)}")
line()
# Image to Text
raw_text = process_ocr(image_path=image_path)
print(f"Successfully extract raw text. Text: {raw_text}")
line()
# Text to JSON
extracted_information = parse_image_text(
text=raw_text,
extract_model=LLM_model
)
print(f"Extracted Information.")
line()
# Mapping
merchant_mapped_data = mapping_merchant(
information=extracted_information,
json_path=os.getenv("MERCHANT_JSON_PATH"),
normalization_rule=os.getenv("NORMALIZATION_RULE_PATH")
)
unit_merchant_mapped_data = mapping_unit(
information=merchant_mapped_data,
json_path=os.getenv("UNIT_JSON_PATH"),
normalization_rule=os.getenv("NORMALIZATION_RULE_PATH")
)
# Skipping employee and product mapping
processed_data = unit_merchant_mapped_data
# Assign order id
for item in processed_data:
item["order_id"] = order_id
print(f"Successfully mapped data (merchant + unit).")
line()
return processed_data
|