Spaces:

vinhngba2704
/

Multimodal-Image-Audio

Sleeping

File size: 3,797 Bytes
from dotenv import load_dotenv
import os
import pandas as pd
import json
from google.cloud import vision
import google.generativeai as genai
from google.oauth2 import service_account
import re

# Initialized Modules
from modules.mapping import mapping_employee, mapping_merchant, mapping_product, mapping_unit

load_dotenv()
# Load the credential for Cloud-Vision-API model
service_account_info_str = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON")
service_account_info = json.loads(service_account_info_str)
CREDENTIALS = service_account.Credentials.from_service_account_info(service_account_info)
# Load the Gemini model
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
MODEL_NAME = os.getenv("MODEL_NAME")
genai.configure(api_key=GEMINI_API_KEY)

# Gemini Model
LLM_model = genai.GenerativeModel(MODEL_NAME)

# Line Split Function
def line():
    print("=" * 30)

# Image to raw text
def process_ocr(image_path):
    try:
        client = vision.ImageAnnotatorClient(credentials=CREDENTIALS)

        with open(image_path, "rb") as image_file:
            content = image_file.read()

        image = vision.Image(content=content)
        response = client.document_text_detection(image=image)

        # Extract detected text
        texts = response.text_annotations
        return texts[0].description if texts else ""
    except Exception as e:
        print(f"OCR failed: {e}")
        return ""
    
# Parsing image-text
def parse_image_text(text, extract_model):
    prompt = f"""
    Dưới đây là nội dung hóa đơn bằng tiếng Việt. Hãy trích xuất tên đại lý mua (seller), tên đại lý bán (buyer), tên sản phẩm (product_name), đơn vị tính (unit), số lượng theo từng đơn hàng (quantity), ngày đặt hàng (order_date).

    Văn bản:
    {text}

    Trả về kết quả dạng JSON:
    {{
    "order_1": {{
        "seller": "...",
        "buyer": "...",
        "product_name": "...",
        "unit": "...",
        "quantity": "...",
        "order_date": "..."
    }},
    ...
    }}
    """
    response = extract_model.generate_content(prompt)

    try:
        content = response.text
        # Use regex to extract the JSON part
        match = re.search(r"\{[\s\S]*\}", content)
        if match:
            json_str = match.group(0)
            extracted_json = json.loads(json_str)
            return list(extracted_json.values())  # List of orders
        else:
            raise ValueError("No valid JSON found in Gemini output")

    except Exception as e:
        print("Failed to parse JSON from LLM response:", e)
        return []
    
# Image Handling Function
def image_process(image_path, order_id):
    print(f"Start process image file: {os.path.basename(image_path)}")
    line()

    # Image to Text
    raw_text = process_ocr(image_path=image_path)
    print(f"Successfully extract raw text. Text: {raw_text}")
    line()

    # Text to JSON
    extracted_information = parse_image_text(
        text=raw_text,
        extract_model=LLM_model
    )
    print(f"Extracted Information.")
    line()

    # Mapping
    merchant_mapped_data = mapping_merchant(
        information=extracted_information,
        json_path=os.getenv("MERCHANT_JSON_PATH"),
        normalization_rule=os.getenv("NORMALIZATION_RULE_PATH")
    )

    unit_merchant_mapped_data = mapping_unit(
        information=merchant_mapped_data,
        json_path=os.getenv("UNIT_JSON_PATH"),
        normalization_rule=os.getenv("NORMALIZATION_RULE_PATH")
    )

    # Skipping employee and product mapping
    processed_data = unit_merchant_mapped_data

    # Assign order id
    for item in processed_data:
        item["order_id"] = order_id

    print(f"Successfully mapped data (merchant + unit).")
    line()

    return processed_data