Experiments / json_handling.py
credent007's picture
Update json_handling.py
d501a2d verified
import torch
from model_loader import model, processor, device
from processor_utils import load_input
from prompt import get_prompt
from inference import process_document
import json
# def process_whole_doc(file_path):
# images = load_input(file_path)
# complete_json = {}
# for image in images:
# json = process_documents(image)
# def process_whole_doc(file_path):
# images = load_input(file_path)
# complete_json = {}
# for image in images:
# partial_json = process_document(image)
# for part_name, part_value in partial_json.items():
# # Ensure structure exists
# if part_name not in complete_json:
# complete_json[part_name] = [{}]
# if not complete_json[part_name]:
# complete_json[part_name].append({})
# # Merge data
# if isinstance(part_value, list) and part_value:
# complete_json[part_name][0].update(part_value[0])
# return complete_json
def process_whole_doc(file_path):
images = load_input(file_path)
complete_json = "["
first = True
for image in images:
partial_json = process_document(image) # returns string
if not first:
complete_json += ","
complete_json += partial_json
first = False
complete_json += "]"
return complete_json
# complete_json = ""
# # PART_NAME = "PART-1 - BILL OF ENTRY SUMMARY" # you can change per doc type
# for i, image in enumerate(images, start=1):
# partial_json = process_document(image)
# # print(partial_json)
# complete_json = complete_json+partial_json
# # # 🔹 Extract only "result"
# # result_data = partial_json.get("result", {})
# # page_key = f"Page {i}"
# # Ensure structure exists
# # # if PART_NAME not in complete_json:
# # complete_json[] = [{}]
# # if not complete_json[PART_NAME]:
# # complete_json[PART_NAME].append({})
# # ✅ Merge directly
# # complete_json[page_key]=result_data
# return complete_json