import torch from model_loader import model, processor, device from processor_utils import load_input from prompt import get_prompt from inference import process_document import json # def process_whole_doc(file_path): # images = load_input(file_path) # complete_json = {} # for image in images: # json = process_documents(image) # def process_whole_doc(file_path): # images = load_input(file_path) # complete_json = {} # for image in images: # partial_json = process_document(image) # for part_name, part_value in partial_json.items(): # # Ensure structure exists # if part_name not in complete_json: # complete_json[part_name] = [{}] # if not complete_json[part_name]: # complete_json[part_name].append({}) # # Merge data # if isinstance(part_value, list) and part_value: # complete_json[part_name][0].update(part_value[0]) # return complete_json def process_whole_doc(file_path): images = load_input(file_path) complete_json = "[" first = True for image in images: partial_json = process_document(image) # returns string if not first: complete_json += "," complete_json += partial_json first = False complete_json += "]" return complete_json # complete_json = "" # # PART_NAME = "PART-1 - BILL OF ENTRY SUMMARY" # you can change per doc type # for i, image in enumerate(images, start=1): # partial_json = process_document(image) # # print(partial_json) # complete_json = complete_json+partial_json # # # 🔹 Extract only "result" # # result_data = partial_json.get("result", {}) # # page_key = f"Page {i}" # # Ensure structure exists # # # if PART_NAME not in complete_json: # # complete_json[] = [{}] # # if not complete_json[PART_NAME]: # # complete_json[PART_NAME].append({}) # # ✅ Merge directly # # complete_json[page_key]=result_data # return complete_json