Experiments

Paused

App Files Files Community

Experiments / json_handling.py

credent007

Update json_handling.py

d501a2d verified 4 days ago

raw

history blame contribute delete

2.17 kB

	import torch
	from model_loader import model, processor, device
	from processor_utils import load_input
	from prompt import get_prompt
	from inference import process_document
	import json

	# def process_whole_doc(file_path):
	# images = load_input(file_path)
	# complete_json = {}
	# for image in images:
	# json = process_documents(image)
	# def process_whole_doc(file_path):
	# images = load_input(file_path)
	# complete_json = {}

	# for image in images:
	# partial_json = process_document(image)

	# for part_name, part_value in partial_json.items():

	# # Ensure structure exists
	# if part_name not in complete_json:
	# complete_json[part_name] = [{}]

	# if not complete_json[part_name]:
	# complete_json[part_name].append({})

	# # Merge data
	# if isinstance(part_value, list) and part_value:
	# complete_json[part_name][0].update(part_value[0])

	# return complete_json

	def process_whole_doc(file_path):
	images = load_input(file_path)

	complete_json = "["
	first = True

	for image in images:
	partial_json = process_document(image) # returns string

	if not first:
	complete_json += ","
	complete_json += partial_json
	first = False

	complete_json += "]"

	return complete_json
	# complete_json = ""


	# # PART_NAME = "PART-1 - BILL OF ENTRY SUMMARY" # you can change per doc type
	# for i, image in enumerate(images, start=1):
	# partial_json = process_document(image)
	# # print(partial_json)
	# complete_json = complete_json+partial_json
	# # # 🔹 Extract only "result"
	# # result_data = partial_json.get("result", {})
	# # page_key = f"Page {i}"
	# # Ensure structure exists
	# # # if PART_NAME not in complete_json:
	# # complete_json[] = [{}]

	# # if not complete_json[PART_NAME]:
	# # complete_json[PART_NAME].append({})

	# # ✅ Merge directly
	# # complete_json[page_key]=result_data

	# return complete_json