Spaces:
Paused
Paused
| import torch | |
| from model_loader import model, processor, device | |
| from processor_utils import load_input | |
| from prompt import get_prompt | |
| from inference import process_document | |
| import json | |
| # def process_whole_doc(file_path): | |
| # images = load_input(file_path) | |
| # complete_json = {} | |
| # for image in images: | |
| # json = process_documents(image) | |
| # def process_whole_doc(file_path): | |
| # images = load_input(file_path) | |
| # complete_json = {} | |
| # for image in images: | |
| # partial_json = process_document(image) | |
| # for part_name, part_value in partial_json.items(): | |
| # # Ensure structure exists | |
| # if part_name not in complete_json: | |
| # complete_json[part_name] = [{}] | |
| # if not complete_json[part_name]: | |
| # complete_json[part_name].append({}) | |
| # # Merge data | |
| # if isinstance(part_value, list) and part_value: | |
| # complete_json[part_name][0].update(part_value[0]) | |
| # return complete_json | |
| def process_whole_doc(file_path): | |
| images = load_input(file_path) | |
| complete_json = "[" | |
| first = True | |
| for image in images: | |
| partial_json = process_document(image) # returns string | |
| if not first: | |
| complete_json += "," | |
| complete_json += partial_json | |
| first = False | |
| complete_json += "]" | |
| return complete_json | |
| # complete_json = "" | |
| # # PART_NAME = "PART-1 - BILL OF ENTRY SUMMARY" # you can change per doc type | |
| # for i, image in enumerate(images, start=1): | |
| # partial_json = process_document(image) | |
| # # print(partial_json) | |
| # complete_json = complete_json+partial_json | |
| # # # 🔹 Extract only "result" | |
| # # result_data = partial_json.get("result", {}) | |
| # # page_key = f"Page {i}" | |
| # # Ensure structure exists | |
| # # # if PART_NAME not in complete_json: | |
| # # complete_json[] = [{}] | |
| # # if not complete_json[PART_NAME]: | |
| # # complete_json[PART_NAME].append({}) | |
| # # ✅ Merge directly | |
| # # complete_json[page_key]=result_data | |
| # return complete_json | |