| import json |
| import os |
|
|
| def reformat_json(input_file, output_file): |
| with open(input_file, 'r') as f: |
| data = json.load(f) |
| new_data = [] |
| for item in data: |
| new_item = { |
| "image_id": item["filename"], |
| "sentences": [" ".join(sentence["tokens"]) for sentence in item["sentences"]], |
| } |
| new_data.append(new_item) |
| with open(output_file, 'wt') as f: |
| json.dump(new_data, f, indent=4) |
|
|
| if __name__ == "__main__": |
| splits = ["train", "val", "test"] |
| for split in splits: |
| input_file = f"./{split}.json" |
| output_file = f"./reformat_{split}.json" |
| reformat_json(input_file, output_file) |
|
|
| |
| with open("./reformat_test.json", 'r') as f: |
| data = json.load(f) |
| new_data = {item["image_id"]: item["sentences"] for item in data} |
| with open("./reformat_test_all.json", "wt") as f: |
| json.dump(new_data, f) |
| |