| ''' |
| 这个程序是需要先运行的一个程序它的作用是将哦answers和of questions合并为一个JSON文件便于去转换 |
| 使用方法很简单看程序最后的使用示例,只需填入两个文件的地址然后给定输出地址就可以 |
| ''' |
|
|
| import json |
| from collections import defaultdict |
|
|
| def deep_merge(base_dict, merge_dict): |
| """递归合并字典,处理嵌套结构和冲突""" |
| for key in merge_dict: |
| if key in base_dict: |
| |
| if isinstance(base_dict[key], dict) and isinstance(merge_dict[key], dict): |
| deep_merge(base_dict[key], merge_dict[key]) |
| |
| elif isinstance(base_dict[key], list) and isinstance(merge_dict[key], list): |
| base_dict[key] = base_dict[key] + merge_dict[key] |
| |
| else: |
| pass |
| else: |
| |
| base_dict[key] = merge_dict[key] |
| return base_dict |
|
|
| def merge_json_files(answers_file, questions_file, output_file): |
| |
| with open(answers_file) as f: |
| answers = {item['id']: item for item in json.load(f)['answers']} |
| |
| with open(questions_file) as f: |
| questions = json.load(f)['questions'] |
| |
| |
| merged = [] |
| for q in questions: |
| merged_q = q.copy() |
| |
| for ans_id in q.get('answers_ids', []): |
| if ans_id in answers: |
| |
| merged_q = deep_merge(merged_q, answers[ans_id]) |
| merged.append(merged_q) |
| |
| |
| with open(output_file, 'w') as f: |
| json.dump({"merged_data": merged}, f, indent=2, ensure_ascii=False) |
|
|
| |
| merge_json_files("/mnt/data/users/zys/proj/vlm_reasoning/unprocessed_data/Satellite/USGSanswers.json", "/mnt/data/users/zys/proj/vlm_reasoning/unprocessed_data/Satellite/USGSquestions.json", '/mnt/data/users/zys/proj/vlm_reasoning/unprocessed_data/Satellite/merged_output.json') |