| import csv |
| import json |
| from pathlib import Path |
|
|
| def csv_to_json(csv_path, json_path, file_stem): |
| |
| json_data = [] |
| |
| with open(csv_path, 'r', encoding='utf-8') as csv_file: |
| csv_reader = csv.DictReader(csv_file) |
| |
| for index, row in enumerate(csv_reader): |
| |
| folder_name = row['folder_name'] |
| media_path = "./" + (Path("data") / file_stem / folder_name).as_posix() |
| |
| |
| answer = [ |
| row['caption_1'], |
| row['caption_2'], |
| row['caption_3'], |
| row['caption_4'], |
| row['caption_5'] |
| ] |
| |
| |
| entry = { |
| "index": index, |
| "media_type": "Video", |
| "media_paths": media_path, |
| "description": "", |
| "task_type": "Vision-Question-Answer", |
| "question": ["Please generate descriptive captions for this multi-view video."], |
| "question_type": "free-form", |
| "annotations": {}, |
| "options": [], |
| "answer": answer, |
| "source": "4D-Bench", |
| "domain": "Embodied_ai" |
| } |
| |
| json_data.append(entry) |
| |
| |
| with open(json_path, 'w', encoding='utf-8') as json_file: |
| json.dump(json_data, json_file, indent=2) |
|
|
| |
| if __name__ == "__main__": |
| |
| INPUT_CSV = "/mnt/data/users/zys/proj/vlm_reasoning/unprocessed_data/emb_ai/4d/4D_Object_Captioning/data/human_annotations.csv" |
| OUTPUT_JSON = "/mnt/data/users/zys/proj/vlm_reasoning/dataset/4D_Object_Captioning.json" |
| FILE_STEM = "4D_Object_Captioning" |
| |
| csv_to_json(INPUT_CSV, OUTPUT_JSON, FILE_STEM) |