| import json
|
| import os
|
| from PIL import Image
|
| import numpy as np
|
| from pycocotools.mask import encode, decode, frPyObjects
|
| from tqdm import tqdm
|
| import copy
|
| from natsort import natsorted
|
| import cv2
|
|
|
| import argparse
|
| parser = argparse.ArgumentParser()
|
| parser.add_argument('--root_path', type=str, default='', required=True,
|
| help='Root path of the dataset')
|
| parser.add_argument('--save_path', type=str, default='', required=True,
|
| help='Path to save the json file')
|
| parser.add_argument('--split_path', type=str, default='', required=True,
|
| help='Path to the split file')
|
| parser.add_argument("--split", type=str, default="val", help="Split to use (train/val/test)")
|
| parser.add_argument('--task', type=str, default='ego2exo', help='Task type (ego2exo/exo2ego)')
|
| args = parser.parse_args()
|
|
|
|
|
| if __name__ == '__main__':
|
|
|
| root_path = args.root_path
|
| save_path = args.save_path
|
| split_path = args.split_path
|
|
|
|
|
| with open(split_path, "r") as fp:
|
| data_split = json.load(fp)
|
| data_set = data_split[args.split]
|
|
|
|
|
| with open("datasets/missing_takes.txt", "r") as fp:
|
| missing_files = [line.strip() for line in fp.readlines()]
|
|
|
|
|
| new_img_id = 0
|
|
|
| egoexo_dataset = []
|
|
|
| for vid_name in tqdm(data_set):
|
| if vid_name in missing_files:
|
| continue
|
|
|
|
|
| vid_root_path = os.path.join(root_path, vid_name)
|
| anno_path = os.path.join(vid_root_path, "annotation.json")
|
| with open(anno_path, 'r') as fp:
|
| annotations = json.load(fp)
|
|
|
|
|
| objs = natsorted(list(annotations["masks"].keys()))
|
| coco_id_to_cont_id = {coco_id: cont_id + 1 for cont_id, coco_id in enumerate(objs)}
|
|
|
|
|
| valid_cams = os.listdir(vid_root_path)
|
| valid_cams.remove("annotation.json")
|
| valid_cams = natsorted(valid_cams)
|
| ego_cams = []
|
| exo_cams = []
|
| for vc in valid_cams:
|
| if 'aria' in vc:
|
| ego_cams.append(vc)
|
| else:
|
| exo_cams.append(vc)
|
| ego = ego_cams[0]
|
| exo = exo_cams[0]
|
| vid_ego_path = os.path.join(vid_root_path, ego)
|
| ego_frames = natsorted(os.listdir(vid_ego_path))
|
| ego_frames = [f.split(".")[0] for f in ego_frames]
|
| objs_both_have = []
|
| for obj in objs:
|
| if ego in annotations["masks"][obj].keys() and exo in annotations["masks"][obj].keys():
|
| objs_both_have.append(obj)
|
|
|
| if len(exo_cams) > 1:
|
| for cam in exo_cams[1:]:
|
| objs_both_have_tmp = []
|
| for obj in objs:
|
| if ego in annotations["masks"][obj].keys() and cam in annotations["masks"][obj].keys():
|
| objs_both_have_tmp.append(obj)
|
| if len(objs_both_have_tmp) > len(objs_both_have):
|
| exo = cam
|
| objs_both_have = objs_both_have_tmp
|
| if len(objs_both_have) == 0:
|
| continue
|
| vid_exo_path = os.path.join(vid_root_path, exo)
|
| exo_frames = natsorted(os.listdir(vid_exo_path))
|
| exo_frames = [f.split(".")[0] for f in exo_frames]
|
|
|
|
|
| if args.task == 'ego2exo':
|
| query_cam = ego
|
| target_cam = exo
|
| target_cam_anno_frames = exo_frames
|
| vid_target_path = vid_exo_path
|
| vid_query_path = vid_ego_path
|
| elif args.task == 'exo2ego':
|
| query_cam = exo
|
| target_cam = ego
|
| target_cam_anno_frames = ego_frames
|
| vid_target_path = vid_ego_path
|
| vid_query_path = vid_exo_path
|
| else:
|
| raise ValueError("Task must be either 'ego2exo' or 'exo2ego'.")
|
|
|
|
|
| obj_ref = objs_both_have[0]
|
| for obj in objs_both_have:
|
| if len(list(annotations["masks"][obj_ref][query_cam].keys())) < len(list(annotations["masks"][obj][query_cam].keys())):
|
| obj_ref = obj
|
| query_cam_anno_frames = natsorted(list(annotations["masks"][obj_ref][query_cam].keys()))
|
| frames = natsorted(np.intersect1d(query_cam_anno_frames, target_cam_anno_frames))
|
|
|
| for idx in frames:
|
| coco_format_annotations = []
|
| filename = f"{idx}.jpg"
|
|
|
| sample_img_path = os.path.join(vid_target_path, filename)
|
| sample_img_relpath = os.path.relpath(sample_img_path, root_path)
|
| first_frame_img_path = os.path.join(vid_query_path, filename)
|
| first_frame_img_relpath = os.path.relpath(first_frame_img_path, root_path)
|
|
|
|
|
| obj_list_query = []
|
| for obj in objs_both_have:
|
| if idx in annotations["masks"][obj][query_cam].keys():
|
| mask_query = decode(annotations["masks"][obj][query_cam][idx])
|
| area_new = mask_query.sum().astype(float)
|
| if area_new != 0:
|
| obj_list_query.append(obj)
|
| if len(obj_list_query) == 0:
|
| continue
|
| obj_list_query_new = []
|
| for obj in obj_list_query:
|
| segmentation_tmp = annotations["masks"][obj][query_cam][idx]
|
| binary_mask = decode(segmentation_tmp)
|
| h, w = binary_mask.shape
|
| if args.task == 'ego2exo':
|
| binary_mask = cv2.resize(binary_mask, (w // 2, h // 2), interpolation=cv2.INTER_NEAREST)
|
| elif args.task == 'exo2ego':
|
| binary_mask = cv2.resize(binary_mask, (w // 4, h // 4), interpolation=cv2.INTER_NEAREST)
|
| area = binary_mask.sum().astype(float)
|
| if area == 0:
|
| continue
|
| segmentation = encode(np.asfortranarray(binary_mask))
|
| segmentation = {
|
| 'counts': segmentation['counts'].decode('ascii'),
|
| 'size': segmentation["size"],
|
| }
|
| obj_list_query_new.append(obj)
|
| coco_format_annotations.append(
|
| {
|
| 'segmentation': segmentation,
|
| 'area': area,
|
| 'category_id': float(coco_id_to_cont_id[obj]),
|
| }
|
| )
|
| if len(obj_list_query_new) == 0:
|
| continue
|
|
|
|
|
| obj_list_target = []
|
| for obj in obj_list_query_new:
|
| if idx in annotations["masks"][obj][target_cam].keys():
|
| mask_target = decode(annotations["masks"][obj][target_cam][idx])
|
| area_target = mask_target.sum().astype(float)
|
| if area_target != 0:
|
| obj_list_target.append(obj)
|
| if len(obj_list_target) == 0:
|
| continue
|
| height, width = annotations["masks"][obj_list_target[0]][target_cam][idx]["size"]
|
| if args.task == 'ego2exo':
|
| image_info = {
|
| 'file_name': sample_img_relpath,
|
| 'height': height // 4,
|
| 'width': width // 4,
|
| }
|
| elif args.task == 'exo2ego':
|
| image_info = {
|
| 'file_name': sample_img_relpath,
|
| 'height': height // 2,
|
| 'width': width // 2,
|
| }
|
| anns = []
|
| obj_list_target_new = []
|
| for obj in obj_list_target:
|
| assert obj in obj_list_query_new, 'Found new target not in the first frame'
|
| segmentation_tmp = annotations["masks"][obj][target_cam][idx]
|
| binary_mask = decode(segmentation_tmp)
|
| h, w = binary_mask.shape
|
| if args.task == 'ego2exo':
|
| binary_mask = cv2.resize(binary_mask, (w // 4, h // 4), interpolation=cv2.INTER_NEAREST)
|
| elif args.task == 'exo2ego':
|
| binary_mask = cv2.resize(binary_mask, (w // 2, h // 2), interpolation=cv2.INTER_NEAREST)
|
| area = binary_mask.sum().astype(float)
|
| if area == 0:
|
| continue
|
| segmentation = encode(np.asfortranarray(binary_mask))
|
| segmentation = {
|
| 'counts': segmentation['counts'].decode('ascii'),
|
| 'size': segmentation['size'],
|
| }
|
| obj_list_target_new.append(obj)
|
| anns.append(
|
| {
|
| 'segmentation': segmentation,
|
| 'area': area,
|
| 'category_id': float(coco_id_to_cont_id[obj]),
|
| }
|
| )
|
| if len(obj_list_target_new) == 0:
|
| continue
|
|
|
| sample_unique_instances = [float(coco_id_to_cont_id[obj]) for obj in obj_list_target_new]
|
| first_frame_anns = copy.deepcopy(coco_format_annotations)
|
| if len(anns) < len(first_frame_anns):
|
| first_frame_anns = [ann for ann in first_frame_anns if ann['category_id'] in sample_unique_instances]
|
| assert len(anns) == len(first_frame_anns)
|
| sample = {
|
| 'image': sample_img_relpath,
|
| 'image_info': image_info,
|
| 'anns': anns,
|
| 'first_frame_image': first_frame_img_relpath,
|
| 'first_frame_anns': first_frame_anns,
|
| 'new_img_id': new_img_id,
|
| 'video_name': vid_name,
|
| }
|
| egoexo_dataset.append(sample)
|
| new_img_id += 1
|
|
|
| with open(save_path, 'w') as f:
|
| json.dump(egoexo_dataset, f)
|
| print(f'Save at {save_path}. Total sample: {len(egoexo_dataset)}')
|
|
|