| import json |
| import os |
| from PIL import Image |
| import numpy as np |
| from pycocotools.mask import encode, decode, frPyObjects |
| from tqdm import tqdm |
| import copy |
| from natsort import natsorted |
| import cv2 |
|
|
|
|
| import argparse |
|
|
| parser = argparse.ArgumentParser() |
| parser.add_argument('--setting', required=True, choices=['ego-exo', 'exo-ego'], help="ego-exo or exo-ego") |
|
|
|
|
| args = parser.parse_args() |
|
|
| if __name__ == '__main__': |
| |
| root_path = '/scratch/yuqian_fu/data_segswap_test' |
| |
| save_path = "/home/yuqian_fu/Projects/PSALM/exoego_test_framelevel_new.json" |
| split_path = "/home/yuqian_fu/Projects/ego-exo4d-relation/correspondence/split.json" |
| with open(split_path, "r") as fp: |
| data_split = json.load(fp) |
| takes = data_split["test"] |
| |
| |
|
|
| |
| new_img_id = 0 |
| |
| egoexo_dataset = [] |
|
|
|
|
| for take in tqdm(takes): |
| |
| vid_root_path = os.path.join(root_path, take) |
| anno_path = os.path.join(vid_root_path, "annotation.json") |
| with open(anno_path, 'r') as fp: |
| annotations = json.load(fp) |
|
|
| |
| objs = natsorted(list(annotations["masks"].keys())) |
| |
| coco_id_to_cont_id = {coco_id: cont_id + 1 for cont_id, coco_id in enumerate(objs)} |
| |
|
|
| objs_after = [] |
|
|
| for obj in annotations['masks']: |
| cams = annotations['masks'][obj].keys() |
| |
|
|
| exo_cams = [x for x in cams if 'aria' not in x] |
| ego_cams = [x for x in cams if 'aria' in x] |
| |
| |
| |
|
|
| for ego in ego_cams: |
| for exo in exo_cams: |
| |
| if args.setting == 'ego-exo': |
| for idx in annotations['masks'][obj][ego].keys(): |
| first_frame_img_path = os.path.join(f'{vid_root_path}/{ego}/', f'{idx}.jpg') |
| first_frame_img_relpath = os.path.relpath(first_frame_img_path, root_path) |
| first_frame_anns = [] |
| ego_mask = decode(annotations['masks'][obj][ego][idx]) |
| h, w = ego_mask.shape |
| ego_mask = cv2.resize(ego_mask, (w // 2, h // 2), interpolation=cv2.INTER_NEAREST) |
| area = ego_mask.sum().astype(float) |
| segmentation = encode(np.asfortranarray(ego_mask)) |
| segmentation = { |
| 'counts': segmentation['counts'].decode('ascii'), |
| 'size': segmentation["size"], |
| } |
| first_frame_anns.append( |
| { |
| 'segmentation': segmentation, |
| 'area': area, |
| 'category_id': float(coco_id_to_cont_id[obj]), |
| } |
| ) |
| objs_after.append(float(coco_id_to_cont_id[obj])) |
|
|
| sample_img_path = os.path.join(f'{vid_root_path}/{exo}/', f'{idx}.jpg') |
| sample_img_relpath = os.path.relpath(sample_img_path, root_path) |
| anns = [] |
|
|
| |
| exo_img = cv2.imread(sample_img_path) |
| h, w, _ = exo_img.shape |
| |
| binary_mask = np.zeros((h, w)) |
| binary_mask = binary_mask.astype(np.uint8) |
| area = float(0) |
| segmentation = encode(np.asfortranarray(binary_mask)) |
| segmentation = { |
| 'counts': segmentation['counts'].decode('ascii'), |
| 'size': segmentation["size"], |
| } |
| anns.append( |
| { |
| 'segmentation': segmentation, |
| 'area': area, |
| 'category_id': float(coco_id_to_cont_id[obj]), |
| } |
| ) |
|
|
|
|
| |
| image_info = { |
| 'file_name': sample_img_relpath, |
| 'height': h, |
| 'width': w, |
| } |
|
|
| |
| sample = { |
| 'image': sample_img_relpath, |
| 'image_info': image_info, |
| 'anns': anns, |
| 'first_frame_image': first_frame_img_relpath, |
| 'first_frame_anns': first_frame_anns, |
| 'new_img_id': new_img_id, |
| 'video_name': take, |
| } |
|
|
| egoexo_dataset.append(sample) |
| new_img_id += 1 |
| |
| |
| |
| elif args.setting == 'exo-ego': |
| |
| for idx in annotations['masks'][obj][exo].keys(): |
| first_frame_img_path = os.path.join(f'{vid_root_path}/{exo}/', f'{idx}.jpg') |
| first_frame_img_relpath = os.path.relpath(first_frame_img_path, root_path) |
| first_frame_anns = [] |
| exo_mask = decode(annotations['masks'][obj][exo][idx]) |
| h, w = exo_mask.shape |
| exo_mask = cv2.resize(exo_mask, (w // 4, h // 4), interpolation=cv2.INTER_NEAREST) |
| area = exo_mask.sum().astype(float) |
| segmentation = encode(np.asfortranarray(exo_mask)) |
| segmentation = { |
| 'counts': segmentation['counts'].decode('ascii'), |
| 'size': segmentation["size"], |
| } |
| first_frame_anns.append( |
| { |
| 'segmentation': segmentation, |
| 'area': area, |
| 'category_id': float(coco_id_to_cont_id[obj]), |
| } |
| ) |
| objs_after.append(float(coco_id_to_cont_id[obj])) |
|
|
| sample_img_path = os.path.join(f'{vid_root_path}/{ego}/', f'{idx}.jpg') |
| sample_img_relpath = os.path.relpath(sample_img_path, root_path) |
| anns = [] |
|
|
| |
| ego_img = cv2.imread(sample_img_path) |
| h, w, _ = ego_img.shape |
| |
| binary_mask = np.zeros((h, w)) |
| binary_mask = binary_mask.astype(np.uint8) |
| area = float(0) |
| segmentation = encode(np.asfortranarray(binary_mask)) |
| segmentation = { |
| 'counts': segmentation['counts'].decode('ascii'), |
| 'size': segmentation["size"], |
| } |
| anns.append( |
| { |
| 'segmentation': segmentation, |
| 'area': area, |
| 'category_id': float(coco_id_to_cont_id[obj]), |
| } |
| ) |
|
|
|
|
| |
| image_info = { |
| 'file_name': sample_img_relpath, |
| 'height': h, |
| 'width': w, |
| } |
|
|
| |
| sample = { |
| 'image': sample_img_relpath, |
| 'image_info': image_info, |
| 'anns': anns, |
| 'first_frame_image': first_frame_img_relpath, |
| 'first_frame_anns': first_frame_anns, |
| 'new_img_id': new_img_id, |
| 'video_name': take, |
| } |
|
|
| egoexo_dataset.append(sample) |
| new_img_id += 1 |
| |
|
|
| else: |
| raise Exception(f"Setting {args.setting} not recognized.") |
|
|
| |
| with open(save_path, 'w') as f: |
| json.dump(egoexo_dataset, f) |
| print(f'Save at {save_path}. Total sample: {len(egoexo_dataset)}') |