ObjectRelator-Original / datasets /build_egoexo_competition_align2segswap.py
YuqianFu's picture
Upload folder using huggingface_hub
625a17f verified
import json
import os
from PIL import Image
import numpy as np
from pycocotools.mask import encode, decode, frPyObjects
from tqdm import tqdm
import copy
from natsort import natsorted
import cv2
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--setting', required=True, choices=['ego-exo', 'exo-ego'], help="ego-exo or exo-ego")
args = parser.parse_args()
if __name__ == '__main__':
# 定义相关路径,获取takes
root_path = '/scratch/yuqian_fu/data_segswap_test'
#save_path = os.path.join(root_path, 'egoexo_test_framelevel.json')
save_path = "/home/yuqian_fu/Projects/PSALM/exoego_test_framelevel_new.json"
split_path = "/home/yuqian_fu/Projects/ego-exo4d-relation/correspondence/split.json"
with open(split_path, "r") as fp:
data_split = json.load(fp)
takes = data_split["test"]
#takes = ["8c952699-0c25-453b-92dd-52b0580248db"]
# 计数
new_img_id = 0
# 存储保存的数据
egoexo_dataset = []
for take in tqdm(takes):
# 获取本take下的注释
vid_root_path = os.path.join(root_path, take)
anno_path = os.path.join(vid_root_path, "annotation.json")
with open(anno_path, 'r') as fp:
annotations = json.load(fp)
objs = natsorted(list(annotations["masks"].keys()))
#print("len(objs):", len(objs))
coco_id_to_cont_id = {coco_id: cont_id + 1 for cont_id, coco_id in enumerate(objs)}
#print(f"coco_id_to_cont_id:{coco_id_to_cont_id}")
objs_after = []
for obj in annotations['masks']:
cams = annotations['masks'][obj].keys()
#print("cams:", cams) # debug
exo_cams = [x for x in cams if 'aria' not in x]
ego_cams = [x for x in cams if 'aria' in x]
# print("obj", obj) # debug
# print("ego_cams:", ego_cams) # debug
# print("exo_cams:", exo_cams) # debug
for ego in ego_cams:
for exo in exo_cams:
# ego -> exo
if args.setting == 'ego-exo':
for idx in annotations['masks'][obj][ego].keys():
first_frame_img_path = os.path.join(f'{vid_root_path}/{ego}/', f'{idx}.jpg')
first_frame_img_relpath = os.path.relpath(first_frame_img_path, root_path)
first_frame_anns = []
ego_mask = decode(annotations['masks'][obj][ego][idx])
h, w = ego_mask.shape
ego_mask = cv2.resize(ego_mask, (w // 2, h // 2), interpolation=cv2.INTER_NEAREST)
area = ego_mask.sum().astype(float)
segmentation = encode(np.asfortranarray(ego_mask))
segmentation = {
'counts': segmentation['counts'].decode('ascii'),
'size': segmentation["size"],
}
first_frame_anns.append(
{
'segmentation': segmentation,
'area': area,
'category_id': float(coco_id_to_cont_id[obj]),
}
)
objs_after.append(float(coco_id_to_cont_id[obj]))
sample_img_path = os.path.join(f'{vid_root_path}/{exo}/', f'{idx}.jpg')
sample_img_relpath = os.path.relpath(sample_img_path, root_path)
anns = []
# 获取h,w
exo_img = cv2.imread(sample_img_path)
h, w, _ = exo_img.shape
#print("h,w", exo_img.shape) # debug
binary_mask = np.zeros((h, w))
binary_mask = binary_mask.astype(np.uint8)
area = float(0)
segmentation = encode(np.asfortranarray(binary_mask))
segmentation = {
'counts': segmentation['counts'].decode('ascii'),
'size': segmentation["size"],
}
anns.append(
{
'segmentation': segmentation,
'area': area,
'category_id': float(coco_id_to_cont_id[obj]),
}
)
# 其余信息
image_info = {
'file_name': sample_img_relpath,
'height': h,
'width': w,
}
# 汇总一个pairs的信息
sample = {
'image': sample_img_relpath,
'image_info': image_info,
'anns': anns,
'first_frame_image': first_frame_img_relpath,
'first_frame_anns': first_frame_anns,
'new_img_id': new_img_id,
'video_name': take,
}
egoexo_dataset.append(sample)
new_img_id += 1
# exo -> ego
elif args.setting == 'exo-ego':
#print("len:", len(annotations['masks'][obj][exo].keys())) # debug
for idx in annotations['masks'][obj][exo].keys():
first_frame_img_path = os.path.join(f'{vid_root_path}/{exo}/', f'{idx}.jpg')
first_frame_img_relpath = os.path.relpath(first_frame_img_path, root_path)
first_frame_anns = []
exo_mask = decode(annotations['masks'][obj][exo][idx])
h, w = exo_mask.shape
exo_mask = cv2.resize(exo_mask, (w // 4, h // 4), interpolation=cv2.INTER_NEAREST)
area = exo_mask.sum().astype(float)
segmentation = encode(np.asfortranarray(exo_mask))
segmentation = {
'counts': segmentation['counts'].decode('ascii'),
'size': segmentation["size"],
}
first_frame_anns.append(
{
'segmentation': segmentation,
'area': area,
'category_id': float(coco_id_to_cont_id[obj]),
}
)
objs_after.append(float(coco_id_to_cont_id[obj]))
sample_img_path = os.path.join(f'{vid_root_path}/{ego}/', f'{idx}.jpg')
sample_img_relpath = os.path.relpath(sample_img_path, root_path)
anns = []
# 获取h,w
ego_img = cv2.imread(sample_img_path)
h, w, _ = ego_img.shape
#print("h,w", ego_img.shape) # debug
binary_mask = np.zeros((h, w))
binary_mask = binary_mask.astype(np.uint8)
area = float(0)
segmentation = encode(np.asfortranarray(binary_mask))
segmentation = {
'counts': segmentation['counts'].decode('ascii'),
'size': segmentation["size"],
}
anns.append(
{
'segmentation': segmentation,
'area': area,
'category_id': float(coco_id_to_cont_id[obj]),
}
)
# 其余信息
image_info = {
'file_name': sample_img_relpath,
'height': h,
'width': w,
}
# 汇总一个pairs的信息
sample = {
'image': sample_img_relpath,
'image_info': image_info,
'anns': anns,
'first_frame_image': first_frame_img_relpath,
'first_frame_anns': first_frame_anns,
'new_img_id': new_img_id,
'video_name': take,
}
egoexo_dataset.append(sample)
new_img_id += 1
else:
raise Exception(f"Setting {args.setting} not recognized.")
#print("objs_after:", set(objs_after)) # debug
with open(save_path, 'w') as f:
json.dump(egoexo_dataset, f)
print(f'Save at {save_path}. Total sample: {len(egoexo_dataset)}')