ObjectRelator-Original / datasets /build_egoexo_competition_align2segswap.py

Upload folder using huggingface_hub

625a17f verified 4 months ago

9.7 kB

	import json
	import os
	from PIL import Image
	import numpy as np
	from pycocotools.mask import encode, decode, frPyObjects
	from tqdm import tqdm
	import copy
	from natsort import natsorted
	import cv2


	import argparse

	parser = argparse.ArgumentParser()
	parser.add_argument('--setting', required=True, choices=['ego-exo', 'exo-ego'], help="ego-exo or exo-ego")


	args = parser.parse_args()

	if __name__ == '__main__':
	# 定义相关路径，获取takes
	root_path = '/scratch/yuqian_fu/data_segswap_test'
	#save_path = os.path.join(root_path, 'egoexo_test_framelevel.json')
	save_path = "/home/yuqian_fu/Projects/PSALM/exoego_test_framelevel_new.json"
	split_path = "/home/yuqian_fu/Projects/ego-exo4d-relation/correspondence/split.json"
	with open(split_path, "r") as fp:
	data_split = json.load(fp)
	takes = data_split["test"]
	#takes = ["8c952699-0c25-453b-92dd-52b0580248db"]


	# 计数
	new_img_id = 0
	# 存储保存的数据
	egoexo_dataset = []


	for take in tqdm(takes):
	# 获取本take下的注释
	vid_root_path = os.path.join(root_path, take)
	anno_path = os.path.join(vid_root_path, "annotation.json")
	with open(anno_path, 'r') as fp:
	annotations = json.load(fp)


	objs = natsorted(list(annotations["masks"].keys()))
	#print("len(objs):", len(objs))
	coco_id_to_cont_id = {coco_id: cont_id + 1 for cont_id, coco_id in enumerate(objs)}
	#print(f"coco_id_to_cont_id:{coco_id_to_cont_id}")

	objs_after = []

	for obj in annotations['masks']:
	cams = annotations['masks'][obj].keys()
	#print("cams:", cams) # debug

	exo_cams = [x for x in cams if 'aria' not in x]
	ego_cams = [x for x in cams if 'aria' in x]
	# print("obj", obj) # debug
	# print("ego_cams:", ego_cams) # debug
	# print("exo_cams:", exo_cams) # debug

	for ego in ego_cams:
	for exo in exo_cams:
	# ego -> exo
	if args.setting == 'ego-exo':
	for idx in annotations['masks'][obj][ego].keys():
	first_frame_img_path = os.path.join(f'{vid_root_path}/{ego}/', f'{idx}.jpg')
	first_frame_img_relpath = os.path.relpath(first_frame_img_path, root_path)
	first_frame_anns = []
	ego_mask = decode(annotations['masks'][obj][ego][idx])
	h, w = ego_mask.shape
	ego_mask = cv2.resize(ego_mask, (w // 2, h // 2), interpolation=cv2.INTER_NEAREST)
	area = ego_mask.sum().astype(float)
	segmentation = encode(np.asfortranarray(ego_mask))
	segmentation = {
	'counts': segmentation['counts'].decode('ascii'),
	'size': segmentation["size"],
	}
	first_frame_anns.append(
	{
	'segmentation': segmentation,
	'area': area,
	'category_id': float(coco_id_to_cont_id[obj]),
	}
	)
	objs_after.append(float(coco_id_to_cont_id[obj]))

	sample_img_path = os.path.join(f'{vid_root_path}/{exo}/', f'{idx}.jpg')
	sample_img_relpath = os.path.relpath(sample_img_path, root_path)
	anns = []

	# 获取h,w
	exo_img = cv2.imread(sample_img_path)
	h, w, _ = exo_img.shape
	#print("h,w", exo_img.shape) # debug
	binary_mask = np.zeros((h, w))
	binary_mask = binary_mask.astype(np.uint8)
	area = float(0)
	segmentation = encode(np.asfortranarray(binary_mask))
	segmentation = {
	'counts': segmentation['counts'].decode('ascii'),
	'size': segmentation["size"],
	}
	anns.append(
	{
	'segmentation': segmentation,
	'area': area,
	'category_id': float(coco_id_to_cont_id[obj]),
	}
	)


	# 其余信息
	image_info = {
	'file_name': sample_img_relpath,
	'height': h,
	'width': w,
	}

	# 汇总一个pairs的信息
	sample = {
	'image': sample_img_relpath,
	'image_info': image_info,
	'anns': anns,
	'first_frame_image': first_frame_img_relpath,
	'first_frame_anns': first_frame_anns,
	'new_img_id': new_img_id,
	'video_name': take,
	}

	egoexo_dataset.append(sample)
	new_img_id += 1


	# exo -> ego
	elif args.setting == 'exo-ego':
	#print("len:", len(annotations['masks'][obj][exo].keys())) # debug
	for idx in annotations['masks'][obj][exo].keys():
	first_frame_img_path = os.path.join(f'{vid_root_path}/{exo}/', f'{idx}.jpg')
	first_frame_img_relpath = os.path.relpath(first_frame_img_path, root_path)
	first_frame_anns = []
	exo_mask = decode(annotations['masks'][obj][exo][idx])
	h, w = exo_mask.shape
	exo_mask = cv2.resize(exo_mask, (w // 4, h // 4), interpolation=cv2.INTER_NEAREST)
	area = exo_mask.sum().astype(float)
	segmentation = encode(np.asfortranarray(exo_mask))
	segmentation = {
	'counts': segmentation['counts'].decode('ascii'),
	'size': segmentation["size"],
	}
	first_frame_anns.append(
	{
	'segmentation': segmentation,
	'area': area,
	'category_id': float(coco_id_to_cont_id[obj]),
	}
	)
	objs_after.append(float(coco_id_to_cont_id[obj]))

	sample_img_path = os.path.join(f'{vid_root_path}/{ego}/', f'{idx}.jpg')
	sample_img_relpath = os.path.relpath(sample_img_path, root_path)
	anns = []

	# 获取h,w
	ego_img = cv2.imread(sample_img_path)
	h, w, _ = ego_img.shape
	#print("h,w", ego_img.shape) # debug
	binary_mask = np.zeros((h, w))
	binary_mask = binary_mask.astype(np.uint8)
	area = float(0)
	segmentation = encode(np.asfortranarray(binary_mask))
	segmentation = {
	'counts': segmentation['counts'].decode('ascii'),
	'size': segmentation["size"],
	}
	anns.append(
	{
	'segmentation': segmentation,
	'area': area,
	'category_id': float(coco_id_to_cont_id[obj]),
	}
	)


	# 其余信息
	image_info = {
	'file_name': sample_img_relpath,
	'height': h,
	'width': w,
	}

	# 汇总一个pairs的信息
	sample = {
	'image': sample_img_relpath,
	'image_info': image_info,
	'anns': anns,
	'first_frame_image': first_frame_img_relpath,
	'first_frame_anns': first_frame_anns,
	'new_img_id': new_img_id,
	'video_name': take,
	}

	egoexo_dataset.append(sample)
	new_img_id += 1


	else:
	raise Exception(f"Setting {args.setting} not recognized.")

	#print("objs_after:", set(objs_after)) # debug
	with open(save_path, 'w') as f:
	json.dump(egoexo_dataset, f)
	print(f'Save at {save_path}. Total sample: {len(egoexo_dataset)}')