| import json |
| import os |
| from PIL import Image |
| import numpy as np |
| from pycocotools.mask import encode, decode, frPyObjects |
| from tqdm import tqdm |
| import copy |
|
|
| if __name__ == '__main__': |
| root_path = '/work/yuqian_fu/Data/datasets/DAVIS' |
| splits = ['trainval', 'test-dev'] |
| |
| annotation_path = os.path.join(root_path, f'2017/{splits[0]}/Annotations/480p') |
| image_path = os.path.join(root_path, f'2017/{splits[0]}/JPEGImages/480p') |
|
|
| set_path = os.path.join(root_path, f'2017/{splits[0]}/ImageSets/2017/val.txt') |
| save_path = os.path.join(root_path, f'2017/{splits[0]}_test_psalm_20gap.json') |
|
|
| val_set = [] |
| with open(set_path, 'r') as f: |
| for line in f: |
| val_set.append(line.strip()) |
| new_img_id = 0 |
| DAVIS_dataset = [] |
| for val_name in tqdm(val_set): |
| vid_path = os.path.join(image_path, val_name) |
| anno_path = os.path.join(annotation_path, val_name) |
|
|
| |
| frame_list = sorted(os.listdir(vid_path)) |
| anno_list = sorted(os.listdir(anno_path)) |
| video_len = len(frame_list) |
|
|
| |
| assert len(frame_list) == len(anno_list), f"Mismatch in {val_name}: {len(frame_list)} frames vs {len(anno_list)} annotations" |
|
|
| |
| for i in range(video_len): |
| |
| if i + 20 > video_len - 1: |
| break |
| |
| target_idx = i + 20 |
|
|
| |
| first_frame_img_path = os.path.join(vid_path, frame_list[i]) |
| first_frame_img_relpath = os.path.relpath(first_frame_img_path, root_path) |
|
|
| first_frame_annotation_path = os.path.join(anno_path, anno_list[i]) |
| first_frame_annotation_relpath = os.path.relpath(first_frame_annotation_path, root_path) |
|
|
| first_frame_annotation_img = Image.open(first_frame_annotation_path) |
| first_frame_annotation = np.array(first_frame_annotation_img) |
| height, width = first_frame_annotation.shape |
| |
| unique_instances = np.unique(first_frame_annotation) |
| unique_instances = unique_instances[unique_instances != 0] |
| |
| coco_format_annotations = [] |
| |
| for instance_value in unique_instances: |
| binary_mask = (first_frame_annotation == instance_value).astype(np.uint8) |
| segmentation = encode(np.asfortranarray(binary_mask)) |
| segmentation = { |
| 'counts': segmentation['counts'].decode('ascii'), |
| 'size': segmentation['size'], |
| } |
| area = binary_mask.sum().astype(float) |
| coco_format_annotations.append( |
| { |
| 'segmentation': segmentation, |
| 'area': area, |
| 'category_id': instance_value.astype(float), |
| } |
| ) |
|
|
| |
| sample_img_path = os.path.join(vid_path, frame_list[target_idx]) |
| sample_img_relpath = os.path.relpath(sample_img_path, root_path) |
| image_info = { |
| 'file_name': sample_img_relpath, |
| 'height': height, |
| 'width': width, |
| } |
|
|
| sample_annotation_path = os.path.join(anno_path, anno_list[target_idx]) |
| sample_annotation = np.array(Image.open(sample_annotation_path)) |
| |
| sample_unique_instances = np.unique(sample_annotation) |
| sample_unique_instances = sample_unique_instances[sample_unique_instances != 0] |
| |
| anns = [] |
| skip = False |
| for instance_value in sample_unique_instances: |
| if instance_value not in unique_instances: |
| print(f"Skip {sample_img_relpath}: new instance not in reference frame") |
| skip = True |
| break |
| binary_mask = (sample_annotation == instance_value).astype(np.uint8) |
| segmentation = encode(np.asfortranarray(binary_mask)) |
| segmentation = { |
| 'counts': segmentation['counts'].decode('ascii'), |
| 'size': segmentation['size'], |
| } |
| area = binary_mask.sum().astype(float) |
| anns.append( |
| { |
| 'segmentation': segmentation, |
| 'area': area, |
| 'category_id': instance_value.astype(float), |
| } |
| ) |
| |
| if skip: |
| continue |
| |
| first_frame_anns = copy.deepcopy(coco_format_annotations) |
| if len(anns) < len(first_frame_anns): |
| first_frame_anns = [ann for ann in first_frame_anns if ann['category_id'] in sample_unique_instances] |
| assert len(anns) == len(first_frame_anns), f"Annotation mismatch at {sample_img_relpath}" |
| |
| sample = { |
| 'image': sample_img_relpath, |
| 'image_info': image_info, |
| 'anns': anns, |
| 'first_frame_image': first_frame_img_relpath, |
| 'first_frame_anns': first_frame_anns, |
| 'new_img_id': new_img_id, |
| 'video_name': val_name, |
| } |
| DAVIS_dataset.append(sample) |
| new_img_id += 1 |
|
|
| with open(save_path, 'w') as f: |
| json.dump(DAVIS_dataset, f) |
| print(f'Save at {save_path}. Total sample: {len(DAVIS_dataset)}') |