| """
|
| Datasets file. Code adapted from LOST: https://github.com/valeoai/LOST
|
| """
|
| import os
|
| import torch
|
| import json
|
| import torchvision
|
| import numpy as np
|
| import skimage.io
|
|
|
| from PIL import Image
|
| from tqdm import tqdm
|
| from skimage.transform import resize
|
| from torchvision import transforms as pth_transforms
|
|
|
|
|
| transform = pth_transforms.Compose(
|
| [
|
| pth_transforms.ToTensor(),
|
| pth_transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
|
| ]
|
| )
|
|
|
| class ImageDataset:
|
| def __init__(self, image_path, resize=None):
|
|
|
| self.image_path = image_path
|
| self.name = image_path.split("/")[-1]
|
|
|
|
|
| with open(image_path, "rb") as f:
|
| img = Image.open(f)
|
| img = img.convert("RGB")
|
|
|
|
|
| if resize is not None:
|
| transform_resize = pth_transforms.Compose(
|
| [
|
| pth_transforms.ToTensor(),
|
| pth_transforms.Resize(resize),
|
| pth_transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
|
| ]
|
| )
|
| img = transform_resize(img)
|
| self.img_size = list(img.shape[-1:-3:-1])
|
| else:
|
| img = transform(img)
|
| self.img_size = list(img.shape[-1:-3:-1])
|
| self.dataloader = [[img, image_path]]
|
|
|
| def get_image_name(self, *args, **kwargs):
|
| return self.image_path.split("/")[-1].split(".")[0]
|
|
|
| def load_image(self, *args, **kwargs):
|
| return Image.open(self.image_path).convert("RGB").resize(self.img_size)
|
|
|
| class Dataset:
|
| def __init__(self, dataset_name, dataset_set, remove_hards):
|
| """
|
| Build the dataloader
|
| """
|
|
|
| self.dataset_name = dataset_name
|
| self.set = dataset_set
|
|
|
| if dataset_name == "VOC07":
|
| self.root_path = "datasets/VOC2007"
|
| self.year = "2007"
|
| elif dataset_name == "VOC12":
|
| self.root_path = "datasets/VOC2012"
|
| self.year = "2012"
|
| elif dataset_name == "COCO20k":
|
| self.year = "2014"
|
| self.root_path = f"datasets/COCO/images/{dataset_set}{self.year}"
|
| self.sel20k = 'datasets/coco_20k_filenames.txt'
|
|
|
| self.all_annfile = "datasets/COCO/annotations/instances_train2014.json"
|
| self.annfile = "datasets/instances_train2014_sel20k.json"
|
| self.sel_20k = get_sel_20k(self.sel20k)
|
| if not os.path.exists(self.annfile):
|
| select_coco_20k(self.sel20k, self.all_annfile)
|
| self.train2014 = get_train2014(self.annfile)
|
| else:
|
| raise ValueError("Unknown dataset.")
|
|
|
| if not os.path.exists(self.root_path):
|
| raise ValueError("Please follow the README to setup the datasets.")
|
|
|
| self.name = f"{self.dataset_name}_{self.set}"
|
|
|
|
|
| if "VOC" in dataset_name:
|
| self.dataloader = torchvision.datasets.VOCDetection(
|
| self.root_path,
|
| year=self.year,
|
| image_set=self.set,
|
| transform=transform,
|
| download=False,
|
| )
|
| elif "COCO20k" == dataset_name:
|
| self.dataloader = torchvision.datasets.CocoDetection(
|
| self.root_path, annFile=self.annfile, transform=transform
|
| )
|
| else:
|
| raise ValueError("Unknown dataset.")
|
|
|
|
|
| self.remove_hards = remove_hards
|
| self.hards = []
|
| if remove_hards:
|
| self.name += f"-nohards"
|
| self.hards = self.get_hards()
|
| print(f"Nb images discarded {len(self.hards)}")
|
|
|
| def load_image(self, im_name):
|
| """
|
| Load the image corresponding to the im_name
|
| """
|
| if "VOC" in self.dataset_name:
|
| image = skimage.io.imread(f"./datasets/VOC{self.year}/VOCdevkit/VOC{self.year}/JPEGImages/{im_name}")
|
| elif "COCO" in self.dataset_name:
|
|
|
|
|
|
|
| image = skimage.io.imread(f"./datasets/COCO/images/train2014/{im_name}")
|
| else:
|
| raise ValueError("Unkown dataset.")
|
| return image
|
|
|
| def get_image_name(self, inp):
|
| """
|
| Return the image name
|
| """
|
| if "VOC" in self.dataset_name:
|
| im_name = inp["annotation"]["filename"]
|
| elif "COCO" in self.dataset_name:
|
| im_name = str(inp[0]["image_id"])
|
| im_name = self.train2014['images'][self.sel_20k.index(im_name)]['file_name']
|
|
|
| return im_name
|
|
|
| def extract_gt(self, targets, im_name):
|
| if "VOC" in self.dataset_name:
|
| return extract_gt_VOC(targets, remove_hards=self.remove_hards)
|
| elif "COCO" in self.dataset_name:
|
| return extract_gt_COCO(targets, remove_iscrowd=True)
|
| else:
|
| raise ValueError("Unknown dataset")
|
|
|
| def extract_classes(self):
|
| if "VOC" in self.dataset_name:
|
| cls_path = f"classes_{self.set}_{self.year}.txt"
|
| elif "COCO" in self.dataset_name:
|
| cls_path = f"classes_{self.dataset}_{self.set}_{self.year}.txt"
|
|
|
|
|
| if os.path.exists(cls_path):
|
| all_classes = []
|
| with open(cls_path, "r") as f:
|
| for line in f:
|
| all_classes.append(line.strip())
|
| else:
|
| print("Extract all classes from the dataset")
|
| if "VOC" in self.dataset_name:
|
| all_classes = self.extract_classes_VOC()
|
| elif "COCO" in self.dataset_name:
|
| all_classes = self.extract_classes_COCO()
|
|
|
| with open(cls_path, "w") as f:
|
| for s in all_classes:
|
| f.write(str(s) + "\n")
|
|
|
| return all_classes
|
|
|
| def extract_classes_VOC(self):
|
| all_classes = []
|
| for im_id, inp in enumerate(tqdm(self.dataloader)):
|
| objects = inp[1]["annotation"]["object"]
|
|
|
| for o in range(len(objects)):
|
| if objects[o]["name"] not in all_classes:
|
| all_classes.append(objects[o]["name"])
|
|
|
| return all_classes
|
|
|
| def extract_classes_COCO(self):
|
| all_classes = []
|
| for im_id, inp in enumerate(tqdm(self.dataloader)):
|
| objects = inp[1]
|
|
|
| for o in range(len(objects)):
|
| if objects[o]["category_id"] not in all_classes:
|
| all_classes.append(objects[o]["category_id"])
|
|
|
| return all_classes
|
|
|
| def get_hards(self):
|
| hard_path = "datasets/hard_%s_%s_%s.txt" % (self.dataset_name, self.set, self.year)
|
| if os.path.exists(hard_path):
|
| hards = []
|
| with open(hard_path, "r") as f:
|
| for line in f:
|
| hards.append(int(line.strip()))
|
| else:
|
| print("Discover hard images that should be discarded")
|
|
|
| if "VOC" in self.dataset_name:
|
|
|
| hards = discard_hard_voc(self.dataloader)
|
|
|
| with open(hard_path, "w") as f:
|
| for s in hards:
|
| f.write(str(s) + "\n")
|
|
|
| return hards
|
|
|
|
|
| def discard_hard_voc(dataloader):
|
| hards = []
|
| for im_id, inp in enumerate(tqdm(dataloader)):
|
| objects = inp[1]["annotation"]["object"]
|
| nb_obj = len(objects)
|
|
|
| hard = np.zeros(nb_obj)
|
| for i, o in enumerate(range(nb_obj)):
|
| hard[i] = (
|
| 1
|
| if (objects[o]["truncated"] == "1" or objects[o]["difficult"] == "1")
|
| else 0
|
| )
|
|
|
|
|
| if np.sum(hard) == nb_obj:
|
| hards.append(im_id)
|
| return hards
|
|
|
|
|
| def extract_gt_COCO(targets, remove_iscrowd=True):
|
| objects = targets
|
| nb_obj = len(objects)
|
|
|
| gt_bbxs = []
|
| gt_clss = []
|
| for o in range(nb_obj):
|
|
|
| if remove_iscrowd and objects[o]["iscrowd"] == 1:
|
| continue
|
| gt_cls = objects[o]["category_id"]
|
| gt_clss.append(gt_cls)
|
| bbx = objects[o]["bbox"]
|
| x1y1x2y2 = [bbx[0], bbx[1], bbx[0] + bbx[2], bbx[1] + bbx[3]]
|
| x1y1x2y2 = [int(round(x)) for x in x1y1x2y2]
|
| gt_bbxs.append(x1y1x2y2)
|
|
|
| return np.asarray(gt_bbxs), gt_clss
|
|
|
|
|
| def extract_gt_VOC(targets, remove_hards=False):
|
| objects = targets["annotation"]["object"]
|
| nb_obj = len(objects)
|
|
|
| gt_bbxs = []
|
| gt_clss = []
|
| for o in range(nb_obj):
|
| if remove_hards and (
|
| objects[o]["truncated"] == "1" or objects[o]["difficult"] == "1"
|
| ):
|
| continue
|
| gt_cls = objects[o]["name"]
|
| gt_clss.append(gt_cls)
|
| obj = objects[o]["bndbox"]
|
| x1y1x2y2 = [
|
| int(obj["xmin"]),
|
| int(obj["ymin"]),
|
| int(obj["xmax"]),
|
| int(obj["ymax"]),
|
| ]
|
|
|
|
|
|
|
|
|
| x1y1x2y2[0] -= 1
|
| x1y1x2y2[1] -= 1
|
| gt_bbxs.append(x1y1x2y2)
|
|
|
| return np.asarray(gt_bbxs), gt_clss
|
|
|
|
|
| def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
|
|
|
|
|
| box2 = box2.T
|
|
|
|
|
| if x1y1x2y2:
|
| b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
|
| b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
|
| else:
|
| b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
|
| b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
|
| b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
|
| b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2
|
|
|
|
|
| inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * (
|
| torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)
|
| ).clamp(0)
|
|
|
|
|
| w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
|
| w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps
|
| union = w1 * h1 + w2 * h2 - inter + eps
|
|
|
| iou = inter / union
|
| if GIoU or DIoU or CIoU:
|
| cw = torch.max(b1_x2, b2_x2) - torch.min(
|
| b1_x1, b2_x1
|
| )
|
| ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1)
|
| if CIoU or DIoU:
|
| c2 = cw ** 2 + ch ** 2 + eps
|
| rho2 = (
|
| (b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2
|
| + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2
|
| ) / 4
|
| if DIoU:
|
| return iou - rho2 / c2
|
| elif (
|
| CIoU
|
| ):
|
| v = (4 / math.pi ** 2) * torch.pow(
|
| torch.atan(w2 / h2) - torch.atan(w1 / h1), 2
|
| )
|
| with torch.no_grad():
|
| alpha = v / (v - iou + (1 + eps))
|
| return iou - (rho2 / c2 + v * alpha)
|
| else:
|
| c_area = cw * ch + eps
|
| return iou - (c_area - union) / c_area
|
| else:
|
| return iou
|
|
|
| def get_sel_20k(sel_file):
|
|
|
| with open(sel_file, "r") as f:
|
| sel_20k = f.readlines()
|
| sel_20k = [s.replace("\n", "") for s in sel_20k]
|
| im20k = [str(int(s.split("_")[-1].split(".")[0])) for s in sel_20k]
|
| return im20k
|
|
|
| def get_train2014(all_annotations_file):
|
|
|
| with open(all_annotations_file, "r") as f:
|
| train2014 = json.load(f)
|
| return train2014
|
|
|
|
|
|
|
| def select_coco_20k(sel_file, all_annotations_file):
|
| print('Building COCO 20k dataset.')
|
|
|
|
|
| with open(all_annotations_file, "r") as f:
|
| train2014 = json.load(f)
|
|
|
|
|
| with open(sel_file, "r") as f:
|
| sel_20k = f.readlines()
|
| sel_20k = [s.replace("\n", "") for s in sel_20k]
|
| im20k = [str(int(s.split("_")[-1].split(".")[0])) for s in sel_20k]
|
|
|
| new_anno = []
|
| new_images = []
|
|
|
| for i in tqdm(im20k):
|
| new_anno.extend(
|
| [a for a in train2014["annotations"] if a["image_id"] == int(i)]
|
| )
|
| new_images.extend([a for a in train2014["images"] if a["id"] == int(i)])
|
|
|
| train2014_20k = {}
|
| train2014_20k["images"] = new_images
|
| train2014_20k["annotations"] = new_anno
|
| train2014_20k["categories"] = train2014["categories"]
|
|
|
| with open("datasets/instances_train2014_sel20k.json", "w") as outfile:
|
| json.dump(train2014_20k, outfile)
|
|
|
| print(f'im20k :{im20k[0]}')
|
| print('Done.')
|
|
|