Upload folder using huggingface_hub

Browse files

Files changed (10) hide show

datasets/__init__.py +0 -0
datasets/base.py +177 -0
datasets/bdd100k.py +173 -0
datasets/cityscapes.py +221 -0
datasets/data_utils.py +161 -0
datasets/lvis.py +303 -0
datasets/mapillary_vistas.py +228 -0
datasets/objects365.py +200 -0
datasets/viton_hd.py +163 -0
datasets/webdataset.py +80 -0

datasets/__init__.py ADDED Viewed

File without changes

datasets/base.py ADDED Viewed

	@@ -0,0 +1,177 @@

+import numpy as np
+import cv2
+import albumentations as A
+from torch.utils.data import Dataset
+from .data_utils import *
+class BaseDataset(Dataset):
+    def __init__(self):
+        self.data = []
+    def __getitem__(self, idx):
+        item = self._get_sample(idx)
+        return item
+    def _get_sample(self, idx):
+        # Implemented for each specific dataset
+        pass
+    def __len__(self):
+        # We adjust the ratio of different dataset by setting the length.
+        pass
+    def aug_data_mask(self, image, mask):
+        transform = A.Compose([
+            A.RandomBrightnessContrast(p=0.5),
+            A.Rotate(limit=30, border_mode=cv2.BORDER_CONSTANT),
+            ])
+        transformed = transform(image=image.astype(np.uint8), mask=mask)
+        transformed_image = transformed["image"]
+        transformed_mask = transformed["mask"]
+        return transformed_image, transformed_mask
+    # def aug_patch(self, patch):
+    #     transform = A.Compose([
+    #         A.HorizontalFlip(p=0.2),
+    #         A.RandomBrightnessContrast(brightness_limit=0.1, contrast_limit=0.1, p=0.3),
+    #         A.Rotate(limit=15, border_mode=cv2.BORDER_REPLICATE, p=0.5),
+    #         ])
+    #     return transform(image=patch)["image"]
+    def aug_patch(self, patch):
+        gray = cv2.cvtColor(patch, cv2.COLOR_RGB2GRAY)
+        mask = (gray < 250).astype(np.float32)[:, :, None]
+        transform = A.Compose([
+            A.HorizontalFlip(p=0.2),
+            A.RandomBrightnessContrast(brightness_limit=0.1, contrast_limit=0.1, p=0.3),
+            A.Rotate(limit=15, border_mode=cv2.BORDER_REPLICATE, p=0.5),
+        ])
+        transformed = transform(image=patch.astype(np.uint8), mask=mask)
+        aug_img = transformed["image"]
+        aug_mask = transformed["mask"]
+        final_img = aug_img * aug_mask + 255 * (1 - aug_mask)
+        return final_img.astype(np.uint8)
+    def sample_timestep(self, max_step=1000):
+        if np.random.rand() < 0.3:
+            step = np.random.randint(0, max_step)
+        else:
+            step = np.random.randint(0, max_step // 2)
+        return np.array([step])
+    def get_patch(self, ref_image, ref_mask):
+        '''
+        extract compact patch and convert to 224x224 RGBA.
+        ref_mask: [0, 1]
+        '''
+        # 1. Get the outline Box of the reference image
+        y1, y2, x1, x2 = get_bbox_from_mask(ref_mask) # y1y2x1x2, obtain location from ref patch
+        # 2. Background is set to white (255)
+        ref_mask_3 = np.stack([ref_mask, ref_mask, ref_mask], -1)
+        masked_ref_image = ref_image * ref_mask_3 + np.ones_like(ref_image) * 255 * (1 - ref_mask_3)
+        # 3. Crop based on bounding boxes
+        masked_ref_image = masked_ref_image[y1:y2, x1:x2, :]
+        ref_mask_crop = ref_mask[y1:y2, x1:x2] # obtain a tight mask
+        # 4. Dilate the patch and mask
+        ratio = np.random.randint(11, 15) / 10
+        masked_ref_image, ref_mask_crop = expand_image_mask(masked_ref_image, ref_mask_crop, ratio=ratio)
+        # augmentation
+        # masked_ref_image, ref_mask_crop = self.aug_data_mask(masked_ref_image, ref_mask_crop)
+        # 5. Padding & Resize
+        masked_ref_image = pad_to_square(masked_ref_image, pad_value=255)
+        masked_ref_image = cv2.resize(masked_ref_image.astype(np.uint8), (224, 224))
+        m_local = ref_mask_crop[:, :, None] * 255
+        m_local = pad_to_square(m_local, pad_value=0)
+        m_local = cv2.resize(m_local.astype(np.uint8), (224, 224), interpolation=cv2.INTER_NEAREST)
+        rgba_image = np.dstack((masked_ref_image.astype(np.uint8), m_local))
+        return rgba_image
+    def _construct_collage(self, image, object_0, object_1, mask_0, mask_1):
+        background = image.copy()
+        image = pad_to_square(image, pad_value = 0, random = False).astype(np.uint8)
+        image = cv2.resize(image.astype(np.uint8), (512,512)).astype(np.float32)
+        image = image / 127.5 - 1.0
+        item = {}
+        item.update({'jpg': image.copy()}) # source image (checked) [-1, 1], 512x512x3
+        ratio = np.random.randint(11, 15) / 10
+        object_0 = expand_image(object_0, ratio=ratio)
+        object_0 = self.aug_patch(object_0)
+        object_0 = pad_to_square(object_0, pad_value = 255, random = False) # pad to square
+        object_0 = cv2.resize(object_0.astype(np.uint8), (224,224) ).astype(np.uint8) # check 1
+        object_0 = object_0 / 255
+        item.update({'ref0': object_0.copy()}) # patch 0 (checked) [0, 1], 224x224x3
+        ratio = np.random.randint(11, 15) / 10
+        object_1 = expand_image(object_1, ratio=ratio)
+        object_1 = self.aug_patch(object_1)
+        object_1 = pad_to_square(object_1, pad_value = 255, random = False) # pad to square
+        object_1 = cv2.resize(object_1.astype(np.uint8), (224,224) ).astype(np.uint8) # check 1
+        object_1 = object_1 / 255
+        item.update({'ref1': object_1.copy()}) # patch 1 (checked) [0, 1], 224x224x3
+        background_mask0 = background.copy() * 0.0
+        background_mask1 = background.copy() * 0.0
+        background_mask = background.copy() * 0.0
+        box_yyxx = get_bbox_from_mask(mask_0)
+        box_yyxx = expand_bbox(mask_0, box_yyxx, ratio=[1.1, 1.2]) #1.1  1.3
+        y1, y2, x1, x2 = box_yyxx
+        background[y1:y2, x1:x2,:] = 0
+        background_mask0[y1:y2, x1:x2, :] = 1.0
+        background_mask[y1:y2, x1:x2, :] = 1.0
+        box_yyxx = get_bbox_from_mask(mask_1)
+        box_yyxx = expand_bbox(mask_1, box_yyxx, ratio=[1.1, 1.2]) #1.1  1.3
+        y1, y2, x1, x2 = box_yyxx
+        background[y1:y2, x1:x2,:] = 0
+        background_mask1[y1:y2, x1:x2, :] = 1.0
+        background_mask[y1:y2, x1:x2, :] = 1.0
+        background = pad_to_square(background, pad_value = 0, random = False).astype(np.uint8)
+        background = cv2.resize(background.astype(np.uint8), (512,512)).astype(np.float32)
+        background_mask0 = pad_to_square(background_mask0, pad_value = 2, random = False).astype(np.uint8)
+        background_mask1 = pad_to_square(background_mask1, pad_value = 2, random = False).astype(np.uint8)
+        background_mask = pad_to_square(background_mask, pad_value = 2, random = False).astype(np.uint8)
+        background_mask0  = cv2.resize(background_mask0.astype(np.uint8), (512,512),  interpolation = cv2.INTER_NEAREST).astype(np.float32)
+        background_mask1  = cv2.resize(background_mask1.astype(np.uint8), (512,512),  interpolation = cv2.INTER_NEAREST).astype(np.float32)
+        background_mask  = cv2.resize(background_mask.astype(np.uint8), (512,512),  interpolation = cv2.INTER_NEAREST).astype(np.float32)
+        background_mask0[background_mask0 == 2] = -1
+        background_mask1[background_mask1 == 2] = -1
+        background_mask[background_mask == 2] = -1
+        background_mask0_ = background_mask0
+        background_mask0_[background_mask0_ == -1] = 0
+        background_mask0_ = background_mask0_[:, :, 0]
+        background_mask1_ = background_mask1
+        background_mask1_[background_mask1_ == -1] = 0
+        background_mask1_ = background_mask1_[:, :, 0]
+        background = background / 127.5 - 1.0
+        background = np.concatenate([background, background_mask[:,:,:1]] , -1)
+        item.update({'hint': background.copy()})
+        item.update({'mask0': background_mask0_.copy()})
+        item.update({'mask1': background_mask1_.copy()})
+        sampled_time_steps = self.sample_timestep()
+        item['time_steps'] = sampled_time_steps
+        item['object_num'] = 2
+        return item

datasets/bdd100k.py ADDED Viewed

	@@ -0,0 +1,173 @@

+import json
+import cv2
+import numpy as np
+import os
+from .data_utils import *
+from .base import BaseDataset
+from util.box_ops import compute_iou_matrix, draw_bboxes
+from pathlib import Path
+from pycocotools import mask as mask_utils
+import shutil
+IS_VERIFY = False
+class BDD100KDataset(BaseDataset):
+    def __init__(self, construct_dataset_dir, obj_thr=20, area_ratio=0.02):
+        self.obj_thr = obj_thr
+        self.construct_dataset_dir = construct_dataset_dir
+        os.makedirs(Path(self.construct_dataset_dir), exist_ok=True)
+        self.area_ratio = area_ratio
+        self.sample_list = os.listdir(self.construct_dataset_dir)
+    def _intersect_2_obj(self, image_dir, samples, idx):
+        self.image_dir = image_dir
+        sample = samples[idx]
+        image_name = sample['name']
+        image_path = os.path.join(image_dir, image_name)
+        image = cv2.imread(image_path)
+        h, w = image.shape[0:2]
+        image_area = h * w
+        labels = sample['labels']
+        # filter by area
+        obj_ids = []
+        obj_areas = []
+        obj_bbox = []
+        for i in range(len(labels)):
+            obj = labels[i]
+            bbox = [obj['box2d']['x1'], obj['box2d']['y1'], obj['box2d']['x2'], obj['box2d']['y2']]
+            rle = obj['rle']
+            mask = mask_utils.decode(rle)
+            area = np.sum(mask)
+            if area > image_area * self.area_ratio:
+                obj_ids.append(i)
+                obj_areas.append(area)
+                obj_bbox.append(bbox)
+        if len(obj_bbox) < 2:
+            print(f"[Info] Skip image index {image_name[:-4]} due to insufficient bbox.")
+            return
+        os.makedirs(Path(self.construct_dataset_dir) / image_name[:-4], exist_ok=True)
+        bbox_xyxy = np.array(obj_bbox)
+        if IS_VERIFY:
+            image_with_boxes = draw_bboxes(image, bbox_xyxy)
+            cv2.imwrite(str(Path(self.construct_dataset_dir) / image_name[:-4] / "bboxes_image.png"), image_with_boxes)
+        iou_matrix = compute_iou_matrix(bbox_xyxy)
+        np.fill_diagonal(iou_matrix, -1) # Exclude self-comparisons (i.e., each box with itself)
+        max_index = np.unravel_index(np.argmax(iou_matrix), iou_matrix.shape)
+        index0, index1 = max_index[0], max_index[1]
+        max_iou = iou_matrix[index0, index1]
+        if max_iou <= 0:
+            print(f"[Info] Skip image index {image_name[:-4]} due to no overlapping bboxes.")
+            return
+        dst = Path(self.construct_dataset_dir) / image_name[:-4] / "image.jpg"
+        dst.parent.mkdir(parents=True, exist_ok=True)
+        shutil.copy(image_path, dst)
+        box0 = obj_bbox[index0]
+        box1 = obj_bbox[index1]
+        counter = 0
+        for i in range(len(labels)):
+            obj = labels[i]
+            rle = obj['rle']
+            if counter == obj_ids[index0]:
+                mask = mask_utils.decode(rle)
+            counter += 1
+        cv2.imwrite(str(Path(self.construct_dataset_dir) / image_name[:-4] / "object_0_mask.png"), 255*mask)
+        patch = self.get_patch(cv2.cvtColor(image, cv2.COLOR_BGR2RGB), mask)
+        patch = cv2.cvtColor(patch, cv2.COLOR_RGB2BGR)
+        cv2.imwrite(str(Path(self.construct_dataset_dir) / image_name[:-4] / "object_0.png"), patch)
+        if IS_VERIFY:
+            mask_color = np.stack([mask * 255]*3, axis=-1).astype(np.uint8)
+            highlight = np.zeros_like(image)
+            highlight[:, :, 2] = 255  # red channel
+            alpha = 0.5
+            image_with_boxes = np.where(mask_color == 255, cv2.addWeighted(image_with_boxes, 1 - alpha, highlight, alpha, 0), image_with_boxes)
+        counter = 0
+        for i in range(len(labels)):
+            obj = labels[i]
+            rle = obj['rle']
+            if counter == obj_ids[index1]:
+                mask = mask_utils.decode(rle)
+            counter += 1
+        cv2.imwrite(str(Path(self.construct_dataset_dir) / image_name[:-4] / "object_1_mask.png"), 255*mask)
+        patch = self.get_patch(cv2.cvtColor(image, cv2.COLOR_BGR2RGB), mask)
+        patch = cv2.cvtColor(patch, cv2.COLOR_RGB2BGR)
+        cv2.imwrite(str(Path(self.construct_dataset_dir) / image_name[:-4] / "object_1.png"), patch)
+        if IS_VERIFY:
+            mask_color = np.stack([mask * 255]*3, axis=-1).astype(np.uint8)
+            highlight = np.zeros_like(image)
+            highlight[:, :, 0] = 255  # blue channel
+            alpha = 0.5
+            image_with_boxes = np.where(mask_color == 255, cv2.addWeighted(image_with_boxes, 1 - alpha, highlight, alpha, 0), image_with_boxes)
+            cv2.imwrite(str(Path(self.construct_dataset_dir) / image_name[:-4] / "highlighted_image.png"), image_with_boxes)
+    def _get_sample(self, idx):
+        sample_path = os.path.join(self.construct_dataset_dir, self.sample_list[idx])
+        image = cv2.cvtColor(cv2.imread(os.path.join(sample_path, "image.jpg")), cv2.COLOR_BGR2RGB)
+        object_0 = cv2.cvtColor(cv2.imread(os.path.join(sample_path, "object_0.png")), cv2.COLOR_BGR2RGB)
+        object_1 = cv2.cvtColor(cv2.imread(os.path.join(sample_path, "object_1.png")), cv2.COLOR_BGR2RGB)
+        mask_0 = cv2.imread(os.path.join(sample_path, "object_0_mask.png"), cv2.IMREAD_GRAYSCALE)
+        mask_1 = cv2.imread(os.path.join(sample_path, "object_1_mask.png"), cv2.IMREAD_GRAYSCALE)
+        collage = self._construct_collage(image, object_0, object_1, mask_0, mask_1)
+        return collage
+    def __len__(self):
+        return len(os.listdir(self.construct_dataset_dir))
+if __name__ == "__main__":
+    '''
+    two-object case: train/test: 1012/371
+    '''
+    import argparse
+    parser = argparse.ArgumentParser(description="BDD100KDataset Analysis")
+    parser.add_argument("--dataset_dir", type=str, required=True, help="Path to the dataset directory.")
+    parser.add_argument("--construct_dataset_dir", type=str, default='bin', help="Path to the debug bin directory.")
+    parser.add_argument("--dataset_name", type=str, default='bdd100k', help="Dataset name.")
+    parser.add_argument('--is_train', action='store_true', help="Train/Test")
+    parser.add_argument('--is_build_data', action='store_true', help="Build data")
+    parser.add_argument('--is_multiple', action='store_true', help="Multiple/Two objects")
+    parser.add_argument("--area_ratio", type=float, default=0.01171, help="Area ratio for filtering out small objects.")
+    parser.add_argument("--obj_thr", type=int, default=20, help="Object threshold for filtering.")
+    parser.add_argument("--index", type=int, default=0, help="Index of the sample to test.")
+    args = parser.parse_args()
+    if args.is_train:
+        image_dir = Path(args.dataset_dir) / args.dataset_name / "images" / "10k" / "train"
+        json_path = Path(args.dataset_dir) / args.dataset_name / "labels" / "ins_seg" / "rles" / "ins_seg_train.json"
+        max_num = 7000
+    else:
+        image_dir = Path(args.dataset_dir) / args.dataset_name / "images" / "10k" / "val"
+        json_path = Path(args.dataset_dir) / args.dataset_name / "labels" / "ins_seg" / "rles" / "ins_seg_val.json"
+        max_num = 1000
+    dataset = BDD100KDataset(
+        construct_dataset_dir = args.construct_dataset_dir,
+        obj_thr = args.obj_thr,
+        area_ratio = args.area_ratio,
+    )
+    with open(json_path) as data_file:
+        label = json.load(data_file)
+    samples = label["frames"]
+    if args.is_build_data:
+        if not args.is_multiple:
+            for index in range(max_num):
+                dataset._intersect_2_obj(image_dir, samples, index)
+    else:
+        for index in range(len(os.listdir(args.construct_dataset_dir))):
+            collage = dataset._get_sample(index)

datasets/cityscapes.py ADDED Viewed

	@@ -0,0 +1,221 @@

+import cv2
+import numpy as np
+import os
+from PIL import Image
+from .data_utils import *
+from .base import BaseDataset
+import PIL.ImageDraw as ImageDraw
+from util.box_ops import mask_to_bbox_xywh, compute_iou_matrix, draw_bboxes
+from util.cityscapes_ops import Annotation, name2label
+from pathlib import Path
+import shutil
+IS_VERIFY = False
+class CityscapesDataset(BaseDataset):
+    def __init__(self, construct_dataset_dir, obj_thr=20, area_ratio=0.02):
+        self.obj_thr = obj_thr
+        self.construct_dataset_dir = construct_dataset_dir
+        os.makedirs(Path(self.construct_dataset_dir), exist_ok=True)
+        self.area_ratio = area_ratio
+        self.sample_list = os.listdir(self.construct_dataset_dir)
+    def _intersect_2_obj(self, image_dir, json_dir, idx):
+        json_list = os.listdir(json_dir)
+        image_name = json_list[idx][:-21]
+        image_path = os.path.join(image_dir, image_name+'_leftImg8bit.png')
+        image = cv2.imread(image_path)
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        json_path = os.path.join(json_dir, image_name+'_gtFine_polygons.json')
+        annotation = Annotation()
+        annotation.fromJsonFile(json_path)
+        size = (annotation.imgWidth, annotation.imgHeight)
+        image_area = size[0]*size[1]
+        # the background
+        background = name2label['unlabeled'].color
+        obj_ids = []
+        obj_areas = []
+        obj_bbox = []
+        counter = 0
+        # loop over all objects
+        for obj in annotation.objects:
+            label   = obj.label
+            polygon = obj.polygon
+            if (not label in name2label) and label.endswith('group'):
+                label = label[:-len('group')]
+            # only get car/truck/bus class
+            if name2label[label].id !=26 and name2label[label].id !=27 and name2label[label].id !=28:
+                continue
+            labelImg = Image.new("RGBA", size, background)
+            drawer = ImageDraw.Draw(labelImg)
+            drawer.polygon(polygon, fill=(255, 255, 255))
+            mask = np.array(labelImg)[:, :, 0]
+            area = np.sum(mask/255)
+            bbox = mask_to_bbox_xywh(mask)
+            if area > image_area * self.area_ratio:
+                obj_ids.append(counter)
+                obj_areas.append(area)
+                obj_bbox.append(bbox)
+            counter += 1
+        if len(obj_bbox) < 2:
+            print(f"[Info] Skip image index {image_name} due to insufficient bbox.")
+            return
+        # filter by IOU
+        bbox_xyxy = []
+        for box in obj_bbox:
+            x, y, w, h = box
+            bbox_xyxy.append([x, y, x + w, y + h])
+        bbox_xyxy = np.array(bbox_xyxy)  # shape: [N, 4]
+        if IS_VERIFY:
+            os.makedirs(Path(self.construct_dataset_dir) / image_name, exist_ok=True)
+            image_with_boxes = draw_bboxes(image, bbox_xyxy)
+            cv2.imwrite(str(Path(self.construct_dataset_dir) / image_name / "bboxes_image.png"), cv2.cvtColor(image_with_boxes, cv2.COLOR_RGB2BGR))
+        iou_matrix = compute_iou_matrix(bbox_xyxy)
+        np.fill_diagonal(iou_matrix, -1) # Exclude self-comparisons (i.e., each box with itself)
+        max_index = np.unravel_index(np.argmax(iou_matrix), iou_matrix.shape)
+        index0, index1 = max_index[0], max_index[1]
+        max_iou = iou_matrix[index0, index1]
+        if max_iou <= 0:
+            print(f"[Info] Skip image index {image_name} due to no overlapping bboxes.")
+            return
+        os.makedirs(Path(self.construct_dataset_dir) / image_name, exist_ok=True)
+        dst = Path(self.construct_dataset_dir) / image_name / "image.jpg"
+        dst.parent.mkdir(parents=True, exist_ok=True)
+        shutil.copy(image_path, dst)
+        counter = 0
+        for obj in annotation.objects:
+            label   = obj.label
+            polygon = obj.polygon
+            if (not label in name2label) and label.endswith('group'):
+                label = label[:-len('group')]
+            # only get car/truck/bus class
+            if name2label[label].id !=26 and name2label[label].id !=27 and name2label[label].id !=28:
+                continue
+            if counter == obj_ids[index0]:
+                labelImg = Image.new("RGBA", size, background)
+                drawer = ImageDraw.Draw(labelImg)
+                drawer.polygon(polygon, fill=(255, 255, 255))
+                mask = np.array(labelImg)[:, :, 0]/255
+                cv2.imwrite(str(Path(self.construct_dataset_dir) / image_name / "object_0_mask.png"), 255*mask)
+                patch = self.get_patch(image, mask)
+                patch = cv2.cvtColor(patch, cv2.COLOR_RGB2BGR)
+                cv2.imwrite(str(Path(self.construct_dataset_dir) / image_name / "object_0.png"), patch)
+                break
+            counter += 1
+        if IS_VERIFY:
+            mask_color = np.stack([mask * 255]*3, axis=-1).astype(np.uint8)
+            highlight = np.zeros_like(image)
+            highlight[:, :, 2] = 255  # red channel
+            alpha = 0.5
+            image_with_boxes = np.where(mask_color == 255, cv2.addWeighted(image_with_boxes, 1 - alpha, highlight, alpha, 0), image_with_boxes)
+        counter = 0
+        for obj in annotation.objects:
+            label   = obj.label
+            polygon = obj.polygon
+            if (not label in name2label) and label.endswith('group'):
+                label = label[:-len('group')]
+            # only get car/truck/bus class
+            if name2label[label].id !=26 and name2label[label].id !=27 and name2label[label].id !=28:
+                continue
+            if counter == obj_ids[index1]:
+                labelImg = Image.new("RGBA", size, background)
+                drawer = ImageDraw.Draw(labelImg)
+                drawer.polygon(polygon, fill=(255, 255, 255))
+                mask = np.array(labelImg)[:, :, 0]/255
+                cv2.imwrite(str(Path(self.construct_dataset_dir) / image_name / "object_1_mask.png"), 255*mask)
+                patch = self.get_patch(image, mask)
+                patch = cv2.cvtColor(patch, cv2.COLOR_RGB2BGR)
+                cv2.imwrite(str(Path(self.construct_dataset_dir) / image_name / "object_1.png"), patch)
+                break
+            counter += 1
+        if IS_VERIFY:
+            mask_color = np.stack([mask * 255]*3, axis=-1).astype(np.uint8)
+            highlight = np.zeros_like(image)
+            highlight[:, :, 0] = 255  # blue channel
+            alpha = 0.5
+            image_with_boxes = np.where(mask_color == 255, cv2.addWeighted(image_with_boxes, 1 - alpha, highlight, alpha, 0), image_with_boxes)
+            cv2.imwrite(str(Path(self.construct_dataset_dir) / image_name / "highlighted_image.png"), cv2.cvtColor(image_with_boxes, cv2.COLOR_RGB2BGR))
+    def _get_sample(self, idx):
+        sample_path = os.path.join(self.construct_dataset_dir, self.sample_list[idx])
+        image = cv2.cvtColor(cv2.imread(os.path.join(sample_path, "image.jpg")), cv2.COLOR_BGR2RGB)
+        object_0 = cv2.cvtColor(cv2.imread(os.path.join(sample_path, "object_0.png")), cv2.COLOR_BGR2RGB)
+        object_1 = cv2.cvtColor(cv2.imread(os.path.join(sample_path, "object_1.png")), cv2.COLOR_BGR2RGB)
+        mask_0 = cv2.imread(os.path.join(sample_path, "object_0_mask.png"), cv2.IMREAD_GRAYSCALE)
+        mask_1 = cv2.imread(os.path.join(sample_path, "object_1_mask.png"), cv2.IMREAD_GRAYSCALE)
+        collage = self._construct_collage(image, object_0, object_1, mask_0, mask_1)
+        return collage
+    def __len__(self):
+        return len(os.listdir(self.construct_dataset_dir))
+if __name__ == "__main__":
+    '''
+    two-object case: train/test: 536/78
+    '''
+    import argparse
+    parser = argparse.ArgumentParser(description="CityscapesDataset Analysis")
+    parser.add_argument("--dataset_dir", type=str, required=True, help="Path to the dataset directory.")
+    parser.add_argument("--construct_dataset_dir", type=str, default='bin', help="Path to the debug bin directory.")
+    parser.add_argument("--dataset_name", type=str, default='Cityscapes', help="Dataset name.")
+    parser.add_argument('--is_train', action='store_true', help="Train/Test")
+    parser.add_argument('--is_build_data', action='store_true', help="Build data")
+    parser.add_argument('--is_multiple', action='store_true', help="Multiple/Two objects")
+    parser.add_argument("--area_ratio", type=float, default=0.01171, help="Area ratio for filtering out small objects.")
+    parser.add_argument("--obj_thr", type=int, default=20, help="Object threshold for filtering.")
+    parser.add_argument("--index", type=int, default=0, help="Index of the sample to test.")
+    args = parser.parse_args()
+    if args.is_train:
+        image_dir = Path(args.dataset_dir) / args.dataset_name / "train" / "images"
+        json_dir = Path(args.dataset_dir) / args.dataset_name / "train" / "jsons"
+        max_num = 2975
+    else:
+        image_dir = Path(args.dataset_dir) / args.dataset_name / "val" / "images"
+        json_dir = Path(args.dataset_dir) / args.dataset_name / "val" / "jsons"
+        max_num = 500
+    dataset = CityscapesDataset(
+        construct_dataset_dir = args.construct_dataset_dir,
+        obj_thr = args.obj_thr,
+        area_ratio = args.area_ratio,
+    )
+    if args.is_build_data:
+        if not args.is_multiple:
+            for index in range(max_num):
+                dataset._intersect_2_obj(image_dir, json_dir, index)
+    else:
+        for index in range(len(os.listdir(args.construct_dataset_dir))):
+            collage = dataset._get_sample(index)

datasets/data_utils.py ADDED Viewed

	@@ -0,0 +1,161 @@

+import numpy as np
+import cv2
+def resize_and_pad(image, box):
+    '''Fitting an image to the box region while keeping the aspect ratio.'''
+    y1,y2,x1,x2 = box
+    H,W = y2-y1, x2-x1
+    h,w =  image.shape[0], image.shape[1]
+    r_box = W / H
+    r_image = w / h
+    if r_box >= r_image:
+        h_target = H
+        w_target = int(w * H / h)
+        image = cv2.resize(image, (w_target, h_target))
+        w1 = (W - w_target) // 2
+        w2 = W - w_target - w1
+        pad_param = ((0,0),(w1,w2),(0,0))
+        image = np.pad(image, pad_param, 'constant', constant_values=255)
+    else:
+        w_target = W
+        h_target = int(h * W / w)
+        image = cv2.resize(image, (w_target, h_target))
+        h1 = (H-h_target) // 2
+        h2 = H - h_target - h1
+        pad_param =((h1,h2),(0,0),(0,0))
+        image = np.pad(image, pad_param, 'constant', constant_values=255)
+    return image
+def expand_image_mask(image, mask, ratio=1.4):
+    h,w = image.shape[0], image.shape[1]
+    H,W = int(h * ratio), int(w * ratio)
+    h1 = int((H - h) // 2)
+    h2 = H - h - h1
+    w1 = int((W -w) // 2)
+    w2 = W -w - w1
+    pad_param_image = ((h1,h2),(w1,w2),(0,0))
+    pad_param_mask = ((h1,h2),(w1,w2))
+    image = np.pad(image, pad_param_image, 'constant', constant_values=255)
+    mask = np.pad(mask, pad_param_mask, 'constant', constant_values=0)
+    return image, mask
+def expand_image(image, ratio=1.4):
+    h,w = image.shape[0], image.shape[1]
+    H,W = int(h * ratio), int(w * ratio)
+    h1 = int((H - h) // 2)
+    h2 = H - h - h1
+    w1 = int((W -w) // 2)
+    w2 = W -w - w1
+    pad_param_image = ((h1,h2),(w1,w2),(0,0))
+    image = np.pad(image, pad_param_image, 'constant', constant_values=255)
+    return image
+def expand_bbox(mask,yyxx,ratio=[1.2,2.0], min_crop=0):
+    y1,y2,x1,x2 = yyxx
+    ratio = np.random.randint( ratio[0] * 10,  ratio[1] * 10 ) / 10
+    H,W = mask.shape[0], mask.shape[1]
+    xc, yc = 0.5 * (x1 + x2), 0.5 * (y1 + y2)
+    h = ratio * (y2-y1+1)
+    w = ratio * (x2-x1+1)
+    h = max(h,min_crop)
+    w = max(w,min_crop)
+    x1 = int(xc - w * 0.5)
+    x2 = int(xc + w * 0.5)
+    y1 = int(yc - h * 0.5)
+    y2 = int(yc + h * 0.5)
+    x1 = max(0,x1)
+    x2 = min(W,x2)
+    y1 = max(0,y1)
+    y2 = min(H,y2)
+    return (y1,y2,x1,x2)
+def box2squre(image, box):
+    H,W = image.shape[0], image.shape[1]
+    y1,y2,x1,x2 = box
+    cx = (x1 + x2) // 2
+    cy = (y1 + y2) // 2
+    h,w = y2-y1, x2-x1
+    if h >= w:
+        x1 = cx - h//2
+        x2 = cx + h//2
+    else:
+        y1 = cy - w//2
+        y2 = cy + w//2
+    x1 = max(0,x1)
+    x2 = min(W,x2)
+    y1 = max(0,y1)
+    y2 = min(H,y2)
+    return (y1,y2,x1,x2)
+def pad_to_square(image, pad_value = 255, random = False):
+    H,W = image.shape[0], image.shape[1]
+    if H == W:
+        return image
+    padd = abs(H - W)
+    if random:
+        padd_1 = int(np.random.randint(0,padd))
+    else:
+        padd_1 = int(padd / 2)
+    padd_2 = padd - padd_1
+    if H > W:
+        pad_param = ((0,0),(padd_1,padd_2),(0,0))
+    else:
+        pad_param = ((padd_1,padd_2),(0,0),(0,0))
+    image = np.pad(image, pad_param, 'constant', constant_values=pad_value)
+    return image
+def get_bbox_from_mask(mask):
+    h,w = mask.shape[0],mask.shape[1]
+    if mask.sum() < 10:
+        return 0, h, 0, w
+    rows = np.any(mask, axis=1)
+    cols = np.any(mask, axis=0)
+    y1,y2 = np.where(rows)[0][[0, -1]]
+    x1,x2 = np.where(cols)[0][[0, -1]]
+    return (y1, y2, x1, x2)
+def box_in_box(small_box, big_box):
+    y1, y2, x1, x2 = small_box
+    y1_b, _, x1_b, _ = big_box
+    y1, y2, x1, x2 = y1 - y1_b ,y2 - y1_b, x1 - x1_b, x2 - x1_b
+    return (y1, y2, x1, x2)
+def crop_back(pred, tar_image, extra_sizes, tar_box_yyxx_crop, tar_box_yyxx_crop2, is_masked=False):
+    H1, W1, H2, W2 = extra_sizes
+    y1, x1, y2, x2 = tar_box_yyxx_crop
+    y1_, x1_, y2_, x2_ = tar_box_yyxx_crop2
+    m = 0 # maigin_pixel
+    if H1 < W1:
+        pad1 = int((W1 - H1) / 2)
+        pad2 = W1 - H1 - pad1
+        pred = pred[pad1: -pad2, :, :]
+    elif H1 > W1:
+        pad1 = int((H1 - W1) / 2)
+        pad2 = H1 - W1 - pad1
+        pred = pred[:,pad1: -pad2, :]
+    if is_masked:
+        gen_image = tar_image.copy()
+        gen_image[y1+m :y2-m, x1+m:x2-m, :] = pred[y1+m :y2-m, x1+m:x2-m, :]
+        gen_image[y1_+m :y2_-m, x1_+m:x2_-m, :] = pred[y1_+m :y2_-m, x1_+m:x2_-m, :]
+    else:
+        gen_image = pred
+    return gen_image

datasets/lvis.py ADDED Viewed

	@@ -0,0 +1,303 @@

+import cv2
+import numpy as np
+import os
+from .data_utils import *
+from .base import BaseDataset
+from lvis import LVIS
+from pathlib import Path
+from util.box_ops import compute_iou_matrix, draw_bboxes
+import shutil
+IS_VERIFY = False
+class LVISDataset(BaseDataset):
+    def __init__(self, construct_dataset_dir, obj_thr=20, area_ratio=0.02):
+        self.obj_thr = obj_thr
+        self.construct_dataset_dir = construct_dataset_dir
+        os.makedirs(Path(self.construct_dataset_dir), exist_ok=True)
+        self.area_ratio = area_ratio
+        self.sample_list = os.listdir(self.construct_dataset_dir)
+    def _get_image_path(self, file_name):
+        for img_dir in self.image_dir:
+            path = img_dir / file_name
+            if path.exists():
+                return str(path)
+        raise FileNotFoundError(f"File {file_name} not found in any of the image_dir.")
+    def _intersect_2_obj(self, image_dir, lvis_api, imgs_info, annos, idx):
+        self.image_dir = image_dir
+        image_name = imgs_info[idx]['coco_url'].split('/')[-1]
+        image_path = self._get_image_path(image_name)
+        image = cv2.imread(image_path)
+        h, w = image.shape[0:2]
+        image_area = h*w
+        anno = annos[idx]
+        # filter by area
+        obj_ids = []
+        obj_areas = []
+        obj_bbox = []
+        for i in range(len(anno)):
+            obj = anno[i]
+            area = obj['area']
+            bbox = obj['bbox'] # xyhw
+            if area > image_area * self.area_ratio:
+                obj_ids.append(i)
+                obj_areas.append(area)
+                obj_bbox.append(bbox)
+        if len(obj_bbox) < 2:
+            print(f"[Info] Skip image index {image_name[:-4]} due to insufficient bbox.")
+            return
+        # filter by IOU
+        bbox_xyxy = []
+        for box in obj_bbox:
+            x, y, w, h = box
+            bbox_xyxy.append([x, y, x + w, y + h])
+        bbox_xyxy = np.array(bbox_xyxy)  # shape: [N, 4]
+        if IS_VERIFY:
+            os.makedirs(Path(self.construct_dataset_dir) / image_name[:-4], exist_ok=True)
+            image_with_boxes = draw_bboxes(image, bbox_xyxy)
+            cv2.imwrite(str(Path(self.construct_dataset_dir) / image_name[:-4] / "bboxes_image.png"), image_with_boxes)
+        iou_matrix = compute_iou_matrix(bbox_xyxy)
+        np.fill_diagonal(iou_matrix, -1) # Exclude self-comparisons (i.e., each box with itself)
+        max_index = np.unravel_index(np.argmax(iou_matrix), iou_matrix.shape)
+        index0, index1 = max_index[0], max_index[1]
+        max_iou = iou_matrix[index0, index1]
+        if max_iou <= 0:
+            print(f"[Info] Skip image index {image_name[:-4]} due to no overlapping bboxes.")
+            return
+        os.makedirs(Path(self.construct_dataset_dir) / image_name[:-4], exist_ok=True)
+        dst = Path(self.construct_dataset_dir) / image_name[:-4] / "image.jpg"
+        dst.parent.mkdir(parents=True, exist_ok=True)
+        shutil.copy(image_path, dst)
+        anno_id = anno[obj_ids[index0]]
+        mask = lvis_api.ann_to_mask(anno_id)
+        cv2.imwrite(str(Path(self.construct_dataset_dir) / image_name[:-4] / "object_0_mask.png"), 255*mask)
+        patch = self.get_patch(cv2.cvtColor(image, cv2.COLOR_BGR2RGB), mask)
+        patch = cv2.cvtColor(patch, cv2.COLOR_RGB2BGR)
+        cv2.imwrite(str(Path(self.construct_dataset_dir) / image_name[:-4] / "object_0.png"), patch)
+        if IS_VERIFY:
+            mask_color = np.stack([mask * 255]*3, axis=-1).astype(np.uint8)
+            highlight = np.zeros_like(image)
+            highlight[:, :, 2] = 255  # red channel
+            alpha = 0.5
+            image_with_boxes = np.where(mask_color == 255, cv2.addWeighted(image_with_boxes, 1 - alpha, highlight, alpha, 0), image_with_boxes)
+        anno_id = anno[obj_ids[index1]]
+        mask = lvis_api.ann_to_mask(anno_id)
+        cv2.imwrite(str(Path(self.construct_dataset_dir) / image_name[:-4] / "object_1_mask.png"), 255*mask)
+        patch = self.get_patch(cv2.cvtColor(image, cv2.COLOR_BGR2RGB), mask)
+        patch = cv2.cvtColor(patch, cv2.COLOR_RGB2BGR)
+        cv2.imwrite(str(Path(self.construct_dataset_dir) / image_name[:-4] / "object_1.png"), patch)
+        if IS_VERIFY:
+            mask_color = np.stack([mask * 255]*3, axis=-1).astype(np.uint8)
+            highlight = np.zeros_like(image)
+            highlight[:, :, 0] = 255  # blue channel
+            alpha = 0.5
+            image_with_boxes = np.where(mask_color == 255, cv2.addWeighted(image_with_boxes, 1 - alpha, highlight, alpha, 0), image_with_boxes)
+            cv2.imwrite(str(Path(self.construct_dataset_dir) / image_name[:-4] / "highlighted_image.png"), image_with_boxes)
+    def _intersect_3_obj(self, image_dir, lvis_api, imgs_info, annos, idx):
+        self.image_dir = image_dir
+        image_name = imgs_info[idx]['coco_url'].split('/')[-1]
+        image_path = self._get_image_path(image_name)
+        image = cv2.imread(image_path)
+        h, w = image.shape[0:2]
+        image_area = h * w
+        anno = annos[idx]
+        # filter by area
+        obj_ids = []
+        obj_areas = []
+        obj_bbox = []
+        for i, obj in enumerate(anno):
+            area = obj['area']
+            bbox = obj['bbox']  # xywh
+            if area > image_area * self.area_ratio:
+                obj_ids.append(i)
+                obj_areas.append(area)
+                obj_bbox.append(bbox)
+        if len(obj_bbox) < 3:
+            print(f"[Info] Skip image index {image_name[:-4]} due to insufficient bbox (need >=3, got {len(obj_bbox)}).")
+            return
+        # calculate IOU matrix
+        bbox_xyxy = []
+        for box in obj_bbox:
+            x, y, w_box, h_box = box
+            bbox_xyxy.append([x, y, x + w_box, y + h_box])
+        bbox_xyxy = np.array(bbox_xyxy)  # shape: [N, 4]
+        if IS_VERIFY:
+            os.makedirs(Path(self.construct_dataset_dir) / image_name[:-4], exist_ok=True)
+            image_with_boxes = draw_bboxes(image, bbox_xyxy)
+            cv2.imwrite(str(Path(self.construct_dataset_dir) / image_name[:-4] / "bboxes_image.png"), image_with_boxes)
+        iou_matrix = compute_iou_matrix(bbox_xyxy)
+        np.fill_diagonal(iou_matrix, -1)  # Exclude self-comparisons
+        # find 3 overlapped objects
+        positive_iou = np.where(iou_matrix > 0, iou_matrix, 0.0)
+        row_sums = positive_iou.sum(axis=1)
+        anchor = int(np.argmax(row_sums))
+        partner_candidates = np.argsort(iou_matrix[anchor])[::-1]
+        partners = [int(p) for p in partner_candidates if iou_matrix[anchor, p] > 0]
+        if len(partners) < 2:
+            print(f"[Info] Skip image index {image_name[:-4]} due to not enough overlapping bboxes for 3 objects.")
+            return
+        index0 = anchor
+        index1 = partners[0]
+        index2 = partners[1]
+        max_iou_pair = max(iou_matrix[index0, index1], iou_matrix[index0, index2], iou_matrix[index1, index2])
+        if max_iou_pair <= 0:
+            print(f"[Info] Skip image index {image_name[:-4]} due to no overlapping bboxes.")
+            return
+        # copy original image
+        out_dir = Path(self.construct_dataset_dir) / image_name[:-4]
+        out_dir.mkdir(parents=True, exist_ok=True)
+        dst = out_dir / "image.jpg"
+        shutil.copy(image_path, dst)
+        # first object
+        anno_id = anno[obj_ids[index0]]
+        mask0 = lvis_api.ann_to_mask(anno_id)
+        cv2.imwrite(str(out_dir / "object_0_mask.png"), 255 * mask0)
+        patch0 = self.get_patch(cv2.cvtColor(image, cv2.COLOR_BGR2RGB), mask0)
+        patch0 = cv2.cvtColor(patch0, cv2.COLOR_RGB2BGR)
+        cv2.imwrite(str(out_dir / "object_0.png"), patch0)
+        if IS_VERIFY:
+            mask_color = np.stack([mask0 * 255] * 3, axis=-1).astype(np.uint8)
+            highlight = np.zeros_like(image)
+            highlight[:, :, 2] = 255  # red channel
+            alpha = 0.5
+            image_with_boxes = np.where(
+                mask_color == 255,
+                cv2.addWeighted(image_with_boxes, 1 - alpha, highlight, alpha, 0),
+                image_with_boxes
+            )
+        # second object
+        anno_id = anno[obj_ids[index1]]
+        mask1 = lvis_api.ann_to_mask(anno_id)
+        cv2.imwrite(str(out_dir / "object_1_mask.png"), 255 * mask1)
+        patch1 = self.get_patch(cv2.cvtColor(image, cv2.COLOR_BGR2RGB), mask1)
+        patch1 = cv2.cvtColor(patch1, cv2.COLOR_RGB2BGR)
+        cv2.imwrite(str(out_dir / "object_1.png"), patch1)
+        if IS_VERIFY:
+            mask_color = np.stack([mask1 * 255] * 3, axis=-1).astype(np.uint8)
+            highlight = np.zeros_like(image)
+            highlight[:, :, 0] = 255  # blue channel
+            alpha = 0.5
+            image_with_boxes = np.where(
+                mask_color == 255,
+                cv2.addWeighted(image_with_boxes, 1 - alpha, highlight, alpha, 0),
+                image_with_boxes
+            )
+        # third object
+        anno_id = anno[obj_ids[index2]]
+        mask2 = lvis_api.ann_to_mask(anno_id)
+        cv2.imwrite(str(out_dir / "object_2_mask.png"), 255 * mask2)
+        patch2 = self.get_patch(cv2.cvtColor(image, cv2.COLOR_BGR2RGB), mask2)
+        patch2 = cv2.cvtColor(patch2, cv2.COLOR_RGB2BGR)
+        cv2.imwrite(str(out_dir / "object_2.png"), patch2)
+        if IS_VERIFY:
+            mask_color = np.stack([mask2 * 255] * 3, axis=-1).astype(np.uint8)
+            highlight = np.zeros_like(image)
+            highlight[:, :, 1] = 255  # green channel
+            alpha = 0.5
+            image_with_boxes = np.where(
+                mask_color == 255,
+                cv2.addWeighted(image_with_boxes, 1 - alpha, highlight, alpha, 0),
+                image_with_boxes
+            )
+            cv2.imwrite(str(out_dir / "highlighted_image.png"), image_with_boxes)
+    def _get_sample(self, idx):
+        sample_path = os.path.join(self.construct_dataset_dir, self.sample_list[idx])
+        image = cv2.cvtColor(cv2.imread(os.path.join(sample_path, "image.jpg")), cv2.COLOR_BGR2RGB)
+        object_0 = cv2.cvtColor(cv2.imread(os.path.join(sample_path, "object_0.png")), cv2.COLOR_BGR2RGB)
+        object_1 = cv2.cvtColor(cv2.imread(os.path.join(sample_path, "object_1.png")), cv2.COLOR_BGR2RGB)
+        mask_0 = cv2.imread(os.path.join(sample_path, "object_0_mask.png"), cv2.IMREAD_GRAYSCALE)
+        mask_1 = cv2.imread(os.path.join(sample_path, "object_1_mask.png"), cv2.IMREAD_GRAYSCALE)
+        collage = self._construct_collage(image, object_0, object_1, mask_0, mask_1)
+        return collage
+    def __len__(self):
+        return len(os.listdir(self.construct_dataset_dir))
+if __name__ == "__main__":
+    '''
+    two-object case: train/test: 34610/8859
+    '''
+    import argparse
+    parser = argparse.ArgumentParser(description="LVISDataset Analysis")
+    parser.add_argument("--dataset_dir", type=str, required=True, help="Path to the dataset directory.")
+    parser.add_argument("--construct_dataset_dir", type=str, default='bin', help="Path to the debug bin directory.")
+    parser.add_argument("--dataset_name", type=str, default='COCO', help="Dataset name.")
+    parser.add_argument('--is_train', action='store_true', help="Train/Test")
+    parser.add_argument('--is_build_data', action='store_true', help="Build data")
+    parser.add_argument('--is_multiple', action='store_true', help="Multiple/Two objects")
+    parser.add_argument("--area_ratio", type=float, default=0.01171, help="Area ratio for filtering out small objects.")
+    parser.add_argument("--obj_thr", type=int, default=20, help="Object threshold for filtering.")
+    parser.add_argument("--index", type=int, default=0, help="Index of the sample to test.")
+    args = parser.parse_args()
+    image_dirs = [
+        Path(args.dataset_dir) / args.dataset_name / "train2017",
+        Path(args.dataset_dir) / args.dataset_name / "val2017",
+    ]
+    if args.is_train:
+        json_path = Path(args.dataset_dir) / args.dataset_name / "lvis_v1/lvis_v1_train.json"
+        max_num = 2000000
+    else:
+        json_path = Path(args.dataset_dir) / args.dataset_name / "lvis_v1/lvis_v1_val.json"
+        max_num = 30000
+    dataset = LVISDataset(
+        construct_dataset_dir = args.construct_dataset_dir,
+        obj_thr = args.obj_thr,
+        area_ratio = args.area_ratio,
+    )
+    lvis_api = LVIS(json_path)
+    img_ids = sorted(lvis_api.imgs.keys())
+    imgs_info = lvis_api.load_imgs(img_ids)
+    annos = [lvis_api.img_ann_map[img_id] for img_id in img_ids]
+    if args.is_build_data:
+        if not args.is_multiple:
+            for index in range(max_num):
+                dataset._intersect_2_obj(image_dirs, lvis_api, imgs_info, annos, index)
+                # dataset._intersect_3_obj(image_dirs, lvis_api, imgs_info, annos, index)
+    else:
+        for index in range(len(os.listdir(args.construct_dataset_dir))):
+            collage = dataset._get_sample(index)

datasets/mapillary_vistas.py ADDED Viewed

	@@ -0,0 +1,228 @@

+import json
+import cv2
+import numpy as np
+import os
+from PIL import Image
+from .data_utils import *
+from .base import BaseDataset
+from util.box_ops import mask_to_bbox_xywh, compute_iou_matrix, draw_bboxes
+from pathlib import Path
+import shutil
+IS_VERIFY = False
+class MapillaryVistasDataset(BaseDataset):
+    def __init__(self, construct_dataset_dir, obj_thr=20, area_ratio=0.02):
+        self.obj_thr = obj_thr
+        self.construct_dataset_dir = construct_dataset_dir
+        os.makedirs(Path(self.construct_dataset_dir), exist_ok=True)
+        self.area_ratio = area_ratio
+        self.sample_list = os.listdir(self.construct_dataset_dir)
+    def _intersect_2_obj(self, image_dir, instance_dir, labels, idx):
+        json_list = os.listdir(instance_dir)
+        image_name = json_list[idx][:-4]
+        image_path = os.path.join(image_dir, image_name+'.jpg')
+        image = cv2.imread(image_path)
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        instance_path = os.path.join(instance_dir, image_name+'.png')
+        instance_image = Image.open(instance_path)
+        instance_array = np.array(instance_image, dtype=np.uint16)
+        instance_label_array = np.array(instance_array / 256, dtype=np.uint8)
+        instance_ids_array = np.array(instance_array % 256, dtype=np.uint8)
+        img_h, img_w = image.shape[0:2]
+        image_area = img_h*img_w
+        # vehicle_keywords = ['car', 'truck', 'bus']
+        # excluded_keywords = ['bicycle']
+        # vehicle_ids = []
+        # for idx, label in enumerate(labels):
+        #     name = label['name'].lower()
+        #     if any(k in name for k in vehicle_keywords) and not any(k in name for k in excluded_keywords):
+        #         vehicle_ids.append(idx)
+        '''
+        ids: 107, 'name': 'object--vehicle--bus', 'readable': 'Bus', 'color': [0, 60, 100]
+        ids: 108, 'name': 'object--vehicle--car', 'readable': 'Car', 'color': [0, 0, 142]
+        ids: 109, 'name': 'object--vehicle--caravan', 'readable': 'Caravan', 'color': [0, 0, 90]
+        ids: 114, 'name': 'object--vehicle--truck', 'readable': 'Truck', 'color': [0, 0, 70]
+        '''
+        target_class_ids = [107, 108, 109, 114]
+        max_instance = np.max(instance_ids_array)
+        obj_ids = []
+        obj_areas = []
+        obj_bbox = []
+        counter = 0
+        for target_id in target_class_ids:
+            semantic_mask = (instance_label_array == target_id)
+            for idx in range(max_instance):
+                instance_mask = (instance_ids_array == idx)
+                mask = np.logical_and(semantic_mask, instance_mask).astype(np.uint8)
+                area = np.sum(mask)
+                bbox = mask_to_bbox_xywh(mask)
+                if area > image_area * self.area_ratio:
+                    obj_ids.append(counter)
+                    obj_areas.append(area)
+                    obj_bbox.append(bbox)
+                counter += 1
+        if len(obj_bbox) < 2:
+            print(f"[Info] Skip image index {image_name} due to insufficient bbox.")
+            return
+        # filter by IOU
+        bbox_xyxy = []
+        for box in obj_bbox:
+            x, y, w, h = box
+            bbox_xyxy.append([x, y, x + w, y + h])
+        bbox_xyxy = np.array(bbox_xyxy)  # shape: [N, 4]
+        os.makedirs(Path(self.construct_dataset_dir) / image_name, exist_ok=True)
+        if IS_VERIFY:
+            image_with_boxes = draw_bboxes(image, bbox_xyxy)
+            cv2.imwrite(str(Path(self.construct_dataset_dir) / image_name / "bboxes_image.png"), cv2.cvtColor(image_with_boxes, cv2.COLOR_RGB2BGR))
+        iou_matrix = compute_iou_matrix(bbox_xyxy)
+        np.fill_diagonal(iou_matrix, -1) # Exclude self-comparisons (i.e., each box with itself)
+        max_index = np.unravel_index(np.argmax(iou_matrix), iou_matrix.shape)
+        index0, index1 = max_index[0], max_index[1]
+        max_iou = iou_matrix[index0, index1]
+        if max_iou <= 0:
+            print(f"[Info] Skip image index {image_name} due to no overlapping bboxes.")
+            return
+        dst = Path(self.construct_dataset_dir) / image_name / "image.jpg"
+        dst.parent.mkdir(parents=True, exist_ok=True)
+        shutil.copy(image_path, dst)
+        counter = 0
+        found = False
+        for target_id in target_class_ids:
+            semantic_mask = (instance_label_array == target_id)
+            for idx in range(max_instance):
+                if counter == obj_ids[index0]:
+                    instance_mask = (instance_ids_array == idx)
+                    mask = np.logical_and(semantic_mask, instance_mask).astype(np.uint8)
+                    found = True
+                    break
+                counter += 1
+            if found:
+                break
+        cv2.imwrite(str(Path(self.construct_dataset_dir) / image_name / "object_0_mask.png"), 255*mask)
+        patch = self.get_patch(image, mask)
+        patch = cv2.cvtColor(patch, cv2.COLOR_RGB2BGR)
+        cv2.imwrite(str(Path(self.construct_dataset_dir) / image_name / "object_0.png"), patch)
+        if IS_VERIFY:
+            mask_color = np.stack([mask * 255]*3, axis=-1).astype(np.uint8)
+            highlight = np.zeros_like(image)
+            highlight[:, :, 2] = 255  # red channel
+            alpha = 0.5
+            image_with_boxes = np.where(mask_color == 255, cv2.addWeighted(image_with_boxes, 1 - alpha, highlight, alpha, 0), image_with_boxes)
+        counter = 0
+        found = False
+        for target_id in target_class_ids:
+            semantic_mask = (instance_label_array == target_id)
+            for idx in range(max_instance):
+                if counter == obj_ids[index1]:
+                    instance_mask = (instance_ids_array == idx)
+                    mask = np.logical_and(semantic_mask, instance_mask).astype(np.uint8)
+                    found = True
+                    break
+                counter += 1
+            if found:
+                break
+        cv2.imwrite(str(Path(self.construct_dataset_dir) / image_name / "object_1_mask.png"), 255*mask)
+        patch = self.get_patch(image, mask)
+        patch = cv2.cvtColor(patch, cv2.COLOR_RGB2BGR)
+        cv2.imwrite(str(Path(self.construct_dataset_dir) / image_name / "object_1.png"), patch)
+        if IS_VERIFY:
+            mask_color = np.stack([mask * 255]*3, axis=-1).astype(np.uint8)
+            highlight = np.zeros_like(image)
+            highlight[:, :, 0] = 255  # blue channel
+            alpha = 0.5
+            image_with_boxes = np.where(mask_color == 255, cv2.addWeighted(image_with_boxes, 1 - alpha, highlight, alpha, 0), image_with_boxes)
+            cv2.imwrite(str(Path(self.construct_dataset_dir) / image_name / "highlighted_image.png"), cv2.cvtColor(image_with_boxes, cv2.COLOR_RGB2BGR))
+    def _get_sample(self, idx):
+        sample_path = os.path.join(self.construct_dataset_dir, self.sample_list[idx])
+        image = cv2.cvtColor(cv2.imread(os.path.join(sample_path, "image.jpg")), cv2.COLOR_BGR2RGB)
+        object_0 = cv2.cvtColor(cv2.imread(os.path.join(sample_path, "object_0.png")), cv2.COLOR_BGR2RGB)
+        object_1 = cv2.cvtColor(cv2.imread(os.path.join(sample_path, "object_1.png")), cv2.COLOR_BGR2RGB)
+        mask_0 = cv2.imread(os.path.join(sample_path, "object_0_mask.png"), cv2.IMREAD_GRAYSCALE)
+        mask_1 = cv2.imread(os.path.join(sample_path, "object_1_mask.png"), cv2.IMREAD_GRAYSCALE)
+        collage = self._construct_collage(image, object_0, object_1, mask_0, mask_1)
+        return collage
+    def __len__(self):
+        return len(os.listdir(self.construct_dataset_dir))
+if __name__ == "__main__":
+    '''
+    two-object case: train/test: 603/190
+    '''
+    import argparse
+    parser = argparse.ArgumentParser(description="MapillaryVistasDataset Analysis")
+    parser.add_argument("--dataset_dir", type=str, required=True, help="Path to the dataset directory.")
+    parser.add_argument("--construct_dataset_dir", type=str, default='bin', help="Path to the debug bin directory.")
+    parser.add_argument("--dataset_name", type=str, default='MVD', help="Dataset name.")
+    parser.add_argument('--is_train', action='store_true', help="Train/Test")
+    parser.add_argument('--is_build_data', action='store_true', help="Build data")
+    parser.add_argument('--is_multiple', action='store_true', help="Multiple/Two objects")
+    parser.add_argument("--area_ratio", type=float, default=0.01171, help="Area ratio for filtering out small objects.")
+    parser.add_argument("--obj_thr", type=int, default=20, help="Object threshold for filtering.")
+    parser.add_argument("--index", type=int, default=0, help="Index of the sample to test.")
+    args = parser.parse_args()
+    version = "v2.0" # "v1.2"
+    config_path = Path(args.dataset_dir) / args.dataset_name / f'config_{version}.json'
+    with open(config_path) as config_file:
+        config = json.load(config_file)
+    labels = config['labels']
+    if args.is_train:
+        image_dir = Path(args.dataset_dir) / args.dataset_name / "training" / "images"
+        instance_dir = Path(args.dataset_dir) / args.dataset_name / "training" / "v2.0" / "instances"
+        max_num = 18000
+    else:
+        image_dir = Path(args.dataset_dir) / args.dataset_name / "validation" / "images"
+        instance_dir = Path(args.dataset_dir) / args.dataset_name / "validation" / "v2.0" / "instances"
+        max_num = 2000
+    dataset = MapillaryVistasDataset(
+        construct_dataset_dir = args.construct_dataset_dir,
+        obj_thr = args.obj_thr,
+        area_ratio = args.area_ratio,
+    )
+    if args.is_build_data:
+        if not args.is_multiple:
+            for index in range(max_num):
+                dataset._intersect_2_obj(image_dir, instance_dir, labels, index)
+                print('Done index ', index)
+    else:
+        for index in range(len(os.listdir(args.construct_dataset_dir))):
+            collage = dataset._get_sample(index)
+'''
+25,000 high-resolution images
+124 semantic object categories
+100 instance-specifically annotated categories
+Global reach, covering 6 continents
+Variety of weather, season, time of day, camera, and viewpoint
+'''

datasets/objects365.py ADDED Viewed

	@@ -0,0 +1,200 @@

+import json
+import cv2
+import numpy as np
+import os
+from .data_utils import *
+from .base import BaseDataset
+from pycocotools import mask as mask_utils
+from pathlib import Path
+from util.box_ops import compute_iou_matrix, draw_bboxes
+import shutil
+IS_VERIFY = False
+IS_BOX = False
+def save_bboxes(bbox_xyxy, save_path="bboxes.txt"):
+    bbox_xyxy = np.atleast_2d(bbox_xyxy)
+    with open(save_path, "a") as f:
+        np.savetxt(f, bbox_xyxy, fmt="%.2f", delimiter=" ")
+class Objects365Dataset(BaseDataset):
+    def __init__(self, construct_dataset_dir, obj_thr=20, area_ratio=0.02):
+        self.obj_thr = obj_thr
+        self.construct_dataset_dir = construct_dataset_dir
+        os.makedirs(Path(self.construct_dataset_dir), exist_ok=True)
+        self.area_ratio = area_ratio
+        self.sample_list = os.listdir(self.construct_dataset_dir)
+    def _get_all_file_paths_recursive(self, root_dir):
+        all_files = []
+        for dirpath, _, filenames in os.walk(root_dir):
+            for f in filenames:
+                abs_path = os.path.abspath(os.path.join(dirpath, f))
+                all_files.append(abs_path)
+        return all_files
+    def _get_image_path(self, file_name):
+        for img_dir in self.image_dir:
+            path = img_dir / file_name
+            if path.exists():
+                return str(path)
+        raise FileNotFoundError(f"File {file_name} not found in any of the image_dir.")
+    def _intersect_2_obj(self, image_dir, json_dir, idx):
+        self.image_dir = image_dir
+        self.json_list = self._get_all_file_paths_recursive(json_dir)
+        json_path = self.json_list[idx]
+        image_name = json_path.split('/')[-1]
+        image_subset = json_path.split('/')[-2]
+        image_path = os.path.join(os.path.join(image_dir, image_subset), image_name[:-5]+'.jpg')
+        image = cv2.imread(image_path)
+        with open(json_path) as f:
+            data = json.load(f)
+            image_id = data["image_id"]
+            annotations = data["annotations"]
+        img_h, img_w = image.shape[0:2]
+        image_area = img_h*img_w
+        anno = annotations
+        # filter by area
+        obj_ids = []
+        obj_areas = []
+        obj_bbox = []
+        for i in range(len(anno)):
+            obj = anno[i]
+            area = obj['area']
+            bbox = obj['bbox'] # xyhw
+            if area > image_area * self.area_ratio:
+                obj_ids.append(i)
+                obj_areas.append(area)
+                obj_bbox.append(bbox)
+        if len(obj_bbox) < 2:
+            print(f"[Info] Skip image index {image_name[:-5]} due to insufficient bbox.")
+            return
+        # filter by IOU
+        bbox_xyxy = []
+        for box in obj_bbox:
+            x, y, w, h = box
+            bbox_xyxy.append([x, y, x + w, y + h])
+        bbox_xyxy = np.array(bbox_xyxy)  # shape: [N, 4]
+        if IS_VERIFY:
+            os.makedirs(Path(self.construct_dataset_dir) / image_name[:-5], exist_ok=True)
+            image_with_boxes = draw_bboxes(image, bbox_xyxy)
+            cv2.imwrite(str(Path(self.construct_dataset_dir) / image_name[:-5] / "bboxes_image.png"), image_with_boxes)
+        iou_matrix = compute_iou_matrix(bbox_xyxy)
+        np.fill_diagonal(iou_matrix, -1) # Exclude self-comparisons (i.e., each box with itself)
+        max_index = np.unravel_index(np.argmax(iou_matrix), iou_matrix.shape)
+        index0, index1 = max_index[0], max_index[1]
+        max_iou = iou_matrix[index0, index1]
+        if max_iou <= 0:
+            print(f"[Info] Skip image index {image_name[:-5]} due to no overlapping bboxes.")
+            return
+        if IS_BOX:
+            save_bboxes(bbox_xyxy[index0], '/home/hang18/links/projects/rrg-vislearn/hang18/bboxes0.txt')
+            save_bboxes(bbox_xyxy[index1], '/home/hang18/links/projects/rrg-vislearn/hang18/bboxes1.txt')
+        os.makedirs(Path(self.construct_dataset_dir) / image_name[:-5], exist_ok=True)
+        # cv2.imwrite(str(Path(self.construct_dataset_dir) / image_name[:-4] / "image.jpg"), image) # source image
+        dst = Path(self.construct_dataset_dir) / image_name[:-5] / "image.jpg"
+        dst.parent.mkdir(parents=True, exist_ok=True)
+        shutil.copy(image_path, dst)
+        segmentation = anno[obj_ids[index0]]["segmentation"]
+        rles = mask_utils.frPyObjects(segmentation, img_h, img_w)
+        rle = mask_utils.merge(rles)
+        mask = mask_utils.decode(rle)
+        cv2.imwrite(str(Path(self.construct_dataset_dir) / image_name[:-5] / "object_0_mask.png"), 255*mask)
+        patch = self.get_patch(cv2.cvtColor(image, cv2.COLOR_BGR2RGB), mask)
+        patch = cv2.cvtColor(patch, cv2.COLOR_RGB2BGR)
+        cv2.imwrite(str(Path(self.construct_dataset_dir) / image_name[:-5] / "object_0.png"), patch)
+        if IS_VERIFY:
+            mask_color = np.stack([mask * 255]*3, axis=-1).astype(np.uint8)
+            highlight = np.zeros_like(image)
+            highlight[:, :, 2] = 255  # red channel
+            alpha = 0.5
+            image_with_boxes = np.where(mask_color == 255, cv2.addWeighted(image_with_boxes, 1 - alpha, highlight, alpha, 0), image_with_boxes)
+        segmentation = anno[obj_ids[index1]]["segmentation"]
+        rles = mask_utils.frPyObjects(segmentation, img_h, img_w)
+        rle = mask_utils.merge(rles)
+        mask = mask_utils.decode(rle)
+        cv2.imwrite(str(Path(self.construct_dataset_dir) / image_name[:-5] / "object_1_mask.png"), 255*mask)
+        patch = self.get_patch(cv2.cvtColor(image, cv2.COLOR_BGR2RGB), mask)
+        patch = cv2.cvtColor(patch, cv2.COLOR_RGB2BGR)
+        cv2.imwrite(str(Path(self.construct_dataset_dir) / image_name[:-5] / "object_1.png"), patch)
+        if IS_VERIFY:
+            mask_color = np.stack([mask * 255]*3, axis=-1).astype(np.uint8)
+            highlight = np.zeros_like(image)
+            highlight[:, :, 0] = 255  # blue channel
+            alpha = 0.5
+            image_with_boxes = np.where(mask_color == 255, cv2.addWeighted(image_with_boxes, 1 - alpha, highlight, alpha, 0), image_with_boxes)
+            cv2.imwrite(str(Path(self.construct_dataset_dir) / image_name[:-5] / "highlighted_image.png"), image_with_boxes)
+    def _get_sample(self, idx):
+        sample_path = os.path.join(self.construct_dataset_dir, self.sample_list[idx])
+        image = cv2.cvtColor(cv2.imread(os.path.join(sample_path, "image.jpg")), cv2.COLOR_BGR2RGB)
+        object_0 = cv2.cvtColor(cv2.imread(os.path.join(sample_path, "object_0.png")), cv2.COLOR_BGR2RGB)
+        object_1 = cv2.cvtColor(cv2.imread(os.path.join(sample_path, "object_1.png")), cv2.COLOR_BGR2RGB)
+        mask_0 = cv2.imread(os.path.join(sample_path, "object_0_mask.png"), cv2.IMREAD_GRAYSCALE)
+        mask_1 = cv2.imread(os.path.join(sample_path, "object_1_mask.png"), cv2.IMREAD_GRAYSCALE)
+        collage = self._construct_collage(image, object_0, object_1, mask_0, mask_1)
+        return collage
+    def __len__(self):
+        return len(os.listdir(self.construct_dataset_dir))
+if __name__ == "__main__":
+    '''
+    two-object case: train/test: TODO/51791
+    '''
+    import argparse
+    parser = argparse.ArgumentParser(description="Objects365Dataset Analysis")
+    parser.add_argument("--dataset_dir", type=str, required=True, help="Path to the dataset directory.")
+    parser.add_argument("--construct_dataset_dir", type=str, default='bin', help="Path to the debug bin directory.")
+    parser.add_argument("--dataset_name", type=str, default='object365', help="Dataset name.")
+    parser.add_argument('--is_train', action='store_true', help="Train/Test")
+    parser.add_argument('--is_build_data', action='store_true', help="Build data")
+    parser.add_argument('--is_multiple', action='store_true', help="Multiple/Two objects")
+    parser.add_argument("--area_ratio", type=float, default=0.01171, help="Area ratio for filtering out small objects.")
+    parser.add_argument("--obj_thr", type=int, default=20, help="Object threshold for filtering.")
+    parser.add_argument("--index", type=int, default=0, help="Index of the sample to test.")
+    args = parser.parse_args()
+    if args.is_train:
+        image_dir = Path(args.dataset_dir) / args.dataset_name / "images" / "train"
+        json_dir = Path(args.dataset_dir) / args.dataset_name / "labels" / "train"
+        max_num = 1742289
+    else:
+        image_dir = Path(args.dataset_dir) / args.dataset_name / "images" / "val"
+        json_dir = Path(args.dataset_dir) / args.dataset_name / "labels" / "val"
+        max_num = 80000
+    dataset = Objects365Dataset(
+        # json_dir = json_dir,
+        construct_dataset_dir = args.construct_dataset_dir,
+        obj_thr = args.obj_thr,
+        area_ratio = args.area_ratio,
+    )
+    if args.is_build_data:
+        if not args.is_multiple:
+            for index in range(0, max_num):
+                dataset._intersect_2_obj(image_dir, json_dir, index)
+    else:
+        for index in range(len(os.listdir(args.construct_dataset_dir))):
+            collage = dataset._get_sample(index)

datasets/viton_hd.py ADDED Viewed

	@@ -0,0 +1,163 @@

+import cv2
+import numpy as np
+import os
+from PIL import Image
+from .data_utils import *
+from .base import BaseDataset
+from pathlib import Path
+from util.box_ops import mask_to_bbox_xywh, draw_bboxes, compute_iou_matrix
+import shutil
+IS_VERIFY = False
+class VITONHDDataset(BaseDataset):
+    def __init__(self, construct_dataset_dir, obj_thr=20, area_ratio=0.02):
+        self.obj_thr = obj_thr
+        self.construct_dataset_dir = construct_dataset_dir
+        os.makedirs(Path(self.construct_dataset_dir), exist_ok=True)
+        self.area_ratio = area_ratio
+        self.sample_list = os.listdir(self.construct_dataset_dir)
+    def _intersect_2_obj(self, asset_dir, idx):
+        image_dir = os.path.join(asset_dir, 'image')
+        image_list = os.listdir(image_dir)
+        image_path = os.path.join(image_dir, image_list[idx])
+        image_name = os.path.basename(image_path)
+        image = cv2.imread(image_path)
+        mask_dir = os.path.join(asset_dir, 'image-parse-v3')
+        segmentation_path = os.path.join(mask_dir, image_name[:-4]+'.png')
+        segmentation = Image.open(segmentation_path).convert('P')
+        segmentation = np.array(segmentation)
+        h, w = image.shape[0:2]
+        image_area = h*w
+        ids = np.unique(segmentation)
+        ids = [ i for i in ids if i!=0 ] # remove background mask
+        if len(ids) < 2:
+            print(f"[Info] Skip image index {image_name[:-4]} due to insufficient bbox.")
+            return
+        # filter by area
+        obj_ids = []
+        obj_areas = []
+        obj_bbox = []
+        for i in ids:
+            mask_id = (segmentation == int(i)).astype(np.uint8)
+            bbox = mask_to_bbox_xywh(mask_id) # xyhw
+            area = np.sum(mask_id)
+            if area > image_area * self.area_ratio:
+                obj_ids.append(i)
+                obj_areas.append(area)
+                obj_bbox.append(bbox)
+        if len(obj_bbox) < 2:
+            print(f"[Info] Skip image index {image_name[:-4]} due to insufficient bbox.")
+            return
+        # filter by IOU
+        bbox_xyxy = []
+        for box in obj_bbox:
+            x, y, w, h = box
+            bbox_xyxy.append([x, y, x + w, y + h])
+        bbox_xyxy = np.array(bbox_xyxy)  # shape: [N, 4]
+        if IS_VERIFY:
+            os.makedirs(Path(self.construct_dataset_dir) / image_name[:-4], exist_ok=True)
+            image_with_boxes = draw_bboxes(image, bbox_xyxy)
+            cv2.imwrite(str(Path(self.construct_dataset_dir) / image_name[:-4] / "bboxes_image.png"), image_with_boxes)
+        iou_matrix = compute_iou_matrix(bbox_xyxy)
+        np.fill_diagonal(iou_matrix, -1) # Exclude self-comparisons (i.e., each box with itself)
+        sorted_obj_ids = np.argsort(obj_areas)[::-1]
+        assert len(sorted_obj_ids) > 0
+        index0 = sorted_obj_ids[0]
+        index1 = sorted_obj_ids[1]
+        os.makedirs(Path(self.construct_dataset_dir) / image_name[:-4], exist_ok=True)
+        dst = Path(self.construct_dataset_dir) / image_name[:-4] / "image.jpg"
+        dst.parent.mkdir(parents=True, exist_ok=True)
+        shutil.copy(image_path, dst)
+        mask = (segmentation == int(obj_ids[index0])).astype(np.uint8)
+        cv2.imwrite(str(Path(self.construct_dataset_dir) / image_name[:-4] / "object_0_mask.png"), 255*mask)
+        patch = self.get_patch(cv2.cvtColor(image, cv2.COLOR_BGR2RGB), mask)
+        patch = cv2.cvtColor(patch, cv2.COLOR_RGB2BGR)
+        cv2.imwrite(str(Path(self.construct_dataset_dir) / image_name[:-4] / "object_0.png"), patch)
+        if IS_VERIFY:
+            mask_color = np.stack([mask * 255]*3, axis=-1).astype(np.uint8)
+            highlight = np.zeros_like(image)
+            highlight[:, :, 2] = 255  # red channel
+            alpha = 0.5
+            image_with_boxes = np.where(mask_color == 255, cv2.addWeighted(image_with_boxes, 1 - alpha, highlight, alpha, 0), image_with_boxes)
+        mask = (segmentation == int(obj_ids[index1])).astype(np.uint8)
+        cv2.imwrite(str(Path(self.construct_dataset_dir) / image_name[:-4] / "object_1_mask.png"), 255*mask)
+        patch = self.get_patch(cv2.cvtColor(image, cv2.COLOR_BGR2RGB), mask)
+        patch = cv2.cvtColor(patch, cv2.COLOR_RGB2BGR)
+        cv2.imwrite(str(Path(self.construct_dataset_dir) / image_name[:-4] / "object_1.png"), patch)
+        if IS_VERIFY:
+            mask_color = np.stack([mask * 255]*3, axis=-1).astype(np.uint8)
+            highlight = np.zeros_like(image)
+            highlight[:, :, 0] = 255  # blue channel
+            alpha = 0.5
+            image_with_boxes = np.where(mask_color == 255, cv2.addWeighted(image_with_boxes, 1 - alpha, highlight, alpha, 0), image_with_boxes)
+            cv2.imwrite(str(Path(self.construct_dataset_dir) / image_name[:-4] / "highlighted_image.png"), image_with_boxes)
+    def _get_sample(self, idx):
+        sample_path = os.path.join(self.construct_dataset_dir, self.sample_list[idx])
+        image = cv2.cvtColor(cv2.imread(os.path.join(sample_path, "image.jpg")), cv2.COLOR_BGR2RGB)
+        object_0 = cv2.cvtColor(cv2.imread(os.path.join(sample_path, "object_0.png")), cv2.COLOR_BGR2RGB)
+        object_1 = cv2.cvtColor(cv2.imread(os.path.join(sample_path, "object_1.png")), cv2.COLOR_BGR2RGB)
+        mask_0 = cv2.imread(os.path.join(sample_path, "object_0_mask.png"), cv2.IMREAD_GRAYSCALE)
+        mask_1 = cv2.imread(os.path.join(sample_path, "object_1_mask.png"), cv2.IMREAD_GRAYSCALE)
+        collage = self._construct_collage(image, object_0, object_1, mask_0, mask_1)
+        return collage
+    def __len__(self):
+        return len(os.listdir(self.construct_dataset_dir))
+if __name__ == "__main__":
+    '''
+    two-object case: train/test: 11626/2028
+    '''
+    import argparse
+    parser = argparse.ArgumentParser(description="VITONHDDataset Analysis")
+    parser.add_argument("--dataset_dir", type=str, required=True, help="Path to the dataset directory.")
+    parser.add_argument("--construct_dataset_dir", type=str, default='bin', help="Path to the debug bin directory.")
+    parser.add_argument("--dataset_name", type=str, default='VitonHD', help="Dataset name.")
+    parser.add_argument('--is_train', action='store_true', help="Train/Test")
+    parser.add_argument('--is_build_data', action='store_true', help="Build data")
+    parser.add_argument('--is_multiple', action='store_true', help="Multiple/Two objects")
+    parser.add_argument("--area_ratio", type=float, default=0.01171, help="Area ratio for filtering out small objects.")
+    parser.add_argument("--obj_thr", type=int, default=20, help="Object threshold for filtering.")
+    parser.add_argument("--index", type=int, default=0, help="Index of the sample to test.")
+    args = parser.parse_args()
+    if args.is_train:
+        asset_dir = Path(args.dataset_dir) / args.dataset_name / "train"
+    else:
+        asset_dir = Path(args.dataset_dir) / args.dataset_name / "test"
+    dataset = VITONHDDataset(
+        construct_dataset_dir = args.construct_dataset_dir,
+        obj_thr = args.obj_thr,
+        area_ratio = args.area_ratio,
+    )
+    max_num = 20000
+    if args.is_build_data:
+        if not args.is_multiple:
+            for index in range(max_num):
+                dataset._intersect_2_obj(asset_dir, index)
+    else:
+        for index in range(len(os.listdir(args.construct_dataset_dir))):
+            collage = dataset._get_sample(index)

datasets/webdataset.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import webdataset as wds
+from torch.utils.data import IterableDataset
+from PIL import Image
+import numpy as np
+import cv2
+class MultiWebDataset(IterableDataset):
+    def __init__(
+        self,
+        urls,
+        construct_collage_fn,
+        shuffle_size=0,
+        seed=0,
+        decode_mode="pil",
+    ):
+        super().__init__()
+        self.urls = urls
+        self.shuffle_size = shuffle_size
+        self.seed = seed
+        self.decode_mode = decode_mode
+        self.construct_collage_fn = construct_collage_fn
+    def _to_rgb_np(self, img):
+        if isinstance(img, Image.Image):
+            return np.array(img.convert("RGB"))
+        elif isinstance(img, np.ndarray):
+            if img.ndim == 2:
+                return cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
+            if img.ndim == 3 and img.shape[2] == 4:
+                return img[:, :, :3]
+            return img
+        else:
+            raise TypeError(f"Unsupported image type: {type(img)}")
+    def _to_mask_np(self, img):
+        if isinstance(img, Image.Image):
+            m = np.array(img.convert("L"))
+        elif isinstance(img, np.ndarray):
+            if img.ndim == 3:
+                m = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+            else:
+                m = img
+        else:
+            raise TypeError(f"Unsupported mask type: {type(img)}")
+        m = (m > 127).astype(np.uint8) * 255
+        return m
+    def __iter__(self):
+        ds = wds.WebDataset(self.urls, shardshuffle=True, empty_check=False)
+        if self.shuffle_size and self.shuffle_size > 0:
+            ds = ds.shuffle(self.shuffle_size)
+        ds = ds.decode("pil")
+        ds = ds.rename(
+            bg="bg.jpg",
+            obj0="obj0.png",
+            mask0="mask0.png",
+            obj1="obj1.png",
+            mask1="mask1.png",
+        )
+        for sample in ds:
+            bg    = sample["bg"]
+            obj0  = sample["obj0"]
+            obj1  = sample["obj1"]
+            mask0 = sample["mask0"]
+            mask1 = sample["mask1"]
+            bg_np    = self._to_rgb_np(bg)
+            obj0_np  = self._to_rgb_np(obj0)
+            obj1_np  = self._to_rgb_np(obj1)
+            mask0_np = self._to_mask_np(mask0)
+            mask1_np = self._to_mask_np(mask1)
+            collage = self.construct_collage_fn(
+                bg_np, obj0_np, obj1_np, mask0_np, mask1_np
+            )
+            yield collage