| |
| """Optimize anchor settings on a specific dataset. |
| |
| This script provides two method to optimize YOLO anchors including k-means |
| anchor cluster and differential evolution. You can use ``--algorithm k-means`` |
| and ``--algorithm differential_evolution`` to switch two method. |
| |
| Example: |
| Use k-means anchor cluster:: |
| |
| python tools/analysis_tools/optimize_anchors.py ${CONFIG} \ |
| --algorithm k-means --input-shape ${INPUT_SHAPE [WIDTH HEIGHT]} \ |
| --output-dir ${OUTPUT_DIR} |
| Use differential evolution to optimize anchors:: |
| |
| python tools/analysis_tools/optimize_anchors.py ${CONFIG} \ |
| --algorithm differential_evolution \ |
| --input-shape ${INPUT_SHAPE [WIDTH HEIGHT]} \ |
| --output-dir ${OUTPUT_DIR} |
| """ |
| import argparse |
| import os.path as osp |
|
|
| import numpy as np |
| import torch |
| from mmengine.config import Config |
| from mmengine.fileio import dump |
| from mmengine.logging import MMLogger |
| from mmengine.registry import init_default_scope |
| from mmengine.utils import ProgressBar |
| from scipy.optimize import differential_evolution |
|
|
| from mmdet.registry import DATASETS |
| from mmdet.structures.bbox import (bbox_cxcywh_to_xyxy, bbox_overlaps, |
| bbox_xyxy_to_cxcywh) |
| from mmdet.utils import replace_cfg_vals, update_data_root |
|
|
|
|
| def parse_args(): |
| parser = argparse.ArgumentParser(description='Optimize anchor parameters.') |
| parser.add_argument('config', help='Train config file path.') |
| parser.add_argument( |
| '--device', default='cuda:0', help='Device used for calculating.') |
| parser.add_argument( |
| '--input-shape', |
| type=int, |
| nargs='+', |
| default=[608, 608], |
| help='input image size') |
| parser.add_argument( |
| '--algorithm', |
| default='differential_evolution', |
| help='Algorithm used for anchor optimizing.' |
| 'Support k-means and differential_evolution for YOLO.') |
| parser.add_argument( |
| '--iters', |
| default=1000, |
| type=int, |
| help='Maximum iterations for optimizer.') |
| parser.add_argument( |
| '--output-dir', |
| default=None, |
| type=str, |
| help='Path to save anchor optimize result.') |
|
|
| args = parser.parse_args() |
| return args |
|
|
|
|
| class BaseAnchorOptimizer: |
| """Base class for anchor optimizer. |
| |
| Args: |
| dataset (obj:`Dataset`): Dataset object. |
| input_shape (list[int]): Input image shape of the model. |
| Format in [width, height]. |
| logger (obj:`logging.Logger`): The logger for logging. |
| device (str, optional): Device used for calculating. |
| Default: 'cuda:0' |
| out_dir (str, optional): Path to save anchor optimize result. |
| Default: None |
| """ |
|
|
| def __init__(self, |
| dataset, |
| input_shape, |
| logger, |
| device='cuda:0', |
| out_dir=None): |
| self.dataset = dataset |
| self.input_shape = input_shape |
| self.logger = logger |
| self.device = device |
| self.out_dir = out_dir |
| bbox_whs, img_shapes = self.get_whs_and_shapes() |
| ratios = img_shapes.max(1, keepdims=True) / np.array([input_shape]) |
|
|
| |
| self.bbox_whs = bbox_whs / ratios |
|
|
| def get_whs_and_shapes(self): |
| """Get widths and heights of bboxes and shapes of images. |
| |
| Returns: |
| tuple[np.ndarray]: Array of bbox shapes and array of image |
| shapes with shape (num_bboxes, 2) in [width, height] format. |
| """ |
| self.logger.info('Collecting bboxes from annotation...') |
| bbox_whs = [] |
| img_shapes = [] |
| prog_bar = ProgressBar(len(self.dataset)) |
| for idx in range(len(self.dataset)): |
| data_info = self.dataset.get_data_info(idx) |
| img_shape = np.array([data_info['width'], data_info['height']]) |
| gt_instances = data_info['instances'] |
| for instance in gt_instances: |
| bbox = np.array(instance['bbox']) |
| wh = bbox[2:4] - bbox[0:2] |
| img_shapes.append(img_shape) |
| bbox_whs.append(wh) |
|
|
| prog_bar.update() |
| print('\n') |
| bbox_whs = np.array(bbox_whs) |
| img_shapes = np.array(img_shapes) |
| self.logger.info(f'Collected {bbox_whs.shape[0]} bboxes.') |
| return bbox_whs, img_shapes |
|
|
| def get_zero_center_bbox_tensor(self): |
| """Get a tensor of bboxes centered at (0, 0). |
| |
| Returns: |
| Tensor: Tensor of bboxes with shape (num_bboxes, 4) |
| in [xmin, ymin, xmax, ymax] format. |
| """ |
| whs = torch.from_numpy(self.bbox_whs).to( |
| self.device, dtype=torch.float32) |
| bboxes = bbox_cxcywh_to_xyxy( |
| torch.cat([torch.zeros_like(whs), whs], dim=1)) |
| return bboxes |
|
|
| def optimize(self): |
| raise NotImplementedError |
|
|
| def save_result(self, anchors, path=None): |
| anchor_results = [] |
| for w, h in anchors: |
| anchor_results.append([round(w), round(h)]) |
| self.logger.info(f'Anchor optimize result:{anchor_results}') |
| if path: |
| json_path = osp.join(path, 'anchor_optimize_result.json') |
| dump(anchor_results, json_path) |
| self.logger.info(f'Result saved in {json_path}') |
|
|
|
|
| class YOLOKMeansAnchorOptimizer(BaseAnchorOptimizer): |
| r"""YOLO anchor optimizer using k-means. Code refer to `AlexeyAB/darknet. |
| <https://github.com/AlexeyAB/darknet/blob/master/src/detector.c>`_. |
| |
| Args: |
| num_anchors (int) : Number of anchors. |
| iters (int): Maximum iterations for k-means. |
| """ |
|
|
| def __init__(self, num_anchors, iters, **kwargs): |
|
|
| super(YOLOKMeansAnchorOptimizer, self).__init__(**kwargs) |
| self.num_anchors = num_anchors |
| self.iters = iters |
|
|
| def optimize(self): |
| anchors = self.kmeans_anchors() |
| self.save_result(anchors, self.out_dir) |
|
|
| def kmeans_anchors(self): |
| self.logger.info( |
| f'Start cluster {self.num_anchors} YOLO anchors with K-means...') |
| bboxes = self.get_zero_center_bbox_tensor() |
| cluster_center_idx = torch.randint( |
| 0, bboxes.shape[0], (self.num_anchors, )).to(self.device) |
|
|
| assignments = torch.zeros((bboxes.shape[0], )).to(self.device) |
| cluster_centers = bboxes[cluster_center_idx] |
| if self.num_anchors == 1: |
| cluster_centers = self.kmeans_maximization(bboxes, assignments, |
| cluster_centers) |
| anchors = bbox_xyxy_to_cxcywh(cluster_centers)[:, 2:].cpu().numpy() |
| anchors = sorted(anchors, key=lambda x: x[0] * x[1]) |
| return anchors |
|
|
| prog_bar = ProgressBar(self.iters) |
| for i in range(self.iters): |
| converged, assignments = self.kmeans_expectation( |
| bboxes, assignments, cluster_centers) |
| if converged: |
| self.logger.info(f'K-means process has converged at iter {i}.') |
| break |
| cluster_centers = self.kmeans_maximization(bboxes, assignments, |
| cluster_centers) |
| prog_bar.update() |
| print('\n') |
| avg_iou = bbox_overlaps(bboxes, |
| cluster_centers).max(1)[0].mean().item() |
|
|
| anchors = bbox_xyxy_to_cxcywh(cluster_centers)[:, 2:].cpu().numpy() |
| anchors = sorted(anchors, key=lambda x: x[0] * x[1]) |
| self.logger.info(f'Anchor cluster finish. Average IOU: {avg_iou}') |
|
|
| return anchors |
|
|
| def kmeans_maximization(self, bboxes, assignments, centers): |
| """Maximization part of EM algorithm(Expectation-Maximization)""" |
| new_centers = torch.zeros_like(centers) |
| for i in range(centers.shape[0]): |
| mask = (assignments == i) |
| if mask.sum(): |
| new_centers[i, :] = bboxes[mask].mean(0) |
| return new_centers |
|
|
| def kmeans_expectation(self, bboxes, assignments, centers): |
| """Expectation part of EM algorithm(Expectation-Maximization)""" |
| ious = bbox_overlaps(bboxes, centers) |
| closest = ious.argmax(1) |
| converged = (closest == assignments).all() |
| return converged, closest |
|
|
|
|
| class YOLODEAnchorOptimizer(BaseAnchorOptimizer): |
| """YOLO anchor optimizer using differential evolution algorithm. |
| |
| Args: |
| num_anchors (int) : Number of anchors. |
| iters (int): Maximum iterations for k-means. |
| strategy (str): The differential evolution strategy to use. |
| Should be one of: |
| |
| - 'best1bin' |
| - 'best1exp' |
| - 'rand1exp' |
| - 'randtobest1exp' |
| - 'currenttobest1exp' |
| - 'best2exp' |
| - 'rand2exp' |
| - 'randtobest1bin' |
| - 'currenttobest1bin' |
| - 'best2bin' |
| - 'rand2bin' |
| - 'rand1bin' |
| |
| Default: 'best1bin'. |
| population_size (int): Total population size of evolution algorithm. |
| Default: 15. |
| convergence_thr (float): Tolerance for convergence, the |
| optimizing stops when ``np.std(pop) <= abs(convergence_thr) |
| + convergence_thr * np.abs(np.mean(population_energies))``, |
| respectively. Default: 0.0001. |
| mutation (tuple[float]): Range of dithering randomly changes the |
| mutation constant. Default: (0.5, 1). |
| recombination (float): Recombination constant of crossover probability. |
| Default: 0.7. |
| """ |
|
|
| def __init__(self, |
| num_anchors, |
| iters, |
| strategy='best1bin', |
| population_size=15, |
| convergence_thr=0.0001, |
| mutation=(0.5, 1), |
| recombination=0.7, |
| **kwargs): |
|
|
| super(YOLODEAnchorOptimizer, self).__init__(**kwargs) |
|
|
| self.num_anchors = num_anchors |
| self.iters = iters |
| self.strategy = strategy |
| self.population_size = population_size |
| self.convergence_thr = convergence_thr |
| self.mutation = mutation |
| self.recombination = recombination |
|
|
| def optimize(self): |
| anchors = self.differential_evolution() |
| self.save_result(anchors, self.out_dir) |
|
|
| def differential_evolution(self): |
| bboxes = self.get_zero_center_bbox_tensor() |
|
|
| bounds = [] |
| for i in range(self.num_anchors): |
| bounds.extend([(0, self.input_shape[0]), (0, self.input_shape[1])]) |
|
|
| result = differential_evolution( |
| func=self.avg_iou_cost, |
| bounds=bounds, |
| args=(bboxes, ), |
| strategy=self.strategy, |
| maxiter=self.iters, |
| popsize=self.population_size, |
| tol=self.convergence_thr, |
| mutation=self.mutation, |
| recombination=self.recombination, |
| updating='immediate', |
| disp=True) |
| self.logger.info( |
| f'Anchor evolution finish. Average IOU: {1 - result.fun}') |
| anchors = [(w, h) for w, h in zip(result.x[::2], result.x[1::2])] |
| anchors = sorted(anchors, key=lambda x: x[0] * x[1]) |
| return anchors |
|
|
| @staticmethod |
| def avg_iou_cost(anchor_params, bboxes): |
| assert len(anchor_params) % 2 == 0 |
| anchor_whs = torch.tensor( |
| [[w, h] |
| for w, h in zip(anchor_params[::2], anchor_params[1::2])]).to( |
| bboxes.device, dtype=bboxes.dtype) |
| anchor_boxes = bbox_cxcywh_to_xyxy( |
| torch.cat([torch.zeros_like(anchor_whs), anchor_whs], dim=1)) |
| ious = bbox_overlaps(bboxes, anchor_boxes) |
| max_ious, _ = ious.max(1) |
| cost = 1 - max_ious.mean().item() |
| return cost |
|
|
|
|
| def main(): |
| logger = MMLogger.get_current_instance() |
| args = parse_args() |
| cfg = args.config |
| cfg = Config.fromfile(cfg) |
| init_default_scope(cfg.get('default_scope', 'mmdet')) |
|
|
| |
| cfg = replace_cfg_vals(cfg) |
|
|
| |
| update_data_root(cfg) |
|
|
| input_shape = args.input_shape |
| assert len(input_shape) == 2 |
|
|
| anchor_type = cfg.model.bbox_head.anchor_generator.type |
| assert anchor_type == 'YOLOAnchorGenerator', \ |
| f'Only support optimize YOLOAnchor, but get {anchor_type}.' |
|
|
| base_sizes = cfg.model.bbox_head.anchor_generator.base_sizes |
| num_anchors = sum([len(sizes) for sizes in base_sizes]) |
|
|
| train_data_cfg = cfg.train_dataloader |
| while 'dataset' in train_data_cfg: |
| train_data_cfg = train_data_cfg['dataset'] |
| dataset = DATASETS.build(train_data_cfg) |
|
|
| if args.algorithm == 'k-means': |
| optimizer = YOLOKMeansAnchorOptimizer( |
| dataset=dataset, |
| input_shape=input_shape, |
| device=args.device, |
| num_anchors=num_anchors, |
| iters=args.iters, |
| logger=logger, |
| out_dir=args.output_dir) |
| elif args.algorithm == 'differential_evolution': |
| optimizer = YOLODEAnchorOptimizer( |
| dataset=dataset, |
| input_shape=input_shape, |
| device=args.device, |
| num_anchors=num_anchors, |
| iters=args.iters, |
| logger=logger, |
| out_dir=args.output_dir) |
| else: |
| raise NotImplementedError( |
| f'Only support k-means and differential_evolution, ' |
| f'but get {args.algorithm}') |
|
|
| optimizer.optimize() |
|
|
|
|
| if __name__ == '__main__': |
| main() |
|
|