iasjkk commited on
Commit
a2b688c
·
verified ·
1 Parent(s): f5cb202

Update model.py

Browse files
Files changed (1) hide show
  1. model.py +81 -123
model.py CHANGED
@@ -1,12 +1,3 @@
1
- """
2
- Mask R-CNN
3
- The main Mask R-CNN model implementation.
4
-
5
- Copyright (c) 2017 Matterport, Inc.
6
- Licensed under the MIT License (see LICENSE for details)
7
- Written by Waleed Abdulla
8
- """
9
-
10
  import os
11
  import random
12
  import datetime
@@ -89,8 +80,6 @@ def compute_backbone_shapes(config, image_shape):
89
  # Resnet Graph
90
  ############################################################
91
 
92
- # Code adopted from:
93
- # https://github.com/fchollet/deep-learning-models/blob/master/resnet50.py
94
 
95
  def identity_block(input_tensor, kernel_size, filters, stage, block,
96
  use_bias=True, train_bn=True):
@@ -409,12 +398,7 @@ class PyramidROIAlign(KE.Layer):
409
  level_boxes = tf.stop_gradient(level_boxes)
410
  box_indices = tf.stop_gradient(box_indices)
411
 
412
- # Crop and Resize
413
- # From Mask R-CNN paper: "We sample four regular locations, so
414
- # that we can evaluate either max or average pooling. In fact,
415
- # interpolating only a single value at each bin center (without
416
- # pooling) is nearly as effective."
417
- #
418
  # Here we use the simplified approach of a single value per bin,
419
  # which is how it's done in tf.crop_and_resize()
420
  # Result: [batch * num_boxes, pool_height, pool_width, channels]
@@ -794,8 +778,8 @@ class DetectionLayer(KE.Layer):
794
 
795
  def call(self, inputs):
796
  rois = inputs[0]
797
- mrcnn_class = inputs[1]
798
- mrcnn_bbox = inputs[2]
799
  image_meta = inputs[3]
800
 
801
  # Get windows of images in normalized coordinates. Windows are the area
@@ -808,7 +792,7 @@ class DetectionLayer(KE.Layer):
808
 
809
  # Run detection refinement graph on each item in the batch
810
  detections_batch = utils.batch_slice(
811
- [rois, mrcnn_class, mrcnn_bbox, window],
812
  lambda x, y, w, z: refine_detections_graph(x, y, w, z, self.config),
813
  self.config.IMAGES_PER_GPU)
814
 
@@ -925,32 +909,32 @@ def fpn_classifier_graph(rois, feature_maps, image_meta,
925
  name="roi_align_classifier")([rois, image_meta] + feature_maps)
926
  # Two 1024 FC layers (implemented with Conv2D for consistency)
927
  x = KL.TimeDistributed(KL.Conv2D(fc_layers_size, (pool_size, pool_size), padding="valid"),
928
- name="mrcnn_class_conv1")(x)
929
- x = KL.TimeDistributed(BatchNorm(), name='mrcnn_class_bn1')(x, training=train_bn)
930
  x = KL.Activation('relu')(x)
931
  x = KL.TimeDistributed(KL.Conv2D(fc_layers_size, (1, 1)),
932
- name="mrcnn_class_conv2")(x)
933
- x = KL.TimeDistributed(BatchNorm(), name='mrcnn_class_bn2')(x, training=train_bn)
934
  x = KL.Activation('relu')(x)
935
 
936
  shared = KL.Lambda(lambda x: K.squeeze(K.squeeze(x, 3), 2),
937
  name="pool_squeeze")(x)
938
 
939
  # Classifier head
940
- mrcnn_class_logits = KL.TimeDistributed(KL.Dense(num_classes),
941
- name='mrcnn_class_logits')(shared)
942
- mrcnn_probs = KL.TimeDistributed(KL.Activation("softmax"),
943
- name="mrcnn_class")(mrcnn_class_logits)
944
 
945
  # BBox head
946
  # [batch, num_rois, NUM_CLASSES * (dy, dx, log(dh), log(dw))]
947
  x = KL.TimeDistributed(KL.Dense(num_classes * 4, activation='linear'),
948
- name='mrcnn_bbox_fc')(shared)
949
  # Reshape to [batch, num_rois, NUM_CLASSES, (dy, dx, log(dh), log(dw))]
950
  s = K.int_shape(x)
951
- mrcnn_bbox = KL.Reshape((s[1], num_classes, 4), name="mrcnn_bbox")(x)
952
 
953
- return mrcnn_class_logits, mrcnn_probs, mrcnn_bbox
954
 
955
 
956
  def build_fpn_mask_graph(rois, feature_maps, image_meta,
@@ -975,33 +959,33 @@ def build_fpn_mask_graph(rois, feature_maps, image_meta,
975
 
976
  # Conv layers
977
  x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
978
- name="mrcnn_mask_conv1")(x)
979
  x = KL.TimeDistributed(BatchNorm(),
980
- name='mrcnn_mask_bn1')(x, training=train_bn)
981
  x = KL.Activation('relu')(x)
982
 
983
  x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
984
- name="mrcnn_mask_conv2")(x)
985
  x = KL.TimeDistributed(BatchNorm(),
986
- name='mrcnn_mask_bn2')(x, training=train_bn)
987
  x = KL.Activation('relu')(x)
988
 
989
  x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
990
- name="mrcnn_mask_conv3")(x)
991
  x = KL.TimeDistributed(BatchNorm(),
992
- name='mrcnn_mask_bn3')(x, training=train_bn)
993
  x = KL.Activation('relu')(x)
994
 
995
  x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
996
- name="mrcnn_mask_conv4")(x)
997
  x = KL.TimeDistributed(BatchNorm(),
998
- name='mrcnn_mask_bn4')(x, training=train_bn)
999
  x = KL.Activation('relu')(x)
1000
 
1001
  x = KL.TimeDistributed(KL.Conv2DTranspose(256, (2, 2), strides=2, activation="relu"),
1002
- name="mrcnn_mask_deconv")(x)
1003
  x = KL.TimeDistributed(KL.Conv2D(num_classes, (1, 1), strides=1, activation="sigmoid"),
1004
- name="mrcnn_mask")(x)
1005
  return x
1006
 
1007
 
@@ -1073,7 +1057,7 @@ def rpn_bbox_loss_graph(config, target_bbox, rpn_match, rpn_bbox):
1073
  return loss
1074
 
1075
 
1076
- def mrcnn_class_loss_graph(target_class_ids, pred_class_logits,
1077
  active_class_ids):
1078
  """Loss for the classifier head of Mask RCNN.
1079
 
@@ -1109,7 +1093,7 @@ def mrcnn_class_loss_graph(target_class_ids, pred_class_logits,
1109
  return loss
1110
 
1111
 
1112
- def mrcnn_bbox_loss_graph(target_bbox, target_class_ids, pred_bbox):
1113
  """Loss for Mask R-CNN bounding box refinement.
1114
 
1115
  target_bbox: [batch, num_rois, (dy, dx, log(dh), log(dw))]
@@ -1140,7 +1124,7 @@ def mrcnn_bbox_loss_graph(target_bbox, target_class_ids, pred_bbox):
1140
  return loss
1141
 
1142
 
1143
- def mrcnn_mask_loss_graph(target_masks, target_class_ids, pred_masks):
1144
  """Mask binary cross-entropy loss for the masks head.
1145
 
1146
  target_masks: [batch, num_rois, height, width].
@@ -1180,7 +1164,7 @@ def mrcnn_mask_loss_graph(target_masks, target_class_ids, pred_masks):
1180
 
1181
 
1182
  ############################################################
1183
- # Data Generator
1184
  ############################################################
1185
 
1186
  def load_image_gt(dataset, config, image_id, augment=False, augmentation=None,
@@ -1189,9 +1173,6 @@ def load_image_gt(dataset, config, image_id, augment=False, augmentation=None,
1189
 
1190
  augment: (deprecated. Use augmentation instead). If true, apply random
1191
  image augmentation. Currently, only horizontal flipping is offered.
1192
- augmentation: Optional. An imgaug (https://github.com/aleju/imgaug) augmentation.
1193
- For example, passing imgaug.augmenters.Fliplr(0.5) flips images
1194
- right/left 50% of the time.
1195
  use_mini_mask: If False, returns full-size masks that are the same height
1196
  and width as the original image. These can be big, for example
1197
  1024x1024x100 (for 100 instances). Mini masks are smaller, typically,
@@ -1227,8 +1208,6 @@ def load_image_gt(dataset, config, image_id, augment=False, augmentation=None,
1227
  image = np.fliplr(image)
1228
  mask = np.fliplr(mask)
1229
 
1230
- # Augmentation
1231
- # This requires the imgaug lib (https://github.com/aleju/imgaug)
1232
  if augmentation:
1233
  import imgaug
1234
 
@@ -1636,11 +1615,6 @@ def data_generator(dataset, config, shuffle=True, augment=False, augmentation=No
1636
  dataset: The Dataset object to pick data from
1637
  config: The model config object
1638
  shuffle: If True, shuffles the samples before every epoch
1639
- augment: (deprecated. Use augmentation instead). If true, apply random
1640
- image augmentation. Currently, only horizontal flipping is offered.
1641
- augmentation: Optional. An imgaug (https://github.com/aleju/imgaug) augmentation.
1642
- For example, passing imgaug.augmenters.Fliplr(0.5) flips images
1643
- right/left 50% of the time.
1644
  random_rois: If > 0 then generate proposals to be used to train the
1645
  network classifier and mask heads. Useful if training
1646
  the Mask RCNN part without the RPN.
@@ -1723,7 +1697,7 @@ def data_generator(dataset, config, shuffle=True, augment=False, augmentation=No
1723
  rpn_rois = generate_random_rois(
1724
  image.shape, random_rois, gt_class_ids, gt_boxes)
1725
  if detection_targets:
1726
- rois, mrcnn_class_ids, mrcnn_bbox, mrcnn_mask =\
1727
  build_detection_targets(
1728
  rpn_rois, gt_class_ids, gt_boxes, gt_masks, config)
1729
 
@@ -1750,12 +1724,12 @@ def data_generator(dataset, config, shuffle=True, augment=False, augmentation=No
1750
  if detection_targets:
1751
  batch_rois = np.zeros(
1752
  (batch_size,) + rois.shape, dtype=rois.dtype)
1753
- batch_mrcnn_class_ids = np.zeros(
1754
- (batch_size,) + mrcnn_class_ids.shape, dtype=mrcnn_class_ids.dtype)
1755
- batch_mrcnn_bbox = np.zeros(
1756
- (batch_size,) + mrcnn_bbox.shape, dtype=mrcnn_bbox.dtype)
1757
- batch_mrcnn_mask = np.zeros(
1758
- (batch_size,) + mrcnn_mask.shape, dtype=mrcnn_mask.dtype)
1759
 
1760
  # If more instances than fits in the array, sub-sample from them.
1761
  if gt_boxes.shape[0] > config.MAX_GT_INSTANCES:
@@ -1777,9 +1751,9 @@ def data_generator(dataset, config, shuffle=True, augment=False, augmentation=No
1777
  batch_rpn_rois[b] = rpn_rois
1778
  if detection_targets:
1779
  batch_rois[b] = rois
1780
- batch_mrcnn_class_ids[b] = mrcnn_class_ids
1781
- batch_mrcnn_bbox[b] = mrcnn_bbox
1782
- batch_mrcnn_mask[b] = mrcnn_mask
1783
  b += 1
1784
 
1785
  # Batch full?
@@ -1793,10 +1767,10 @@ def data_generator(dataset, config, shuffle=True, augment=False, augmentation=No
1793
  if detection_targets:
1794
  inputs.extend([batch_rois])
1795
  # Keras requires that output and targets have the same number of dimensions
1796
- batch_mrcnn_class_ids = np.expand_dims(
1797
- batch_mrcnn_class_ids, -1)
1798
  outputs.extend(
1799
- [batch_mrcnn_class_ids, batch_mrcnn_bbox, batch_mrcnn_mask])
1800
 
1801
  yield inputs, outputs
1802
 
@@ -1814,10 +1788,10 @@ def data_generator(dataset, config, shuffle=True, augment=False, augmentation=No
1814
 
1815
 
1816
  ############################################################
1817
- # MaskRCNN Class
1818
  ############################################################
1819
 
1820
- class MaskRCNN():
1821
  """Encapsulates the Mask RCNN model functionality.
1822
 
1823
  The actual Keras model is in the keras_model property.
@@ -1922,7 +1896,7 @@ class MaskRCNN():
1922
 
1923
  # Note that P6 is used in RPN, but not in the classifier heads.
1924
  rpn_feature_maps = [P2, P3, P4, P5, P6]
1925
- mrcnn_feature_maps = [P2, P3, P4, P5]
1926
 
1927
  # Anchors
1928
  if mode == "training":
@@ -1991,13 +1965,13 @@ class MaskRCNN():
1991
 
1992
  # Network Heads
1993
  # TODO: verify that this handles zero padded ROIs
1994
- mrcnn_class_logits, mrcnn_class, mrcnn_bbox =\
1995
- fpn_classifier_graph(rois, mrcnn_feature_maps, input_image_meta,
1996
  config.POOL_SIZE, config.NUM_CLASSES,
1997
  train_bn=config.TRAIN_BN,
1998
  fc_layers_size=config.FPN_CLASSIF_FC_LAYERS_SIZE)
1999
 
2000
- mrcnn_mask = build_fpn_mask_graph(rois, mrcnn_feature_maps,
2001
  input_image_meta,
2002
  config.MASK_POOL_SIZE,
2003
  config.NUM_CLASSES,
@@ -2011,12 +1985,12 @@ class MaskRCNN():
2011
  [input_rpn_match, rpn_class_logits])
2012
  rpn_bbox_loss = KL.Lambda(lambda x: rpn_bbox_loss_graph(config, *x), name="rpn_bbox_loss")(
2013
  [input_rpn_bbox, input_rpn_match, rpn_bbox])
2014
- class_loss = KL.Lambda(lambda x: mrcnn_class_loss_graph(*x), name="mrcnn_class_loss")(
2015
- [target_class_ids, mrcnn_class_logits, active_class_ids])
2016
- bbox_loss = KL.Lambda(lambda x: mrcnn_bbox_loss_graph(*x), name="mrcnn_bbox_loss")(
2017
- [target_bbox, target_class_ids, mrcnn_bbox])
2018
- mask_loss = KL.Lambda(lambda x: mrcnn_mask_loss_graph(*x), name="mrcnn_mask_loss")(
2019
- [target_mask, target_class_ids, mrcnn_mask])
2020
 
2021
  # Model
2022
  inputs = [input_image, input_image_meta,
@@ -2024,15 +1998,15 @@ class MaskRCNN():
2024
  if not config.USE_RPN_ROIS:
2025
  inputs.append(input_rois)
2026
  outputs = [rpn_class_logits, rpn_class, rpn_bbox,
2027
- mrcnn_class_logits, mrcnn_class, mrcnn_bbox, mrcnn_mask,
2028
  rpn_rois, output_rois,
2029
  rpn_class_loss, rpn_bbox_loss, class_loss, bbox_loss, mask_loss]
2030
- model = KM.Model(inputs, outputs, name='mask_rcnn')
2031
  else:
2032
  # Network Heads
2033
  # Proposal classifier and BBox regressor heads
2034
- mrcnn_class_logits, mrcnn_class, mrcnn_bbox =\
2035
- fpn_classifier_graph(rpn_rois, mrcnn_feature_maps, input_image_meta,
2036
  config.POOL_SIZE, config.NUM_CLASSES,
2037
  train_bn=config.TRAIN_BN,
2038
  fc_layers_size=config.FPN_CLASSIF_FC_LAYERS_SIZE)
@@ -2040,25 +2014,25 @@ class MaskRCNN():
2040
  # Detections
2041
  # output is [batch, num_detections, (y1, x1, y2, x2, class_id, score)] in
2042
  # normalized coordinates
2043
- detections = DetectionLayer(config, name="mrcnn_detection")(
2044
- [rpn_rois, mrcnn_class, mrcnn_bbox, input_image_meta])
2045
 
2046
  # Create masks for detections
2047
  detection_boxes = KL.Lambda(lambda x: x[..., :4])(detections)
2048
- mrcnn_mask = build_fpn_mask_graph(detection_boxes, mrcnn_feature_maps,
2049
  input_image_meta,
2050
  config.MASK_POOL_SIZE,
2051
  config.NUM_CLASSES,
2052
  train_bn=config.TRAIN_BN)
2053
 
2054
  model = KM.Model([input_image, input_image_meta, input_anchors],
2055
- [detections, mrcnn_class, mrcnn_bbox,
2056
- mrcnn_mask, rpn_rois, rpn_class, rpn_bbox],
2057
- name='mask_rcnn')
2058
 
2059
  # Add multi-GPU support.
2060
  if config.GPU_COUNT > 1:
2061
- from mrcnn.parallel_model import ParallelModel
2062
  model = ParallelModel(model, config.GPU_COUNT)
2063
 
2064
  return model
@@ -2083,7 +2057,7 @@ class MaskRCNN():
2083
  dir_name = os.path.join(self.model_dir, dir_names[-1])
2084
  # Find the last checkpoint
2085
  checkpoints = next(os.walk(dir_name))[2]
2086
- checkpoints = filter(lambda f: f.startswith("mask_rcnn"), checkpoints)
2087
  checkpoints = sorted(checkpoints)
2088
  if not checkpoints:
2089
  import errno
@@ -2141,9 +2115,6 @@ class MaskRCNN():
2141
  Returns path to weights file.
2142
  """
2143
  from keras.utils.data_utils import get_file
2144
- TF_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/'\
2145
- 'releases/download/v0.2/'\
2146
- 'resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5'
2147
  weights_path = get_file('resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5',
2148
  TF_WEIGHTS_PATH_NO_TOP,
2149
  cache_subdir='models',
@@ -2164,7 +2135,7 @@ class MaskRCNN():
2164
  self.keras_model._per_input_losses = {}
2165
  loss_names = [
2166
  "rpn_class_loss", "rpn_bbox_loss",
2167
- "mrcnn_class_loss", "mrcnn_bbox_loss", "mrcnn_mask_loss"]
2168
  for name in loss_names:
2169
  layer = self.keras_model.get_layer(name)
2170
  if layer.output in self.keras_model.losses:
@@ -2250,10 +2221,7 @@ class MaskRCNN():
2250
  # If we have a model path with date and epochs use them
2251
  if model_path:
2252
  # Continue from we left of. Get epoch and date from the file name
2253
- # A sample model path might look like:
2254
- # \path\to\logs\coco20171029T2315\mask_rcnn_coco_0001.h5 (Windows)
2255
- # /path/to/logs/coco20171029T2315/mask_rcnn_coco_0001.h5 (Linux)
2256
- regex = r".*[/\\][\w-]+(\d{4})(\d{2})(\d{2})T(\d{2})(\d{2})[/\\]mask\_rcnn\_[\w-]+(\d{4})\.h5"
2257
  m = re.match(regex, model_path)
2258
  if m:
2259
  now = datetime.datetime(int(m.group(1)), int(m.group(2)), int(m.group(3)),
@@ -2268,7 +2236,7 @@ class MaskRCNN():
2268
  self.config.NAME.lower(), now))
2269
 
2270
  # Path to save after each epoch. Include placeholders that get filled by Keras.
2271
- self.checkpoint_path = os.path.join(self.log_dir, "mask_rcnn_{}_*epoch*.h5".format(
2272
  self.config.NAME.lower()))
2273
  self.checkpoint_path = self.checkpoint_path.replace(
2274
  "*epoch*", "{epoch:04d}")
@@ -2290,12 +2258,6 @@ class MaskRCNN():
2290
  3+: Train Resnet stage 3 and up
2291
  4+: Train Resnet stage 4 and up
2292
  5+: Train Resnet stage 5 and up
2293
- augmentation: Optional. An imgaug (https://github.com/aleju/imgaug)
2294
- augmentation. For example, passing imgaug.augmenters.Fliplr(0.5)
2295
- flips images right/left 50% of the time. You can pass complex
2296
- augmentations as well. This augmentation applies 50% of the
2297
- time, and when it does it flips images right/left half the time
2298
- and adds a Gaussian blur with a random sigma in range 0 to 5.
2299
 
2300
  augmentation = imgaug.augmenters.Sometimes(0.5, [
2301
  imgaug.augmenters.Fliplr(0.5),
@@ -2312,11 +2274,11 @@ class MaskRCNN():
2312
  # Pre-defined layer regular expressions
2313
  layer_regex = {
2314
  # all layers but the backbone
2315
- "heads": r"(mrcnn\_.*)|(rpn\_.*)|(fpn\_.*)",
2316
  # From a specific Resnet stage and up
2317
- "3+": r"(res3.*)|(bn3.*)|(res4.*)|(bn4.*)|(res5.*)|(bn5.*)|(mrcnn\_.*)|(rpn\_.*)|(fpn\_.*)",
2318
- "4+": r"(res4.*)|(bn4.*)|(res5.*)|(bn5.*)|(mrcnn\_.*)|(rpn\_.*)|(fpn\_.*)",
2319
- "5+": r"(res5.*)|(bn5.*)|(mrcnn\_.*)|(rpn\_.*)|(fpn\_.*)",
2320
  # All layers
2321
  "all": ".*",
2322
  }
@@ -2352,10 +2314,6 @@ class MaskRCNN():
2352
  log("Checkpoint Path: {}".format(self.checkpoint_path))
2353
  self.set_trainable(layers)
2354
  self.compile(learning_rate, self.config.LEARNING_MOMENTUM)
2355
-
2356
- # Work-around for Windows: Keras fails on Windows when using
2357
- # multiprocessing workers. See discussion here:
2358
- # https://github.com/matterport/Mask_RCNN/issues/13#issuecomment-353124009
2359
  if os.name is 'nt':
2360
  workers = 0
2361
  else:
@@ -2414,14 +2372,14 @@ class MaskRCNN():
2414
  windows = np.stack(windows)
2415
  return molded_images, image_metas, windows
2416
 
2417
- def unmold_detections(self, detections, mrcnn_mask, original_image_shape,
2418
  image_shape, window):
2419
  """Reformats the detections of one image from the format of the neural
2420
  network output to a format suitable for use in the rest of the
2421
  application.
2422
 
2423
  detections: [N, (y1, x1, y2, x2, class_id, score)] in normalized coordinates
2424
- mrcnn_mask: [N, height, width, num_classes]
2425
  original_image_shape: [H, W, C] Original image shape before resizing
2426
  image_shape: [H, W, C] Shape of the image after resizing and padding
2427
  window: [y1, x1, y2, x2] Pixel coordinates of box in the image where the real
@@ -2442,7 +2400,7 @@ class MaskRCNN():
2442
  boxes = detections[:N, :4]
2443
  class_ids = detections[:N, 4].astype(np.int32)
2444
  scores = detections[:N, 5]
2445
- masks = mrcnn_mask[np.arange(N), :, :, class_ids]
2446
 
2447
  # Translate normalized coordinates in the resized image to pixel
2448
  # coordinates in the original image before resizing
@@ -2520,13 +2478,13 @@ class MaskRCNN():
2520
  log("image_metas", image_metas)
2521
  log("anchors", anchors)
2522
  # Run object detection
2523
- detections, _, _, mrcnn_mask, _, _, _ =\
2524
  self.keras_model.predict([molded_images, image_metas, anchors], verbose=0)
2525
  # Process detections
2526
  results = []
2527
  for i, image in enumerate(images):
2528
  final_rois, final_class_ids, final_scores, final_masks =\
2529
- self.unmold_detections(detections[i], mrcnn_mask[i],
2530
  image.shape, molded_images[i].shape,
2531
  windows[i])
2532
  results.append({
@@ -2577,14 +2535,14 @@ class MaskRCNN():
2577
  log("image_metas", image_metas)
2578
  log("anchors", anchors)
2579
  # Run object detection
2580
- detections, _, _, mrcnn_mask, _, _, _ =\
2581
  self.keras_model.predict([molded_images, image_metas, anchors], verbose=0)
2582
  # Process detections
2583
  results = []
2584
  for i, image in enumerate(molded_images):
2585
  window = [0, 0, image.shape[0], image.shape[1]]
2586
  final_rois, final_class_ids, final_scores, final_masks =\
2587
- self.unmold_detections(detections[i], mrcnn_mask[i],
2588
  image.shape, molded_images[i].shape,
2589
  window)
2590
  results.append({
@@ -2865,4 +2823,4 @@ def denorm_boxes_graph(boxes, shape):
2865
  h, w = tf.split(tf.cast(shape, tf.float32), 2)
2866
  scale = tf.concat([h, w, h, w], axis=-1) - tf.constant(1.0)
2867
  shift = tf.constant([0., 0., 1., 1.])
2868
- return tf.cast(tf.round(tf.multiply(boxes, scale) + shift), tf.int32)
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import random
3
  import datetime
 
80
  # Resnet Graph
81
  ############################################################
82
 
 
 
83
 
84
  def identity_block(input_tensor, kernel_size, filters, stage, block,
85
  use_bias=True, train_bn=True):
 
398
  level_boxes = tf.stop_gradient(level_boxes)
399
  box_indices = tf.stop_gradient(box_indices)
400
 
401
+
 
 
 
 
 
402
  # Here we use the simplified approach of a single value per bin,
403
  # which is how it's done in tf.crop_and_resize()
404
  # Result: [batch * num_boxes, pool_height, pool_width, channels]
 
778
 
779
  def call(self, inputs):
780
  rois = inputs[0]
781
+ bboxcnn_class = inputs[1]
782
+ bboxcnn_bbox = inputs[2]
783
  image_meta = inputs[3]
784
 
785
  # Get windows of images in normalized coordinates. Windows are the area
 
792
 
793
  # Run detection refinement graph on each item in the batch
794
  detections_batch = utils.batch_slice(
795
+ [rois, bboxcnn_class, bboxcnn_bbox, window],
796
  lambda x, y, w, z: refine_detections_graph(x, y, w, z, self.config),
797
  self.config.IMAGES_PER_GPU)
798
 
 
909
  name="roi_align_classifier")([rois, image_meta] + feature_maps)
910
  # Two 1024 FC layers (implemented with Conv2D for consistency)
911
  x = KL.TimeDistributed(KL.Conv2D(fc_layers_size, (pool_size, pool_size), padding="valid"),
912
+ name="bboxcnn_class_conv1")(x)
913
+ x = KL.TimeDistributed(BatchNorm(), name='bboxcnn_class_bn1')(x, training=train_bn)
914
  x = KL.Activation('relu')(x)
915
  x = KL.TimeDistributed(KL.Conv2D(fc_layers_size, (1, 1)),
916
+ name="bboxcnn_class_conv2")(x)
917
+ x = KL.TimeDistributed(BatchNorm(), name='bboxcnn_class_bn2')(x, training=train_bn)
918
  x = KL.Activation('relu')(x)
919
 
920
  shared = KL.Lambda(lambda x: K.squeeze(K.squeeze(x, 3), 2),
921
  name="pool_squeeze")(x)
922
 
923
  # Classifier head
924
+ bboxcnn_class_logits = KL.TimeDistributed(KL.Dense(num_classes),
925
+ name='bboxcnn_class_logits')(shared)
926
+ bboxcnn_probs = KL.TimeDistributed(KL.Activation("softmax"),
927
+ name="bboxcnn_class")(bboxcnn_class_logits)
928
 
929
  # BBox head
930
  # [batch, num_rois, NUM_CLASSES * (dy, dx, log(dh), log(dw))]
931
  x = KL.TimeDistributed(KL.Dense(num_classes * 4, activation='linear'),
932
+ name='bboxcnn_bbox_fc')(shared)
933
  # Reshape to [batch, num_rois, NUM_CLASSES, (dy, dx, log(dh), log(dw))]
934
  s = K.int_shape(x)
935
+ bboxcnn_bbox = KL.Reshape((s[1], num_classes, 4), name="bboxcnn_bbox")(x)
936
 
937
+ return bboxcnn_class_logits, bboxcnn_probs, bboxcnn_bbox
938
 
939
 
940
  def build_fpn_mask_graph(rois, feature_maps, image_meta,
 
959
 
960
  # Conv layers
961
  x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
962
+ name="bboxcnn_mask_conv1")(x)
963
  x = KL.TimeDistributed(BatchNorm(),
964
+ name='bboxcnn_mask_bn1')(x, training=train_bn)
965
  x = KL.Activation('relu')(x)
966
 
967
  x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
968
+ name="bboxcnn_mask_conv2")(x)
969
  x = KL.TimeDistributed(BatchNorm(),
970
+ name='bboxcnn_mask_bn2')(x, training=train_bn)
971
  x = KL.Activation('relu')(x)
972
 
973
  x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
974
+ name="bboxcnn_mask_conv3")(x)
975
  x = KL.TimeDistributed(BatchNorm(),
976
+ name='bboxcnn_mask_bn3')(x, training=train_bn)
977
  x = KL.Activation('relu')(x)
978
 
979
  x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
980
+ name="bboxcnn_mask_conv4")(x)
981
  x = KL.TimeDistributed(BatchNorm(),
982
+ name='bboxcnn_mask_bn4')(x, training=train_bn)
983
  x = KL.Activation('relu')(x)
984
 
985
  x = KL.TimeDistributed(KL.Conv2DTranspose(256, (2, 2), strides=2, activation="relu"),
986
+ name="bboxcnn_mask_deconv")(x)
987
  x = KL.TimeDistributed(KL.Conv2D(num_classes, (1, 1), strides=1, activation="sigmoid"),
988
+ name="bboxcnn_mask")(x)
989
  return x
990
 
991
 
 
1057
  return loss
1058
 
1059
 
1060
+ def bboxcnn_class_loss_graph(target_class_ids, pred_class_logits,
1061
  active_class_ids):
1062
  """Loss for the classifier head of Mask RCNN.
1063
 
 
1093
  return loss
1094
 
1095
 
1096
+ def bboxcnn_bbox_loss_graph(target_bbox, target_class_ids, pred_bbox):
1097
  """Loss for Mask R-CNN bounding box refinement.
1098
 
1099
  target_bbox: [batch, num_rois, (dy, dx, log(dh), log(dw))]
 
1124
  return loss
1125
 
1126
 
1127
+ def bboxcnn_mask_loss_graph(target_masks, target_class_ids, pred_masks):
1128
  """Mask binary cross-entropy loss for the masks head.
1129
 
1130
  target_masks: [batch, num_rois, height, width].
 
1164
 
1165
 
1166
  ############################################################
1167
+ # Data Gen
1168
  ############################################################
1169
 
1170
  def load_image_gt(dataset, config, image_id, augment=False, augmentation=None,
 
1173
 
1174
  augment: (deprecated. Use augmentation instead). If true, apply random
1175
  image augmentation. Currently, only horizontal flipping is offered.
 
 
 
1176
  use_mini_mask: If False, returns full-size masks that are the same height
1177
  and width as the original image. These can be big, for example
1178
  1024x1024x100 (for 100 instances). Mini masks are smaller, typically,
 
1208
  image = np.fliplr(image)
1209
  mask = np.fliplr(mask)
1210
 
 
 
1211
  if augmentation:
1212
  import imgaug
1213
 
 
1615
  dataset: The Dataset object to pick data from
1616
  config: The model config object
1617
  shuffle: If True, shuffles the samples before every epoch
 
 
 
 
 
1618
  random_rois: If > 0 then generate proposals to be used to train the
1619
  network classifier and mask heads. Useful if training
1620
  the Mask RCNN part without the RPN.
 
1697
  rpn_rois = generate_random_rois(
1698
  image.shape, random_rois, gt_class_ids, gt_boxes)
1699
  if detection_targets:
1700
+ rois, bboxcnn_class_ids, bboxcnn_bbox, bboxcnn_mask =\
1701
  build_detection_targets(
1702
  rpn_rois, gt_class_ids, gt_boxes, gt_masks, config)
1703
 
 
1724
  if detection_targets:
1725
  batch_rois = np.zeros(
1726
  (batch_size,) + rois.shape, dtype=rois.dtype)
1727
+ batch_bboxcnn_class_ids = np.zeros(
1728
+ (batch_size,) + bboxcnn_class_ids.shape, dtype=bboxcnn_class_ids.dtype)
1729
+ batch_bboxcnn_bbox = np.zeros(
1730
+ (batch_size,) + bboxcnn_bbox.shape, dtype=bboxcnn_bbox.dtype)
1731
+ batch_bboxcnn_mask = np.zeros(
1732
+ (batch_size,) + bboxcnn_mask.shape, dtype=bboxcnn_mask.dtype)
1733
 
1734
  # If more instances than fits in the array, sub-sample from them.
1735
  if gt_boxes.shape[0] > config.MAX_GT_INSTANCES:
 
1751
  batch_rpn_rois[b] = rpn_rois
1752
  if detection_targets:
1753
  batch_rois[b] = rois
1754
+ batch_bboxcnn_class_ids[b] = bboxcnn_class_ids
1755
+ batch_bboxcnn_bbox[b] = bboxcnn_bbox
1756
+ batch_bboxcnn_mask[b] = bboxcnn_mask
1757
  b += 1
1758
 
1759
  # Batch full?
 
1767
  if detection_targets:
1768
  inputs.extend([batch_rois])
1769
  # Keras requires that output and targets have the same number of dimensions
1770
+ batch_bboxcnn_class_ids = np.expand_dims(
1771
+ batch_bboxcnn_class_ids, -1)
1772
  outputs.extend(
1773
+ [batch_bboxcnn_class_ids, batch_bboxcnn_bbox, batch_bboxcnn_mask])
1774
 
1775
  yield inputs, outputs
1776
 
 
1788
 
1789
 
1790
  ############################################################
1791
+ # BBoxCNN Class
1792
  ############################################################
1793
 
1794
+ class BBoxCNN():
1795
  """Encapsulates the Mask RCNN model functionality.
1796
 
1797
  The actual Keras model is in the keras_model property.
 
1896
 
1897
  # Note that P6 is used in RPN, but not in the classifier heads.
1898
  rpn_feature_maps = [P2, P3, P4, P5, P6]
1899
+ bboxcnn_feature_maps = [P2, P3, P4, P5]
1900
 
1901
  # Anchors
1902
  if mode == "training":
 
1965
 
1966
  # Network Heads
1967
  # TODO: verify that this handles zero padded ROIs
1968
+ bboxcnn_class_logits, bboxcnn_class, bboxcnn_bbox =\
1969
+ fpn_classifier_graph(rois, bboxcnn_feature_maps, input_image_meta,
1970
  config.POOL_SIZE, config.NUM_CLASSES,
1971
  train_bn=config.TRAIN_BN,
1972
  fc_layers_size=config.FPN_CLASSIF_FC_LAYERS_SIZE)
1973
 
1974
+ bboxcnn_mask = build_fpn_mask_graph(rois, bboxcnn_feature_maps,
1975
  input_image_meta,
1976
  config.MASK_POOL_SIZE,
1977
  config.NUM_CLASSES,
 
1985
  [input_rpn_match, rpn_class_logits])
1986
  rpn_bbox_loss = KL.Lambda(lambda x: rpn_bbox_loss_graph(config, *x), name="rpn_bbox_loss")(
1987
  [input_rpn_bbox, input_rpn_match, rpn_bbox])
1988
+ class_loss = KL.Lambda(lambda x: bboxcnn_class_loss_graph(*x), name="bboxcnn_class_loss")(
1989
+ [target_class_ids, bboxcnn_class_logits, active_class_ids])
1990
+ bbox_loss = KL.Lambda(lambda x: bboxcnn_bbox_loss_graph(*x), name="bboxcnn_bbox_loss")(
1991
+ [target_bbox, target_class_ids, bboxcnn_bbox])
1992
+ mask_loss = KL.Lambda(lambda x: bboxcnn_mask_loss_graph(*x), name="bboxcnn_mask_loss")(
1993
+ [target_mask, target_class_ids, bboxcnn_mask])
1994
 
1995
  # Model
1996
  inputs = [input_image, input_image_meta,
 
1998
  if not config.USE_RPN_ROIS:
1999
  inputs.append(input_rois)
2000
  outputs = [rpn_class_logits, rpn_class, rpn_bbox,
2001
+ bboxcnn_class_logits, bboxcnn_class, bboxcnn_bbox, bboxcnn_mask,
2002
  rpn_rois, output_rois,
2003
  rpn_class_loss, rpn_bbox_loss, class_loss, bbox_loss, mask_loss]
2004
+ model = KM.Model(inputs, outputs, name='bboxcnn')
2005
  else:
2006
  # Network Heads
2007
  # Proposal classifier and BBox regressor heads
2008
+ bboxcnn_class_logits, bboxcnn_class, bboxcnn_bbox =\
2009
+ fpn_classifier_graph(rpn_rois, bboxcnn_feature_maps, input_image_meta,
2010
  config.POOL_SIZE, config.NUM_CLASSES,
2011
  train_bn=config.TRAIN_BN,
2012
  fc_layers_size=config.FPN_CLASSIF_FC_LAYERS_SIZE)
 
2014
  # Detections
2015
  # output is [batch, num_detections, (y1, x1, y2, x2, class_id, score)] in
2016
  # normalized coordinates
2017
+ detections = DetectionLayer(config, name="bboxcnn_detection")(
2018
+ [rpn_rois, bboxcnn_class, bboxcnn_bbox, input_image_meta])
2019
 
2020
  # Create masks for detections
2021
  detection_boxes = KL.Lambda(lambda x: x[..., :4])(detections)
2022
+ bboxcnn_mask = build_fpn_mask_graph(detection_boxes, bboxcnn_feature_maps,
2023
  input_image_meta,
2024
  config.MASK_POOL_SIZE,
2025
  config.NUM_CLASSES,
2026
  train_bn=config.TRAIN_BN)
2027
 
2028
  model = KM.Model([input_image, input_image_meta, input_anchors],
2029
+ [detections, bboxcnn_class, bboxcnn_bbox,
2030
+ bboxcnn_mask, rpn_rois, rpn_class, rpn_bbox],
2031
+ name='bboxcnn')
2032
 
2033
  # Add multi-GPU support.
2034
  if config.GPU_COUNT > 1:
2035
+ from bboxcnn.parallel_model import ParallelModel
2036
  model = ParallelModel(model, config.GPU_COUNT)
2037
 
2038
  return model
 
2057
  dir_name = os.path.join(self.model_dir, dir_names[-1])
2058
  # Find the last checkpoint
2059
  checkpoints = next(os.walk(dir_name))[2]
2060
+ checkpoints = filter(lambda f: f.startswith("bboxcnn"), checkpoints)
2061
  checkpoints = sorted(checkpoints)
2062
  if not checkpoints:
2063
  import errno
 
2115
  Returns path to weights file.
2116
  """
2117
  from keras.utils.data_utils import get_file
 
 
 
2118
  weights_path = get_file('resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5',
2119
  TF_WEIGHTS_PATH_NO_TOP,
2120
  cache_subdir='models',
 
2135
  self.keras_model._per_input_losses = {}
2136
  loss_names = [
2137
  "rpn_class_loss", "rpn_bbox_loss",
2138
+ "bboxcnn_class_loss", "bboxcnn_bbox_loss", "bboxcnn_mask_loss"]
2139
  for name in loss_names:
2140
  layer = self.keras_model.get_layer(name)
2141
  if layer.output in self.keras_model.losses:
 
2221
  # If we have a model path with date and epochs use them
2222
  if model_path:
2223
  # Continue from we left of. Get epoch and date from the file name
2224
+ regex = r".*[/\\][\w-]+(\d{4})(\d{2})(\d{2})T(\d{2})(\d{2})[/\\]bboxcnn\_[\w-]+(\d{4})\.h5"
 
 
 
2225
  m = re.match(regex, model_path)
2226
  if m:
2227
  now = datetime.datetime(int(m.group(1)), int(m.group(2)), int(m.group(3)),
 
2236
  self.config.NAME.lower(), now))
2237
 
2238
  # Path to save after each epoch. Include placeholders that get filled by Keras.
2239
+ self.checkpoint_path = os.path.join(self.log_dir, "bboxcnn_{}_*epoch*.h5".format(
2240
  self.config.NAME.lower()))
2241
  self.checkpoint_path = self.checkpoint_path.replace(
2242
  "*epoch*", "{epoch:04d}")
 
2258
  3+: Train Resnet stage 3 and up
2259
  4+: Train Resnet stage 4 and up
2260
  5+: Train Resnet stage 5 and up
 
 
 
 
 
 
2261
 
2262
  augmentation = imgaug.augmenters.Sometimes(0.5, [
2263
  imgaug.augmenters.Fliplr(0.5),
 
2274
  # Pre-defined layer regular expressions
2275
  layer_regex = {
2276
  # all layers but the backbone
2277
+ "heads": r"(bboxcnn\_.*)|(rpn\_.*)|(fpn\_.*)",
2278
  # From a specific Resnet stage and up
2279
+ "3+": r"(res3.*)|(bn3.*)|(res4.*)|(bn4.*)|(res5.*)|(bn5.*)|(bboxcnn\_.*)|(rpn\_.*)|(fpn\_.*)",
2280
+ "4+": r"(res4.*)|(bn4.*)|(res5.*)|(bn5.*)|(bboxcnn\_.*)|(rpn\_.*)|(fpn\_.*)",
2281
+ "5+": r"(res5.*)|(bn5.*)|(bboxcnn\_.*)|(rpn\_.*)|(fpn\_.*)",
2282
  # All layers
2283
  "all": ".*",
2284
  }
 
2314
  log("Checkpoint Path: {}".format(self.checkpoint_path))
2315
  self.set_trainable(layers)
2316
  self.compile(learning_rate, self.config.LEARNING_MOMENTUM)
 
 
 
 
2317
  if os.name is 'nt':
2318
  workers = 0
2319
  else:
 
2372
  windows = np.stack(windows)
2373
  return molded_images, image_metas, windows
2374
 
2375
+ def unmold_detections(self, detections, bboxcnn_mask, original_image_shape,
2376
  image_shape, window):
2377
  """Reformats the detections of one image from the format of the neural
2378
  network output to a format suitable for use in the rest of the
2379
  application.
2380
 
2381
  detections: [N, (y1, x1, y2, x2, class_id, score)] in normalized coordinates
2382
+ bboxcnn_mask: [N, height, width, num_classes]
2383
  original_image_shape: [H, W, C] Original image shape before resizing
2384
  image_shape: [H, W, C] Shape of the image after resizing and padding
2385
  window: [y1, x1, y2, x2] Pixel coordinates of box in the image where the real
 
2400
  boxes = detections[:N, :4]
2401
  class_ids = detections[:N, 4].astype(np.int32)
2402
  scores = detections[:N, 5]
2403
+ masks = bboxcnn_mask[np.arange(N), :, :, class_ids]
2404
 
2405
  # Translate normalized coordinates in the resized image to pixel
2406
  # coordinates in the original image before resizing
 
2478
  log("image_metas", image_metas)
2479
  log("anchors", anchors)
2480
  # Run object detection
2481
+ detections, _, _, bboxcnn_mask, _, _, _ =\
2482
  self.keras_model.predict([molded_images, image_metas, anchors], verbose=0)
2483
  # Process detections
2484
  results = []
2485
  for i, image in enumerate(images):
2486
  final_rois, final_class_ids, final_scores, final_masks =\
2487
+ self.unmold_detections(detections[i], bboxcnn_mask[i],
2488
  image.shape, molded_images[i].shape,
2489
  windows[i])
2490
  results.append({
 
2535
  log("image_metas", image_metas)
2536
  log("anchors", anchors)
2537
  # Run object detection
2538
+ detections, _, _, bboxcnn_mask, _, _, _ =\
2539
  self.keras_model.predict([molded_images, image_metas, anchors], verbose=0)
2540
  # Process detections
2541
  results = []
2542
  for i, image in enumerate(molded_images):
2543
  window = [0, 0, image.shape[0], image.shape[1]]
2544
  final_rois, final_class_ids, final_scores, final_masks =\
2545
+ self.unmold_detections(detections[i], bboxcnn_mask[i],
2546
  image.shape, molded_images[i].shape,
2547
  window)
2548
  results.append({
 
2823
  h, w = tf.split(tf.cast(shape, tf.float32), 2)
2824
  scale = tf.concat([h, w, h, w], axis=-1) - tf.constant(1.0)
2825
  shift = tf.constant([0., 0., 1., 1.])
2826
+ return tf.cast(tf.round(tf.multiply(boxes, scale) + shift), tf.int32)