Update model.py
Browse files
model.py
CHANGED
|
@@ -1,12 +1,3 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Mask R-CNN
|
| 3 |
-
The main Mask R-CNN model implementation.
|
| 4 |
-
|
| 5 |
-
Copyright (c) 2017 Matterport, Inc.
|
| 6 |
-
Licensed under the MIT License (see LICENSE for details)
|
| 7 |
-
Written by Waleed Abdulla
|
| 8 |
-
"""
|
| 9 |
-
|
| 10 |
import os
|
| 11 |
import random
|
| 12 |
import datetime
|
|
@@ -89,8 +80,6 @@ def compute_backbone_shapes(config, image_shape):
|
|
| 89 |
# Resnet Graph
|
| 90 |
############################################################
|
| 91 |
|
| 92 |
-
# Code adopted from:
|
| 93 |
-
# https://github.com/fchollet/deep-learning-models/blob/master/resnet50.py
|
| 94 |
|
| 95 |
def identity_block(input_tensor, kernel_size, filters, stage, block,
|
| 96 |
use_bias=True, train_bn=True):
|
|
@@ -409,12 +398,7 @@ class PyramidROIAlign(KE.Layer):
|
|
| 409 |
level_boxes = tf.stop_gradient(level_boxes)
|
| 410 |
box_indices = tf.stop_gradient(box_indices)
|
| 411 |
|
| 412 |
-
|
| 413 |
-
# From Mask R-CNN paper: "We sample four regular locations, so
|
| 414 |
-
# that we can evaluate either max or average pooling. In fact,
|
| 415 |
-
# interpolating only a single value at each bin center (without
|
| 416 |
-
# pooling) is nearly as effective."
|
| 417 |
-
#
|
| 418 |
# Here we use the simplified approach of a single value per bin,
|
| 419 |
# which is how it's done in tf.crop_and_resize()
|
| 420 |
# Result: [batch * num_boxes, pool_height, pool_width, channels]
|
|
@@ -794,8 +778,8 @@ class DetectionLayer(KE.Layer):
|
|
| 794 |
|
| 795 |
def call(self, inputs):
|
| 796 |
rois = inputs[0]
|
| 797 |
-
|
| 798 |
-
|
| 799 |
image_meta = inputs[3]
|
| 800 |
|
| 801 |
# Get windows of images in normalized coordinates. Windows are the area
|
|
@@ -808,7 +792,7 @@ class DetectionLayer(KE.Layer):
|
|
| 808 |
|
| 809 |
# Run detection refinement graph on each item in the batch
|
| 810 |
detections_batch = utils.batch_slice(
|
| 811 |
-
[rois,
|
| 812 |
lambda x, y, w, z: refine_detections_graph(x, y, w, z, self.config),
|
| 813 |
self.config.IMAGES_PER_GPU)
|
| 814 |
|
|
@@ -925,32 +909,32 @@ def fpn_classifier_graph(rois, feature_maps, image_meta,
|
|
| 925 |
name="roi_align_classifier")([rois, image_meta] + feature_maps)
|
| 926 |
# Two 1024 FC layers (implemented with Conv2D for consistency)
|
| 927 |
x = KL.TimeDistributed(KL.Conv2D(fc_layers_size, (pool_size, pool_size), padding="valid"),
|
| 928 |
-
name="
|
| 929 |
-
x = KL.TimeDistributed(BatchNorm(), name='
|
| 930 |
x = KL.Activation('relu')(x)
|
| 931 |
x = KL.TimeDistributed(KL.Conv2D(fc_layers_size, (1, 1)),
|
| 932 |
-
name="
|
| 933 |
-
x = KL.TimeDistributed(BatchNorm(), name='
|
| 934 |
x = KL.Activation('relu')(x)
|
| 935 |
|
| 936 |
shared = KL.Lambda(lambda x: K.squeeze(K.squeeze(x, 3), 2),
|
| 937 |
name="pool_squeeze")(x)
|
| 938 |
|
| 939 |
# Classifier head
|
| 940 |
-
|
| 941 |
-
name='
|
| 942 |
-
|
| 943 |
-
name="
|
| 944 |
|
| 945 |
# BBox head
|
| 946 |
# [batch, num_rois, NUM_CLASSES * (dy, dx, log(dh), log(dw))]
|
| 947 |
x = KL.TimeDistributed(KL.Dense(num_classes * 4, activation='linear'),
|
| 948 |
-
name='
|
| 949 |
# Reshape to [batch, num_rois, NUM_CLASSES, (dy, dx, log(dh), log(dw))]
|
| 950 |
s = K.int_shape(x)
|
| 951 |
-
|
| 952 |
|
| 953 |
-
return
|
| 954 |
|
| 955 |
|
| 956 |
def build_fpn_mask_graph(rois, feature_maps, image_meta,
|
|
@@ -975,33 +959,33 @@ def build_fpn_mask_graph(rois, feature_maps, image_meta,
|
|
| 975 |
|
| 976 |
# Conv layers
|
| 977 |
x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
|
| 978 |
-
name="
|
| 979 |
x = KL.TimeDistributed(BatchNorm(),
|
| 980 |
-
name='
|
| 981 |
x = KL.Activation('relu')(x)
|
| 982 |
|
| 983 |
x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
|
| 984 |
-
name="
|
| 985 |
x = KL.TimeDistributed(BatchNorm(),
|
| 986 |
-
name='
|
| 987 |
x = KL.Activation('relu')(x)
|
| 988 |
|
| 989 |
x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
|
| 990 |
-
name="
|
| 991 |
x = KL.TimeDistributed(BatchNorm(),
|
| 992 |
-
name='
|
| 993 |
x = KL.Activation('relu')(x)
|
| 994 |
|
| 995 |
x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
|
| 996 |
-
name="
|
| 997 |
x = KL.TimeDistributed(BatchNorm(),
|
| 998 |
-
name='
|
| 999 |
x = KL.Activation('relu')(x)
|
| 1000 |
|
| 1001 |
x = KL.TimeDistributed(KL.Conv2DTranspose(256, (2, 2), strides=2, activation="relu"),
|
| 1002 |
-
name="
|
| 1003 |
x = KL.TimeDistributed(KL.Conv2D(num_classes, (1, 1), strides=1, activation="sigmoid"),
|
| 1004 |
-
name="
|
| 1005 |
return x
|
| 1006 |
|
| 1007 |
|
|
@@ -1073,7 +1057,7 @@ def rpn_bbox_loss_graph(config, target_bbox, rpn_match, rpn_bbox):
|
|
| 1073 |
return loss
|
| 1074 |
|
| 1075 |
|
| 1076 |
-
def
|
| 1077 |
active_class_ids):
|
| 1078 |
"""Loss for the classifier head of Mask RCNN.
|
| 1079 |
|
|
@@ -1109,7 +1093,7 @@ def mrcnn_class_loss_graph(target_class_ids, pred_class_logits,
|
|
| 1109 |
return loss
|
| 1110 |
|
| 1111 |
|
| 1112 |
-
def
|
| 1113 |
"""Loss for Mask R-CNN bounding box refinement.
|
| 1114 |
|
| 1115 |
target_bbox: [batch, num_rois, (dy, dx, log(dh), log(dw))]
|
|
@@ -1140,7 +1124,7 @@ def mrcnn_bbox_loss_graph(target_bbox, target_class_ids, pred_bbox):
|
|
| 1140 |
return loss
|
| 1141 |
|
| 1142 |
|
| 1143 |
-
def
|
| 1144 |
"""Mask binary cross-entropy loss for the masks head.
|
| 1145 |
|
| 1146 |
target_masks: [batch, num_rois, height, width].
|
|
@@ -1180,7 +1164,7 @@ def mrcnn_mask_loss_graph(target_masks, target_class_ids, pred_masks):
|
|
| 1180 |
|
| 1181 |
|
| 1182 |
############################################################
|
| 1183 |
-
# Data
|
| 1184 |
############################################################
|
| 1185 |
|
| 1186 |
def load_image_gt(dataset, config, image_id, augment=False, augmentation=None,
|
|
@@ -1189,9 +1173,6 @@ def load_image_gt(dataset, config, image_id, augment=False, augmentation=None,
|
|
| 1189 |
|
| 1190 |
augment: (deprecated. Use augmentation instead). If true, apply random
|
| 1191 |
image augmentation. Currently, only horizontal flipping is offered.
|
| 1192 |
-
augmentation: Optional. An imgaug (https://github.com/aleju/imgaug) augmentation.
|
| 1193 |
-
For example, passing imgaug.augmenters.Fliplr(0.5) flips images
|
| 1194 |
-
right/left 50% of the time.
|
| 1195 |
use_mini_mask: If False, returns full-size masks that are the same height
|
| 1196 |
and width as the original image. These can be big, for example
|
| 1197 |
1024x1024x100 (for 100 instances). Mini masks are smaller, typically,
|
|
@@ -1227,8 +1208,6 @@ def load_image_gt(dataset, config, image_id, augment=False, augmentation=None,
|
|
| 1227 |
image = np.fliplr(image)
|
| 1228 |
mask = np.fliplr(mask)
|
| 1229 |
|
| 1230 |
-
# Augmentation
|
| 1231 |
-
# This requires the imgaug lib (https://github.com/aleju/imgaug)
|
| 1232 |
if augmentation:
|
| 1233 |
import imgaug
|
| 1234 |
|
|
@@ -1636,11 +1615,6 @@ def data_generator(dataset, config, shuffle=True, augment=False, augmentation=No
|
|
| 1636 |
dataset: The Dataset object to pick data from
|
| 1637 |
config: The model config object
|
| 1638 |
shuffle: If True, shuffles the samples before every epoch
|
| 1639 |
-
augment: (deprecated. Use augmentation instead). If true, apply random
|
| 1640 |
-
image augmentation. Currently, only horizontal flipping is offered.
|
| 1641 |
-
augmentation: Optional. An imgaug (https://github.com/aleju/imgaug) augmentation.
|
| 1642 |
-
For example, passing imgaug.augmenters.Fliplr(0.5) flips images
|
| 1643 |
-
right/left 50% of the time.
|
| 1644 |
random_rois: If > 0 then generate proposals to be used to train the
|
| 1645 |
network classifier and mask heads. Useful if training
|
| 1646 |
the Mask RCNN part without the RPN.
|
|
@@ -1723,7 +1697,7 @@ def data_generator(dataset, config, shuffle=True, augment=False, augmentation=No
|
|
| 1723 |
rpn_rois = generate_random_rois(
|
| 1724 |
image.shape, random_rois, gt_class_ids, gt_boxes)
|
| 1725 |
if detection_targets:
|
| 1726 |
-
rois,
|
| 1727 |
build_detection_targets(
|
| 1728 |
rpn_rois, gt_class_ids, gt_boxes, gt_masks, config)
|
| 1729 |
|
|
@@ -1750,12 +1724,12 @@ def data_generator(dataset, config, shuffle=True, augment=False, augmentation=No
|
|
| 1750 |
if detection_targets:
|
| 1751 |
batch_rois = np.zeros(
|
| 1752 |
(batch_size,) + rois.shape, dtype=rois.dtype)
|
| 1753 |
-
|
| 1754 |
-
(batch_size,) +
|
| 1755 |
-
|
| 1756 |
-
(batch_size,) +
|
| 1757 |
-
|
| 1758 |
-
(batch_size,) +
|
| 1759 |
|
| 1760 |
# If more instances than fits in the array, sub-sample from them.
|
| 1761 |
if gt_boxes.shape[0] > config.MAX_GT_INSTANCES:
|
|
@@ -1777,9 +1751,9 @@ def data_generator(dataset, config, shuffle=True, augment=False, augmentation=No
|
|
| 1777 |
batch_rpn_rois[b] = rpn_rois
|
| 1778 |
if detection_targets:
|
| 1779 |
batch_rois[b] = rois
|
| 1780 |
-
|
| 1781 |
-
|
| 1782 |
-
|
| 1783 |
b += 1
|
| 1784 |
|
| 1785 |
# Batch full?
|
|
@@ -1793,10 +1767,10 @@ def data_generator(dataset, config, shuffle=True, augment=False, augmentation=No
|
|
| 1793 |
if detection_targets:
|
| 1794 |
inputs.extend([batch_rois])
|
| 1795 |
# Keras requires that output and targets have the same number of dimensions
|
| 1796 |
-
|
| 1797 |
-
|
| 1798 |
outputs.extend(
|
| 1799 |
-
[
|
| 1800 |
|
| 1801 |
yield inputs, outputs
|
| 1802 |
|
|
@@ -1814,10 +1788,10 @@ def data_generator(dataset, config, shuffle=True, augment=False, augmentation=No
|
|
| 1814 |
|
| 1815 |
|
| 1816 |
############################################################
|
| 1817 |
-
#
|
| 1818 |
############################################################
|
| 1819 |
|
| 1820 |
-
class
|
| 1821 |
"""Encapsulates the Mask RCNN model functionality.
|
| 1822 |
|
| 1823 |
The actual Keras model is in the keras_model property.
|
|
@@ -1922,7 +1896,7 @@ class MaskRCNN():
|
|
| 1922 |
|
| 1923 |
# Note that P6 is used in RPN, but not in the classifier heads.
|
| 1924 |
rpn_feature_maps = [P2, P3, P4, P5, P6]
|
| 1925 |
-
|
| 1926 |
|
| 1927 |
# Anchors
|
| 1928 |
if mode == "training":
|
|
@@ -1991,13 +1965,13 @@ class MaskRCNN():
|
|
| 1991 |
|
| 1992 |
# Network Heads
|
| 1993 |
# TODO: verify that this handles zero padded ROIs
|
| 1994 |
-
|
| 1995 |
-
fpn_classifier_graph(rois,
|
| 1996 |
config.POOL_SIZE, config.NUM_CLASSES,
|
| 1997 |
train_bn=config.TRAIN_BN,
|
| 1998 |
fc_layers_size=config.FPN_CLASSIF_FC_LAYERS_SIZE)
|
| 1999 |
|
| 2000 |
-
|
| 2001 |
input_image_meta,
|
| 2002 |
config.MASK_POOL_SIZE,
|
| 2003 |
config.NUM_CLASSES,
|
|
@@ -2011,12 +1985,12 @@ class MaskRCNN():
|
|
| 2011 |
[input_rpn_match, rpn_class_logits])
|
| 2012 |
rpn_bbox_loss = KL.Lambda(lambda x: rpn_bbox_loss_graph(config, *x), name="rpn_bbox_loss")(
|
| 2013 |
[input_rpn_bbox, input_rpn_match, rpn_bbox])
|
| 2014 |
-
class_loss = KL.Lambda(lambda x:
|
| 2015 |
-
[target_class_ids,
|
| 2016 |
-
bbox_loss = KL.Lambda(lambda x:
|
| 2017 |
-
[target_bbox, target_class_ids,
|
| 2018 |
-
mask_loss = KL.Lambda(lambda x:
|
| 2019 |
-
[target_mask, target_class_ids,
|
| 2020 |
|
| 2021 |
# Model
|
| 2022 |
inputs = [input_image, input_image_meta,
|
|
@@ -2024,15 +1998,15 @@ class MaskRCNN():
|
|
| 2024 |
if not config.USE_RPN_ROIS:
|
| 2025 |
inputs.append(input_rois)
|
| 2026 |
outputs = [rpn_class_logits, rpn_class, rpn_bbox,
|
| 2027 |
-
|
| 2028 |
rpn_rois, output_rois,
|
| 2029 |
rpn_class_loss, rpn_bbox_loss, class_loss, bbox_loss, mask_loss]
|
| 2030 |
-
model = KM.Model(inputs, outputs, name='
|
| 2031 |
else:
|
| 2032 |
# Network Heads
|
| 2033 |
# Proposal classifier and BBox regressor heads
|
| 2034 |
-
|
| 2035 |
-
fpn_classifier_graph(rpn_rois,
|
| 2036 |
config.POOL_SIZE, config.NUM_CLASSES,
|
| 2037 |
train_bn=config.TRAIN_BN,
|
| 2038 |
fc_layers_size=config.FPN_CLASSIF_FC_LAYERS_SIZE)
|
|
@@ -2040,25 +2014,25 @@ class MaskRCNN():
|
|
| 2040 |
# Detections
|
| 2041 |
# output is [batch, num_detections, (y1, x1, y2, x2, class_id, score)] in
|
| 2042 |
# normalized coordinates
|
| 2043 |
-
detections = DetectionLayer(config, name="
|
| 2044 |
-
[rpn_rois,
|
| 2045 |
|
| 2046 |
# Create masks for detections
|
| 2047 |
detection_boxes = KL.Lambda(lambda x: x[..., :4])(detections)
|
| 2048 |
-
|
| 2049 |
input_image_meta,
|
| 2050 |
config.MASK_POOL_SIZE,
|
| 2051 |
config.NUM_CLASSES,
|
| 2052 |
train_bn=config.TRAIN_BN)
|
| 2053 |
|
| 2054 |
model = KM.Model([input_image, input_image_meta, input_anchors],
|
| 2055 |
-
[detections,
|
| 2056 |
-
|
| 2057 |
-
name='
|
| 2058 |
|
| 2059 |
# Add multi-GPU support.
|
| 2060 |
if config.GPU_COUNT > 1:
|
| 2061 |
-
from
|
| 2062 |
model = ParallelModel(model, config.GPU_COUNT)
|
| 2063 |
|
| 2064 |
return model
|
|
@@ -2083,7 +2057,7 @@ class MaskRCNN():
|
|
| 2083 |
dir_name = os.path.join(self.model_dir, dir_names[-1])
|
| 2084 |
# Find the last checkpoint
|
| 2085 |
checkpoints = next(os.walk(dir_name))[2]
|
| 2086 |
-
checkpoints = filter(lambda f: f.startswith("
|
| 2087 |
checkpoints = sorted(checkpoints)
|
| 2088 |
if not checkpoints:
|
| 2089 |
import errno
|
|
@@ -2141,9 +2115,6 @@ class MaskRCNN():
|
|
| 2141 |
Returns path to weights file.
|
| 2142 |
"""
|
| 2143 |
from keras.utils.data_utils import get_file
|
| 2144 |
-
TF_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/'\
|
| 2145 |
-
'releases/download/v0.2/'\
|
| 2146 |
-
'resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5'
|
| 2147 |
weights_path = get_file('resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5',
|
| 2148 |
TF_WEIGHTS_PATH_NO_TOP,
|
| 2149 |
cache_subdir='models',
|
|
@@ -2164,7 +2135,7 @@ class MaskRCNN():
|
|
| 2164 |
self.keras_model._per_input_losses = {}
|
| 2165 |
loss_names = [
|
| 2166 |
"rpn_class_loss", "rpn_bbox_loss",
|
| 2167 |
-
"
|
| 2168 |
for name in loss_names:
|
| 2169 |
layer = self.keras_model.get_layer(name)
|
| 2170 |
if layer.output in self.keras_model.losses:
|
|
@@ -2250,10 +2221,7 @@ class MaskRCNN():
|
|
| 2250 |
# If we have a model path with date and epochs use them
|
| 2251 |
if model_path:
|
| 2252 |
# Continue from we left of. Get epoch and date from the file name
|
| 2253 |
-
|
| 2254 |
-
# \path\to\logs\coco20171029T2315\mask_rcnn_coco_0001.h5 (Windows)
|
| 2255 |
-
# /path/to/logs/coco20171029T2315/mask_rcnn_coco_0001.h5 (Linux)
|
| 2256 |
-
regex = r".*[/\\][\w-]+(\d{4})(\d{2})(\d{2})T(\d{2})(\d{2})[/\\]mask\_rcnn\_[\w-]+(\d{4})\.h5"
|
| 2257 |
m = re.match(regex, model_path)
|
| 2258 |
if m:
|
| 2259 |
now = datetime.datetime(int(m.group(1)), int(m.group(2)), int(m.group(3)),
|
|
@@ -2268,7 +2236,7 @@ class MaskRCNN():
|
|
| 2268 |
self.config.NAME.lower(), now))
|
| 2269 |
|
| 2270 |
# Path to save after each epoch. Include placeholders that get filled by Keras.
|
| 2271 |
-
self.checkpoint_path = os.path.join(self.log_dir, "
|
| 2272 |
self.config.NAME.lower()))
|
| 2273 |
self.checkpoint_path = self.checkpoint_path.replace(
|
| 2274 |
"*epoch*", "{epoch:04d}")
|
|
@@ -2290,12 +2258,6 @@ class MaskRCNN():
|
|
| 2290 |
3+: Train Resnet stage 3 and up
|
| 2291 |
4+: Train Resnet stage 4 and up
|
| 2292 |
5+: Train Resnet stage 5 and up
|
| 2293 |
-
augmentation: Optional. An imgaug (https://github.com/aleju/imgaug)
|
| 2294 |
-
augmentation. For example, passing imgaug.augmenters.Fliplr(0.5)
|
| 2295 |
-
flips images right/left 50% of the time. You can pass complex
|
| 2296 |
-
augmentations as well. This augmentation applies 50% of the
|
| 2297 |
-
time, and when it does it flips images right/left half the time
|
| 2298 |
-
and adds a Gaussian blur with a random sigma in range 0 to 5.
|
| 2299 |
|
| 2300 |
augmentation = imgaug.augmenters.Sometimes(0.5, [
|
| 2301 |
imgaug.augmenters.Fliplr(0.5),
|
|
@@ -2312,11 +2274,11 @@ class MaskRCNN():
|
|
| 2312 |
# Pre-defined layer regular expressions
|
| 2313 |
layer_regex = {
|
| 2314 |
# all layers but the backbone
|
| 2315 |
-
"heads": r"(
|
| 2316 |
# From a specific Resnet stage and up
|
| 2317 |
-
"3+": r"(res3.*)|(bn3.*)|(res4.*)|(bn4.*)|(res5.*)|(bn5.*)|(
|
| 2318 |
-
"4+": r"(res4.*)|(bn4.*)|(res5.*)|(bn5.*)|(
|
| 2319 |
-
"5+": r"(res5.*)|(bn5.*)|(
|
| 2320 |
# All layers
|
| 2321 |
"all": ".*",
|
| 2322 |
}
|
|
@@ -2352,10 +2314,6 @@ class MaskRCNN():
|
|
| 2352 |
log("Checkpoint Path: {}".format(self.checkpoint_path))
|
| 2353 |
self.set_trainable(layers)
|
| 2354 |
self.compile(learning_rate, self.config.LEARNING_MOMENTUM)
|
| 2355 |
-
|
| 2356 |
-
# Work-around for Windows: Keras fails on Windows when using
|
| 2357 |
-
# multiprocessing workers. See discussion here:
|
| 2358 |
-
# https://github.com/matterport/Mask_RCNN/issues/13#issuecomment-353124009
|
| 2359 |
if os.name is 'nt':
|
| 2360 |
workers = 0
|
| 2361 |
else:
|
|
@@ -2414,14 +2372,14 @@ class MaskRCNN():
|
|
| 2414 |
windows = np.stack(windows)
|
| 2415 |
return molded_images, image_metas, windows
|
| 2416 |
|
| 2417 |
-
def unmold_detections(self, detections,
|
| 2418 |
image_shape, window):
|
| 2419 |
"""Reformats the detections of one image from the format of the neural
|
| 2420 |
network output to a format suitable for use in the rest of the
|
| 2421 |
application.
|
| 2422 |
|
| 2423 |
detections: [N, (y1, x1, y2, x2, class_id, score)] in normalized coordinates
|
| 2424 |
-
|
| 2425 |
original_image_shape: [H, W, C] Original image shape before resizing
|
| 2426 |
image_shape: [H, W, C] Shape of the image after resizing and padding
|
| 2427 |
window: [y1, x1, y2, x2] Pixel coordinates of box in the image where the real
|
|
@@ -2442,7 +2400,7 @@ class MaskRCNN():
|
|
| 2442 |
boxes = detections[:N, :4]
|
| 2443 |
class_ids = detections[:N, 4].astype(np.int32)
|
| 2444 |
scores = detections[:N, 5]
|
| 2445 |
-
masks =
|
| 2446 |
|
| 2447 |
# Translate normalized coordinates in the resized image to pixel
|
| 2448 |
# coordinates in the original image before resizing
|
|
@@ -2520,13 +2478,13 @@ class MaskRCNN():
|
|
| 2520 |
log("image_metas", image_metas)
|
| 2521 |
log("anchors", anchors)
|
| 2522 |
# Run object detection
|
| 2523 |
-
detections, _, _,
|
| 2524 |
self.keras_model.predict([molded_images, image_metas, anchors], verbose=0)
|
| 2525 |
# Process detections
|
| 2526 |
results = []
|
| 2527 |
for i, image in enumerate(images):
|
| 2528 |
final_rois, final_class_ids, final_scores, final_masks =\
|
| 2529 |
-
self.unmold_detections(detections[i],
|
| 2530 |
image.shape, molded_images[i].shape,
|
| 2531 |
windows[i])
|
| 2532 |
results.append({
|
|
@@ -2577,14 +2535,14 @@ class MaskRCNN():
|
|
| 2577 |
log("image_metas", image_metas)
|
| 2578 |
log("anchors", anchors)
|
| 2579 |
# Run object detection
|
| 2580 |
-
detections, _, _,
|
| 2581 |
self.keras_model.predict([molded_images, image_metas, anchors], verbose=0)
|
| 2582 |
# Process detections
|
| 2583 |
results = []
|
| 2584 |
for i, image in enumerate(molded_images):
|
| 2585 |
window = [0, 0, image.shape[0], image.shape[1]]
|
| 2586 |
final_rois, final_class_ids, final_scores, final_masks =\
|
| 2587 |
-
self.unmold_detections(detections[i],
|
| 2588 |
image.shape, molded_images[i].shape,
|
| 2589 |
window)
|
| 2590 |
results.append({
|
|
@@ -2865,4 +2823,4 @@ def denorm_boxes_graph(boxes, shape):
|
|
| 2865 |
h, w = tf.split(tf.cast(shape, tf.float32), 2)
|
| 2866 |
scale = tf.concat([h, w, h, w], axis=-1) - tf.constant(1.0)
|
| 2867 |
shift = tf.constant([0., 0., 1., 1.])
|
| 2868 |
-
return tf.cast(tf.round(tf.multiply(boxes, scale) + shift), tf.int32)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
import random
|
| 3 |
import datetime
|
|
|
|
| 80 |
# Resnet Graph
|
| 81 |
############################################################
|
| 82 |
|
|
|
|
|
|
|
| 83 |
|
| 84 |
def identity_block(input_tensor, kernel_size, filters, stage, block,
|
| 85 |
use_bias=True, train_bn=True):
|
|
|
|
| 398 |
level_boxes = tf.stop_gradient(level_boxes)
|
| 399 |
box_indices = tf.stop_gradient(box_indices)
|
| 400 |
|
| 401 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 402 |
# Here we use the simplified approach of a single value per bin,
|
| 403 |
# which is how it's done in tf.crop_and_resize()
|
| 404 |
# Result: [batch * num_boxes, pool_height, pool_width, channels]
|
|
|
|
| 778 |
|
| 779 |
def call(self, inputs):
|
| 780 |
rois = inputs[0]
|
| 781 |
+
bboxcnn_class = inputs[1]
|
| 782 |
+
bboxcnn_bbox = inputs[2]
|
| 783 |
image_meta = inputs[3]
|
| 784 |
|
| 785 |
# Get windows of images in normalized coordinates. Windows are the area
|
|
|
|
| 792 |
|
| 793 |
# Run detection refinement graph on each item in the batch
|
| 794 |
detections_batch = utils.batch_slice(
|
| 795 |
+
[rois, bboxcnn_class, bboxcnn_bbox, window],
|
| 796 |
lambda x, y, w, z: refine_detections_graph(x, y, w, z, self.config),
|
| 797 |
self.config.IMAGES_PER_GPU)
|
| 798 |
|
|
|
|
| 909 |
name="roi_align_classifier")([rois, image_meta] + feature_maps)
|
| 910 |
# Two 1024 FC layers (implemented with Conv2D for consistency)
|
| 911 |
x = KL.TimeDistributed(KL.Conv2D(fc_layers_size, (pool_size, pool_size), padding="valid"),
|
| 912 |
+
name="bboxcnn_class_conv1")(x)
|
| 913 |
+
x = KL.TimeDistributed(BatchNorm(), name='bboxcnn_class_bn1')(x, training=train_bn)
|
| 914 |
x = KL.Activation('relu')(x)
|
| 915 |
x = KL.TimeDistributed(KL.Conv2D(fc_layers_size, (1, 1)),
|
| 916 |
+
name="bboxcnn_class_conv2")(x)
|
| 917 |
+
x = KL.TimeDistributed(BatchNorm(), name='bboxcnn_class_bn2')(x, training=train_bn)
|
| 918 |
x = KL.Activation('relu')(x)
|
| 919 |
|
| 920 |
shared = KL.Lambda(lambda x: K.squeeze(K.squeeze(x, 3), 2),
|
| 921 |
name="pool_squeeze")(x)
|
| 922 |
|
| 923 |
# Classifier head
|
| 924 |
+
bboxcnn_class_logits = KL.TimeDistributed(KL.Dense(num_classes),
|
| 925 |
+
name='bboxcnn_class_logits')(shared)
|
| 926 |
+
bboxcnn_probs = KL.TimeDistributed(KL.Activation("softmax"),
|
| 927 |
+
name="bboxcnn_class")(bboxcnn_class_logits)
|
| 928 |
|
| 929 |
# BBox head
|
| 930 |
# [batch, num_rois, NUM_CLASSES * (dy, dx, log(dh), log(dw))]
|
| 931 |
x = KL.TimeDistributed(KL.Dense(num_classes * 4, activation='linear'),
|
| 932 |
+
name='bboxcnn_bbox_fc')(shared)
|
| 933 |
# Reshape to [batch, num_rois, NUM_CLASSES, (dy, dx, log(dh), log(dw))]
|
| 934 |
s = K.int_shape(x)
|
| 935 |
+
bboxcnn_bbox = KL.Reshape((s[1], num_classes, 4), name="bboxcnn_bbox")(x)
|
| 936 |
|
| 937 |
+
return bboxcnn_class_logits, bboxcnn_probs, bboxcnn_bbox
|
| 938 |
|
| 939 |
|
| 940 |
def build_fpn_mask_graph(rois, feature_maps, image_meta,
|
|
|
|
| 959 |
|
| 960 |
# Conv layers
|
| 961 |
x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
|
| 962 |
+
name="bboxcnn_mask_conv1")(x)
|
| 963 |
x = KL.TimeDistributed(BatchNorm(),
|
| 964 |
+
name='bboxcnn_mask_bn1')(x, training=train_bn)
|
| 965 |
x = KL.Activation('relu')(x)
|
| 966 |
|
| 967 |
x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
|
| 968 |
+
name="bboxcnn_mask_conv2")(x)
|
| 969 |
x = KL.TimeDistributed(BatchNorm(),
|
| 970 |
+
name='bboxcnn_mask_bn2')(x, training=train_bn)
|
| 971 |
x = KL.Activation('relu')(x)
|
| 972 |
|
| 973 |
x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
|
| 974 |
+
name="bboxcnn_mask_conv3")(x)
|
| 975 |
x = KL.TimeDistributed(BatchNorm(),
|
| 976 |
+
name='bboxcnn_mask_bn3')(x, training=train_bn)
|
| 977 |
x = KL.Activation('relu')(x)
|
| 978 |
|
| 979 |
x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
|
| 980 |
+
name="bboxcnn_mask_conv4")(x)
|
| 981 |
x = KL.TimeDistributed(BatchNorm(),
|
| 982 |
+
name='bboxcnn_mask_bn4')(x, training=train_bn)
|
| 983 |
x = KL.Activation('relu')(x)
|
| 984 |
|
| 985 |
x = KL.TimeDistributed(KL.Conv2DTranspose(256, (2, 2), strides=2, activation="relu"),
|
| 986 |
+
name="bboxcnn_mask_deconv")(x)
|
| 987 |
x = KL.TimeDistributed(KL.Conv2D(num_classes, (1, 1), strides=1, activation="sigmoid"),
|
| 988 |
+
name="bboxcnn_mask")(x)
|
| 989 |
return x
|
| 990 |
|
| 991 |
|
|
|
|
| 1057 |
return loss
|
| 1058 |
|
| 1059 |
|
| 1060 |
+
def bboxcnn_class_loss_graph(target_class_ids, pred_class_logits,
|
| 1061 |
active_class_ids):
|
| 1062 |
"""Loss for the classifier head of Mask RCNN.
|
| 1063 |
|
|
|
|
| 1093 |
return loss
|
| 1094 |
|
| 1095 |
|
| 1096 |
+
def bboxcnn_bbox_loss_graph(target_bbox, target_class_ids, pred_bbox):
|
| 1097 |
"""Loss for Mask R-CNN bounding box refinement.
|
| 1098 |
|
| 1099 |
target_bbox: [batch, num_rois, (dy, dx, log(dh), log(dw))]
|
|
|
|
| 1124 |
return loss
|
| 1125 |
|
| 1126 |
|
| 1127 |
+
def bboxcnn_mask_loss_graph(target_masks, target_class_ids, pred_masks):
|
| 1128 |
"""Mask binary cross-entropy loss for the masks head.
|
| 1129 |
|
| 1130 |
target_masks: [batch, num_rois, height, width].
|
|
|
|
| 1164 |
|
| 1165 |
|
| 1166 |
############################################################
|
| 1167 |
+
# Data Gen
|
| 1168 |
############################################################
|
| 1169 |
|
| 1170 |
def load_image_gt(dataset, config, image_id, augment=False, augmentation=None,
|
|
|
|
| 1173 |
|
| 1174 |
augment: (deprecated. Use augmentation instead). If true, apply random
|
| 1175 |
image augmentation. Currently, only horizontal flipping is offered.
|
|
|
|
|
|
|
|
|
|
| 1176 |
use_mini_mask: If False, returns full-size masks that are the same height
|
| 1177 |
and width as the original image. These can be big, for example
|
| 1178 |
1024x1024x100 (for 100 instances). Mini masks are smaller, typically,
|
|
|
|
| 1208 |
image = np.fliplr(image)
|
| 1209 |
mask = np.fliplr(mask)
|
| 1210 |
|
|
|
|
|
|
|
| 1211 |
if augmentation:
|
| 1212 |
import imgaug
|
| 1213 |
|
|
|
|
| 1615 |
dataset: The Dataset object to pick data from
|
| 1616 |
config: The model config object
|
| 1617 |
shuffle: If True, shuffles the samples before every epoch
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1618 |
random_rois: If > 0 then generate proposals to be used to train the
|
| 1619 |
network classifier and mask heads. Useful if training
|
| 1620 |
the Mask RCNN part without the RPN.
|
|
|
|
| 1697 |
rpn_rois = generate_random_rois(
|
| 1698 |
image.shape, random_rois, gt_class_ids, gt_boxes)
|
| 1699 |
if detection_targets:
|
| 1700 |
+
rois, bboxcnn_class_ids, bboxcnn_bbox, bboxcnn_mask =\
|
| 1701 |
build_detection_targets(
|
| 1702 |
rpn_rois, gt_class_ids, gt_boxes, gt_masks, config)
|
| 1703 |
|
|
|
|
| 1724 |
if detection_targets:
|
| 1725 |
batch_rois = np.zeros(
|
| 1726 |
(batch_size,) + rois.shape, dtype=rois.dtype)
|
| 1727 |
+
batch_bboxcnn_class_ids = np.zeros(
|
| 1728 |
+
(batch_size,) + bboxcnn_class_ids.shape, dtype=bboxcnn_class_ids.dtype)
|
| 1729 |
+
batch_bboxcnn_bbox = np.zeros(
|
| 1730 |
+
(batch_size,) + bboxcnn_bbox.shape, dtype=bboxcnn_bbox.dtype)
|
| 1731 |
+
batch_bboxcnn_mask = np.zeros(
|
| 1732 |
+
(batch_size,) + bboxcnn_mask.shape, dtype=bboxcnn_mask.dtype)
|
| 1733 |
|
| 1734 |
# If more instances than fits in the array, sub-sample from them.
|
| 1735 |
if gt_boxes.shape[0] > config.MAX_GT_INSTANCES:
|
|
|
|
| 1751 |
batch_rpn_rois[b] = rpn_rois
|
| 1752 |
if detection_targets:
|
| 1753 |
batch_rois[b] = rois
|
| 1754 |
+
batch_bboxcnn_class_ids[b] = bboxcnn_class_ids
|
| 1755 |
+
batch_bboxcnn_bbox[b] = bboxcnn_bbox
|
| 1756 |
+
batch_bboxcnn_mask[b] = bboxcnn_mask
|
| 1757 |
b += 1
|
| 1758 |
|
| 1759 |
# Batch full?
|
|
|
|
| 1767 |
if detection_targets:
|
| 1768 |
inputs.extend([batch_rois])
|
| 1769 |
# Keras requires that output and targets have the same number of dimensions
|
| 1770 |
+
batch_bboxcnn_class_ids = np.expand_dims(
|
| 1771 |
+
batch_bboxcnn_class_ids, -1)
|
| 1772 |
outputs.extend(
|
| 1773 |
+
[batch_bboxcnn_class_ids, batch_bboxcnn_bbox, batch_bboxcnn_mask])
|
| 1774 |
|
| 1775 |
yield inputs, outputs
|
| 1776 |
|
|
|
|
| 1788 |
|
| 1789 |
|
| 1790 |
############################################################
|
| 1791 |
+
# BBoxCNN Class
|
| 1792 |
############################################################
|
| 1793 |
|
| 1794 |
+
class BBoxCNN():
|
| 1795 |
"""Encapsulates the Mask RCNN model functionality.
|
| 1796 |
|
| 1797 |
The actual Keras model is in the keras_model property.
|
|
|
|
| 1896 |
|
| 1897 |
# Note that P6 is used in RPN, but not in the classifier heads.
|
| 1898 |
rpn_feature_maps = [P2, P3, P4, P5, P6]
|
| 1899 |
+
bboxcnn_feature_maps = [P2, P3, P4, P5]
|
| 1900 |
|
| 1901 |
# Anchors
|
| 1902 |
if mode == "training":
|
|
|
|
| 1965 |
|
| 1966 |
# Network Heads
|
| 1967 |
# TODO: verify that this handles zero padded ROIs
|
| 1968 |
+
bboxcnn_class_logits, bboxcnn_class, bboxcnn_bbox =\
|
| 1969 |
+
fpn_classifier_graph(rois, bboxcnn_feature_maps, input_image_meta,
|
| 1970 |
config.POOL_SIZE, config.NUM_CLASSES,
|
| 1971 |
train_bn=config.TRAIN_BN,
|
| 1972 |
fc_layers_size=config.FPN_CLASSIF_FC_LAYERS_SIZE)
|
| 1973 |
|
| 1974 |
+
bboxcnn_mask = build_fpn_mask_graph(rois, bboxcnn_feature_maps,
|
| 1975 |
input_image_meta,
|
| 1976 |
config.MASK_POOL_SIZE,
|
| 1977 |
config.NUM_CLASSES,
|
|
|
|
| 1985 |
[input_rpn_match, rpn_class_logits])
|
| 1986 |
rpn_bbox_loss = KL.Lambda(lambda x: rpn_bbox_loss_graph(config, *x), name="rpn_bbox_loss")(
|
| 1987 |
[input_rpn_bbox, input_rpn_match, rpn_bbox])
|
| 1988 |
+
class_loss = KL.Lambda(lambda x: bboxcnn_class_loss_graph(*x), name="bboxcnn_class_loss")(
|
| 1989 |
+
[target_class_ids, bboxcnn_class_logits, active_class_ids])
|
| 1990 |
+
bbox_loss = KL.Lambda(lambda x: bboxcnn_bbox_loss_graph(*x), name="bboxcnn_bbox_loss")(
|
| 1991 |
+
[target_bbox, target_class_ids, bboxcnn_bbox])
|
| 1992 |
+
mask_loss = KL.Lambda(lambda x: bboxcnn_mask_loss_graph(*x), name="bboxcnn_mask_loss")(
|
| 1993 |
+
[target_mask, target_class_ids, bboxcnn_mask])
|
| 1994 |
|
| 1995 |
# Model
|
| 1996 |
inputs = [input_image, input_image_meta,
|
|
|
|
| 1998 |
if not config.USE_RPN_ROIS:
|
| 1999 |
inputs.append(input_rois)
|
| 2000 |
outputs = [rpn_class_logits, rpn_class, rpn_bbox,
|
| 2001 |
+
bboxcnn_class_logits, bboxcnn_class, bboxcnn_bbox, bboxcnn_mask,
|
| 2002 |
rpn_rois, output_rois,
|
| 2003 |
rpn_class_loss, rpn_bbox_loss, class_loss, bbox_loss, mask_loss]
|
| 2004 |
+
model = KM.Model(inputs, outputs, name='bboxcnn')
|
| 2005 |
else:
|
| 2006 |
# Network Heads
|
| 2007 |
# Proposal classifier and BBox regressor heads
|
| 2008 |
+
bboxcnn_class_logits, bboxcnn_class, bboxcnn_bbox =\
|
| 2009 |
+
fpn_classifier_graph(rpn_rois, bboxcnn_feature_maps, input_image_meta,
|
| 2010 |
config.POOL_SIZE, config.NUM_CLASSES,
|
| 2011 |
train_bn=config.TRAIN_BN,
|
| 2012 |
fc_layers_size=config.FPN_CLASSIF_FC_LAYERS_SIZE)
|
|
|
|
| 2014 |
# Detections
|
| 2015 |
# output is [batch, num_detections, (y1, x1, y2, x2, class_id, score)] in
|
| 2016 |
# normalized coordinates
|
| 2017 |
+
detections = DetectionLayer(config, name="bboxcnn_detection")(
|
| 2018 |
+
[rpn_rois, bboxcnn_class, bboxcnn_bbox, input_image_meta])
|
| 2019 |
|
| 2020 |
# Create masks for detections
|
| 2021 |
detection_boxes = KL.Lambda(lambda x: x[..., :4])(detections)
|
| 2022 |
+
bboxcnn_mask = build_fpn_mask_graph(detection_boxes, bboxcnn_feature_maps,
|
| 2023 |
input_image_meta,
|
| 2024 |
config.MASK_POOL_SIZE,
|
| 2025 |
config.NUM_CLASSES,
|
| 2026 |
train_bn=config.TRAIN_BN)
|
| 2027 |
|
| 2028 |
model = KM.Model([input_image, input_image_meta, input_anchors],
|
| 2029 |
+
[detections, bboxcnn_class, bboxcnn_bbox,
|
| 2030 |
+
bboxcnn_mask, rpn_rois, rpn_class, rpn_bbox],
|
| 2031 |
+
name='bboxcnn')
|
| 2032 |
|
| 2033 |
# Add multi-GPU support.
|
| 2034 |
if config.GPU_COUNT > 1:
|
| 2035 |
+
from bboxcnn.parallel_model import ParallelModel
|
| 2036 |
model = ParallelModel(model, config.GPU_COUNT)
|
| 2037 |
|
| 2038 |
return model
|
|
|
|
| 2057 |
dir_name = os.path.join(self.model_dir, dir_names[-1])
|
| 2058 |
# Find the last checkpoint
|
| 2059 |
checkpoints = next(os.walk(dir_name))[2]
|
| 2060 |
+
checkpoints = filter(lambda f: f.startswith("bboxcnn"), checkpoints)
|
| 2061 |
checkpoints = sorted(checkpoints)
|
| 2062 |
if not checkpoints:
|
| 2063 |
import errno
|
|
|
|
| 2115 |
Returns path to weights file.
|
| 2116 |
"""
|
| 2117 |
from keras.utils.data_utils import get_file
|
|
|
|
|
|
|
|
|
|
| 2118 |
weights_path = get_file('resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5',
|
| 2119 |
TF_WEIGHTS_PATH_NO_TOP,
|
| 2120 |
cache_subdir='models',
|
|
|
|
| 2135 |
self.keras_model._per_input_losses = {}
|
| 2136 |
loss_names = [
|
| 2137 |
"rpn_class_loss", "rpn_bbox_loss",
|
| 2138 |
+
"bboxcnn_class_loss", "bboxcnn_bbox_loss", "bboxcnn_mask_loss"]
|
| 2139 |
for name in loss_names:
|
| 2140 |
layer = self.keras_model.get_layer(name)
|
| 2141 |
if layer.output in self.keras_model.losses:
|
|
|
|
| 2221 |
# If we have a model path with date and epochs use them
|
| 2222 |
if model_path:
|
| 2223 |
# Continue from we left of. Get epoch and date from the file name
|
| 2224 |
+
regex = r".*[/\\][\w-]+(\d{4})(\d{2})(\d{2})T(\d{2})(\d{2})[/\\]bboxcnn\_[\w-]+(\d{4})\.h5"
|
|
|
|
|
|
|
|
|
|
| 2225 |
m = re.match(regex, model_path)
|
| 2226 |
if m:
|
| 2227 |
now = datetime.datetime(int(m.group(1)), int(m.group(2)), int(m.group(3)),
|
|
|
|
| 2236 |
self.config.NAME.lower(), now))
|
| 2237 |
|
| 2238 |
# Path to save after each epoch. Include placeholders that get filled by Keras.
|
| 2239 |
+
self.checkpoint_path = os.path.join(self.log_dir, "bboxcnn_{}_*epoch*.h5".format(
|
| 2240 |
self.config.NAME.lower()))
|
| 2241 |
self.checkpoint_path = self.checkpoint_path.replace(
|
| 2242 |
"*epoch*", "{epoch:04d}")
|
|
|
|
| 2258 |
3+: Train Resnet stage 3 and up
|
| 2259 |
4+: Train Resnet stage 4 and up
|
| 2260 |
5+: Train Resnet stage 5 and up
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2261 |
|
| 2262 |
augmentation = imgaug.augmenters.Sometimes(0.5, [
|
| 2263 |
imgaug.augmenters.Fliplr(0.5),
|
|
|
|
| 2274 |
# Pre-defined layer regular expressions
|
| 2275 |
layer_regex = {
|
| 2276 |
# all layers but the backbone
|
| 2277 |
+
"heads": r"(bboxcnn\_.*)|(rpn\_.*)|(fpn\_.*)",
|
| 2278 |
# From a specific Resnet stage and up
|
| 2279 |
+
"3+": r"(res3.*)|(bn3.*)|(res4.*)|(bn4.*)|(res5.*)|(bn5.*)|(bboxcnn\_.*)|(rpn\_.*)|(fpn\_.*)",
|
| 2280 |
+
"4+": r"(res4.*)|(bn4.*)|(res5.*)|(bn5.*)|(bboxcnn\_.*)|(rpn\_.*)|(fpn\_.*)",
|
| 2281 |
+
"5+": r"(res5.*)|(bn5.*)|(bboxcnn\_.*)|(rpn\_.*)|(fpn\_.*)",
|
| 2282 |
# All layers
|
| 2283 |
"all": ".*",
|
| 2284 |
}
|
|
|
|
| 2314 |
log("Checkpoint Path: {}".format(self.checkpoint_path))
|
| 2315 |
self.set_trainable(layers)
|
| 2316 |
self.compile(learning_rate, self.config.LEARNING_MOMENTUM)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2317 |
if os.name is 'nt':
|
| 2318 |
workers = 0
|
| 2319 |
else:
|
|
|
|
| 2372 |
windows = np.stack(windows)
|
| 2373 |
return molded_images, image_metas, windows
|
| 2374 |
|
| 2375 |
+
def unmold_detections(self, detections, bboxcnn_mask, original_image_shape,
|
| 2376 |
image_shape, window):
|
| 2377 |
"""Reformats the detections of one image from the format of the neural
|
| 2378 |
network output to a format suitable for use in the rest of the
|
| 2379 |
application.
|
| 2380 |
|
| 2381 |
detections: [N, (y1, x1, y2, x2, class_id, score)] in normalized coordinates
|
| 2382 |
+
bboxcnn_mask: [N, height, width, num_classes]
|
| 2383 |
original_image_shape: [H, W, C] Original image shape before resizing
|
| 2384 |
image_shape: [H, W, C] Shape of the image after resizing and padding
|
| 2385 |
window: [y1, x1, y2, x2] Pixel coordinates of box in the image where the real
|
|
|
|
| 2400 |
boxes = detections[:N, :4]
|
| 2401 |
class_ids = detections[:N, 4].astype(np.int32)
|
| 2402 |
scores = detections[:N, 5]
|
| 2403 |
+
masks = bboxcnn_mask[np.arange(N), :, :, class_ids]
|
| 2404 |
|
| 2405 |
# Translate normalized coordinates in the resized image to pixel
|
| 2406 |
# coordinates in the original image before resizing
|
|
|
|
| 2478 |
log("image_metas", image_metas)
|
| 2479 |
log("anchors", anchors)
|
| 2480 |
# Run object detection
|
| 2481 |
+
detections, _, _, bboxcnn_mask, _, _, _ =\
|
| 2482 |
self.keras_model.predict([molded_images, image_metas, anchors], verbose=0)
|
| 2483 |
# Process detections
|
| 2484 |
results = []
|
| 2485 |
for i, image in enumerate(images):
|
| 2486 |
final_rois, final_class_ids, final_scores, final_masks =\
|
| 2487 |
+
self.unmold_detections(detections[i], bboxcnn_mask[i],
|
| 2488 |
image.shape, molded_images[i].shape,
|
| 2489 |
windows[i])
|
| 2490 |
results.append({
|
|
|
|
| 2535 |
log("image_metas", image_metas)
|
| 2536 |
log("anchors", anchors)
|
| 2537 |
# Run object detection
|
| 2538 |
+
detections, _, _, bboxcnn_mask, _, _, _ =\
|
| 2539 |
self.keras_model.predict([molded_images, image_metas, anchors], verbose=0)
|
| 2540 |
# Process detections
|
| 2541 |
results = []
|
| 2542 |
for i, image in enumerate(molded_images):
|
| 2543 |
window = [0, 0, image.shape[0], image.shape[1]]
|
| 2544 |
final_rois, final_class_ids, final_scores, final_masks =\
|
| 2545 |
+
self.unmold_detections(detections[i], bboxcnn_mask[i],
|
| 2546 |
image.shape, molded_images[i].shape,
|
| 2547 |
window)
|
| 2548 |
results.append({
|
|
|
|
| 2823 |
h, w = tf.split(tf.cast(shape, tf.float32), 2)
|
| 2824 |
scale = tf.concat([h, w, h, w], axis=-1) - tf.constant(1.0)
|
| 2825 |
shift = tf.constant([0., 0., 1., 1.])
|
| 2826 |
+
return tf.cast(tf.round(tf.multiply(boxes, scale) + shift), tf.int32)
|