"""
Copyright 2017-2018 Fizyr (https://fizyr.com)Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License athttp://www.apache.org/licenses/LICENSE-2.0Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""import keras
import keras.backendimport keras.models
import keras_retinanet.layers
import keras_retinanet.models.retinanet
import keras_retinanet.backend.tensorflow_backend as backendfrom ..layers.roi import RoiAlign
from ..layers.upsample import Upsample
from ..layers.misc import Shape, ConcatenateBoxes, Castdef default_mask_model(num_classes,pyramid_feature_size=256,mask_feature_size=256,roi_size=(14, 14),mask_size=(28, 28),name='mask_submodel',mask_dtype=keras.backend.floatx(),retinanet_dtype=keras.backend.floatx()
):options = {'kernel_size' : 3,'strides' : 1,'padding' : 'same','kernel_initializer' : keras.initializers.normal(mean=0.0, stddev=0.01, seed=None),'bias_initializer' : 'zeros','activation' : 'relu',}inputs = keras.layers.Input(shape=(None, roi_size[0], roi_size[1], pyramid_feature_size))outputs = inputs# casting to the desidered data type, which may be different than# the one used for the underlying keras-retinanet modelif mask_dtype != retinanet_dtype:outputs = keras.layers.TimeDistributed(Cast(dtype=mask_dtype),name='cast_masks')(outputs)for i in range(4):outputs = keras.layers.TimeDistributed(keras.layers.Conv2D(filters=mask_feature_size,**options), name='roi_mask_{}'.format(i))(outputs)# perform upsampling + conv instead of deconv as in the paper# https://distill.pub/2016/deconv-checkerboard/outputs = keras.layers.TimeDistributed(Upsample(mask_size),name='roi_mask_upsample')(outputs)outputs = keras.layers.TimeDistributed(keras.layers.Conv2D(filters=mask_feature_size,**options), name='roi_mask_features')(outputs)outputs = keras.layers.TimeDistributed(keras.layers.Conv2D(filters=num_classes,kernel_size=1,activation='sigmoid'), name='roi_mask')(outputs)# casting back to the underlying keras-retinanet model data typeif mask_dtype != retinanet_dtype:outputs = keras.layers.TimeDistributed(Cast(dtype=retinanet_dtype),name='recast_masks')(outputs)return keras.models.Model(inputs=inputs, outputs=outputs, name=name)def default_roi_submodels(num_classes, mask_dtype=keras.backend.floatx(), retinanet_dtype=keras.backend.floatx()):return [('masks', default_mask_model(num_classes, mask_dtype=mask_dtype, retinanet_dtype=retinanet_dtype)),]def retinanet_mask(inputs,num_classes,retinanet_model=None,anchor_params=None,nms=True,class_specific_filter=True,name='retinanet-mask',roi_submodels=None,mask_dtype=keras.backend.floatx(),modifier=None,**kwargs
):""" Construct a RetinaNet mask model on top of a retinanet bbox model.This model uses the retinanet bbox model and appends a few layers to compute masks.# Argumentsinputs : List of keras.layers.Input. The first input is the image, the second input the blob of masks.num_classes : Number of classes to classify.retinanet_model : keras_retinanet.models.retinanet model, returning regression and classification values.anchor_params : Struct containing anchor parameters. If None, default values are used.nms : Use NMS.class_specific_filter : Use class specific filtering.roi_submodels : Submodels for processing ROIs.mask_dtype : Data type of the masks, can be different from the main one.modifier : Modifier for the underlying retinanet model, such as freeze.name : Name of the model.**kwargs : Additional kwargs to pass to the retinanet bbox model.# ReturnsModel with inputs as input and as output the output of each submodel for each pyramid level and the detections.The order is as defined in submodels.```[regression, classification, other[0], other[1], ..., boxes_masks, boxes, scores, labels, masks, other[0], other[1], ...]```"""if anchor_params is None:anchor_params = keras_retinanet.utils.anchors.AnchorParameters.defaultif roi_submodels is None:retinanet_dtype = keras.backend.floatx()keras.backend.set_floatx(mask_dtype)roi_submodels = default_roi_submodels(num_classes, mask_dtype, retinanet_dtype)keras.backend.set_floatx(retinanet_dtype)image = inputsimage_shape = Shape()(image)if retinanet_model is None:retinanet_model = keras_retinanet.models.retinanet.retinanet(inputs=image,num_classes=num_classes,num_anchors=anchor_params.num_anchors(),**kwargs)if modifier:retinanet_model = modifier(retinanet_model)# parse outputsregression = retinanet_model.outputs[0]classification = retinanet_model.outputs[1]other = retinanet_model.outputs[2:]features = [retinanet_model.get_layer(name).output for name in ['P3', 'P4', 'P5', 'P6', 'P7']]# build boxesanchors = keras_retinanet.models.retinanet.__build_anchors(anchor_params, features)boxes = keras_retinanet.layers.RegressBoxes(name='boxes')([anchors, regression])boxes = keras_retinanet.layers.ClipBoxes(name='clipped_boxes')([image, boxes])# filter detections (apply NMS / score threshold / select top-k)detections = keras_retinanet.layers.FilterDetections(nms = nms,class_specific_filter = class_specific_filter,max_detections = 100,name = 'filtered_detections')([boxes, classification] + other)# split up in known outputs and "other"boxes = detections[0]scores = detections[1]# get the region of interest featuresrois = RoiAlign()([image_shape, boxes, scores] + features)# execute maskrcnn submodelsmaskrcnn_outputs = [submodel(rois) for _, submodel in roi_submodels]# concatenate boxes for loss computationtrainable_outputs = [ConcatenateBoxes(name=name)([boxes, output]) for (name, _), output in zip(roi_submodels, maskrcnn_outputs)]# reconstruct the new outputoutputs = [regression, classification] + other + trainable_outputs + detections + maskrcnn_outputsreturn keras.models.Model(inputs=inputs, outputs=outputs, name=name)
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License athttp://www.apache.org/licenses/LICENSE-2.0Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""import warningsimport keras
import keras_resnet
import keras_resnet.models
import keras_retinanet.models.resnet
from ..models import retinanet, Backboneclass ResNetBackbone(Backbone, keras_retinanet.models.resnet.ResNetBackbone):def maskrcnn(self, *args, **kwargs):""" Returns a maskrcnn model using the correct backbone."""return resnet_maskrcnn(*args, backbone=self.backbone, **kwargs)def resnet_maskrcnn(num_classes, backbone='resnet50', inputs=None, modifier=None, mask_dtype=keras.backend.floatx(), **kwargs):# choose default inputif inputs is None:inputs = keras.layers.Input(shape=(None, None, 3), name='image')# create the resnet backboneif backbone == 'resnet50':resnet = keras_resnet.models.ResNet50(inputs, include_top=False, freeze_bn=True)elif backbone == 'resnet101':resnet = keras_resnet.models.ResNet101(inputs, include_top=False, freeze_bn=True)elif backbone == 'resnet152':resnet = keras_resnet.models.ResNet152(inputs, include_top=False, freeze_bn=True)# invoke modifier if givenif modifier:resnet = modifier(resnet)# create the full modelmodel = retinanet.retinanet_mask(inputs=inputs, num_classes=num_classes, backbone_layers=resnet.outputs[1:], mask_dtype=mask_dtype, **kwargs)return modeldef resnet50_maskrcnn(num_classes, inputs=None, **kwargs):return resnet_maskrcnn(num_classes=num_classes, backbone='resnet50', inputs=inputs, **kwargs)def resnet101_maskrcnn(num_classes, inputs=None, **kwargs):return resnet_maskrcnn(num_classes=num_classes, backbone='resnet101', inputs=inputs, **kwargs)def resnet152_maskrcnn(num_classes, inputs=None, **kwargs):return resnet_maskrcnn(num_classes=num_classes, backbone='resnet152', inputs=inputs, **kwargs)
3、roi.py
包含RoiAlign類的實現(xiàn),包含map_to_level等計算函數(shù)
import keras.backend
import keras.layers
import keras_retinanet.backendfrom .. import backendclass RoiAlign(keras.layers.Layer):def __init__(self, crop_size=(14, 14), **kwargs):self.crop_size = crop_sizesuper(RoiAlign, self).__init__(**kwargs)def map_to_level(self, boxes, canonical_size=224, canonical_level=1, min_level=0, max_level=4):x1 = boxes[:, 0]y1 = boxes[:, 1]x2 = boxes[:, 2]y2 = boxes[:, 3]w = x2 - x1h = y2 - y1size = keras.backend.sqrt(w * h)levels = backend.floor(canonical_level + backend.log2(size / canonical_size + keras.backend.epsilon()))levels = keras.backend.clip(levels, min_level, max_level)return levelsdef call(self, inputs, **kwargs):# TODO: Support batch_size > 1image_shape = keras.backend.cast(inputs[0], keras.backend.floatx())boxes = keras.backend.stop_gradient(inputs[1][0])scores = keras.backend.stop_gradient(inputs[2][0])fpn = [keras.backend.stop_gradient(i[0]) for i in inputs[3:]]# compute from which level to get features fromtarget_levels = self.map_to_level(boxes)# process each pyramid independentlyrois = []ordered_indices = []for i in range(len(fpn)):# select the boxes and classification from this pyramid levelindices = keras_retinanet.backend.where(keras.backend.equal(target_levels, i))ordered_indices.append(indices)level_boxes = keras_retinanet.backend.gather_nd(boxes, indices)fpn_shape = keras.backend.cast(keras.backend.shape(fpn[i]), dtype=keras.backend.floatx())# convert to expected format for crop_and_resizex1 = level_boxes[:, 0]y1 = level_boxes[:, 1]x2 = level_boxes[:, 2]y2 = level_boxes[:, 3]level_boxes = keras.backend.stack([(y1 / image_shape[1] * fpn_shape[0]) / (fpn_shape[0] - 1),(x1 / image_shape[2] * fpn_shape[1]) / (fpn_shape[1] - 1),(y2 / image_shape[1] * fpn_shape[0] - 1) / (fpn_shape[0] - 1),(x2 / image_shape[2] * fpn_shape[1] - 1) / (fpn_shape[1] - 1),], axis=1)# append the rois to the list of roisrois.append(backend.crop_and_resize(keras.backend.expand_dims(fpn[i], axis=0),level_boxes,keras.backend.zeros((keras.backend.shape(level_boxes)[0],), dtype='int32'),self.crop_size))# concatenate rois to one blobrois = keras.backend.concatenate(rois, axis=0)# reorder rois back to original orderindices = keras.backend.concatenate(ordered_indices, axis=0)rois = keras_retinanet.backend.scatter_nd(indices, rois, keras.backend.cast(keras.backend.shape(rois), 'int64'))return keras.backend.expand_dims(rois, axis=0)def compute_output_shape(self, input_shape):return (input_shape[1][0], None, self.crop_size[0], self.crop_size[1], input_shape[3][-1])def get_config(self):config = super(RoiAlign, self).get_config()config.update({'crop_size' : self.crop_size,})return config