mxnet/model_zoo/vgg.py

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

"""References:

Simonyan, Karen, and Andrew Zisserman. "Very deep convolutional networks for
large-scale image recognition." arXiv preprint arXiv:1409.1556 (2014).
"""

import mxnet as mx
import numpy as np


def get_feature(internel_layer, layers, filters, batch_norm=False, **kwargs):
    for i, num in enumerate(layers):
        for j in range(num):
            internel_layer = mx.sym.Convolution(
                data=internel_layer,
                kernel=(3, 3),
                pad=(1, 1),
                num_filter=filters[i],
                name="conv%s_%s" % (i + 1, j + 1),
            )
            if batch_norm:
                internel_layer = mx.symbol.BatchNorm(
                    data=internel_layer, name="bn%s_%s" % (i + 1, j + 1)
                )
            internel_layer = mx.sym.Activation(
                data=internel_layer, act_type="relu", name="relu%s_%s" % (i + 1, j + 1)
            )
        internel_layer = mx.sym.Pooling(
            data=internel_layer,
            pool_type="max",
            kernel=(2, 2),
            stride=(2, 2),
            name="pool%s" % (i + 1),
        )
    return internel_layer


def get_classifier(input_data, num_classes, **kwargs):
    flatten = mx.sym.Flatten(data=input_data, name="flatten")
    try:
        fc6 = mx.sym.FullyConnected(data=flatten, num_hidden=4096, name="fc6", flatten=False)
        relu6 = mx.sym.Activation(data=fc6, act_type="relu", name="relu6")
        drop6 = mx.sym.Dropout(data=relu6, p=0.5, name="drop6")
        fc7 = mx.sym.FullyConnected(data=drop6, num_hidden=4096, name="fc7", flatten=False)
        relu7 = mx.sym.Activation(data=fc7, act_type="relu", name="relu7")
        drop7 = mx.sym.Dropout(data=relu7, p=0.5, name="drop7")
        fc8 = mx.sym.FullyConnected(data=drop7, num_hidden=num_classes, name="fc8", flatten=False)
    except:
        fc6 = mx.sym.FullyConnected(data=flatten, num_hidden=4096, name="fc6")
        relu6 = mx.sym.Activation(data=fc6, act_type="relu", name="relu6")
        drop6 = mx.sym.Dropout(data=relu6, p=0.5, name="drop6")
        fc7 = mx.sym.FullyConnected(data=drop6, num_hidden=4096, name="fc7")
        relu7 = mx.sym.Activation(data=fc7, act_type="relu", name="relu7")
        drop7 = mx.sym.Dropout(data=relu7, p=0.5, name="drop7")
        fc8 = mx.sym.FullyConnected(data=drop7, num_hidden=num_classes, name="fc8")
    return fc8


def get_symbol(num_classes, num_layers=11, batch_norm=False, dtype="float32", **kwargs):
    """
    Parameters
    ----------
    num_classes : int, default 1000
        Number of classification classes.
    num_layers : int
        Number of layers for the variant of densenet. Options are 11, 13, 16, 19.
    batch_norm : bool, default False
        Use batch normalization.
    dtype: str, float32 or float16
        Data precision.
    """
    vgg_spec = {
        11: ([1, 1, 2, 2, 2], [64, 128, 256, 512, 512]),
        13: ([2, 2, 2, 2, 2], [64, 128, 256, 512, 512]),
        16: ([2, 2, 3, 3, 3], [64, 128, 256, 512, 512]),
        19: ([2, 2, 4, 4, 4], [64, 128, 256, 512, 512]),
    }
    if num_layers not in vgg_spec:
        raise ValueError(
            "Invalide num_layers {}. Possible choices are 11,13,16,19.".format(num_layers)
        )
    layers, filters = vgg_spec[num_layers]
    data = mx.sym.Variable(name="data")
    if dtype == "float16":
        data = mx.sym.Cast(data=data, dtype=np.float16)
    feature = get_feature(data, layers, filters, batch_norm)
    classifier = get_classifier(feature, num_classes)
    if dtype == "float16":
        classifier = mx.sym.Cast(data=classifier, dtype=np.float32)
    symbol = mx.sym.softmax(data=classifier, name="softmax")
    return symbol