1from __future__ import division
2from __future__ import print_function
3
4import sys
5import argparse
6import logging
7logging.basicConfig(level=logging.INFO)
8import time
9import numpy as np
10import mxnet as mx
11from tqdm import tqdm
12from mxnet import nd
13from mxnet import gluon
14import gluoncv as gcv
15gcv.utils.check_version('0.6.0')
16from gluoncv import data as gdata
17from gluoncv.data.batchify import Tuple, Stack, Pad
18from gluoncv.data.transforms.presets.yolo import YOLO3DefaultValTransform
19from gluoncv.utils.metrics.voc_detection import VOC07MApMetric
20from gluoncv.utils.metrics.coco_detection import COCODetectionMetric
21from mxnet.contrib.quantization import *
22
23def parse_args():
24    parser = argparse.ArgumentParser(description='Eval YOLO networks.')
25    parser.add_argument('--network', type=str, default='darknet53',
26                        help="Base network name")
27    parser.add_argument('--algorithm', type=str, default='yolo3',
28                        help='YOLO version, default is yolo3')
29    parser.add_argument('--deploy', action='store_true',
30                        help='whether load static model for deployment')
31    parser.add_argument('--model-prefix', type=str, required=False,
32                        help='load static model as hybridblock.')
33    parser.add_argument('--quantized', action='store_true',
34                        help='use int8 pretrained model')
35    parser.add_argument('--data-shape', type=int, default=416,
36                        help="Input data shape")
37    parser.add_argument('--batch-size', type=int, default=64,
38                        help='Training mini-batch size')
39    parser.add_argument('--benchmark', action='store_true',
40                        help="run dummy-data based benchmarking")
41    parser.add_argument('--num-iterations', type=int, default=100,
42                        help="number of benchmarking iterations.")
43    parser.add_argument('--dataset', type=str, default='voc',
44                        help='Training dataset.')
45    parser.add_argument('--num-workers', '-j', dest='num_workers', type=int,
46                        default=4, help='Number of data workers')
47    parser.add_argument('--gpus', type=str, default='0',
48                        help='Training with GPUs, you can specify 1,3 for example.')
49    parser.add_argument('--pretrained', type=str, default='True',
50                        help='Load weights from previously saved parameters.')
51    parser.add_argument('--save-prefix', type=str, default='',
52                        help='Saving parameter prefix')
53    parser.add_argument('--calibration', action='store_true',
54                        help='quantize model')
55    parser.add_argument('--num-calib-batches', type=int, default=5,
56                        help='number of batches for calibration')
57    parser.add_argument('--quantized-dtype', type=str, default='auto',
58                        choices=['auto', 'int8', 'uint8'],
59                        help='quantization destination data type for input data')
60    parser.add_argument('--calib-mode', type=str, default='naive',
61                        help='calibration mode used for generating calibration table for the quantized symbol; supports'
62                             ' 1. none: no calibration will be used. The thresholds for quantization will be calculated'
63                             ' on the fly. This will result in inference speed slowdown and loss of accuracy'
64                             ' in general.'
65                             ' 2. naive: simply take min and max values of layer outputs as thresholds for'
66                             ' quantization. In general, the inference accuracy worsens with more examples used in'
67                             ' calibration. It is recommended to use `entropy` mode as it produces more accurate'
68                             ' inference results.'
69                             ' 3. entropy: calculate KL divergence of the fp32 output and quantized output for optimal'
70                             ' thresholds. This mode is expected to produce the best inference accuracy of all three'
71                             ' kinds of quantized models if the calibration dataset is representative enough of the'
72                             ' inference dataset.')
73    args = parser.parse_args()
74    return args
75
76def get_dataset(dataset, data_shape):
77    if dataset.lower() == 'voc':
78        val_dataset = gdata.VOCDetection(splits=[(2007, 'test')])
79        val_metric = VOC07MApMetric(iou_thresh=0.5, class_names=val_dataset.classes)
80    elif dataset.lower() == 'coco':
81        val_dataset = gdata.COCODetection(splits='instances_val2017', skip_empty=False)
82        val_metric = COCODetectionMetric(
83            val_dataset, args.save_prefix + '_eval', cleanup=True,
84            data_shape=(data_shape, data_shape))
85    else:
86        raise NotImplementedError('Dataset: {} not implemented.'.format(dataset))
87    return val_dataset, val_metric
88
89def get_dataloader(val_dataset, data_shape, batch_size, num_workers):
90    """Get dataloader."""
91    width, height = data_shape, data_shape
92    batchify_fn = Tuple(Stack(), Pad(pad_val=-1))
93    val_loader = gluon.data.DataLoader(
94        val_dataset.transform(YOLO3DefaultValTransform(width, height)),
95        batch_size, False, last_batch='rollover', num_workers=num_workers, batchify_fn=batchify_fn,)
96    return val_loader
97
98def benchmarking(net, ctx, num_iteration, net_name, datashape=416, batch_size=64):
99    input_shape = (batch_size, 3) + (datashape, datashape)
100    data = mx.random.uniform(-1.0, 1.0, shape=input_shape, ctx=ctx, dtype='float32')
101    dryrun = 5
102    for i in range(dryrun + num_iteration):
103        if i == dryrun:
104            net.export(net_name, 0)
105            tic = time.time()
106        ids, scores, bboxes = net(data)
107        ids.asnumpy()
108        scores.asnumpy()
109        bboxes.asnumpy()
110    toc = time.time() - tic
111    return toc
112
113def validate(net, val_data, ctx, classes, size, metric):
114    """Test on validation dataset."""
115    net.collect_params().reset_ctx(ctx)
116    metric.reset()
117    net.hybridize()
118    with tqdm(total=size) as pbar:
119        for ib, batch in enumerate(val_data):
120            data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False)
121            label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False)
122            det_bboxes = []
123            det_ids = []
124            det_scores = []
125            gt_bboxes = []
126            gt_ids = []
127            gt_difficults = []
128            for x, y in zip(data, label):
129                ids, scores, bboxes = net(x)
130                det_ids.append(ids)
131                det_scores.append(scores)
132                # clip to image size
133                det_bboxes.append(bboxes.clip(0, batch[0].shape[2]))
134                # split ground truths
135                gt_ids.append(y.slice_axis(axis=-1, begin=4, end=5))
136                gt_bboxes.append(y.slice_axis(axis=-1, begin=0, end=4))
137                gt_difficults.append(y.slice_axis(axis=-1, begin=5, end=6) if y.shape[-1] > 5 else None)
138
139            metric.update(det_bboxes, det_ids, det_scores, gt_bboxes, gt_ids, gt_difficults)
140            pbar.update(batch[0].shape[0])
141    return metric.get()
142
143if __name__ == '__main__':
144    args = parse_args()
145    logging.basicConfig()
146    logger = logging.getLogger('logger')
147    logger.setLevel(logging.INFO)
148    logging.info(args)
149
150    # training contexts
151    ctx = [mx.gpu(int(i)) for i in args.gpus.split(',') if i.strip()]
152    ctx = ctx if ctx else [mx.cpu()]
153
154    # network
155    net_name = '_'.join((args.algorithm, args.network, args.dataset))
156    if args.quantized:
157        net_name = '_'.join((net_name, 'int8'))
158    args.save_prefix += net_name
159    if not args.deploy:
160        if args.pretrained.lower() in ['true', '1', 'yes', 't']:
161            net = gcv.model_zoo.get_model(net_name, pretrained=True)
162        else:
163            net = gcv.model_zoo.get_model(net_name, pretrained=False)
164            net.load_parameters(args.pretrained.strip())
165        net.set_nms(nms_thresh=0.45, nms_topk=400)
166        net.hybridize()
167    else:
168        net_name = 'deploy'
169        net = mx.gluon.SymbolBlock.imports('{}-symbol.json'.format(args.model_prefix),
170              ['data'], '{}-0000.params'.format(args.model_prefix))
171        net.hybridize(static_alloc=True, static_shape=True)
172
173    if args.benchmark:
174        print('-----benchmarking on %s -----'%net_name)
175        speed = (args.batch_size*args.num_iterations)/benchmarking(net, ctx=ctx[0], net_name=net_name, num_iteration=args.num_iterations,
176                datashape=args.data_shape, batch_size=args.batch_size)
177        print('Inference speed on %s, with batchsize %d is %.2f img/sec'%(net_name, args.batch_size, speed))
178        sys.exit()
179
180    # training data
181    val_dataset, val_metric = get_dataset(args.dataset, args.data_shape)
182    val_data = get_dataloader(
183        val_dataset, args.data_shape, args.batch_size, args.num_workers)
184    classes = val_dataset.classes  # class names
185
186    # calibration
187    if args.calibration and not args.quantized:
188        exclude_layers = []
189        exclude_operators = ['Concat']
190        if ctx != [mx.cpu()] > 0:
191            raise ValueError('currently only supports CPU with MKL-DNN backend')
192        net = quantize_net(
193            net, quantized_dtype='auto', exclude_layers=exclude_layers,
194            exclude_operators=exclude_operators, calib_data=val_data,
195            calib_mode=args.calib_mode, num_calib_examples=args.batch_size * args.num_calib_batches, ctx=ctx[0],
196            logger=logger)
197        dir_path = os.path.dirname(os.path.realpath(__file__))
198        dst_dir = os.path.join(dir_path, 'model')
199        if not os.path.isdir(dst_dir):
200            os.mkdir(dst_dir)
201        prefix = os.path.join(dst_dir, net_name +
202                              '-quantized-' + args.calib_mode)
203        logger.info('Saving quantized model at %s' % dst_dir)
204        net.export(prefix, epoch=0)
205        sys.exit()
206
207    # training
208    names, values = validate(net, val_data, ctx, classes, len(val_dataset), val_metric)
209    for k, v in zip(names, values):
210        print(k, v)
211