1from __future__ import division 2from __future__ import print_function 3 4import sys 5import argparse 6import logging 7logging.basicConfig(level=logging.INFO) 8import time 9import numpy as np 10import mxnet as mx 11from tqdm import tqdm 12from mxnet import nd 13from mxnet import gluon 14import gluoncv as gcv 15gcv.utils.check_version('0.6.0') 16from gluoncv import data as gdata 17from gluoncv.data.batchify import Tuple, Stack, Pad 18from gluoncv.data.transforms.presets.yolo import YOLO3DefaultValTransform 19from gluoncv.utils.metrics.voc_detection import VOC07MApMetric 20from gluoncv.utils.metrics.coco_detection import COCODetectionMetric 21from mxnet.contrib.quantization import * 22 23def parse_args(): 24 parser = argparse.ArgumentParser(description='Eval YOLO networks.') 25 parser.add_argument('--network', type=str, default='darknet53', 26 help="Base network name") 27 parser.add_argument('--algorithm', type=str, default='yolo3', 28 help='YOLO version, default is yolo3') 29 parser.add_argument('--deploy', action='store_true', 30 help='whether load static model for deployment') 31 parser.add_argument('--model-prefix', type=str, required=False, 32 help='load static model as hybridblock.') 33 parser.add_argument('--quantized', action='store_true', 34 help='use int8 pretrained model') 35 parser.add_argument('--data-shape', type=int, default=416, 36 help="Input data shape") 37 parser.add_argument('--batch-size', type=int, default=64, 38 help='Training mini-batch size') 39 parser.add_argument('--benchmark', action='store_true', 40 help="run dummy-data based benchmarking") 41 parser.add_argument('--num-iterations', type=int, default=100, 42 help="number of benchmarking iterations.") 43 parser.add_argument('--dataset', type=str, default='voc', 44 help='Training dataset.') 45 parser.add_argument('--num-workers', '-j', dest='num_workers', type=int, 46 default=4, help='Number of data workers') 47 parser.add_argument('--gpus', type=str, default='0', 48 help='Training with GPUs, you can specify 1,3 for example.') 49 parser.add_argument('--pretrained', type=str, default='True', 50 help='Load weights from previously saved parameters.') 51 parser.add_argument('--save-prefix', type=str, default='', 52 help='Saving parameter prefix') 53 parser.add_argument('--calibration', action='store_true', 54 help='quantize model') 55 parser.add_argument('--num-calib-batches', type=int, default=5, 56 help='number of batches for calibration') 57 parser.add_argument('--quantized-dtype', type=str, default='auto', 58 choices=['auto', 'int8', 'uint8'], 59 help='quantization destination data type for input data') 60 parser.add_argument('--calib-mode', type=str, default='naive', 61 help='calibration mode used for generating calibration table for the quantized symbol; supports' 62 ' 1. none: no calibration will be used. The thresholds for quantization will be calculated' 63 ' on the fly. This will result in inference speed slowdown and loss of accuracy' 64 ' in general.' 65 ' 2. naive: simply take min and max values of layer outputs as thresholds for' 66 ' quantization. In general, the inference accuracy worsens with more examples used in' 67 ' calibration. It is recommended to use `entropy` mode as it produces more accurate' 68 ' inference results.' 69 ' 3. entropy: calculate KL divergence of the fp32 output and quantized output for optimal' 70 ' thresholds. This mode is expected to produce the best inference accuracy of all three' 71 ' kinds of quantized models if the calibration dataset is representative enough of the' 72 ' inference dataset.') 73 args = parser.parse_args() 74 return args 75 76def get_dataset(dataset, data_shape): 77 if dataset.lower() == 'voc': 78 val_dataset = gdata.VOCDetection(splits=[(2007, 'test')]) 79 val_metric = VOC07MApMetric(iou_thresh=0.5, class_names=val_dataset.classes) 80 elif dataset.lower() == 'coco': 81 val_dataset = gdata.COCODetection(splits='instances_val2017', skip_empty=False) 82 val_metric = COCODetectionMetric( 83 val_dataset, args.save_prefix + '_eval', cleanup=True, 84 data_shape=(data_shape, data_shape)) 85 else: 86 raise NotImplementedError('Dataset: {} not implemented.'.format(dataset)) 87 return val_dataset, val_metric 88 89def get_dataloader(val_dataset, data_shape, batch_size, num_workers): 90 """Get dataloader.""" 91 width, height = data_shape, data_shape 92 batchify_fn = Tuple(Stack(), Pad(pad_val=-1)) 93 val_loader = gluon.data.DataLoader( 94 val_dataset.transform(YOLO3DefaultValTransform(width, height)), 95 batch_size, False, last_batch='rollover', num_workers=num_workers, batchify_fn=batchify_fn,) 96 return val_loader 97 98def benchmarking(net, ctx, num_iteration, net_name, datashape=416, batch_size=64): 99 input_shape = (batch_size, 3) + (datashape, datashape) 100 data = mx.random.uniform(-1.0, 1.0, shape=input_shape, ctx=ctx, dtype='float32') 101 dryrun = 5 102 for i in range(dryrun + num_iteration): 103 if i == dryrun: 104 net.export(net_name, 0) 105 tic = time.time() 106 ids, scores, bboxes = net(data) 107 ids.asnumpy() 108 scores.asnumpy() 109 bboxes.asnumpy() 110 toc = time.time() - tic 111 return toc 112 113def validate(net, val_data, ctx, classes, size, metric): 114 """Test on validation dataset.""" 115 net.collect_params().reset_ctx(ctx) 116 metric.reset() 117 net.hybridize() 118 with tqdm(total=size) as pbar: 119 for ib, batch in enumerate(val_data): 120 data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False) 121 label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False) 122 det_bboxes = [] 123 det_ids = [] 124 det_scores = [] 125 gt_bboxes = [] 126 gt_ids = [] 127 gt_difficults = [] 128 for x, y in zip(data, label): 129 ids, scores, bboxes = net(x) 130 det_ids.append(ids) 131 det_scores.append(scores) 132 # clip to image size 133 det_bboxes.append(bboxes.clip(0, batch[0].shape[2])) 134 # split ground truths 135 gt_ids.append(y.slice_axis(axis=-1, begin=4, end=5)) 136 gt_bboxes.append(y.slice_axis(axis=-1, begin=0, end=4)) 137 gt_difficults.append(y.slice_axis(axis=-1, begin=5, end=6) if y.shape[-1] > 5 else None) 138 139 metric.update(det_bboxes, det_ids, det_scores, gt_bboxes, gt_ids, gt_difficults) 140 pbar.update(batch[0].shape[0]) 141 return metric.get() 142 143if __name__ == '__main__': 144 args = parse_args() 145 logging.basicConfig() 146 logger = logging.getLogger('logger') 147 logger.setLevel(logging.INFO) 148 logging.info(args) 149 150 # training contexts 151 ctx = [mx.gpu(int(i)) for i in args.gpus.split(',') if i.strip()] 152 ctx = ctx if ctx else [mx.cpu()] 153 154 # network 155 net_name = '_'.join((args.algorithm, args.network, args.dataset)) 156 if args.quantized: 157 net_name = '_'.join((net_name, 'int8')) 158 args.save_prefix += net_name 159 if not args.deploy: 160 if args.pretrained.lower() in ['true', '1', 'yes', 't']: 161 net = gcv.model_zoo.get_model(net_name, pretrained=True) 162 else: 163 net = gcv.model_zoo.get_model(net_name, pretrained=False) 164 net.load_parameters(args.pretrained.strip()) 165 net.set_nms(nms_thresh=0.45, nms_topk=400) 166 net.hybridize() 167 else: 168 net_name = 'deploy' 169 net = mx.gluon.SymbolBlock.imports('{}-symbol.json'.format(args.model_prefix), 170 ['data'], '{}-0000.params'.format(args.model_prefix)) 171 net.hybridize(static_alloc=True, static_shape=True) 172 173 if args.benchmark: 174 print('-----benchmarking on %s -----'%net_name) 175 speed = (args.batch_size*args.num_iterations)/benchmarking(net, ctx=ctx[0], net_name=net_name, num_iteration=args.num_iterations, 176 datashape=args.data_shape, batch_size=args.batch_size) 177 print('Inference speed on %s, with batchsize %d is %.2f img/sec'%(net_name, args.batch_size, speed)) 178 sys.exit() 179 180 # training data 181 val_dataset, val_metric = get_dataset(args.dataset, args.data_shape) 182 val_data = get_dataloader( 183 val_dataset, args.data_shape, args.batch_size, args.num_workers) 184 classes = val_dataset.classes # class names 185 186 # calibration 187 if args.calibration and not args.quantized: 188 exclude_layers = [] 189 exclude_operators = ['Concat'] 190 if ctx != [mx.cpu()] > 0: 191 raise ValueError('currently only supports CPU with MKL-DNN backend') 192 net = quantize_net( 193 net, quantized_dtype='auto', exclude_layers=exclude_layers, 194 exclude_operators=exclude_operators, calib_data=val_data, 195 calib_mode=args.calib_mode, num_calib_examples=args.batch_size * args.num_calib_batches, ctx=ctx[0], 196 logger=logger) 197 dir_path = os.path.dirname(os.path.realpath(__file__)) 198 dst_dir = os.path.join(dir_path, 'model') 199 if not os.path.isdir(dst_dir): 200 os.mkdir(dst_dir) 201 prefix = os.path.join(dst_dir, net_name + 202 '-quantized-' + args.calib_mode) 203 logger.info('Saving quantized model at %s' % dst_dir) 204 net.export(prefix, epoch=0) 205 sys.exit() 206 207 # training 208 names, values = validate(net, val_data, ctx, classes, len(val_dataset), val_metric) 209 for k, v in zip(names, values): 210 print(k, v) 211