1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, 13 * software distributed under the License is distributed on an 14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 * KIND, either express or implied. See the License for the 16 * specific language governing permissions and limitations 17 * under the License. 18 */ 19 20 /*! 21 * \file bounding_box.cc 22 * \brief Bounding box util functions and operators 23 * \author Joshua Zhang 24 */ 25 26 #include "./bounding_box-inl.h" 27 #include "../elemwise_op_common.h" 28 29 namespace mxnet { 30 namespace op { 31 DMLC_REGISTER_PARAMETER(BoxNMSParam); 32 DMLC_REGISTER_PARAMETER(BoxOverlapParam); 33 DMLC_REGISTER_PARAMETER(BipartiteMatchingParam); 34 DMLC_REGISTER_PARAMETER(BoxDecodeParam); 35 36 37 NNVM_REGISTER_OP(_contrib_box_nms) 38 .add_alias("_contrib_box_non_maximum_suppression") 39 .describe(R"code(Apply non-maximum suppression to input. 40 41 The output will be sorted in descending order according to `score`. Boxes with 42 overlaps larger than `overlap_thresh`, smaller scores and background boxes 43 will be removed and filled with -1, the corresponding position will be recorded 44 for backward propogation. 45 46 During back-propagation, the gradient will be copied to the original 47 position according to the input index. For positions that have been suppressed, 48 the in_grad will be assigned 0. 49 In summary, gradients are sticked to its boxes, will either be moved or discarded 50 according to its original index in input. 51 52 Input requirements:: 53 54 1. Input tensor have at least 2 dimensions, (n, k), any higher dims will be regarded 55 as batch, e.g. (a, b, c, d, n, k) == (a*b*c*d, n, k) 56 2. n is the number of boxes in each batch 57 3. k is the width of each box item. 58 59 By default, a box is [id, score, xmin, ymin, xmax, ymax, ...], 60 additional elements are allowed. 61 62 - `id_index`: optional, use -1 to ignore, useful if `force_suppress=False`, which means 63 we will skip highly overlapped boxes if one is `apple` while the other is `car`. 64 65 - `background_id`: optional, default=-1, class id for background boxes, useful 66 when `id_index >= 0` which means boxes with background id will be filtered before nms. 67 68 - `coord_start`: required, default=2, the starting index of the 4 coordinates. 69 Two formats are supported: 70 71 - `corner`: [xmin, ymin, xmax, ymax] 72 73 - `center`: [x, y, width, height] 74 75 - `score_index`: required, default=1, box score/confidence. 76 When two boxes overlap IOU > `overlap_thresh`, the one with smaller score will be suppressed. 77 78 - `in_format` and `out_format`: default='corner', specify in/out box formats. 79 80 Examples:: 81 82 x = [[0, 0.5, 0.1, 0.1, 0.2, 0.2], [1, 0.4, 0.1, 0.1, 0.2, 0.2], 83 [0, 0.3, 0.1, 0.1, 0.14, 0.14], [2, 0.6, 0.5, 0.5, 0.7, 0.8]] 84 box_nms(x, overlap_thresh=0.1, coord_start=2, score_index=1, id_index=0, 85 force_suppress=True, in_format='corner', out_typ='corner') = 86 [[2, 0.6, 0.5, 0.5, 0.7, 0.8], [0, 0.5, 0.1, 0.1, 0.2, 0.2], 87 [-1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1]] 88 out_grad = [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.2, 0.2, 0.2, 0.2, 0.2, 0.2], 89 [0.3, 0.3, 0.3, 0.3, 0.3, 0.3], [0.4, 0.4, 0.4, 0.4, 0.4, 0.4]] 90 # exe.backward 91 in_grad = [[0.2, 0.2, 0.2, 0.2, 0.2, 0.2], [0, 0, 0, 0, 0, 0], 92 [0, 0, 0, 0, 0, 0], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1]] 93 94 )code" ADD_FILELINE) 95 .set_num_inputs(1) 96 .set_num_outputs(2) 97 .set_attr_parser(ParamParser<BoxNMSParam>) 98 .set_attr<nnvm::FNumVisibleOutputs>("FNumVisibleOutputs", BoxNMSNumVisibleOutputs) 99 .set_attr<mxnet::FInferShape>("FInferShape", BoxNMSShape) 100 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 2>) 101 .set_attr<FResourceRequest>("FResourceRequest", __anon810d622c0102(const NodeAttrs& attrs) 102 [](const NodeAttrs& attrs) { 103 return std::vector<ResourceRequest>{ResourceRequest::kTempSpace}; 104 }) 105 .set_attr<THasDeterministicOutput>("THasDeterministicOutput", true) 106 .set_attr<FCompute>("FCompute<cpu>", BoxNMSForward<cpu>) 107 .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseOut{"_backward_contrib_box_nms"}) 108 .add_argument("data", "NDArray-or-Symbol", "The input") 109 .add_arguments(BoxNMSParam::__FIELDS__()); 110 111 NNVM_REGISTER_OP(_backward_contrib_box_nms) 112 .set_num_inputs(4) 113 .set_num_outputs(1) 114 .set_attr_parser(ParamParser<BoxNMSParam>) 115 .set_attr<nnvm::TIsBackward>("TIsBackward", true) 116 .set_attr<FCompute>("FCompute<cpu>", BoxNMSBackward<cpu>) 117 .add_arguments(BoxNMSParam::__FIELDS__()); 118 119 NNVM_REGISTER_OP(_contrib_box_iou) 120 .describe(R"doc(Bounding box overlap of two arrays. 121 The overlap is defined as Intersection-over-Union, aka, IOU. 122 - lhs: (a_1, a_2, ..., a_n, 4) array 123 - rhs: (b_1, b_2, ..., b_n, 4) array 124 - output: (a_1, a_2, ..., a_n, b_1, b_2, ..., b_n) array 125 126 Note:: 127 128 Zero gradients are back-propagated in this op for now. 129 130 Example:: 131 132 x = [[0.5, 0.5, 1.0, 1.0], [0.0, 0.0, 0.5, 0.5]] 133 y = [[0.25, 0.25, 0.75, 0.75]] 134 box_iou(x, y, format='corner') = [[0.1428], [0.1428]] 135 136 )doc" ADD_FILELINE) 137 .set_num_inputs(2) 138 .set_num_outputs(1) 139 .set_attr_parser(ParamParser<BoxOverlapParam>) 140 .set_attr<nnvm::FListInputNames>("FListInputNames", __anon810d622c0202(const NodeAttrs& attrs) 141 [](const NodeAttrs& attrs) { 142 return std::vector<std::string>{"lhs", "rhs"}; 143 }) 144 .set_attr<mxnet::FInferShape>("FInferShape", BoxOverlapShape) 145 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<2, 1>) 146 .set_attr<FCompute>("FCompute<cpu>", BoxOverlapForward<cpu>) 147 .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"_backward_contrib_box_iou"}) 148 .add_argument("lhs", "NDArray-or-Symbol", "The first input") 149 .add_argument("rhs", "NDArray-or-Symbol", "The second input") 150 .add_arguments(BoxOverlapParam::__FIELDS__()); 151 152 NNVM_REGISTER_OP(_backward_contrib_box_iou) 153 .set_num_inputs(1) 154 .set_num_outputs(2) 155 .set_attr_parser(ParamParser<BoxOverlapParam>) 156 .set_attr<nnvm::TIsBackward>("TIsBackward", true) 157 .set_attr<FCompute>("FCompute<cpu>", BoxOverlapBackward<cpu>) 158 .add_arguments(BoxOverlapParam::__FIELDS__()); 159 160 NNVM_REGISTER_OP(_contrib_bipartite_matching) 161 .describe(R"doc(Compute bipartite matching. 162 The matching is performed on score matrix with shape [B, N, M] 163 - B: batch_size 164 - N: number of rows to match 165 - M: number of columns as reference to be matched against. 166 167 Returns: 168 x : matched column indices. -1 indicating non-matched elements in rows. 169 y : matched row indices. 170 171 Note:: 172 173 Zero gradients are back-propagated in this op for now. 174 175 Example:: 176 177 s = [[0.5, 0.6], [0.1, 0.2], [0.3, 0.4]] 178 x, y = bipartite_matching(x, threshold=1e-12, is_ascend=False) 179 x = [1, -1, 0] 180 y = [2, 0] 181 182 )doc" ADD_FILELINE) 183 .set_num_inputs(1) 184 .set_num_outputs(2) 185 .set_attr_parser(ParamParser<BipartiteMatchingParam>) 186 .set_attr<FResourceRequest>("FResourceRequest", __anon810d622c0302(const NodeAttrs& attrs) 187 [](const NodeAttrs& attrs) { 188 return std::vector<ResourceRequest>{ResourceRequest::kTempSpace}; 189 }) 190 .set_attr<THasDeterministicOutput>("THasDeterministicOutput", true) 191 .set_attr<mxnet::FInferShape>("FInferShape", MatchingShape) 192 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 2>) 193 .set_attr<FCompute>("FCompute<cpu>", BipartiteMatchingForward<cpu>) 194 .set_attr<nnvm::FGradient>("FGradient", 195 ElemwiseGradUseNone{"_backward_contrib_bipartite_matching"}) 196 .add_argument("data", "NDArray-or-Symbol", "The input") 197 .add_arguments(BipartiteMatchingParam::__FIELDS__()); 198 199 NNVM_REGISTER_OP(_backward_contrib_bipartite_matching) 200 .set_num_inputs(2) 201 .set_num_outputs(1) 202 .set_attr_parser(ParamParser<BipartiteMatchingParam>) 203 .set_attr<nnvm::TIsBackward>("TIsBackward", true) 204 .set_attr<FCompute>("FCompute<cpu>", BipartiteMatchingBackward<cpu>) 205 .add_arguments(BipartiteMatchingParam::__FIELDS__()); 206 207 NNVM_REGISTER_OP(_contrib_box_encode) 208 .describe(R"doc(Encode bounding boxes training target with normalized center offsets. 209 Input bounding boxes are using corner type: `x_{min}, y_{min}, x_{max}, y_{max}`.) array 210 )doc" ADD_FILELINE) 211 .set_num_inputs(6) 212 .set_num_outputs(2) 213 .set_attr<nnvm::FListInputNames>("FListInputNames", __anon810d622c0402(const NodeAttrs& attrs) 214 [](const NodeAttrs& attrs) { 215 return std::vector<std::string>{"samples", "matches", "anchors", "refs", "means", "stds"}; 216 }) 217 .set_attr<mxnet::FInferShape>("FInferShape", BoxEncodeShape) 218 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<6, 2>) 219 .set_attr<FCompute>("FCompute<cpu>", BoxEncodeForward<cpu>) 220 .set_attr<nnvm::FGradient>("FGradient", MakeZeroGradNodes) 221 .add_argument("samples", "NDArray-or-Symbol", "(B, N) value +1 (positive), -1 (negative), " 222 "0 (ignore)") 223 .add_argument("matches", "NDArray-or-Symbol", "(B, N) value range [0, M)") 224 .add_argument("anchors", "NDArray-or-Symbol", "(B, N, 4) encoded in corner") 225 .add_argument("refs", "NDArray-or-Symbol", "(B, M, 4) encoded in corner") 226 .add_argument("means", "NDArray-or-Symbol", "(4,) Mean value to be subtracted from encoded values") 227 .add_argument("stds", "NDArray-or-Symbol", "(4,) Std value to be divided from encoded values"); 228 229 NNVM_REGISTER_OP(_contrib_box_decode) 230 .describe(R"doc(Decode bounding boxes training target with normalized center offsets. 231 Input bounding boxes are using corner type: `x_{min}, y_{min}, x_{max}, y_{max}` 232 or center type: `x, y, width, height.) array 233 )doc" ADD_FILELINE) 234 .set_num_inputs(2) 235 .set_num_outputs(1) 236 .set_attr_parser(ParamParser<BoxDecodeParam>) 237 .set_attr<nnvm::FListInputNames>("FListInputNames", __anon810d622c0502(const NodeAttrs& attrs) 238 [](const NodeAttrs& attrs) { 239 return std::vector<std::string>{"data", "anchors"}; 240 }) 241 .set_attr<mxnet::FInferShape>("FInferShape", BoxDecodeShape) 242 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<2, 1>) 243 .set_attr<FCompute>("FCompute<cpu>", BoxDecodeForward<cpu>) 244 .set_attr<nnvm::FGradient>("FGradient", MakeZeroGradNodes) 245 .add_argument("data", "NDArray-or-Symbol", "(B, N, 4) predicted bbox offset") 246 .add_argument("anchors", "NDArray-or-Symbol", "(1, N, 4) encoded in corner or center") 247 .add_arguments(BoxDecodeParam::__FIELDS__()); 248 249 } // namespace op 250 } // namespace mxnet 251