1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements.  See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership.  The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License.  You may obtain a copy of the License at
9  *
10  *   http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied.  See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
20  /*!
21   * \file bounding_box.cc
22   * \brief Bounding box util functions and operators
23   * \author Joshua Zhang
24   */
25 
26 #include "./bounding_box-inl.h"
27 #include "../elemwise_op_common.h"
28 
29 namespace mxnet {
30 namespace op {
31 DMLC_REGISTER_PARAMETER(BoxNMSParam);
32 DMLC_REGISTER_PARAMETER(BoxOverlapParam);
33 DMLC_REGISTER_PARAMETER(BipartiteMatchingParam);
34 DMLC_REGISTER_PARAMETER(BoxDecodeParam);
35 
36 
37 NNVM_REGISTER_OP(_contrib_box_nms)
38 .add_alias("_contrib_box_non_maximum_suppression")
39 .describe(R"code(Apply non-maximum suppression to input.
40 
41 The output will be sorted in descending order according to `score`. Boxes with
42 overlaps larger than `overlap_thresh`, smaller scores and background boxes
43 will be removed and filled with -1, the corresponding position will be recorded
44 for backward propogation.
45 
46 During back-propagation, the gradient will be copied to the original
47 position according to the input index. For positions that have been suppressed,
48 the in_grad will be assigned 0.
49 In summary, gradients are sticked to its boxes, will either be moved or discarded
50 according to its original index in input.
51 
52 Input requirements::
53 
54   1. Input tensor have at least 2 dimensions, (n, k), any higher dims will be regarded
55   as batch, e.g. (a, b, c, d, n, k) == (a*b*c*d, n, k)
56   2. n is the number of boxes in each batch
57   3. k is the width of each box item.
58 
59 By default, a box is [id, score, xmin, ymin, xmax, ymax, ...],
60 additional elements are allowed.
61 
62 - `id_index`: optional, use -1 to ignore, useful if `force_suppress=False`, which means
63   we will skip highly overlapped boxes if one is `apple` while the other is `car`.
64 
65 - `background_id`: optional, default=-1, class id for background boxes, useful
66   when `id_index >= 0` which means boxes with background id will be filtered before nms.
67 
68 - `coord_start`: required, default=2, the starting index of the 4 coordinates.
69   Two formats are supported:
70 
71     - `corner`: [xmin, ymin, xmax, ymax]
72 
73     - `center`: [x, y, width, height]
74 
75 - `score_index`: required, default=1, box score/confidence.
76   When two boxes overlap IOU > `overlap_thresh`, the one with smaller score will be suppressed.
77 
78 - `in_format` and `out_format`: default='corner', specify in/out box formats.
79 
80 Examples::
81 
82   x = [[0, 0.5, 0.1, 0.1, 0.2, 0.2], [1, 0.4, 0.1, 0.1, 0.2, 0.2],
83        [0, 0.3, 0.1, 0.1, 0.14, 0.14], [2, 0.6, 0.5, 0.5, 0.7, 0.8]]
84   box_nms(x, overlap_thresh=0.1, coord_start=2, score_index=1, id_index=0,
85       force_suppress=True, in_format='corner', out_typ='corner') =
86       [[2, 0.6, 0.5, 0.5, 0.7, 0.8], [0, 0.5, 0.1, 0.1, 0.2, 0.2],
87        [-1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1]]
88   out_grad = [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.2, 0.2, 0.2, 0.2, 0.2, 0.2],
89               [0.3, 0.3, 0.3, 0.3, 0.3, 0.3], [0.4, 0.4, 0.4, 0.4, 0.4, 0.4]]
90   # exe.backward
91   in_grad = [[0.2, 0.2, 0.2, 0.2, 0.2, 0.2], [0, 0, 0, 0, 0, 0],
92              [0, 0, 0, 0, 0, 0], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1]]
93 
94 )code" ADD_FILELINE)
95 .set_num_inputs(1)
96 .set_num_outputs(2)
97 .set_attr_parser(ParamParser<BoxNMSParam>)
98 .set_attr<nnvm::FNumVisibleOutputs>("FNumVisibleOutputs", BoxNMSNumVisibleOutputs)
99 .set_attr<mxnet::FInferShape>("FInferShape", BoxNMSShape)
100 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 2>)
101 .set_attr<FResourceRequest>("FResourceRequest",
__anon810d622c0102(const NodeAttrs& attrs) 102   [](const NodeAttrs& attrs) {
103     return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
104   })
105 .set_attr<THasDeterministicOutput>("THasDeterministicOutput", true)
106 .set_attr<FCompute>("FCompute<cpu>", BoxNMSForward<cpu>)
107 .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseOut{"_backward_contrib_box_nms"})
108 .add_argument("data", "NDArray-or-Symbol", "The input")
109 .add_arguments(BoxNMSParam::__FIELDS__());
110 
111 NNVM_REGISTER_OP(_backward_contrib_box_nms)
112 .set_num_inputs(4)
113 .set_num_outputs(1)
114 .set_attr_parser(ParamParser<BoxNMSParam>)
115 .set_attr<nnvm::TIsBackward>("TIsBackward", true)
116 .set_attr<FCompute>("FCompute<cpu>", BoxNMSBackward<cpu>)
117 .add_arguments(BoxNMSParam::__FIELDS__());
118 
119 NNVM_REGISTER_OP(_contrib_box_iou)
120 .describe(R"doc(Bounding box overlap of two arrays.
121   The overlap is defined as Intersection-over-Union, aka, IOU.
122   - lhs: (a_1, a_2, ..., a_n, 4) array
123   - rhs: (b_1, b_2, ..., b_n, 4) array
124   - output: (a_1, a_2, ..., a_n, b_1, b_2, ..., b_n) array
125 
126   Note::
127 
128     Zero gradients are back-propagated in this op for now.
129 
130   Example::
131 
132     x = [[0.5, 0.5, 1.0, 1.0], [0.0, 0.0, 0.5, 0.5]]
133     y = [[0.25, 0.25, 0.75, 0.75]]
134     box_iou(x, y, format='corner') = [[0.1428], [0.1428]]
135 
136 )doc" ADD_FILELINE)
137 .set_num_inputs(2)
138 .set_num_outputs(1)
139 .set_attr_parser(ParamParser<BoxOverlapParam>)
140 .set_attr<nnvm::FListInputNames>("FListInputNames",
__anon810d622c0202(const NodeAttrs& attrs) 141   [](const NodeAttrs& attrs) {
142     return std::vector<std::string>{"lhs", "rhs"};
143   })
144 .set_attr<mxnet::FInferShape>("FInferShape", BoxOverlapShape)
145 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<2, 1>)
146 .set_attr<FCompute>("FCompute<cpu>", BoxOverlapForward<cpu>)
147 .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"_backward_contrib_box_iou"})
148 .add_argument("lhs", "NDArray-or-Symbol", "The first input")
149 .add_argument("rhs", "NDArray-or-Symbol", "The second input")
150 .add_arguments(BoxOverlapParam::__FIELDS__());
151 
152 NNVM_REGISTER_OP(_backward_contrib_box_iou)
153 .set_num_inputs(1)
154 .set_num_outputs(2)
155 .set_attr_parser(ParamParser<BoxOverlapParam>)
156 .set_attr<nnvm::TIsBackward>("TIsBackward", true)
157 .set_attr<FCompute>("FCompute<cpu>", BoxOverlapBackward<cpu>)
158 .add_arguments(BoxOverlapParam::__FIELDS__());
159 
160 NNVM_REGISTER_OP(_contrib_bipartite_matching)
161 .describe(R"doc(Compute bipartite matching.
162   The matching is performed on score matrix with shape [B, N, M]
163   - B: batch_size
164   - N: number of rows to match
165   - M: number of columns as reference to be matched against.
166 
167   Returns:
168   x : matched column indices. -1 indicating non-matched elements in rows.
169   y : matched row indices.
170 
171   Note::
172 
173     Zero gradients are back-propagated in this op for now.
174 
175   Example::
176 
177     s = [[0.5, 0.6], [0.1, 0.2], [0.3, 0.4]]
178     x, y = bipartite_matching(x, threshold=1e-12, is_ascend=False)
179     x = [1, -1, 0]
180     y = [2, 0]
181 
182 )doc" ADD_FILELINE)
183 .set_num_inputs(1)
184 .set_num_outputs(2)
185 .set_attr_parser(ParamParser<BipartiteMatchingParam>)
186 .set_attr<FResourceRequest>("FResourceRequest",
__anon810d622c0302(const NodeAttrs& attrs) 187   [](const NodeAttrs& attrs) {
188     return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
189   })
190 .set_attr<THasDeterministicOutput>("THasDeterministicOutput", true)
191 .set_attr<mxnet::FInferShape>("FInferShape", MatchingShape)
192 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 2>)
193 .set_attr<FCompute>("FCompute<cpu>", BipartiteMatchingForward<cpu>)
194 .set_attr<nnvm::FGradient>("FGradient",
195   ElemwiseGradUseNone{"_backward_contrib_bipartite_matching"})
196 .add_argument("data", "NDArray-or-Symbol", "The input")
197 .add_arguments(BipartiteMatchingParam::__FIELDS__());
198 
199 NNVM_REGISTER_OP(_backward_contrib_bipartite_matching)
200 .set_num_inputs(2)
201 .set_num_outputs(1)
202 .set_attr_parser(ParamParser<BipartiteMatchingParam>)
203 .set_attr<nnvm::TIsBackward>("TIsBackward", true)
204 .set_attr<FCompute>("FCompute<cpu>", BipartiteMatchingBackward<cpu>)
205 .add_arguments(BipartiteMatchingParam::__FIELDS__());
206 
207 NNVM_REGISTER_OP(_contrib_box_encode)
208 .describe(R"doc(Encode bounding boxes training target with normalized center offsets.
209     Input bounding boxes are using corner type: `x_{min}, y_{min}, x_{max}, y_{max}`.) array
210 )doc" ADD_FILELINE)
211 .set_num_inputs(6)
212 .set_num_outputs(2)
213 .set_attr<nnvm::FListInputNames>("FListInputNames",
__anon810d622c0402(const NodeAttrs& attrs) 214   [](const NodeAttrs& attrs) {
215     return std::vector<std::string>{"samples", "matches", "anchors", "refs", "means", "stds"};
216   })
217 .set_attr<mxnet::FInferShape>("FInferShape", BoxEncodeShape)
218 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<6, 2>)
219 .set_attr<FCompute>("FCompute<cpu>", BoxEncodeForward<cpu>)
220 .set_attr<nnvm::FGradient>("FGradient", MakeZeroGradNodes)
221 .add_argument("samples", "NDArray-or-Symbol", "(B, N) value +1 (positive), -1 (negative), "
222               "0 (ignore)")
223 .add_argument("matches", "NDArray-or-Symbol", "(B, N) value range [0, M)")
224 .add_argument("anchors", "NDArray-or-Symbol", "(B, N, 4) encoded in corner")
225 .add_argument("refs", "NDArray-or-Symbol", "(B, M, 4) encoded in corner")
226 .add_argument("means", "NDArray-or-Symbol", "(4,) Mean value to be subtracted from encoded values")
227 .add_argument("stds", "NDArray-or-Symbol", "(4,) Std value to be divided from encoded values");
228 
229 NNVM_REGISTER_OP(_contrib_box_decode)
230 .describe(R"doc(Decode bounding boxes training target with normalized center offsets.
231     Input bounding boxes are using corner type: `x_{min}, y_{min}, x_{max}, y_{max}`
232     or center type: `x, y, width, height.) array
233 )doc" ADD_FILELINE)
234 .set_num_inputs(2)
235 .set_num_outputs(1)
236 .set_attr_parser(ParamParser<BoxDecodeParam>)
237 .set_attr<nnvm::FListInputNames>("FListInputNames",
__anon810d622c0502(const NodeAttrs& attrs) 238   [](const NodeAttrs& attrs) {
239     return std::vector<std::string>{"data", "anchors"};
240   })
241 .set_attr<mxnet::FInferShape>("FInferShape", BoxDecodeShape)
242 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<2, 1>)
243 .set_attr<FCompute>("FCompute<cpu>", BoxDecodeForward<cpu>)
244 .set_attr<nnvm::FGradient>("FGradient", MakeZeroGradNodes)
245 .add_argument("data", "NDArray-or-Symbol", "(B, N, 4) predicted bbox offset")
246 .add_argument("anchors", "NDArray-or-Symbol", "(1, N, 4) encoded in corner or center")
247 .add_arguments(BoxDecodeParam::__FIELDS__());
248 
249 }  // namespace op
250 }  // namespace mxnet
251