1# Tencent is pleased to support the open source community by making ncnn available.
2#
3# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
4#
5# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6# in compliance with the License. You may obtain a copy of the License at
7#
8# https://opensource.org/licenses/BSD-3-Clause
9#
10# Unless required by applicable law or agreed to in writing, software distributed
11# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12# CONDITIONS OF ANY KIND, either express or implied. See the License for the
13# specific language governing permissions and limitations under the License.
14
15import numpy as np
16import ncnn
17from .model_store import get_model_file
18from ..utils.objects import Detect_Object
19
20
21class Faster_RCNN:
22    def __init__(
23        self,
24        img_width=600,
25        img_height=600,
26        num_threads=1,
27        use_gpu=False,
28        max_per_image=100,
29        confidence_thresh=0.05,
30        nms_threshold=0.3,
31    ):
32        self.img_width = img_width
33        self.img_height = img_height
34        self.num_threads = num_threads
35        self.use_gpu = use_gpu
36
37        self.mean_vals = [102.9801, 115.9465, 122.7717]
38        self.norm_vals = []
39
40        self.net = ncnn.Net()
41        self.net.opt.use_vulkan_compute = self.use_gpu
42
43        # original pretrained model from https://github.com/rbgirshick/py-faster-rcnn
44        # py-faster-rcnn/models/pascal_voc/ZF/faster_rcnn_alt_opt/faster_rcnn_test.pt
45        # https://dl.dropboxusercontent.com/s/o6ii098bu51d139/faster_rcnn_models.tgz?dl=0
46        # ZF_faster_rcnn_final.caffemodel
47        # the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
48        self.net.load_param(get_model_file("ZF_faster_rcnn_final.param"))
49        self.net.load_model(get_model_file("ZF_faster_rcnn_final.bin"))
50
51        self.max_per_image = max_per_image
52        self.confidence_thresh = confidence_thresh
53        self.nms_threshold = nms_threshold
54
55        self.class_names = [
56            "background",
57            "aeroplane",
58            "bicycle",
59            "bird",
60            "boat",
61            "bottle",
62            "bus",
63            "car",
64            "cat",
65            "chair",
66            "cow",
67            "diningtable",
68            "dog",
69            "horse",
70            "motorbike",
71            "person",
72            "pottedplant",
73            "sheep",
74            "sofa",
75            "train",
76            "tvmonitor",
77        ]
78
79    def __del__(self):
80        self.net = None
81
82    def __call__(self, img):
83        # scale to target detect size
84        h = img.shape[0]
85        w = img.shape[1]
86        scale = 1.0
87        if w < h:
88            scale = float(self.img_width) / w
89            w = self.img_width
90            h = int(h * scale)
91        else:
92            scale = float(self.img_height) / h
93            h = self.img_height
94            w = int(w * scale)
95
96        mat_in = ncnn.Mat.from_pixels_resize(
97            img, ncnn.Mat.PixelType.PIXEL_BGR, img.shape[1], img.shape[0], w, h
98        )
99        mat_in.substract_mean_normalize(self.mean_vals, self.norm_vals)
100
101        # method 1 use numpy to Mat interface
102        # im_info = ncnn.Mat(np.array([h, w, scale], dtype=np.float32))
103
104        # method 2 use ncnn.Mat interface
105        im_info = ncnn.Mat(3)
106        im_info[0] = h
107        im_info[1] = w
108        im_info[2] = scale
109
110        ex1 = self.net.create_extractor()
111        ex1.set_num_threads(self.num_threads)
112
113        ex1.input("data", mat_in)
114        ex1.input("im_info", im_info)
115
116        ret1, conv5_relu5 = ex1.extract("conv5_relu5")
117        ret2, rois = ex1.extract("rois")
118
119        class_candidates = []
120        for i in range(rois.c):
121            ex2 = self.net.create_extractor()
122
123            roi = rois.channel(i)  # get single roi
124            ex2.input("conv5_relu5", conv5_relu5)
125            ex2.input("rois", roi)
126
127            ret1, bbox_pred = ex2.extract("bbox_pred")
128            ret2, cls_prob = ex2.extract("cls_prob")
129
130            num_class = cls_prob.w
131            while len(class_candidates) < num_class:
132                class_candidates.append([])
133
134            # find class id with highest score
135            label = 0
136            score = 0.0
137            for j in range(num_class):
138                class_score = cls_prob[j]
139                if class_score > score:
140                    label = j
141                    score = class_score
142
143            # ignore background or low score
144            if label == 0 or score <= self.confidence_thresh:
145                continue
146
147            # fprintf(stderr, "%d = %f\n", label, score);
148
149            # unscale to image size
150            x1 = roi[0] / scale
151            y1 = roi[1] / scale
152            x2 = roi[2] / scale
153            y2 = roi[3] / scale
154
155            pb_w = x2 - x1 + 1
156            pb_h = y2 - y1 + 1
157
158            # apply bbox regression
159            dx = bbox_pred[label * 4]
160            dy = bbox_pred[label * 4 + 1]
161            dw = bbox_pred[label * 4 + 2]
162            dh = bbox_pred[label * 4 + 3]
163
164            cx = x1 + pb_w * 0.5
165            cy = y1 + pb_h * 0.5
166
167            obj_cx = cx + pb_w * dx
168            obj_cy = cy + pb_h * dy
169
170            obj_w = pb_w * np.exp(dw)
171            obj_h = pb_h * np.exp(dh)
172
173            obj_x1 = obj_cx - obj_w * 0.5
174            obj_y1 = obj_cy - obj_h * 0.5
175            obj_x2 = obj_cx + obj_w * 0.5
176            obj_y2 = obj_cy + obj_h * 0.5
177
178            # clip
179            obj_x1 = np.maximum(np.minimum(obj_x1, float(img.shape[1] - 1)), 0.0)
180            obj_y1 = np.maximum(np.minimum(obj_y1, float(img.shape[0] - 1)), 0.0)
181            obj_x2 = np.maximum(np.minimum(obj_x2, float(img.shape[1] - 1)), 0.0)
182            obj_y2 = np.maximum(np.minimum(obj_y2, float(img.shape[0] - 1)), 0.0)
183
184            # append object
185            obj = Detect_Object()
186            obj.rect.x = obj_x1
187            obj.rect.y = obj_y1
188            obj.rect.w = obj_x2 - obj_x1 + 1
189            obj.rect.h = obj_y2 - obj_y1 + 1
190            obj.label = label
191            obj.prob = score
192
193            class_candidates[label].append(obj)
194
195        # post process
196        objects = []
197        for candidates in class_candidates:
198            if len(candidates) == 0:
199                continue
200
201            candidates.sort(key=lambda obj: obj.prob, reverse=True)
202
203            picked = self.nms_sorted_bboxes(candidates, self.nms_threshold)
204
205            for j in range(len(picked)):
206                z = picked[j]
207                objects.append(candidates[z])
208
209        objects.sort(key=lambda obj: obj.prob, reverse=True)
210
211        objects = objects[: self.max_per_image]
212
213        return objects
214
215    def nms_sorted_bboxes(self, objects, nms_threshold):
216        picked = []
217
218        n = len(objects)
219
220        areas = np.zeros((n,), dtype=np.float32)
221        for i in range(n):
222            areas[i] = objects[i].rect.area()
223
224        for i in range(n):
225            a = objects[i]
226
227            keep = True
228            for j in range(len(picked)):
229                b = objects[picked[j]]
230
231                # intersection over union
232                inter_area = a.rect.intersection_area(b.rect)
233                union_area = areas[i] + areas[picked[j]] - inter_area
234                # float IoU = inter_area / union_area
235                if inter_area / union_area > nms_threshold:
236                    keep = False
237
238            if keep:
239                picked.append(i)
240
241        return picked
242