1# Tencent is pleased to support the open source community by making ncnn available.
2#
3# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
4#
5# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6# in compliance with the License. You may obtain a copy of the License at
7#
8# https://opensource.org/licenses/BSD-3-Clause
9#
10# Unless required by applicable law or agreed to in writing, software distributed
11# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12# CONDITIONS OF ANY KIND, either express or implied. See the License for the
13# specific language governing permissions and limitations under the License.
14
15import numpy as np
16import ncnn
17from .model_store import get_model_file
18from ..utils.objects import Detect_Object
19
20
21class RFCN:
22    def __init__(
23        self,
24        target_size=224,
25        max_per_image=100,
26        confidence_thresh=0.6,
27        nms_threshold=0.3,
28        num_threads=1,
29        use_gpu=False,
30    ):
31        self.target_size = target_size
32        self.max_per_image = max_per_image
33        self.confidence_thresh = confidence_thresh
34        self.nms_threshold = nms_threshold
35        self.num_threads = num_threads
36        self.use_gpu = use_gpu
37
38        self.mean_vals = [102.9801, 115.9465, 122.7717]
39        self.norm_vals = []
40
41        self.net = ncnn.Net()
42        self.net.opt.use_vulkan_compute = self.use_gpu
43
44        # original pretrained model from https://github.com/YuwenXiong/py-R-FCN
45        # https://github.com/YuwenXiong/py-R-FCN/blob/master/models/pascal_voc/ResNet-50/rfcn_end2end/test_agnostic.prototxt
46        # https://1drv.ms/u/s!AoN7vygOjLIQqUWHpY67oaC7mopf
47        # resnet50_rfcn_final.caffemodel
48        # the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
49        self.net.load_param(get_model_file("rfcn_end2end.param"))
50        self.net.load_model(get_model_file("rfcn_end2end.bin"))
51
52        self.class_names = [
53            "background",
54            "aeroplane",
55            "bicycle",
56            "bird",
57            "boat",
58            "bottle",
59            "bus",
60            "car",
61            "cat",
62            "chair",
63            "cow",
64            "diningtable",
65            "dog",
66            "horse",
67            "motorbike",
68            "person",
69            "pottedplant",
70            "sheep",
71            "sofa",
72            "train",
73            "tvmonitor",
74        ]
75
76    def __del__(self):
77        self.net = None
78
79    def __call__(self, img):
80        h = img.shape[0]
81        w = img.shape[1]
82
83        scale = 1.0
84        if w < h:
85            scale = float(self.target_size) / w
86            w = self.target_size
87            h = h * scale
88        else:
89            scale = float(self.target_size) / h
90            h = self.target_size
91            w = w * scale
92
93        mat_in = ncnn.Mat.from_pixels_resize(
94            img,
95            ncnn.Mat.PixelType.PIXEL_BGR,
96            img.shape[1],
97            img.shape[0],
98            int(w),
99            int(h),
100        )
101        mat_in.substract_mean_normalize(self.mean_vals, self.norm_vals)
102
103        im_info = ncnn.Mat(3)
104        im_info[0] = h
105        im_info[1] = w
106        im_info[2] = scale
107
108        # step1, extract feature and all rois
109        ex1 = self.net.create_extractor()
110        ex1.set_num_threads(self.num_threads)
111        ex1.input("data", mat_in)
112        ex1.input("im_info", im_info)
113
114        ret1, rfcn_cls = ex1.extract("rfcn_cls")
115        ret2, rfcn_bbox = ex1.extract("rfcn_bbox")
116        ret3, rois = ex1.extract("rois")  # all rois
117
118        # step2, extract bbox and score for each roi
119        class_candidates = []
120        for i in range(rois.c):
121            ex2 = self.net.create_extractor()
122
123            roi = rois.channel(i)  # get single roi
124            ex2.input("rfcn_cls", rfcn_cls)
125            ex2.input("rfcn_bbox", rfcn_bbox)
126            ex2.input("rois", roi)
127
128            ret1, bbox_pred = ex2.extract("bbox_pred")
129            ret2, cls_prob = ex2.extract("cls_prob")
130
131            num_class = cls_prob.w
132            while len(class_candidates) < num_class:
133                class_candidates.append([])
134
135            # find class id with highest score
136            label = 0
137            score = 0.0
138            for j in range(num_class):
139                class_score = cls_prob[j]
140                if class_score > score:
141                    label = j
142                    score = class_score
143
144            # ignore background or low score
145            if label == 0 or score <= self.confidence_thresh:
146                continue
147
148            # fprintf(stderr, "%d = %f\n", label, score)
149
150            # unscale to image size
151            x1 = roi[0] / scale
152            y1 = roi[1] / scale
153            x2 = roi[2] / scale
154            y2 = roi[3] / scale
155
156            pb_w = x2 - x1 + 1
157            pb_h = y2 - y1 + 1
158
159            # apply bbox regression
160            dx = bbox_pred[4]
161            dy = bbox_pred[4 + 1]
162            dw = bbox_pred[4 + 2]
163            dh = bbox_pred[4 + 3]
164
165            cx = x1 + pb_w * 0.5
166            cy = y1 + pb_h * 0.5
167
168            obj_cx = cx + pb_w * dx
169            obj_cy = cy + pb_h * dy
170
171            obj_w = pb_w * np.exp(dw)
172            obj_h = pb_h * np.exp(dh)
173
174            obj_x1 = obj_cx - obj_w * 0.5
175            obj_y1 = obj_cy - obj_h * 0.5
176            obj_x2 = obj_cx + obj_w * 0.5
177            obj_y2 = obj_cy + obj_h * 0.5
178
179            # clip
180            obj_x1 = np.maximum(np.minimum(obj_x1, float(img.shape[1] - 1)), 0.0)
181            obj_y1 = np.maximum(np.minimum(obj_y1, float(img.shape[0] - 1)), 0.0)
182            obj_x2 = np.maximum(np.minimum(obj_x2, float(img.shape[1] - 1)), 0.0)
183            obj_y2 = np.maximum(np.minimum(obj_y2, float(img.shape[0] - 1)), 0.0)
184
185            # append object
186            obj = Detect_Object()
187            obj.rect.x = obj_x1
188            obj.rect.y = obj_y1
189            obj.rect.w = obj_x2 - obj_x1 + 1
190            obj.rect.h = obj_y2 - obj_y1 + 1
191            obj.label = label
192            obj.prob = score
193
194            class_candidates[label].append(obj)
195
196        # post process
197        objects = []
198        for candidates in class_candidates:
199            if len(candidates) == 0:
200                continue
201
202            candidates.sort(key=lambda obj: obj.prob, reverse=True)
203
204            picked = self.nms_sorted_bboxes(candidates, self.nms_threshold)
205
206            for j in range(len(picked)):
207                z = picked[j]
208                objects.append(candidates[z])
209
210        objects.sort(key=lambda obj: obj.prob, reverse=True)
211
212        objects = objects[: self.max_per_image]
213
214        return objects
215
216    def nms_sorted_bboxes(self, objects, nms_threshold):
217        picked = []
218
219        n = len(objects)
220
221        areas = np.zeros((n,), dtype=np.float32)
222        for i in range(n):
223            areas[i] = objects[i].rect.area()
224
225        for i in range(n):
226            a = objects[i]
227
228            keep = True
229            for j in range(len(picked)):
230                b = objects[picked[j]]
231
232                # intersection over union
233                inter_area = a.rect.intersection_area(b.rect)
234                union_area = areas[i] + areas[picked[j]] - inter_area
235                # float IoU = inter_area / union_area
236                if inter_area / union_area > nms_threshold:
237                    keep = False
238
239            if keep:
240                picked.append(i)
241
242        return picked
243