1# Tencent is pleased to support the open source community by making ncnn available. 2# 3# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. 4# 5# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6# in compliance with the License. You may obtain a copy of the License at 7# 8# https://opensource.org/licenses/BSD-3-Clause 9# 10# Unless required by applicable law or agreed to in writing, software distributed 11# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12# CONDITIONS OF ANY KIND, either express or implied. See the License for the 13# specific language governing permissions and limitations under the License. 14 15import numpy as np 16import ncnn 17from .model_store import get_model_file 18from ..utils.objects import Detect_Object 19 20 21class Faster_RCNN: 22 def __init__( 23 self, 24 img_width=600, 25 img_height=600, 26 num_threads=1, 27 use_gpu=False, 28 max_per_image=100, 29 confidence_thresh=0.05, 30 nms_threshold=0.3, 31 ): 32 self.img_width = img_width 33 self.img_height = img_height 34 self.num_threads = num_threads 35 self.use_gpu = use_gpu 36 37 self.mean_vals = [102.9801, 115.9465, 122.7717] 38 self.norm_vals = [] 39 40 self.net = ncnn.Net() 41 self.net.opt.use_vulkan_compute = self.use_gpu 42 43 # original pretrained model from https://github.com/rbgirshick/py-faster-rcnn 44 # py-faster-rcnn/models/pascal_voc/ZF/faster_rcnn_alt_opt/faster_rcnn_test.pt 45 # https://dl.dropboxusercontent.com/s/o6ii098bu51d139/faster_rcnn_models.tgz?dl=0 46 # ZF_faster_rcnn_final.caffemodel 47 # the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models 48 self.net.load_param(get_model_file("ZF_faster_rcnn_final.param")) 49 self.net.load_model(get_model_file("ZF_faster_rcnn_final.bin")) 50 51 self.max_per_image = max_per_image 52 self.confidence_thresh = confidence_thresh 53 self.nms_threshold = nms_threshold 54 55 self.class_names = [ 56 "background", 57 "aeroplane", 58 "bicycle", 59 "bird", 60 "boat", 61 "bottle", 62 "bus", 63 "car", 64 "cat", 65 "chair", 66 "cow", 67 "diningtable", 68 "dog", 69 "horse", 70 "motorbike", 71 "person", 72 "pottedplant", 73 "sheep", 74 "sofa", 75 "train", 76 "tvmonitor", 77 ] 78 79 def __del__(self): 80 self.net = None 81 82 def __call__(self, img): 83 # scale to target detect size 84 h = img.shape[0] 85 w = img.shape[1] 86 scale = 1.0 87 if w < h: 88 scale = float(self.img_width) / w 89 w = self.img_width 90 h = int(h * scale) 91 else: 92 scale = float(self.img_height) / h 93 h = self.img_height 94 w = int(w * scale) 95 96 mat_in = ncnn.Mat.from_pixels_resize( 97 img, ncnn.Mat.PixelType.PIXEL_BGR, img.shape[1], img.shape[0], w, h 98 ) 99 mat_in.substract_mean_normalize(self.mean_vals, self.norm_vals) 100 101 # method 1 use numpy to Mat interface 102 # im_info = ncnn.Mat(np.array([h, w, scale], dtype=np.float32)) 103 104 # method 2 use ncnn.Mat interface 105 im_info = ncnn.Mat(3) 106 im_info[0] = h 107 im_info[1] = w 108 im_info[2] = scale 109 110 ex1 = self.net.create_extractor() 111 ex1.set_num_threads(self.num_threads) 112 113 ex1.input("data", mat_in) 114 ex1.input("im_info", im_info) 115 116 ret1, conv5_relu5 = ex1.extract("conv5_relu5") 117 ret2, rois = ex1.extract("rois") 118 119 class_candidates = [] 120 for i in range(rois.c): 121 ex2 = self.net.create_extractor() 122 123 roi = rois.channel(i) # get single roi 124 ex2.input("conv5_relu5", conv5_relu5) 125 ex2.input("rois", roi) 126 127 ret1, bbox_pred = ex2.extract("bbox_pred") 128 ret2, cls_prob = ex2.extract("cls_prob") 129 130 num_class = cls_prob.w 131 while len(class_candidates) < num_class: 132 class_candidates.append([]) 133 134 # find class id with highest score 135 label = 0 136 score = 0.0 137 for j in range(num_class): 138 class_score = cls_prob[j] 139 if class_score > score: 140 label = j 141 score = class_score 142 143 # ignore background or low score 144 if label == 0 or score <= self.confidence_thresh: 145 continue 146 147 # fprintf(stderr, "%d = %f\n", label, score); 148 149 # unscale to image size 150 x1 = roi[0] / scale 151 y1 = roi[1] / scale 152 x2 = roi[2] / scale 153 y2 = roi[3] / scale 154 155 pb_w = x2 - x1 + 1 156 pb_h = y2 - y1 + 1 157 158 # apply bbox regression 159 dx = bbox_pred[label * 4] 160 dy = bbox_pred[label * 4 + 1] 161 dw = bbox_pred[label * 4 + 2] 162 dh = bbox_pred[label * 4 + 3] 163 164 cx = x1 + pb_w * 0.5 165 cy = y1 + pb_h * 0.5 166 167 obj_cx = cx + pb_w * dx 168 obj_cy = cy + pb_h * dy 169 170 obj_w = pb_w * np.exp(dw) 171 obj_h = pb_h * np.exp(dh) 172 173 obj_x1 = obj_cx - obj_w * 0.5 174 obj_y1 = obj_cy - obj_h * 0.5 175 obj_x2 = obj_cx + obj_w * 0.5 176 obj_y2 = obj_cy + obj_h * 0.5 177 178 # clip 179 obj_x1 = np.maximum(np.minimum(obj_x1, float(img.shape[1] - 1)), 0.0) 180 obj_y1 = np.maximum(np.minimum(obj_y1, float(img.shape[0] - 1)), 0.0) 181 obj_x2 = np.maximum(np.minimum(obj_x2, float(img.shape[1] - 1)), 0.0) 182 obj_y2 = np.maximum(np.minimum(obj_y2, float(img.shape[0] - 1)), 0.0) 183 184 # append object 185 obj = Detect_Object() 186 obj.rect.x = obj_x1 187 obj.rect.y = obj_y1 188 obj.rect.w = obj_x2 - obj_x1 + 1 189 obj.rect.h = obj_y2 - obj_y1 + 1 190 obj.label = label 191 obj.prob = score 192 193 class_candidates[label].append(obj) 194 195 # post process 196 objects = [] 197 for candidates in class_candidates: 198 if len(candidates) == 0: 199 continue 200 201 candidates.sort(key=lambda obj: obj.prob, reverse=True) 202 203 picked = self.nms_sorted_bboxes(candidates, self.nms_threshold) 204 205 for j in range(len(picked)): 206 z = picked[j] 207 objects.append(candidates[z]) 208 209 objects.sort(key=lambda obj: obj.prob, reverse=True) 210 211 objects = objects[: self.max_per_image] 212 213 return objects 214 215 def nms_sorted_bboxes(self, objects, nms_threshold): 216 picked = [] 217 218 n = len(objects) 219 220 areas = np.zeros((n,), dtype=np.float32) 221 for i in range(n): 222 areas[i] = objects[i].rect.area() 223 224 for i in range(n): 225 a = objects[i] 226 227 keep = True 228 for j in range(len(picked)): 229 b = objects[picked[j]] 230 231 # intersection over union 232 inter_area = a.rect.intersection_area(b.rect) 233 union_area = areas[i] + areas[picked[j]] - inter_area 234 # float IoU = inter_area / union_area 235 if inter_area / union_area > nms_threshold: 236 keep = False 237 238 if keep: 239 picked.append(i) 240 241 return picked 242