1# Tencent is pleased to support the open source community by making ncnn available. 2# 3# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. 4# 5# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6# in compliance with the License. You may obtain a copy of the License at 7# 8# https://opensource.org/licenses/BSD-3-Clause 9# 10# Unless required by applicable law or agreed to in writing, software distributed 11# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12# CONDITIONS OF ANY KIND, either express or implied. See the License for the 13# specific language governing permissions and limitations under the License. 14 15import numpy as np 16import ncnn 17from .model_store import get_model_file 18from ..utils.objects import Detect_Object 19 20 21class RFCN: 22 def __init__( 23 self, 24 target_size=224, 25 max_per_image=100, 26 confidence_thresh=0.6, 27 nms_threshold=0.3, 28 num_threads=1, 29 use_gpu=False, 30 ): 31 self.target_size = target_size 32 self.max_per_image = max_per_image 33 self.confidence_thresh = confidence_thresh 34 self.nms_threshold = nms_threshold 35 self.num_threads = num_threads 36 self.use_gpu = use_gpu 37 38 self.mean_vals = [102.9801, 115.9465, 122.7717] 39 self.norm_vals = [] 40 41 self.net = ncnn.Net() 42 self.net.opt.use_vulkan_compute = self.use_gpu 43 44 # original pretrained model from https://github.com/YuwenXiong/py-R-FCN 45 # https://github.com/YuwenXiong/py-R-FCN/blob/master/models/pascal_voc/ResNet-50/rfcn_end2end/test_agnostic.prototxt 46 # https://1drv.ms/u/s!AoN7vygOjLIQqUWHpY67oaC7mopf 47 # resnet50_rfcn_final.caffemodel 48 # the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models 49 self.net.load_param(get_model_file("rfcn_end2end.param")) 50 self.net.load_model(get_model_file("rfcn_end2end.bin")) 51 52 self.class_names = [ 53 "background", 54 "aeroplane", 55 "bicycle", 56 "bird", 57 "boat", 58 "bottle", 59 "bus", 60 "car", 61 "cat", 62 "chair", 63 "cow", 64 "diningtable", 65 "dog", 66 "horse", 67 "motorbike", 68 "person", 69 "pottedplant", 70 "sheep", 71 "sofa", 72 "train", 73 "tvmonitor", 74 ] 75 76 def __del__(self): 77 self.net = None 78 79 def __call__(self, img): 80 h = img.shape[0] 81 w = img.shape[1] 82 83 scale = 1.0 84 if w < h: 85 scale = float(self.target_size) / w 86 w = self.target_size 87 h = h * scale 88 else: 89 scale = float(self.target_size) / h 90 h = self.target_size 91 w = w * scale 92 93 mat_in = ncnn.Mat.from_pixels_resize( 94 img, 95 ncnn.Mat.PixelType.PIXEL_BGR, 96 img.shape[1], 97 img.shape[0], 98 int(w), 99 int(h), 100 ) 101 mat_in.substract_mean_normalize(self.mean_vals, self.norm_vals) 102 103 im_info = ncnn.Mat(3) 104 im_info[0] = h 105 im_info[1] = w 106 im_info[2] = scale 107 108 # step1, extract feature and all rois 109 ex1 = self.net.create_extractor() 110 ex1.set_num_threads(self.num_threads) 111 ex1.input("data", mat_in) 112 ex1.input("im_info", im_info) 113 114 ret1, rfcn_cls = ex1.extract("rfcn_cls") 115 ret2, rfcn_bbox = ex1.extract("rfcn_bbox") 116 ret3, rois = ex1.extract("rois") # all rois 117 118 # step2, extract bbox and score for each roi 119 class_candidates = [] 120 for i in range(rois.c): 121 ex2 = self.net.create_extractor() 122 123 roi = rois.channel(i) # get single roi 124 ex2.input("rfcn_cls", rfcn_cls) 125 ex2.input("rfcn_bbox", rfcn_bbox) 126 ex2.input("rois", roi) 127 128 ret1, bbox_pred = ex2.extract("bbox_pred") 129 ret2, cls_prob = ex2.extract("cls_prob") 130 131 num_class = cls_prob.w 132 while len(class_candidates) < num_class: 133 class_candidates.append([]) 134 135 # find class id with highest score 136 label = 0 137 score = 0.0 138 for j in range(num_class): 139 class_score = cls_prob[j] 140 if class_score > score: 141 label = j 142 score = class_score 143 144 # ignore background or low score 145 if label == 0 or score <= self.confidence_thresh: 146 continue 147 148 # fprintf(stderr, "%d = %f\n", label, score) 149 150 # unscale to image size 151 x1 = roi[0] / scale 152 y1 = roi[1] / scale 153 x2 = roi[2] / scale 154 y2 = roi[3] / scale 155 156 pb_w = x2 - x1 + 1 157 pb_h = y2 - y1 + 1 158 159 # apply bbox regression 160 dx = bbox_pred[4] 161 dy = bbox_pred[4 + 1] 162 dw = bbox_pred[4 + 2] 163 dh = bbox_pred[4 + 3] 164 165 cx = x1 + pb_w * 0.5 166 cy = y1 + pb_h * 0.5 167 168 obj_cx = cx + pb_w * dx 169 obj_cy = cy + pb_h * dy 170 171 obj_w = pb_w * np.exp(dw) 172 obj_h = pb_h * np.exp(dh) 173 174 obj_x1 = obj_cx - obj_w * 0.5 175 obj_y1 = obj_cy - obj_h * 0.5 176 obj_x2 = obj_cx + obj_w * 0.5 177 obj_y2 = obj_cy + obj_h * 0.5 178 179 # clip 180 obj_x1 = np.maximum(np.minimum(obj_x1, float(img.shape[1] - 1)), 0.0) 181 obj_y1 = np.maximum(np.minimum(obj_y1, float(img.shape[0] - 1)), 0.0) 182 obj_x2 = np.maximum(np.minimum(obj_x2, float(img.shape[1] - 1)), 0.0) 183 obj_y2 = np.maximum(np.minimum(obj_y2, float(img.shape[0] - 1)), 0.0) 184 185 # append object 186 obj = Detect_Object() 187 obj.rect.x = obj_x1 188 obj.rect.y = obj_y1 189 obj.rect.w = obj_x2 - obj_x1 + 1 190 obj.rect.h = obj_y2 - obj_y1 + 1 191 obj.label = label 192 obj.prob = score 193 194 class_candidates[label].append(obj) 195 196 # post process 197 objects = [] 198 for candidates in class_candidates: 199 if len(candidates) == 0: 200 continue 201 202 candidates.sort(key=lambda obj: obj.prob, reverse=True) 203 204 picked = self.nms_sorted_bboxes(candidates, self.nms_threshold) 205 206 for j in range(len(picked)): 207 z = picked[j] 208 objects.append(candidates[z]) 209 210 objects.sort(key=lambda obj: obj.prob, reverse=True) 211 212 objects = objects[: self.max_per_image] 213 214 return objects 215 216 def nms_sorted_bboxes(self, objects, nms_threshold): 217 picked = [] 218 219 n = len(objects) 220 221 areas = np.zeros((n,), dtype=np.float32) 222 for i in range(n): 223 areas[i] = objects[i].rect.area() 224 225 for i in range(n): 226 a = objects[i] 227 228 keep = True 229 for j in range(len(picked)): 230 b = objects[picked[j]] 231 232 # intersection over union 233 inter_area = a.rect.intersection_area(b.rect) 234 union_area = areas[i] + areas[picked[j]] - inter_area 235 # float IoU = inter_area / union_area 236 if inter_area / union_area > nms_threshold: 237 keep = False 238 239 if keep: 240 picked.append(i) 241 242 return picked 243