1"""Mask RCNN default config""" 2# pylint: disable=unused-variable,missing-function-docstring,bad-whitespace,missing-class-docstring 3# from typing import Union, Tuple 4from autocfg import dataclass, field 5 6@dataclass 7class MaskRCNN: 8 # Backbone network. 9 backbone = 'resnet50_v1b' # base feature network 10 # Final R-CNN non-maximum suppression threshold. You can specify < 0 or > 1 to disable NMS. 11 nms_thresh = 0.5 12 # Apply R-CNN NMS to top k detection results, use -1 to disable so that every Detection 13 # result is used in NMS. 14 nms_topk = -1 15 # Only return top `post_nms` detection results, the rest is discarded. 16 # Set to -1 to return all detections. 17 post_nms = -1 18 # ROI pooling mode. Currently support 'pool' and 'align'. 19 roi_mode = 'align' 20 # (height, width) of the ROI region. 21 roi_size = (7, 7) 22 # Feature map stride with respect to original image. 23 # This is usually the ratio between original image size and feature map size. 24 # For FPN, use a tuple of ints. 25 strides = (4, 8, 16, 32, 64) 26 # Clip bounding box prediction to to prevent exponentiation from overflowing. 27 clip = 4.14 28 29 # Anchors generation 30 # ------------------ 31 # The width(and height) of reference anchor box. 32 anchor_base_size = 16 33 # The areas of anchor boxes. 34 # We use the following form to compute the shapes of anchors: 35 # .. math:: 36 # width_{anchor} = size_{base} \times scale \times \sqrt{ 1 / ratio} 37 # height_{anchor} = size_{base} \times scale \times \sqrt{ratio} 38 anchor_aspect_ratio = (0.5, 1, 2) 39 # The aspect ratios of anchor boxes. We expect it to be a list or tuple. 40 anchor_scales = (2, 4, 8, 16, 32) 41 42 # Allocate size for the anchor boxes as (H, W). 43 # Usually we generate enough anchors for large feature map, e.g. 128x128. 44 # Later in inference we can have variable input sizes, 45 # at which time we can crop corresponding anchors from this large 46 # anchor map so we can skip re-generating anchors for each input. 47 anchor_alloc_size = (384, 384) 48 49 # number of channels used in RPN convolutional layers. 50 rpn_channel = 256 51 # IOU threshold for NMS. It is used to remove overlapping proposals. 52 rpn_nms_thresh = 0.7 53 # Maximum ground-truth number for each example. This is only an upper bound, not 54 # necessarily very precise. However, using a very big number may impact the training speed. 55 max_num_gt = 100 56 # Gluon normalization layer to use. Default is none which will use frozen 57 # batch normalization layer. 58 norm_layer = None 59 60 # FPN Options 61 # ----------- 62 # Whether to use FPN. 63 use_fpn = True 64 # Number of filters for FPN output layers. 65 num_fpn_filters = 256 66 67 # Number of convolution layers to use in box head if batch normalization is not frozen. 68 num_box_head_conv = 4 69 # Number of filters for convolution layers in box head. 70 # Only applicable if batch normalization is not frozen. 71 num_box_head_conv_filters = 256 72 # Number of hidden units for the last fully connected layer in box head. 73 num_box_head_dense_filters = 1024 74 75 # Input image short side size. 76 image_short = 800 77 # Maximum size of input image long side. 78 image_max_size = 1333 79 80 # Whether to enable custom model. 81 custom_model = True 82 # Whether to use automatic mixed precision 83 amp = False 84 # Whether to allocate memory statically. 85 static_alloc = False 86 87 # Ratio of mask output roi / input roi. 88 # For model with FPN, this is typically 2. 89 target_roi_scale = 2 90 # Number of convolution blocks before deconv layer for mask head. 91 # For FPN network this is typically 4. 92 num_mask_head_convs = 4 93 94 95@dataclass 96class TrainCfg: 97 # Whether load the imagenet pre-trained base 98 pretrained_base = True 99 # Batch size during training 100 batch_size = 1 101 # starting epoch 102 start_epoch = 0 103 # total epoch for training 104 epochs = 26 105 106 # Solver 107 # ------ 108 # Learning rate. 109 lr = 0.01 110 # Decay rate of learning rate. 111 lr_decay = 0.1 112 # Epochs at which learning rate decays 113 lr_decay_epoch = (20, 24) 114 # Learning rate scheduler mode. options are step, poly and cosine 115 lr_mode = 'step' 116 # Number of iterations for warmup. 117 lr_warmup = 500 118 # Starging lr warmup factor. 119 lr_warmup_factor = 1. / 3. 120 # Gradient clipping. 121 clip_gradient = -1 122 # Momentum 123 momentum = 0.9 124 # Weight decay 125 wd = 1e-4 126 127 # RPN options 128 # ----------- 129 # Filter top proposals before NMS in training of RPN. 130 rpn_train_pre_nms = 12000 131 # Return top proposal results after NMS in training of RPN. 132 # Will be set to rpn_train_pre_nms if it is larger than rpn_train_pre_nms. 133 rpn_train_post_nms = 2000 134 # RPN box regression transition point from L1 to L2 loss. 135 # Set to 0.0 to make the loss simply L1. 136 rpn_smoothl1_rho = 0.001 137 # Proposals whose size is smaller than ``min_size`` will be discarded. 138 rpn_min_size = 1 139 140 # R-CNN options 141 # ------------- 142 # Number of samples for RPN targets. 143 rcnn_num_samples = 512 144 # Anchor with IOU larger than ``rcnn_pos_iou_thresh`` is regarded as positive samples. 145 rcnn_pos_iou_thresh = 0.5 146 # ``rcnn_pos_iou_thresh`` defines how many positive samples 147 # (``rcnn_pos_iou_thresh * num_sample``) is to be sampled. 148 rcnn_pos_ratio = 0.25 149 # R-CNN box regression transition point from L1 to L2 loss. 150 # Set to 0.0 to make the loss simply L1. 151 rcnn_smoothl1_rho = 0.001 152 153 # Misc 154 # ---- 155 # log interval in terms of iterations 156 log_interval = 100 157 seed = 233 158 # Whether to enable verbose logging 159 verbose = False 160 # Number of threads for executor for scheduling ops. 161 # More threads may incur higher GPU memory footprint, 162 # but may speed up throughput. Note that when horovod is used, 163 # it is set to 1. 164 executor_threads = 4 165 166 167@dataclass 168class ValidCfg: 169 # Filter top proposals before NMS in testing of RPN. 170 rpn_test_pre_nms = 6000 171 # Return top proposal results after NMS in testing of RPN. 172 # Will be set to rpn_test_pre_nms if it is larger than rpn_test_pre_nms. 173 rpn_test_post_nms = 1000 174 # Epoch interval for validation 175 val_interval = 1 176 177@dataclass 178class MaskRCNNCfg: 179 mask_rcnn : MaskRCNN = field(default_factory=MaskRCNN) 180 train : TrainCfg = field(default_factory=TrainCfg) 181 valid : ValidCfg = field(default_factory=ValidCfg) # Dataset name. eg. 'coco', 'voc' 182 dataset = 'coco' 183 # Training with GPUs, you can specify (1,3) for example. 184 gpus = (0,) 185 # Resume from previously saved parameters if not None. 186 # For example, you can resume from ./faster_rcnn_xxx_0123.params. 187 resume = '' 188 # Saving parameter prefix 189 save_prefix = '' 190 # Saving parameters epoch interval, best model will always be saved. 191 save_interval = 1 192 # Use MXNet Horovod for distributed training. Must be run with OpenMPI. 193 horovod = False 194 # Number of data workers, you can use larger number to accelerate data loading, 195 # if your CPU and GPUs are powerful. 196 num_workers = 16 197 # KV store options. local, device, nccl, dist_sync, dist_device_sync, 198 # dist_async are available. 199 kv_store = 'nccl' 200 # Whether to disable hybridize the model. Memory usage and speed will decrese. 201 disable_hybridization = False 202 # Use NVIDIA MSCOCO API. Make sure you install first. 203 use_ext = False 204