1"""Mask RCNN default config"""
2# pylint: disable=unused-variable,missing-function-docstring,bad-whitespace,missing-class-docstring
3# from typing import Union, Tuple
4from autocfg import dataclass, field
5
6@dataclass
7class MaskRCNN:
8    # Backbone network.
9    backbone = 'resnet50_v1b'  # base feature network
10    # Final R-CNN non-maximum suppression threshold. You can specify < 0 or > 1 to disable NMS.
11    nms_thresh = 0.5
12    # Apply R-CNN NMS to top k detection results, use -1 to disable so that every Detection
13    # result is used in NMS.
14    nms_topk = -1
15    # Only return top `post_nms` detection results, the rest is discarded.
16    # Set to -1 to return all detections.
17    post_nms = -1
18    # ROI pooling mode. Currently support 'pool' and 'align'.
19    roi_mode = 'align'
20    # (height, width) of the ROI region.
21    roi_size = (7, 7)
22    # Feature map stride with respect to original image.
23    # This is usually the ratio between original image size and feature map size.
24    # For FPN, use a tuple of ints.
25    strides = (4, 8, 16, 32, 64)
26    # Clip bounding box prediction to to prevent exponentiation from overflowing.
27    clip = 4.14
28
29    # Anchors generation
30    # ------------------
31    # The width(and height) of reference anchor box.
32    anchor_base_size = 16
33    # The areas of anchor boxes.
34    # We use the following form to compute the shapes of anchors:
35    # .. math::
36    # width_{anchor} = size_{base} \times scale \times \sqrt{ 1 / ratio}
37    # height_{anchor} = size_{base} \times scale \times \sqrt{ratio}
38    anchor_aspect_ratio = (0.5, 1, 2)
39    # The aspect ratios of anchor boxes. We expect it to be a list or tuple.
40    anchor_scales = (2, 4, 8, 16, 32)
41
42    # Allocate size for the anchor boxes as (H, W).
43    # Usually we generate enough anchors for large feature map, e.g. 128x128.
44    # Later in inference we can have variable input sizes,
45    # at which time we can crop corresponding anchors from this large
46    # anchor map so we can skip re-generating anchors for each input.
47    anchor_alloc_size = (384, 384)
48
49    # number of channels used in RPN convolutional layers.
50    rpn_channel = 256
51    # IOU threshold for NMS. It is used to remove overlapping proposals.
52    rpn_nms_thresh = 0.7
53    # Maximum ground-truth number for each example. This is only an upper bound, not
54    # necessarily very precise. However, using a very big number may impact the training speed.
55    max_num_gt = 100
56    # Gluon normalization layer to use. Default is none which will use frozen
57    # batch normalization layer.
58    norm_layer = None
59
60    # FPN Options
61    # -----------
62    # Whether to use FPN.
63    use_fpn = True
64    # Number of filters for FPN output layers.
65    num_fpn_filters = 256
66
67    # Number of convolution layers to use in box head if batch normalization is not frozen.
68    num_box_head_conv = 4
69    # Number of filters for convolution layers in box head.
70    # Only applicable if batch normalization is not frozen.
71    num_box_head_conv_filters = 256
72    # Number of hidden units for the last fully connected layer in box head.
73    num_box_head_dense_filters = 1024
74
75    # Input image short side size.
76    image_short = 800
77    # Maximum size of input image long side.
78    image_max_size = 1333
79
80    # Whether to enable custom model.
81    custom_model = True
82    # Whether to use automatic mixed precision
83    amp = False
84    # Whether to allocate memory statically.
85    static_alloc = False
86
87    # Ratio of mask output roi / input roi.
88    # For model with FPN, this is typically 2.
89    target_roi_scale = 2
90    # Number of convolution blocks before deconv layer for mask head.
91    # For FPN network this is typically 4.
92    num_mask_head_convs = 4
93
94
95@dataclass
96class TrainCfg:
97    # Whether load the imagenet pre-trained base
98    pretrained_base = True
99    # Batch size during training
100    batch_size = 1
101    # starting epoch
102    start_epoch = 0
103    # total epoch for training
104    epochs = 26
105
106    # Solver
107    # ------
108    # Learning rate.
109    lr = 0.01
110    # Decay rate of learning rate.
111    lr_decay = 0.1
112    # Epochs at which learning rate decays
113    lr_decay_epoch = (20, 24)
114    # Learning rate scheduler mode. options are step, poly and cosine
115    lr_mode = 'step'
116    # Number of iterations for warmup.
117    lr_warmup = 500
118    # Starging lr warmup factor.
119    lr_warmup_factor = 1. / 3.
120    # Gradient clipping.
121    clip_gradient = -1
122    # Momentum
123    momentum = 0.9
124    # Weight decay
125    wd = 1e-4
126
127    # RPN options
128    # -----------
129    # Filter top proposals before NMS in training of RPN.
130    rpn_train_pre_nms = 12000
131    # Return top proposal results after NMS in training of RPN.
132    # Will be set to rpn_train_pre_nms if it is larger than rpn_train_pre_nms.
133    rpn_train_post_nms = 2000
134    # RPN box regression transition point from L1 to L2 loss.
135    # Set to 0.0 to make the loss simply L1.
136    rpn_smoothl1_rho = 0.001
137    # Proposals whose size is smaller than ``min_size`` will be discarded.
138    rpn_min_size = 1
139
140    # R-CNN options
141    # -------------
142    # Number of samples for RPN targets.
143    rcnn_num_samples = 512
144    # Anchor with IOU larger than ``rcnn_pos_iou_thresh`` is regarded as positive samples.
145    rcnn_pos_iou_thresh = 0.5
146    # ``rcnn_pos_iou_thresh`` defines how many positive samples
147    # (``rcnn_pos_iou_thresh * num_sample``) is to be sampled.
148    rcnn_pos_ratio = 0.25
149    # R-CNN box regression transition point from L1 to L2 loss.
150    # Set to 0.0 to make the loss simply L1.
151    rcnn_smoothl1_rho = 0.001
152
153    # Misc
154    # ----
155    # log interval in terms of iterations
156    log_interval = 100
157    seed = 233
158    # Whether to enable verbose logging
159    verbose = False
160    # Number of threads for executor for scheduling ops.
161    # More threads may incur higher GPU memory footprint,
162    # but may speed up throughput. Note that when horovod is used,
163    # it is set to 1.
164    executor_threads = 4
165
166
167@dataclass
168class ValidCfg:
169    # Filter top proposals before NMS in testing of RPN.
170    rpn_test_pre_nms = 6000
171    # Return top proposal results after NMS in testing of RPN.
172    # Will be set to rpn_test_pre_nms if it is larger than rpn_test_pre_nms.
173    rpn_test_post_nms = 1000
174    # Epoch interval for validation
175    val_interval = 1
176
177@dataclass
178class MaskRCNNCfg:
179    mask_rcnn : MaskRCNN = field(default_factory=MaskRCNN)
180    train : TrainCfg = field(default_factory=TrainCfg)
181    valid : ValidCfg = field(default_factory=ValidCfg)    # Dataset name. eg. 'coco', 'voc'
182    dataset = 'coco'
183    # Training with GPUs, you can specify (1,3) for example.
184    gpus = (0,)
185    # Resume from previously saved parameters if not None.
186    # For example, you can resume from ./faster_rcnn_xxx_0123.params.
187    resume = ''
188    # Saving parameter prefix
189    save_prefix = ''
190    # Saving parameters epoch interval, best model will always be saved.
191    save_interval = 1
192    # Use MXNet Horovod for distributed training. Must be run with OpenMPI.
193    horovod = False
194    # Number of data workers, you can use larger number to accelerate data loading,
195    # if your CPU and GPUs are powerful.
196    num_workers = 16
197    # KV store options. local, device, nccl, dist_sync, dist_device_sync,
198    # dist_async are available.
199    kv_store = 'nccl'
200    # Whether to disable hybridize the model. Memory usage and speed will decrese.
201    disable_hybridization = False
202    # Use NVIDIA MSCOCO API. Make sure you install first.
203    use_ext = False
204