1# pylint: disable=line-too-long,too-many-lines,missing-docstring,arguments-differ,unused-argument 2import mxnet as mx 3from mxnet import init 4from mxnet.gluon import nn 5from mxnet.gluon.nn import HybridBlock 6from ..resnetv1b import resnet18_v1b, resnet34_v1b, resnet50_v1b, resnet101_v1b, resnet152_v1b 7 8__all__ = ['resnet18_v1b_sthsthv2', 'resnet34_v1b_sthsthv2', 'resnet50_v1b_sthsthv2', 9 'resnet101_v1b_sthsthv2', 'resnet152_v1b_sthsthv2', 'resnet18_v1b_kinetics400', 10 'resnet34_v1b_kinetics400', 'resnet50_v1b_kinetics400', 'resnet101_v1b_kinetics400', 11 'resnet152_v1b_kinetics400', 'resnet50_v1b_ucf101', 'resnet50_v1b_hmdb51', 12 'resnet50_v1b_custom', 'resnet18_v1b_custom'] 13 14class ActionRecResNetV1b(HybridBlock): 15 r"""ResNet models for video action recognition 16 Deep Residual Learning for Image Recognition, CVPR 2016 17 https://arxiv.org/abs/1512.03385 18 19 Parameters 20 ---------- 21 depth : int, default is 50. 22 Depth of ResNet, from {18, 34, 50, 101, 152}. 23 nclass : int 24 Number of classes in the training dataset. 25 pretrained_base : bool or str, optional, default is True. 26 Load pretrained base network, the extra layers are randomized. Note that 27 if pretrained is `True`, this has no effect. 28 partial_bn : bool, default False. 29 Freeze all batch normalization layers during training except the first layer. 30 dropout_ratio : float, default is 0.5. 31 The dropout rate of a dropout layer. 32 The larger the value, the more strength to prevent overfitting. 33 init_std : float, default is 0.001. 34 Standard deviation value when initialize the dense layers. 35 num_segments : int, default is 1. 36 Number of segments used to evenly divide a video. 37 num_crop : int, default is 1. 38 Number of crops used during evaluation, choices are 1, 3 or 10. 39 40 Input: a single video frame or N images from N segments when num_segments > 1 41 Output: a single predicted action label 42 """ 43 def __init__(self, depth, nclass, pretrained_base=True, 44 dropout_ratio=0.5, init_std=0.01, 45 num_segments=1, num_crop=1, 46 partial_bn=False, **kwargs): 47 super(ActionRecResNetV1b, self).__init__() 48 49 if depth == 18: 50 pretrained_model = resnet18_v1b(pretrained=pretrained_base, **kwargs) 51 self.expansion = 1 52 elif depth == 34: 53 pretrained_model = resnet34_v1b(pretrained=pretrained_base, **kwargs) 54 self.expansion = 1 55 elif depth == 50: 56 pretrained_model = resnet50_v1b(pretrained=pretrained_base, **kwargs) 57 self.expansion = 4 58 elif depth == 101: 59 pretrained_model = resnet101_v1b(pretrained=pretrained_base, **kwargs) 60 self.expansion = 4 61 elif depth == 152: 62 pretrained_model = resnet152_v1b(pretrained=pretrained_base, **kwargs) 63 self.expansion = 4 64 else: 65 print('No such ResNet configuration for depth=%d' % (depth)) 66 67 self.dropout_ratio = dropout_ratio 68 self.init_std = init_std 69 self.feat_dim = 512 * self.expansion 70 self.num_segments = num_segments 71 self.num_crop = num_crop 72 73 with self.name_scope(): 74 self.conv1 = pretrained_model.conv1 75 self.bn1 = pretrained_model.bn1 76 self.relu = pretrained_model.relu 77 self.maxpool = pretrained_model.maxpool 78 self.layer1 = pretrained_model.layer1 79 self.layer2 = pretrained_model.layer2 80 self.layer3 = pretrained_model.layer3 81 self.layer4 = pretrained_model.layer4 82 self.avgpool = pretrained_model.avgpool 83 self.flat = pretrained_model.flat 84 self.drop = nn.Dropout(rate=self.dropout_ratio) 85 self.output = nn.Dense(units=nclass, in_units=self.feat_dim, 86 weight_initializer=init.Normal(sigma=self.init_std)) 87 self.output.initialize() 88 89 def hybrid_forward(self, F, x): 90 x = self.conv1(x) 91 x = self.bn1(x) 92 x = self.relu(x) 93 x = self.maxpool(x) 94 95 x = self.layer1(x) 96 x = self.layer2(x) 97 x = self.layer3(x) 98 x = self.layer4(x) 99 100 x = self.avgpool(x) 101 x = self.flat(x) 102 x = self.drop(x) 103 104 # segmental consensus 105 x = F.reshape(x, shape=(-1, self.num_segments * self.num_crop, self.feat_dim)) 106 x = F.mean(x, axis=1) 107 108 x = self.output(x) 109 return x 110 111def resnet18_v1b_sthsthv2(nclass=174, pretrained=False, pretrained_base=True, 112 use_tsn=False, partial_bn=False, 113 num_segments=1, num_crop=1, root='~/.mxnet/models', 114 ctx=mx.cpu(), **kwargs): 115 r"""ResNet18 model trained on Something-Something-V2 dataset. 116 117 Parameters 118 ---------- 119 nclass : int. 120 Number of categories in the dataset. 121 pretrained : bool or str. 122 Boolean value controls whether to load the default pretrained weights for model. 123 String value represents the hashtag for a certain version of pretrained weights. 124 pretrained_base : bool or str, optional, default is True. 125 Load pretrained base network, the extra layers are randomized. Note that 126 if pretrained is `True`, this has no effect. 127 ctx : Context, default CPU. 128 The context in which to load the pretrained weights. 129 root : str, default $MXNET_HOME/models 130 Location for keeping the model parameters. 131 num_segments : int, default is 1. 132 Number of segments used to evenly divide a video. 133 num_crop : int, default is 1. 134 Number of crops used during evaluation, choices are 1, 3 or 10. 135 partial_bn : bool, default False. 136 Freeze all batch normalization layers during training except the first layer. 137 """ 138 model = ActionRecResNetV1b(depth=18, 139 nclass=nclass, 140 partial_bn=partial_bn, 141 num_segments=num_segments, 142 num_crop=num_crop, 143 dropout_ratio=0.5, 144 init_std=0.01) 145 146 if pretrained: 147 from ..model_store import get_model_file 148 model.load_parameters(get_model_file('resnet18_v1b_sthsthv2', 149 tag=pretrained, root=root)) 150 from ...data import SomethingSomethingV2Attr 151 attrib = SomethingSomethingV2Attr() 152 model.classes = attrib.classes 153 model.collect_params().reset_ctx(ctx) 154 return model 155 156def resnet34_v1b_sthsthv2(nclass=174, pretrained=False, pretrained_base=True, 157 use_tsn=False, partial_bn=False, 158 num_segments=1, num_crop=1, root='~/.mxnet/models', 159 ctx=mx.cpu(), **kwargs): 160 r"""ResNet34 model trained on Something-Something-V2 dataset. 161 162 Parameters 163 ---------- 164 nclass : int. 165 Number of categories in the dataset. 166 pretrained : bool or str. 167 Boolean value controls whether to load the default pretrained weights for model. 168 String value represents the hashtag for a certain version of pretrained weights. 169 pretrained_base : bool or str, optional, default is True. 170 Load pretrained base network, the extra layers are randomized. Note that 171 if pretrained is `True`, this has no effect. 172 ctx : Context, default CPU. 173 The context in which to load the pretrained weights. 174 root : str, default $MXNET_HOME/models 175 Location for keeping the model parameters. 176 num_segments : int, default is 1. 177 Number of segments used to evenly divide a video. 178 num_crop : int, default is 1. 179 Number of crops used during evaluation, choices are 1, 3 or 10. 180 partial_bn : bool, default False. 181 Freeze all batch normalization layers during training except the first layer. 182 """ 183 model = ActionRecResNetV1b(depth=34, 184 nclass=nclass, 185 partial_bn=partial_bn, 186 num_segments=num_segments, 187 num_crop=num_crop, 188 dropout_ratio=0.5, 189 init_std=0.01) 190 191 if pretrained: 192 from ..model_store import get_model_file 193 model.load_parameters(get_model_file('resnet34_v1b_sthsthv2', 194 tag=pretrained, root=root)) 195 from ...data import SomethingSomethingV2Attr 196 attrib = SomethingSomethingV2Attr() 197 model.classes = attrib.classes 198 model.collect_params().reset_ctx(ctx) 199 return model 200 201def resnet50_v1b_sthsthv2(nclass=174, pretrained=False, pretrained_base=True, 202 use_tsn=False, partial_bn=False, 203 num_segments=1, num_crop=1, root='~/.mxnet/models', 204 ctx=mx.cpu(), **kwargs): 205 r"""ResNet50 model trained on Something-Something-V2 dataset. 206 207 Parameters 208 ---------- 209 nclass : int. 210 Number of categories in the dataset. 211 pretrained : bool or str. 212 Boolean value controls whether to load the default pretrained weights for model. 213 String value represents the hashtag for a certain version of pretrained weights. 214 pretrained_base : bool or str, optional, default is True. 215 Load pretrained base network, the extra layers are randomized. Note that 216 if pretrained is `True`, this has no effect. 217 ctx : Context, default CPU. 218 The context in which to load the pretrained weights. 219 root : str, default $MXNET_HOME/models 220 Location for keeping the model parameters. 221 num_segments : int, default is 1. 222 Number of segments used to evenly divide a video. 223 num_crop : int, default is 1. 224 Number of crops used during evaluation, choices are 1, 3 or 10. 225 partial_bn : bool, default False. 226 Freeze all batch normalization layers during training except the first layer. 227 """ 228 model = ActionRecResNetV1b(depth=50, 229 nclass=nclass, 230 partial_bn=partial_bn, 231 num_segments=num_segments, 232 num_crop=num_crop, 233 dropout_ratio=0.5, 234 init_std=0.01) 235 236 if pretrained: 237 from ..model_store import get_model_file 238 model.load_parameters(get_model_file('resnet50_v1b_sthsthv2', 239 tag=pretrained, root=root)) 240 from ...data import SomethingSomethingV2Attr 241 attrib = SomethingSomethingV2Attr() 242 model.classes = attrib.classes 243 model.collect_params().reset_ctx(ctx) 244 return model 245 246def resnet101_v1b_sthsthv2(nclass=174, pretrained=False, pretrained_base=True, 247 use_tsn=False, partial_bn=False, 248 num_segments=1, num_crop=1, root='~/.mxnet/models', 249 ctx=mx.cpu(), **kwargs): 250 r"""ResNet101 model trained on Something-Something-V2 dataset. 251 252 Parameters 253 ---------- 254 nclass : int. 255 Number of categories in the dataset. 256 pretrained : bool or str. 257 Boolean value controls whether to load the default pretrained weights for model. 258 String value represents the hashtag for a certain version of pretrained weights. 259 pretrained_base : bool or str, optional, default is True. 260 Load pretrained base network, the extra layers are randomized. Note that 261 if pretrained is `True`, this has no effect. 262 ctx : Context, default CPU. 263 The context in which to load the pretrained weights. 264 root : str, default $MXNET_HOME/models 265 Location for keeping the model parameters. 266 num_segments : int, default is 1. 267 Number of segments used to evenly divide a video. 268 num_crop : int, default is 1. 269 Number of crops used during evaluation, choices are 1, 3 or 10. 270 partial_bn : bool, default False. 271 Freeze all batch normalization layers during training except the first layer. 272 """ 273 model = ActionRecResNetV1b(depth=101, 274 nclass=nclass, 275 partial_bn=partial_bn, 276 num_segments=num_segments, 277 num_crop=num_crop, 278 dropout_ratio=0.5, 279 init_std=0.01) 280 281 if pretrained: 282 from ..model_store import get_model_file 283 model.load_parameters(get_model_file('resnet101_v1b_sthsthv2', 284 tag=pretrained, root=root)) 285 from ...data import SomethingSomethingV2Attr 286 attrib = SomethingSomethingV2Attr() 287 model.classes = attrib.classes 288 model.collect_params().reset_ctx(ctx) 289 return model 290 291def resnet152_v1b_sthsthv2(nclass=174, pretrained=False, pretrained_base=True, 292 use_tsn=False, partial_bn=False, 293 num_segments=1, num_crop=1, root='~/.mxnet/models', 294 ctx=mx.cpu(), **kwargs): 295 r"""ResNet152 model trained on Something-Something-V2 dataset. 296 297 Parameters 298 ---------- 299 nclass : int. 300 Number of categories in the dataset. 301 pretrained : bool or str. 302 Boolean value controls whether to load the default pretrained weights for model. 303 String value represents the hashtag for a certain version of pretrained weights. 304 pretrained_base : bool or str, optional, default is True. 305 Load pretrained base network, the extra layers are randomized. Note that 306 if pretrained is `True`, this has no effect. 307 ctx : Context, default CPU. 308 The context in which to load the pretrained weights. 309 root : str, default $MXNET_HOME/models 310 Location for keeping the model parameters. 311 num_segments : int, default is 1. 312 Number of segments used to evenly divide a video. 313 num_crop : int, default is 1. 314 Number of crops used during evaluation, choices are 1, 3 or 10. 315 partial_bn : bool, default False. 316 Freeze all batch normalization layers during training except the first layer. 317 """ 318 model = ActionRecResNetV1b(depth=152, 319 nclass=nclass, 320 partial_bn=partial_bn, 321 num_segments=num_segments, 322 num_crop=num_crop, 323 dropout_ratio=0.5, 324 init_std=0.01) 325 326 if pretrained: 327 from ..model_store import get_model_file 328 model.load_parameters(get_model_file('resnet152_v1b_sthsthv2', 329 tag=pretrained, root=root)) 330 from ...data import SomethingSomethingV2Attr 331 attrib = SomethingSomethingV2Attr() 332 model.classes = attrib.classes 333 model.collect_params().reset_ctx(ctx) 334 return model 335 336def resnet18_v1b_kinetics400(nclass=400, pretrained=False, pretrained_base=True, 337 use_tsn=False, partial_bn=False, 338 num_segments=1, num_crop=1, root='~/.mxnet/models', 339 ctx=mx.cpu(), **kwargs): 340 r"""ResNet18 model trained on Kinetics400 dataset. 341 342 Parameters 343 ---------- 344 nclass : int. 345 Number of categories in the dataset. 346 pretrained : bool or str. 347 Boolean value controls whether to load the default pretrained weights for model. 348 String value represents the hashtag for a certain version of pretrained weights. 349 pretrained_base : bool or str, optional, default is True. 350 Load pretrained base network, the extra layers are randomized. Note that 351 if pretrained is `True`, this has no effect. 352 ctx : Context, default CPU. 353 The context in which to load the pretrained weights. 354 root : str, default $MXNET_HOME/models 355 Location for keeping the model parameters. 356 num_segments : int, default is 1. 357 Number of segments used to evenly divide a video. 358 num_crop : int, default is 1. 359 Number of crops used during evaluation, choices are 1, 3 or 10. 360 partial_bn : bool, default False. 361 Freeze all batch normalization layers during training except the first layer. 362 """ 363 model = ActionRecResNetV1b(depth=18, 364 nclass=nclass, 365 partial_bn=partial_bn, 366 num_segments=num_segments, 367 num_crop=num_crop, 368 dropout_ratio=0.5, 369 init_std=0.01) 370 371 if pretrained: 372 from ..model_store import get_model_file 373 model.load_parameters(get_model_file('resnet18_v1b_kinetics400', 374 tag=pretrained, root=root)) 375 from ...data import Kinetics400Attr 376 attrib = Kinetics400Attr() 377 model.classes = attrib.classes 378 model.collect_params().reset_ctx(ctx) 379 return model 380 381def resnet34_v1b_kinetics400(nclass=400, pretrained=False, pretrained_base=True, 382 use_tsn=False, partial_bn=False, 383 num_segments=1, num_crop=1, root='~/.mxnet/models', 384 ctx=mx.cpu(), **kwargs): 385 r"""ResNet34 model trained on Kinetics400 dataset. 386 387 Parameters 388 ---------- 389 nclass : int. 390 Number of categories in the dataset. 391 pretrained : bool or str. 392 Boolean value controls whether to load the default pretrained weights for model. 393 String value represents the hashtag for a certain version of pretrained weights. 394 pretrained_base : bool or str, optional, default is True. 395 Load pretrained base network, the extra layers are randomized. Note that 396 if pretrained is `True`, this has no effect. 397 ctx : Context, default CPU. 398 The context in which to load the pretrained weights. 399 root : str, default $MXNET_HOME/models 400 Location for keeping the model parameters. 401 num_segments : int, default is 1. 402 Number of segments used to evenly divide a video. 403 num_crop : int, default is 1. 404 Number of crops used during evaluation, choices are 1, 3 or 10. 405 partial_bn : bool, default False. 406 Freeze all batch normalization layers during training except the first layer. 407 """ 408 model = ActionRecResNetV1b(depth=34, 409 nclass=nclass, 410 partial_bn=partial_bn, 411 num_segments=num_segments, 412 num_crop=num_crop, 413 dropout_ratio=0.5, 414 init_std=0.01) 415 416 if pretrained: 417 from ..model_store import get_model_file 418 model.load_parameters(get_model_file('resnet34_v1b_kinetics400', 419 tag=pretrained, root=root)) 420 from ...data import Kinetics400Attr 421 attrib = Kinetics400Attr() 422 model.classes = attrib.classes 423 model.collect_params().reset_ctx(ctx) 424 return model 425 426def resnet50_v1b_kinetics400(nclass=400, pretrained=False, pretrained_base=True, 427 use_tsn=False, partial_bn=False, 428 num_segments=1, num_crop=1, root='~/.mxnet/models', 429 ctx=mx.cpu(), **kwargs): 430 r"""ResNet50 model trained on Kinetics400 dataset. 431 432 Parameters 433 ---------- 434 nclass : int. 435 Number of categories in the dataset. 436 pretrained : bool or str. 437 Boolean value controls whether to load the default pretrained weights for model. 438 String value represents the hashtag for a certain version of pretrained weights. 439 pretrained_base : bool or str, optional, default is True. 440 Load pretrained base network, the extra layers are randomized. Note that 441 if pretrained is `True`, this has no effect. 442 ctx : Context, default CPU. 443 The context in which to load the pretrained weights. 444 root : str, default $MXNET_HOME/models 445 Location for keeping the model parameters. 446 num_segments : int, default is 1. 447 Number of segments used to evenly divide a video. 448 num_crop : int, default is 1. 449 Number of crops used during evaluation, choices are 1, 3 or 10. 450 partial_bn : bool, default False. 451 Freeze all batch normalization layers during training except the first layer. 452 """ 453 model = ActionRecResNetV1b(depth=50, 454 nclass=nclass, 455 partial_bn=partial_bn, 456 num_segments=num_segments, 457 num_crop=num_crop, 458 dropout_ratio=0.5, 459 init_std=0.01) 460 461 if pretrained: 462 from ..model_store import get_model_file 463 model.load_parameters(get_model_file('resnet50_v1b_kinetics400', 464 tag=pretrained, root=root)) 465 from ...data import Kinetics400Attr 466 attrib = Kinetics400Attr() 467 model.classes = attrib.classes 468 model.collect_params().reset_ctx(ctx) 469 return model 470 471def resnet101_v1b_kinetics400(nclass=400, pretrained=False, pretrained_base=True, 472 use_tsn=False, partial_bn=False, 473 num_segments=1, num_crop=1, root='~/.mxnet/models', 474 ctx=mx.cpu(), **kwargs): 475 r"""ResNet101 model trained on Kinetics400 dataset. 476 477 Parameters 478 ---------- 479 nclass : int. 480 Number of categories in the dataset. 481 pretrained : bool or str. 482 Boolean value controls whether to load the default pretrained weights for model. 483 String value represents the hashtag for a certain version of pretrained weights. 484 pretrained_base : bool or str, optional, default is True. 485 Load pretrained base network, the extra layers are randomized. Note that 486 if pretrained is `True`, this has no effect. 487 ctx : Context, default CPU. 488 The context in which to load the pretrained weights. 489 root : str, default $MXNET_HOME/models 490 Location for keeping the model parameters. 491 num_segments : int, default is 1. 492 Number of segments used to evenly divide a video. 493 num_crop : int, default is 1. 494 Number of crops used during evaluation, choices are 1, 3 or 10. 495 partial_bn : bool, default False. 496 Freeze all batch normalization layers during training except the first layer. 497 """ 498 model = ActionRecResNetV1b(depth=101, 499 nclass=nclass, 500 partial_bn=partial_bn, 501 num_segments=num_segments, 502 num_crop=num_crop, 503 dropout_ratio=0.5, 504 init_std=0.01) 505 506 if pretrained: 507 from ..model_store import get_model_file 508 model.load_parameters(get_model_file('resnet101_v1b_kinetics400', 509 tag=pretrained, root=root)) 510 from ...data import Kinetics400Attr 511 attrib = Kinetics400Attr() 512 model.classes = attrib.classes 513 model.collect_params().reset_ctx(ctx) 514 return model 515 516def resnet152_v1b_kinetics400(nclass=400, pretrained=False, pretrained_base=True, 517 use_tsn=False, partial_bn=False, 518 num_segments=1, num_crop=1, root='~/.mxnet/models', 519 ctx=mx.cpu(), **kwargs): 520 r"""ResNet152 model trained on Kinetics400 dataset. 521 522 Parameters 523 ---------- 524 nclass : int. 525 Number of categories in the dataset. 526 pretrained : bool or str. 527 Boolean value controls whether to load the default pretrained weights for model. 528 String value represents the hashtag for a certain version of pretrained weights. 529 pretrained_base : bool or str, optional, default is True. 530 Load pretrained base network, the extra layers are randomized. Note that 531 if pretrained is `True`, this has no effect. 532 ctx : Context, default CPU. 533 The context in which to load the pretrained weights. 534 root : str, default $MXNET_HOME/models 535 Location for keeping the model parameters. 536 num_segments : int, default is 1. 537 Number of segments used to evenly divide a video. 538 num_crop : int, default is 1. 539 Number of crops used during evaluation, choices are 1, 3 or 10. 540 partial_bn : bool, default False. 541 Freeze all batch normalization layers during training except the first layer. 542 """ 543 model = ActionRecResNetV1b(depth=152, 544 nclass=nclass, 545 partial_bn=partial_bn, 546 num_segments=num_segments, 547 num_crop=num_crop, 548 dropout_ratio=0.5, 549 init_std=0.01) 550 551 if pretrained: 552 from ..model_store import get_model_file 553 model.load_parameters(get_model_file('resnet152_v1b_kinetics400', 554 tag=pretrained, root=root)) 555 from ...data import Kinetics400Attr 556 attrib = Kinetics400Attr() 557 model.classes = attrib.classes 558 model.collect_params().reset_ctx(ctx) 559 return model 560 561def resnet50_v1b_ucf101(nclass=101, pretrained=False, pretrained_base=True, 562 use_tsn=False, partial_bn=False, 563 num_segments=1, num_crop=1, root='~/.mxnet/models', 564 ctx=mx.cpu(), **kwargs): 565 r"""ResNet50 model trained on UCF101 dataset. 566 567 Parameters 568 ---------- 569 nclass : int. 570 Number of categories in the dataset. 571 pretrained : bool or str. 572 Boolean value controls whether to load the default pretrained weights for model. 573 String value represents the hashtag for a certain version of pretrained weights. 574 pretrained_base : bool or str, optional, default is True. 575 Load pretrained base network, the extra layers are randomized. Note that 576 if pretrained is `True`, this has no effect. 577 ctx : Context, default CPU. 578 The context in which to load the pretrained weights. 579 root : str, default $MXNET_HOME/models 580 Location for keeping the model parameters. 581 num_segments : int, default is 1. 582 Number of segments used to evenly divide a video. 583 num_crop : int, default is 1. 584 Number of crops used during evaluation, choices are 1, 3 or 10. 585 partial_bn : bool, default False. 586 Freeze all batch normalization layers during training except the first layer. 587 """ 588 model = ActionRecResNetV1b(depth=50, 589 nclass=nclass, 590 partial_bn=partial_bn, 591 num_segments=num_segments, 592 num_crop=num_crop, 593 dropout_ratio=0.9, 594 init_std=0.001) 595 596 if pretrained: 597 from ..model_store import get_model_file 598 model.load_parameters(get_model_file('resnet50_v1b_ucf101', 599 tag=pretrained, root=root)) 600 from ...data import UCF101Attr 601 attrib = UCF101Attr() 602 model.classes = attrib.classes 603 model.collect_params().reset_ctx(ctx) 604 return model 605 606def resnet50_v1b_hmdb51(nclass=51, pretrained=False, pretrained_base=True, 607 use_tsn=False, partial_bn=False, 608 num_segments=1, num_crop=1, root='~/.mxnet/models', 609 ctx=mx.cpu(), **kwargs): 610 r"""ResNet50 model trained on HMDB51 dataset. 611 612 Parameters 613 ---------- 614 nclass : int. 615 Number of categories in the dataset. 616 pretrained : bool or str. 617 Boolean value controls whether to load the default pretrained weights for model. 618 String value represents the hashtag for a certain version of pretrained weights. 619 pretrained_base : bool or str, optional, default is True. 620 Load pretrained base network, the extra layers are randomized. Note that 621 if pretrained is `True`, this has no effect. 622 ctx : Context, default CPU. 623 The context in which to load the pretrained weights. 624 root : str, default $MXNET_HOME/models 625 Location for keeping the model parameters. 626 num_segments : int, default is 1. 627 Number of segments used to evenly divide a video. 628 num_crop : int, default is 1. 629 Number of crops used during evaluation, choices are 1, 3 or 10. 630 partial_bn : bool, default False. 631 Freeze all batch normalization layers during training except the first layer. 632 """ 633 model = ActionRecResNetV1b(depth=50, 634 nclass=nclass, 635 partial_bn=partial_bn, 636 num_segments=num_segments, 637 num_crop=num_crop, 638 dropout_ratio=0.9, 639 init_std=0.001) 640 641 if pretrained: 642 from ..model_store import get_model_file 643 model.load_parameters(get_model_file('resnet50_v1b_hmdb51', 644 tag=pretrained, root=root)) 645 from ...data import HMDB51Attr 646 attrib = HMDB51Attr() 647 model.classes = attrib.classes 648 model.collect_params().reset_ctx(ctx) 649 return model 650 651def resnet50_v1b_custom(nclass=400, pretrained=False, pretrained_base=True, 652 use_tsn=False, partial_bn=False, 653 num_segments=1, num_crop=1, root='~/.mxnet/models', 654 ctx=mx.cpu(), use_kinetics_pretrain=True, **kwargs): 655 r"""ResNet50 model customized for any dataset. 656 657 Parameters 658 ---------- 659 nclass : int. 660 Number of categories in the dataset. 661 pretrained : bool or str. 662 Boolean value controls whether to load the default pretrained weights for model. 663 String value represents the hashtag for a certain version of pretrained weights. 664 pretrained_base : bool or str, optional, default is True. 665 Load pretrained base network, the extra layers are randomized. Note that 666 if pretrained is `True`, this has no effect. 667 ctx : Context, default CPU. 668 The context in which to load the pretrained weights. 669 root : str, default $MXNET_HOME/models 670 Location for keeping the model parameters. 671 num_segments : int, default is 1. 672 Number of segments used to evenly divide a video. 673 num_crop : int, default is 1. 674 Number of crops used during evaluation, choices are 1, 3 or 10. 675 partial_bn : bool, default False. 676 Freeze all batch normalization layers during training except the first layer. 677 use_kinetics_pretrain : bool, default True. 678 Whether to load pretrained weights on Kinetics400 dataset as model initialization. 679 """ 680 model = ActionRecResNetV1b(depth=50, 681 nclass=nclass, 682 partial_bn=partial_bn, 683 num_segments=num_segments, 684 num_crop=num_crop, 685 dropout_ratio=0.5, 686 init_std=0.01) 687 688 if use_kinetics_pretrain and not pretrained: 689 from gluoncv.model_zoo import get_model 690 kinetics_model = get_model('resnet50_v1b_kinetics400', nclass=400, pretrained=True) 691 source_params = kinetics_model.collect_params() 692 target_params = model.collect_params() 693 assert len(source_params.keys()) == len(target_params.keys()) 694 695 pretrained_weights = [] 696 for layer_name in source_params.keys(): 697 pretrained_weights.append(source_params[layer_name].data()) 698 699 for i, layer_name in enumerate(target_params.keys()): 700 if i + 2 == len(source_params.keys()): 701 # skip the last dense layer 702 break 703 target_params[layer_name].set_data(pretrained_weights[i]) 704 model.collect_params().reset_ctx(ctx) 705 return model 706 707 708def resnet18_v1b_custom(nclass=400, pretrained=False, pretrained_base=True, 709 use_tsn=False, partial_bn=False, use_kinetics_pretrain=True, 710 num_segments=1, num_crop=1, root='~/.mxnet/models', 711 ctx=mx.cpu(), **kwargs): 712 r"""ResNet18 model trained on Kinetics400 dataset. 713 714 Parameters 715 ---------- 716 nclass : int. 717 Number of categories in the dataset. 718 pretrained : bool or str. 719 Boolean value controls whether to load the default pretrained weights for model. 720 String value represents the hashtag for a certain version of pretrained weights. 721 pretrained_base : bool or str, optional, default is True. 722 Load pretrained base network, the extra layers are randomized. Note that 723 if pretrained is `True`, this has no effect. 724 ctx : Context, default CPU. 725 The context in which to load the pretrained weights. 726 root : str, default $MXNET_HOME/models 727 Location for keeping the model parameters. 728 num_segments : int, default is 1. 729 Number of segments used to evenly divide a video. 730 num_crop : int, default is 1. 731 Number of crops used during evaluation, choices are 1, 3 or 10. 732 partial_bn : bool, default False. 733 Freeze all batch normalization layers during training except the first layer. 734 """ 735 model = ActionRecResNetV1b(depth=18, 736 nclass=nclass, 737 partial_bn=partial_bn, 738 num_segments=num_segments, 739 num_crop=num_crop, 740 dropout_ratio=0.5, 741 init_std=0.01) 742 743 if use_kinetics_pretrain and not pretrained: 744 from gluoncv.model_zoo import get_model 745 kinetics_model = get_model('resnet18_v1b_kinetics400', nclass=400, pretrained=True) 746 source_params = kinetics_model.collect_params() 747 target_params = model.collect_params() 748 assert len(source_params.keys()) == len(target_params.keys()) 749 750 pretrained_weights = [] 751 for layer_name in source_params.keys(): 752 pretrained_weights.append(source_params[layer_name].data()) 753 754 for i, layer_name in enumerate(target_params.keys()): 755 if i + 2 == len(source_params.keys()): 756 # skip the last dense layer 757 break 758 target_params[layer_name].set_data(pretrained_weights[i]) 759 model.collect_params().reset_ctx(ctx) 760 return model 761