1# pylint: disable=line-too-long,too-many-lines,missing-docstring,arguments-differ,unused-argument
2import mxnet as mx
3from mxnet import init
4from mxnet.gluon import nn
5from mxnet.gluon.nn import HybridBlock
6from ..resnetv1b import resnet18_v1b, resnet34_v1b, resnet50_v1b, resnet101_v1b, resnet152_v1b
7
8__all__ = ['resnet18_v1b_sthsthv2', 'resnet34_v1b_sthsthv2', 'resnet50_v1b_sthsthv2',
9           'resnet101_v1b_sthsthv2', 'resnet152_v1b_sthsthv2', 'resnet18_v1b_kinetics400',
10           'resnet34_v1b_kinetics400', 'resnet50_v1b_kinetics400', 'resnet101_v1b_kinetics400',
11           'resnet152_v1b_kinetics400', 'resnet50_v1b_ucf101', 'resnet50_v1b_hmdb51',
12           'resnet50_v1b_custom', 'resnet18_v1b_custom']
13
14class ActionRecResNetV1b(HybridBlock):
15    r"""ResNet models for video action recognition
16    Deep Residual Learning for Image Recognition, CVPR 2016
17    https://arxiv.org/abs/1512.03385
18
19    Parameters
20    ----------
21    depth : int, default is 50.
22        Depth of ResNet, from {18, 34, 50, 101, 152}.
23    nclass : int
24        Number of classes in the training dataset.
25    pretrained_base : bool or str, optional, default is True.
26        Load pretrained base network, the extra layers are randomized. Note that
27        if pretrained is `True`, this has no effect.
28    partial_bn : bool, default False.
29        Freeze all batch normalization layers during training except the first layer.
30    dropout_ratio : float, default is 0.5.
31        The dropout rate of a dropout layer.
32        The larger the value, the more strength to prevent overfitting.
33    init_std : float, default is 0.001.
34        Standard deviation value when initialize the dense layers.
35    num_segments : int, default is 1.
36        Number of segments used to evenly divide a video.
37    num_crop : int, default is 1.
38        Number of crops used during evaluation, choices are 1, 3 or 10.
39
40    Input: a single video frame or N images from N segments when num_segments > 1
41    Output: a single predicted action label
42    """
43    def __init__(self, depth, nclass, pretrained_base=True,
44                 dropout_ratio=0.5, init_std=0.01,
45                 num_segments=1, num_crop=1,
46                 partial_bn=False, **kwargs):
47        super(ActionRecResNetV1b, self).__init__()
48
49        if depth == 18:
50            pretrained_model = resnet18_v1b(pretrained=pretrained_base, **kwargs)
51            self.expansion = 1
52        elif depth == 34:
53            pretrained_model = resnet34_v1b(pretrained=pretrained_base, **kwargs)
54            self.expansion = 1
55        elif depth == 50:
56            pretrained_model = resnet50_v1b(pretrained=pretrained_base, **kwargs)
57            self.expansion = 4
58        elif depth == 101:
59            pretrained_model = resnet101_v1b(pretrained=pretrained_base, **kwargs)
60            self.expansion = 4
61        elif depth == 152:
62            pretrained_model = resnet152_v1b(pretrained=pretrained_base, **kwargs)
63            self.expansion = 4
64        else:
65            print('No such ResNet configuration for depth=%d' % (depth))
66
67        self.dropout_ratio = dropout_ratio
68        self.init_std = init_std
69        self.feat_dim = 512 * self.expansion
70        self.num_segments = num_segments
71        self.num_crop = num_crop
72
73        with self.name_scope():
74            self.conv1 = pretrained_model.conv1
75            self.bn1 = pretrained_model.bn1
76            self.relu = pretrained_model.relu
77            self.maxpool = pretrained_model.maxpool
78            self.layer1 = pretrained_model.layer1
79            self.layer2 = pretrained_model.layer2
80            self.layer3 = pretrained_model.layer3
81            self.layer4 = pretrained_model.layer4
82            self.avgpool = pretrained_model.avgpool
83            self.flat = pretrained_model.flat
84            self.drop = nn.Dropout(rate=self.dropout_ratio)
85            self.output = nn.Dense(units=nclass, in_units=self.feat_dim,
86                                   weight_initializer=init.Normal(sigma=self.init_std))
87            self.output.initialize()
88
89    def hybrid_forward(self, F, x):
90        x = self.conv1(x)
91        x = self.bn1(x)
92        x = self.relu(x)
93        x = self.maxpool(x)
94
95        x = self.layer1(x)
96        x = self.layer2(x)
97        x = self.layer3(x)
98        x = self.layer4(x)
99
100        x = self.avgpool(x)
101        x = self.flat(x)
102        x = self.drop(x)
103
104        # segmental consensus
105        x = F.reshape(x, shape=(-1, self.num_segments * self.num_crop, self.feat_dim))
106        x = F.mean(x, axis=1)
107
108        x = self.output(x)
109        return x
110
111def resnet18_v1b_sthsthv2(nclass=174, pretrained=False, pretrained_base=True,
112                          use_tsn=False, partial_bn=False,
113                          num_segments=1, num_crop=1, root='~/.mxnet/models',
114                          ctx=mx.cpu(), **kwargs):
115    r"""ResNet18 model trained on Something-Something-V2 dataset.
116
117    Parameters
118    ----------
119    nclass : int.
120        Number of categories in the dataset.
121    pretrained : bool or str.
122        Boolean value controls whether to load the default pretrained weights for model.
123        String value represents the hashtag for a certain version of pretrained weights.
124    pretrained_base : bool or str, optional, default is True.
125        Load pretrained base network, the extra layers are randomized. Note that
126        if pretrained is `True`, this has no effect.
127    ctx : Context, default CPU.
128        The context in which to load the pretrained weights.
129    root : str, default $MXNET_HOME/models
130        Location for keeping the model parameters.
131    num_segments : int, default is 1.
132        Number of segments used to evenly divide a video.
133    num_crop : int, default is 1.
134        Number of crops used during evaluation, choices are 1, 3 or 10.
135    partial_bn : bool, default False.
136        Freeze all batch normalization layers during training except the first layer.
137    """
138    model = ActionRecResNetV1b(depth=18,
139                               nclass=nclass,
140                               partial_bn=partial_bn,
141                               num_segments=num_segments,
142                               num_crop=num_crop,
143                               dropout_ratio=0.5,
144                               init_std=0.01)
145
146    if pretrained:
147        from ..model_store import get_model_file
148        model.load_parameters(get_model_file('resnet18_v1b_sthsthv2',
149                                             tag=pretrained, root=root))
150        from ...data import SomethingSomethingV2Attr
151        attrib = SomethingSomethingV2Attr()
152        model.classes = attrib.classes
153    model.collect_params().reset_ctx(ctx)
154    return model
155
156def resnet34_v1b_sthsthv2(nclass=174, pretrained=False, pretrained_base=True,
157                          use_tsn=False, partial_bn=False,
158                          num_segments=1, num_crop=1, root='~/.mxnet/models',
159                          ctx=mx.cpu(), **kwargs):
160    r"""ResNet34 model trained on Something-Something-V2 dataset.
161
162    Parameters
163    ----------
164    nclass : int.
165        Number of categories in the dataset.
166    pretrained : bool or str.
167        Boolean value controls whether to load the default pretrained weights for model.
168        String value represents the hashtag for a certain version of pretrained weights.
169    pretrained_base : bool or str, optional, default is True.
170        Load pretrained base network, the extra layers are randomized. Note that
171        if pretrained is `True`, this has no effect.
172    ctx : Context, default CPU.
173        The context in which to load the pretrained weights.
174    root : str, default $MXNET_HOME/models
175        Location for keeping the model parameters.
176    num_segments : int, default is 1.
177        Number of segments used to evenly divide a video.
178    num_crop : int, default is 1.
179        Number of crops used during evaluation, choices are 1, 3 or 10.
180    partial_bn : bool, default False.
181        Freeze all batch normalization layers during training except the first layer.
182    """
183    model = ActionRecResNetV1b(depth=34,
184                               nclass=nclass,
185                               partial_bn=partial_bn,
186                               num_segments=num_segments,
187                               num_crop=num_crop,
188                               dropout_ratio=0.5,
189                               init_std=0.01)
190
191    if pretrained:
192        from ..model_store import get_model_file
193        model.load_parameters(get_model_file('resnet34_v1b_sthsthv2',
194                                             tag=pretrained, root=root))
195        from ...data import SomethingSomethingV2Attr
196        attrib = SomethingSomethingV2Attr()
197        model.classes = attrib.classes
198    model.collect_params().reset_ctx(ctx)
199    return model
200
201def resnet50_v1b_sthsthv2(nclass=174, pretrained=False, pretrained_base=True,
202                          use_tsn=False, partial_bn=False,
203                          num_segments=1, num_crop=1, root='~/.mxnet/models',
204                          ctx=mx.cpu(), **kwargs):
205    r"""ResNet50 model trained on Something-Something-V2 dataset.
206
207    Parameters
208    ----------
209    nclass : int.
210        Number of categories in the dataset.
211    pretrained : bool or str.
212        Boolean value controls whether to load the default pretrained weights for model.
213        String value represents the hashtag for a certain version of pretrained weights.
214    pretrained_base : bool or str, optional, default is True.
215        Load pretrained base network, the extra layers are randomized. Note that
216        if pretrained is `True`, this has no effect.
217    ctx : Context, default CPU.
218        The context in which to load the pretrained weights.
219    root : str, default $MXNET_HOME/models
220        Location for keeping the model parameters.
221    num_segments : int, default is 1.
222        Number of segments used to evenly divide a video.
223    num_crop : int, default is 1.
224        Number of crops used during evaluation, choices are 1, 3 or 10.
225    partial_bn : bool, default False.
226        Freeze all batch normalization layers during training except the first layer.
227    """
228    model = ActionRecResNetV1b(depth=50,
229                               nclass=nclass,
230                               partial_bn=partial_bn,
231                               num_segments=num_segments,
232                               num_crop=num_crop,
233                               dropout_ratio=0.5,
234                               init_std=0.01)
235
236    if pretrained:
237        from ..model_store import get_model_file
238        model.load_parameters(get_model_file('resnet50_v1b_sthsthv2',
239                                             tag=pretrained, root=root))
240        from ...data import SomethingSomethingV2Attr
241        attrib = SomethingSomethingV2Attr()
242        model.classes = attrib.classes
243    model.collect_params().reset_ctx(ctx)
244    return model
245
246def resnet101_v1b_sthsthv2(nclass=174, pretrained=False, pretrained_base=True,
247                           use_tsn=False, partial_bn=False,
248                           num_segments=1, num_crop=1, root='~/.mxnet/models',
249                           ctx=mx.cpu(), **kwargs):
250    r"""ResNet101 model trained on Something-Something-V2 dataset.
251
252    Parameters
253    ----------
254    nclass : int.
255        Number of categories in the dataset.
256    pretrained : bool or str.
257        Boolean value controls whether to load the default pretrained weights for model.
258        String value represents the hashtag for a certain version of pretrained weights.
259    pretrained_base : bool or str, optional, default is True.
260        Load pretrained base network, the extra layers are randomized. Note that
261        if pretrained is `True`, this has no effect.
262    ctx : Context, default CPU.
263        The context in which to load the pretrained weights.
264    root : str, default $MXNET_HOME/models
265        Location for keeping the model parameters.
266    num_segments : int, default is 1.
267        Number of segments used to evenly divide a video.
268    num_crop : int, default is 1.
269        Number of crops used during evaluation, choices are 1, 3 or 10.
270    partial_bn : bool, default False.
271        Freeze all batch normalization layers during training except the first layer.
272    """
273    model = ActionRecResNetV1b(depth=101,
274                               nclass=nclass,
275                               partial_bn=partial_bn,
276                               num_segments=num_segments,
277                               num_crop=num_crop,
278                               dropout_ratio=0.5,
279                               init_std=0.01)
280
281    if pretrained:
282        from ..model_store import get_model_file
283        model.load_parameters(get_model_file('resnet101_v1b_sthsthv2',
284                                             tag=pretrained, root=root))
285        from ...data import SomethingSomethingV2Attr
286        attrib = SomethingSomethingV2Attr()
287        model.classes = attrib.classes
288    model.collect_params().reset_ctx(ctx)
289    return model
290
291def resnet152_v1b_sthsthv2(nclass=174, pretrained=False, pretrained_base=True,
292                           use_tsn=False, partial_bn=False,
293                           num_segments=1, num_crop=1, root='~/.mxnet/models',
294                           ctx=mx.cpu(), **kwargs):
295    r"""ResNet152 model trained on Something-Something-V2 dataset.
296
297    Parameters
298    ----------
299    nclass : int.
300        Number of categories in the dataset.
301    pretrained : bool or str.
302        Boolean value controls whether to load the default pretrained weights for model.
303        String value represents the hashtag for a certain version of pretrained weights.
304    pretrained_base : bool or str, optional, default is True.
305        Load pretrained base network, the extra layers are randomized. Note that
306        if pretrained is `True`, this has no effect.
307    ctx : Context, default CPU.
308        The context in which to load the pretrained weights.
309    root : str, default $MXNET_HOME/models
310        Location for keeping the model parameters.
311    num_segments : int, default is 1.
312        Number of segments used to evenly divide a video.
313    num_crop : int, default is 1.
314        Number of crops used during evaluation, choices are 1, 3 or 10.
315    partial_bn : bool, default False.
316        Freeze all batch normalization layers during training except the first layer.
317    """
318    model = ActionRecResNetV1b(depth=152,
319                               nclass=nclass,
320                               partial_bn=partial_bn,
321                               num_segments=num_segments,
322                               num_crop=num_crop,
323                               dropout_ratio=0.5,
324                               init_std=0.01)
325
326    if pretrained:
327        from ..model_store import get_model_file
328        model.load_parameters(get_model_file('resnet152_v1b_sthsthv2',
329                                             tag=pretrained, root=root))
330        from ...data import SomethingSomethingV2Attr
331        attrib = SomethingSomethingV2Attr()
332        model.classes = attrib.classes
333    model.collect_params().reset_ctx(ctx)
334    return model
335
336def resnet18_v1b_kinetics400(nclass=400, pretrained=False, pretrained_base=True,
337                             use_tsn=False, partial_bn=False,
338                             num_segments=1, num_crop=1, root='~/.mxnet/models',
339                             ctx=mx.cpu(), **kwargs):
340    r"""ResNet18 model trained on Kinetics400 dataset.
341
342    Parameters
343    ----------
344    nclass : int.
345        Number of categories in the dataset.
346    pretrained : bool or str.
347        Boolean value controls whether to load the default pretrained weights for model.
348        String value represents the hashtag for a certain version of pretrained weights.
349    pretrained_base : bool or str, optional, default is True.
350        Load pretrained base network, the extra layers are randomized. Note that
351        if pretrained is `True`, this has no effect.
352    ctx : Context, default CPU.
353        The context in which to load the pretrained weights.
354    root : str, default $MXNET_HOME/models
355        Location for keeping the model parameters.
356    num_segments : int, default is 1.
357        Number of segments used to evenly divide a video.
358    num_crop : int, default is 1.
359        Number of crops used during evaluation, choices are 1, 3 or 10.
360    partial_bn : bool, default False.
361        Freeze all batch normalization layers during training except the first layer.
362    """
363    model = ActionRecResNetV1b(depth=18,
364                               nclass=nclass,
365                               partial_bn=partial_bn,
366                               num_segments=num_segments,
367                               num_crop=num_crop,
368                               dropout_ratio=0.5,
369                               init_std=0.01)
370
371    if pretrained:
372        from ..model_store import get_model_file
373        model.load_parameters(get_model_file('resnet18_v1b_kinetics400',
374                                             tag=pretrained, root=root))
375        from ...data import Kinetics400Attr
376        attrib = Kinetics400Attr()
377        model.classes = attrib.classes
378    model.collect_params().reset_ctx(ctx)
379    return model
380
381def resnet34_v1b_kinetics400(nclass=400, pretrained=False, pretrained_base=True,
382                             use_tsn=False, partial_bn=False,
383                             num_segments=1, num_crop=1, root='~/.mxnet/models',
384                             ctx=mx.cpu(), **kwargs):
385    r"""ResNet34 model trained on Kinetics400 dataset.
386
387    Parameters
388    ----------
389    nclass : int.
390        Number of categories in the dataset.
391    pretrained : bool or str.
392        Boolean value controls whether to load the default pretrained weights for model.
393        String value represents the hashtag for a certain version of pretrained weights.
394    pretrained_base : bool or str, optional, default is True.
395        Load pretrained base network, the extra layers are randomized. Note that
396        if pretrained is `True`, this has no effect.
397    ctx : Context, default CPU.
398        The context in which to load the pretrained weights.
399    root : str, default $MXNET_HOME/models
400        Location for keeping the model parameters.
401    num_segments : int, default is 1.
402        Number of segments used to evenly divide a video.
403    num_crop : int, default is 1.
404        Number of crops used during evaluation, choices are 1, 3 or 10.
405    partial_bn : bool, default False.
406        Freeze all batch normalization layers during training except the first layer.
407    """
408    model = ActionRecResNetV1b(depth=34,
409                               nclass=nclass,
410                               partial_bn=partial_bn,
411                               num_segments=num_segments,
412                               num_crop=num_crop,
413                               dropout_ratio=0.5,
414                               init_std=0.01)
415
416    if pretrained:
417        from ..model_store import get_model_file
418        model.load_parameters(get_model_file('resnet34_v1b_kinetics400',
419                                             tag=pretrained, root=root))
420        from ...data import Kinetics400Attr
421        attrib = Kinetics400Attr()
422        model.classes = attrib.classes
423    model.collect_params().reset_ctx(ctx)
424    return model
425
426def resnet50_v1b_kinetics400(nclass=400, pretrained=False, pretrained_base=True,
427                             use_tsn=False, partial_bn=False,
428                             num_segments=1, num_crop=1, root='~/.mxnet/models',
429                             ctx=mx.cpu(), **kwargs):
430    r"""ResNet50 model trained on Kinetics400 dataset.
431
432    Parameters
433    ----------
434    nclass : int.
435        Number of categories in the dataset.
436    pretrained : bool or str.
437        Boolean value controls whether to load the default pretrained weights for model.
438        String value represents the hashtag for a certain version of pretrained weights.
439    pretrained_base : bool or str, optional, default is True.
440        Load pretrained base network, the extra layers are randomized. Note that
441        if pretrained is `True`, this has no effect.
442    ctx : Context, default CPU.
443        The context in which to load the pretrained weights.
444    root : str, default $MXNET_HOME/models
445        Location for keeping the model parameters.
446    num_segments : int, default is 1.
447        Number of segments used to evenly divide a video.
448    num_crop : int, default is 1.
449        Number of crops used during evaluation, choices are 1, 3 or 10.
450    partial_bn : bool, default False.
451        Freeze all batch normalization layers during training except the first layer.
452    """
453    model = ActionRecResNetV1b(depth=50,
454                               nclass=nclass,
455                               partial_bn=partial_bn,
456                               num_segments=num_segments,
457                               num_crop=num_crop,
458                               dropout_ratio=0.5,
459                               init_std=0.01)
460
461    if pretrained:
462        from ..model_store import get_model_file
463        model.load_parameters(get_model_file('resnet50_v1b_kinetics400',
464                                             tag=pretrained, root=root))
465        from ...data import Kinetics400Attr
466        attrib = Kinetics400Attr()
467        model.classes = attrib.classes
468    model.collect_params().reset_ctx(ctx)
469    return model
470
471def resnet101_v1b_kinetics400(nclass=400, pretrained=False, pretrained_base=True,
472                              use_tsn=False, partial_bn=False,
473                              num_segments=1, num_crop=1, root='~/.mxnet/models',
474                              ctx=mx.cpu(), **kwargs):
475    r"""ResNet101 model trained on Kinetics400 dataset.
476
477    Parameters
478    ----------
479    nclass : int.
480        Number of categories in the dataset.
481    pretrained : bool or str.
482        Boolean value controls whether to load the default pretrained weights for model.
483        String value represents the hashtag for a certain version of pretrained weights.
484    pretrained_base : bool or str, optional, default is True.
485        Load pretrained base network, the extra layers are randomized. Note that
486        if pretrained is `True`, this has no effect.
487    ctx : Context, default CPU.
488        The context in which to load the pretrained weights.
489    root : str, default $MXNET_HOME/models
490        Location for keeping the model parameters.
491    num_segments : int, default is 1.
492        Number of segments used to evenly divide a video.
493    num_crop : int, default is 1.
494        Number of crops used during evaluation, choices are 1, 3 or 10.
495    partial_bn : bool, default False.
496        Freeze all batch normalization layers during training except the first layer.
497    """
498    model = ActionRecResNetV1b(depth=101,
499                               nclass=nclass,
500                               partial_bn=partial_bn,
501                               num_segments=num_segments,
502                               num_crop=num_crop,
503                               dropout_ratio=0.5,
504                               init_std=0.01)
505
506    if pretrained:
507        from ..model_store import get_model_file
508        model.load_parameters(get_model_file('resnet101_v1b_kinetics400',
509                                             tag=pretrained, root=root))
510        from ...data import Kinetics400Attr
511        attrib = Kinetics400Attr()
512        model.classes = attrib.classes
513    model.collect_params().reset_ctx(ctx)
514    return model
515
516def resnet152_v1b_kinetics400(nclass=400, pretrained=False, pretrained_base=True,
517                              use_tsn=False, partial_bn=False,
518                              num_segments=1, num_crop=1, root='~/.mxnet/models',
519                              ctx=mx.cpu(), **kwargs):
520    r"""ResNet152 model trained on Kinetics400 dataset.
521
522    Parameters
523    ----------
524    nclass : int.
525        Number of categories in the dataset.
526    pretrained : bool or str.
527        Boolean value controls whether to load the default pretrained weights for model.
528        String value represents the hashtag for a certain version of pretrained weights.
529    pretrained_base : bool or str, optional, default is True.
530        Load pretrained base network, the extra layers are randomized. Note that
531        if pretrained is `True`, this has no effect.
532    ctx : Context, default CPU.
533        The context in which to load the pretrained weights.
534    root : str, default $MXNET_HOME/models
535        Location for keeping the model parameters.
536    num_segments : int, default is 1.
537        Number of segments used to evenly divide a video.
538    num_crop : int, default is 1.
539        Number of crops used during evaluation, choices are 1, 3 or 10.
540    partial_bn : bool, default False.
541        Freeze all batch normalization layers during training except the first layer.
542    """
543    model = ActionRecResNetV1b(depth=152,
544                               nclass=nclass,
545                               partial_bn=partial_bn,
546                               num_segments=num_segments,
547                               num_crop=num_crop,
548                               dropout_ratio=0.5,
549                               init_std=0.01)
550
551    if pretrained:
552        from ..model_store import get_model_file
553        model.load_parameters(get_model_file('resnet152_v1b_kinetics400',
554                                             tag=pretrained, root=root))
555        from ...data import Kinetics400Attr
556        attrib = Kinetics400Attr()
557        model.classes = attrib.classes
558    model.collect_params().reset_ctx(ctx)
559    return model
560
561def resnet50_v1b_ucf101(nclass=101, pretrained=False, pretrained_base=True,
562                        use_tsn=False, partial_bn=False,
563                        num_segments=1, num_crop=1, root='~/.mxnet/models',
564                        ctx=mx.cpu(), **kwargs):
565    r"""ResNet50 model trained on UCF101 dataset.
566
567    Parameters
568    ----------
569    nclass : int.
570        Number of categories in the dataset.
571    pretrained : bool or str.
572        Boolean value controls whether to load the default pretrained weights for model.
573        String value represents the hashtag for a certain version of pretrained weights.
574    pretrained_base : bool or str, optional, default is True.
575        Load pretrained base network, the extra layers are randomized. Note that
576        if pretrained is `True`, this has no effect.
577    ctx : Context, default CPU.
578        The context in which to load the pretrained weights.
579    root : str, default $MXNET_HOME/models
580        Location for keeping the model parameters.
581    num_segments : int, default is 1.
582        Number of segments used to evenly divide a video.
583    num_crop : int, default is 1.
584        Number of crops used during evaluation, choices are 1, 3 or 10.
585    partial_bn : bool, default False.
586        Freeze all batch normalization layers during training except the first layer.
587    """
588    model = ActionRecResNetV1b(depth=50,
589                               nclass=nclass,
590                               partial_bn=partial_bn,
591                               num_segments=num_segments,
592                               num_crop=num_crop,
593                               dropout_ratio=0.9,
594                               init_std=0.001)
595
596    if pretrained:
597        from ..model_store import get_model_file
598        model.load_parameters(get_model_file('resnet50_v1b_ucf101',
599                                             tag=pretrained, root=root))
600        from ...data import UCF101Attr
601        attrib = UCF101Attr()
602        model.classes = attrib.classes
603    model.collect_params().reset_ctx(ctx)
604    return model
605
606def resnet50_v1b_hmdb51(nclass=51, pretrained=False, pretrained_base=True,
607                        use_tsn=False, partial_bn=False,
608                        num_segments=1, num_crop=1, root='~/.mxnet/models',
609                        ctx=mx.cpu(), **kwargs):
610    r"""ResNet50 model trained on HMDB51 dataset.
611
612    Parameters
613    ----------
614    nclass : int.
615        Number of categories in the dataset.
616    pretrained : bool or str.
617        Boolean value controls whether to load the default pretrained weights for model.
618        String value represents the hashtag for a certain version of pretrained weights.
619    pretrained_base : bool or str, optional, default is True.
620        Load pretrained base network, the extra layers are randomized. Note that
621        if pretrained is `True`, this has no effect.
622    ctx : Context, default CPU.
623        The context in which to load the pretrained weights.
624    root : str, default $MXNET_HOME/models
625        Location for keeping the model parameters.
626    num_segments : int, default is 1.
627        Number of segments used to evenly divide a video.
628    num_crop : int, default is 1.
629        Number of crops used during evaluation, choices are 1, 3 or 10.
630    partial_bn : bool, default False.
631        Freeze all batch normalization layers during training except the first layer.
632    """
633    model = ActionRecResNetV1b(depth=50,
634                               nclass=nclass,
635                               partial_bn=partial_bn,
636                               num_segments=num_segments,
637                               num_crop=num_crop,
638                               dropout_ratio=0.9,
639                               init_std=0.001)
640
641    if pretrained:
642        from ..model_store import get_model_file
643        model.load_parameters(get_model_file('resnet50_v1b_hmdb51',
644                                             tag=pretrained, root=root))
645        from ...data import HMDB51Attr
646        attrib = HMDB51Attr()
647        model.classes = attrib.classes
648    model.collect_params().reset_ctx(ctx)
649    return model
650
651def resnet50_v1b_custom(nclass=400, pretrained=False, pretrained_base=True,
652                        use_tsn=False, partial_bn=False,
653                        num_segments=1, num_crop=1, root='~/.mxnet/models',
654                        ctx=mx.cpu(), use_kinetics_pretrain=True, **kwargs):
655    r"""ResNet50 model customized for any dataset.
656
657    Parameters
658    ----------
659    nclass : int.
660        Number of categories in the dataset.
661    pretrained : bool or str.
662        Boolean value controls whether to load the default pretrained weights for model.
663        String value represents the hashtag for a certain version of pretrained weights.
664    pretrained_base : bool or str, optional, default is True.
665        Load pretrained base network, the extra layers are randomized. Note that
666        if pretrained is `True`, this has no effect.
667    ctx : Context, default CPU.
668        The context in which to load the pretrained weights.
669    root : str, default $MXNET_HOME/models
670        Location for keeping the model parameters.
671    num_segments : int, default is 1.
672        Number of segments used to evenly divide a video.
673    num_crop : int, default is 1.
674        Number of crops used during evaluation, choices are 1, 3 or 10.
675    partial_bn : bool, default False.
676        Freeze all batch normalization layers during training except the first layer.
677    use_kinetics_pretrain : bool, default True.
678        Whether to load pretrained weights on Kinetics400 dataset as model initialization.
679    """
680    model = ActionRecResNetV1b(depth=50,
681                               nclass=nclass,
682                               partial_bn=partial_bn,
683                               num_segments=num_segments,
684                               num_crop=num_crop,
685                               dropout_ratio=0.5,
686                               init_std=0.01)
687
688    if use_kinetics_pretrain and not pretrained:
689        from gluoncv.model_zoo import get_model
690        kinetics_model = get_model('resnet50_v1b_kinetics400', nclass=400, pretrained=True)
691        source_params = kinetics_model.collect_params()
692        target_params = model.collect_params()
693        assert len(source_params.keys()) == len(target_params.keys())
694
695        pretrained_weights = []
696        for layer_name in source_params.keys():
697            pretrained_weights.append(source_params[layer_name].data())
698
699        for i, layer_name in enumerate(target_params.keys()):
700            if i + 2 == len(source_params.keys()):
701                # skip the last dense layer
702                break
703            target_params[layer_name].set_data(pretrained_weights[i])
704    model.collect_params().reset_ctx(ctx)
705    return model
706
707
708def resnet18_v1b_custom(nclass=400, pretrained=False, pretrained_base=True,
709                        use_tsn=False, partial_bn=False, use_kinetics_pretrain=True,
710                        num_segments=1, num_crop=1, root='~/.mxnet/models',
711                        ctx=mx.cpu(), **kwargs):
712    r"""ResNet18 model trained on Kinetics400 dataset.
713
714    Parameters
715    ----------
716    nclass : int.
717        Number of categories in the dataset.
718    pretrained : bool or str.
719        Boolean value controls whether to load the default pretrained weights for model.
720        String value represents the hashtag for a certain version of pretrained weights.
721    pretrained_base : bool or str, optional, default is True.
722        Load pretrained base network, the extra layers are randomized. Note that
723        if pretrained is `True`, this has no effect.
724    ctx : Context, default CPU.
725        The context in which to load the pretrained weights.
726    root : str, default $MXNET_HOME/models
727        Location for keeping the model parameters.
728    num_segments : int, default is 1.
729        Number of segments used to evenly divide a video.
730    num_crop : int, default is 1.
731        Number of crops used during evaluation, choices are 1, 3 or 10.
732    partial_bn : bool, default False.
733        Freeze all batch normalization layers during training except the first layer.
734    """
735    model = ActionRecResNetV1b(depth=18,
736                               nclass=nclass,
737                               partial_bn=partial_bn,
738                               num_segments=num_segments,
739                               num_crop=num_crop,
740                               dropout_ratio=0.5,
741                               init_std=0.01)
742
743    if use_kinetics_pretrain and not pretrained:
744        from gluoncv.model_zoo import get_model
745        kinetics_model = get_model('resnet18_v1b_kinetics400', nclass=400, pretrained=True)
746        source_params = kinetics_model.collect_params()
747        target_params = model.collect_params()
748        assert len(source_params.keys()) == len(target_params.keys())
749
750        pretrained_weights = []
751        for layer_name in source_params.keys():
752            pretrained_weights.append(source_params[layer_name].data())
753
754        for i, layer_name in enumerate(target_params.keys()):
755            if i + 2 == len(source_params.keys()):
756                # skip the last dense layer
757                break
758            target_params[layer_name].set_data(pretrained_weights[i])
759    model.collect_params().reset_ctx(ctx)
760    return model
761