1# pylint: disable=line-too-long,too-many-lines,missing-docstring 2"""UCF101 video action classification dataset. 3Code adapted from https://github.com/open-mmlab/mmaction and 4https://github.com/bryanyzhu/two-stream-pytorch""" 5import os 6from ..video_custom import VideoClsCustom 7 8__all__ = ['UCF101'] 9 10class UCF101(VideoClsCustom): 11 """Load the UCF101 video action recognition dataset. 12 13 Refer to :doc:`../build/examples_datasets/ucf101` for the description of 14 this dataset and how to prepare it. 15 16 Parameters 17 ---------- 18 root : str, required. Default '~/.mxnet/datasets/ucf101/rawframes'. 19 Path to the root folder storing the dataset. 20 setting : str, required. 21 A text file describing the dataset, each line per video sample. 22 There are three items in each line: (1) video path; (2) video length and (3) video label. 23 train : bool, default True. 24 Whether to load the training or validation set. 25 test_mode : bool, default False. 26 Whether to perform evaluation on the test set. 27 Usually there is three-crop or ten-crop evaluation strategy involved. 28 name_pattern : str, default None. 29 The naming pattern of the decoded video frames. 30 For example, img_00012.jpg. 31 video_ext : str, default 'mp4'. 32 If video_loader is set to True, please specify the video format accordinly. 33 is_color : bool, default True. 34 Whether the loaded image is color or grayscale. 35 modality : str, default 'rgb'. 36 Input modalities, we support only rgb video frames for now. 37 Will add support for rgb difference image and optical flow image later. 38 num_segments : int, default 1. 39 Number of segments to evenly divide the video into clips. 40 A useful technique to obtain global video-level information. 41 Limin Wang, etal, Temporal Segment Networks: Towards Good Practices for Deep Action Recognition, ECCV 2016. 42 num_crop : int, default 1. 43 Number of crops for each image. default is 1. 44 Common choices are three crops and ten crops during evaluation. 45 new_length : int, default 1. 46 The length of input video clip. Default is a single image, but it can be multiple video frames. 47 For example, new_length=16 means we will extract a video clip of consecutive 16 frames. 48 new_step : int, default 1. 49 Temporal sampling rate. For example, new_step=1 means we will extract a video clip of consecutive frames. 50 new_step=2 means we will extract a video clip of every other frame. 51 new_width : int, default 340. 52 Scale the width of loaded image to 'new_width' for later multiscale cropping and resizing. 53 new_height : int, default 256. 54 Scale the height of loaded image to 'new_height' for later multiscale cropping and resizing. 55 target_width : int, default 224. 56 Scale the width of transformed image to the same 'target_width' for batch forwarding. 57 target_height : int, default 224. 58 Scale the height of transformed image to the same 'target_height' for batch forwarding. 59 temporal_jitter : bool, default False. 60 Whether to temporally jitter if new_step > 1. 61 video_loader : bool, default False. 62 Whether to use video loader to load data. 63 use_decord : bool, default True. 64 Whether to use Decord video loader to load data. Otherwise use mmcv video loader. 65 transform : function, default None. 66 A function that takes data and label and transforms them. 67 slowfast : bool, default False. 68 If set to True, use data loader designed for SlowFast network. 69 Christoph Feichtenhofer, etal, SlowFast Networks for Video Recognition, ICCV 2019. 70 slow_temporal_stride : int, default 16. 71 The temporal stride for sparse sampling of video frames in slow branch of a SlowFast network. 72 fast_temporal_stride : int, default 2. 73 The temporal stride for sparse sampling of video frames in fast branch of a SlowFast network. 74 data_aug : str, default 'v1'. 75 Different types of data augmentation auto. Supports v1, v2, v3 and v4. 76 lazy_init : bool, default False. 77 If set to True, build a dataset instance without loading any dataset. 78 """ 79 def __init__(self, 80 root=os.path.expanduser('~/.mxnet/datasets/ucf101/rawframes'), 81 setting=os.path.expanduser('~/.mxnet/datasets/ucf101/ucfTrainTestlist/ucf101_train_split_1_rawframes.txt'), 82 train=True, 83 test_mode=False, 84 name_pattern='img_%05d.jpg', 85 video_ext='mp4', 86 is_color=True, 87 modality='rgb', 88 num_segments=1, 89 num_crop=1, 90 new_length=1, 91 new_step=1, 92 new_width=340, 93 new_height=256, 94 target_width=224, 95 target_height=224, 96 temporal_jitter=False, 97 video_loader=False, 98 use_decord=False, 99 slowfast=False, 100 slow_temporal_stride=16, 101 fast_temporal_stride=2, 102 data_aug='v1', 103 lazy_init=False, 104 transform=None): 105 106 super(UCF101, self).__init__(root, setting, train, test_mode, name_pattern, 107 video_ext, is_color, modality, num_segments, 108 num_crop, new_length, new_step, new_width, new_height, 109 target_width, target_height, temporal_jitter, 110 video_loader, use_decord, slowfast, slow_temporal_stride, 111 fast_temporal_stride, data_aug, lazy_init, transform) 112 113class UCF101Attr(object): 114 def __init__(self): 115 self.num_class = 101 116 self.classes = ['ApplyEyeMakeup', 'ApplyLipstick', 'Archery', 'BabyCrawling', 'BalanceBeam', 117 'BandMarching', 'BaseballPitch', 'Basketball', 'BasketballDunk', 'BenchPress', 118 'Biking', 'Billiards', 'BlowDryHair', 'BlowingCandles', 'BodyWeightSquats', 119 'Bowling', 'BoxingPunchingBag', 'BoxingSpeedBag', 'BreastStroke', 'BrushingTeeth', 120 'CleanAndJerk', 'CliffDiving', 'CricketBowling', 'CricketShot', 'CuttingInKitchen', 121 'Diving', 'Drumming', 'Fencing', 'FieldHockeyPenalty', 'FloorGymnastics', 'FrisbeeCatch', 122 'FrontCrawl', 'GolfSwing', 'Haircut', 'HammerThrow', 'Hammering', 'HandstandPushups', 123 'HandstandWalking', 'HeadMassage', 'HighJump', 'HorseRace', 'HorseRiding', 'HulaHoop', 124 'IceDancing', 'JavelinThrow', 'JugglingBalls', 'JumpRope', 'JumpingJack', 'Kayaking', 125 'Knitting', 'LongJump', 'Lunges', 'MilitaryParade', 'Mixing', 'MoppingFloor', 'Nunchucks', 126 'ParallelBars', 'PizzaTossing', 'PlayingCello', 'PlayingDaf', 'PlayingDhol', 'PlayingFlute', 127 'PlayingGuitar', 'PlayingPiano', 'PlayingSitar', 'PlayingTabla', 'PlayingViolin', 128 'PoleVault', 'PommelHorse', 'PullUps', 'Punch', 'PushUps', 'Rafting', 'RockClimbingIndoor', 129 'RopeClimbing', 'Rowing', 'SalsaSpin', 'ShavingBeard', 'Shotput', 'SkateBoarding', 130 'Skiing', 'Skijet', 'SkyDiving', 'SoccerJuggling', 'SoccerPenalty', 'StillRings', 131 'SumoWrestling', 'Surfing', 'Swing', 'TableTennisShot', 'TaiChi', 'TennisSwing', 132 'ThrowDiscus', 'TrampolineJumping', 'Typing', 'UnevenBars', 'VolleyballSpiking', 133 'WalkingWithDog', 'WallPushups', 'WritingOnBoard', 'YoYo'] 134