1# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 3from typing import Any, Callable, Dict, Optional, Type 4 5import torch 6from pytorchvideo.data.clip_sampling import ClipSampler 7 8from .labeled_video_dataset import labeled_video_dataset, LabeledVideoDataset 9 10 11""" 12 Action recognition video dataset for Kinetics-{400,600,700} 13 <https://deepmind.com/research/open-source/open-source-datasets/kinetics/> 14""" 15 16 17def Kinetics( 18 data_path: str, 19 clip_sampler: ClipSampler, 20 video_sampler: Type[torch.utils.data.Sampler] = torch.utils.data.RandomSampler, 21 transform: Optional[Callable[[Dict[str, Any]], Dict[str, Any]]] = None, 22 video_path_prefix: str = "", 23 decode_audio: bool = True, 24 decoder: str = "pyav", 25) -> LabeledVideoDataset: 26 """ 27 A helper function to create ``LabeledVideoDataset`` object for the Kinetics dataset. 28 29 Args: 30 data_path (str): Path to the data. The path type defines how the data 31 should be read: 32 33 * For a file path, the file is read and each line is parsed into a 34 video path and label. 35 * For a directory, the directory structure defines the classes 36 (i.e. each subdirectory is a class). 37 38 clip_sampler (ClipSampler): Defines how clips should be sampled from each 39 video. See the clip sampling documentation for more information. 40 41 video_sampler (Type[torch.utils.data.Sampler]): Sampler for the internal 42 video container. This defines the order videos are decoded and, 43 if necessary, the distributed split. 44 45 transform (Callable): This callable is evaluated on the clip output before 46 the clip is returned. It can be used for user defined preprocessing and 47 augmentations to the clips. See the ``LabeledVideoDataset`` class for clip 48 output format. 49 50 video_path_prefix (str): Path to root directory with the videos that are 51 loaded in ``LabeledVideoDataset``. All the video paths before loading 52 are prefixed with this path. 53 54 decode_audio (bool): If True, also decode audio from video. 55 56 decoder (str): Defines what type of decoder used to decode a video. 57 58 """ 59 60 torch._C._log_api_usage_once("PYTORCHVIDEO.dataset.Kinetics") 61 62 return labeled_video_dataset( 63 data_path, 64 clip_sampler, 65 video_sampler, 66 transform, 67 video_path_prefix, 68 decode_audio, 69 decoder, 70 ) 71