1# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2
3from typing import Any, Callable, Dict, Optional, Type
4
5import torch
6from pytorchvideo.data.clip_sampling import ClipSampler
7
8from .labeled_video_dataset import labeled_video_dataset, LabeledVideoDataset
9
10
11"""
12    Action recognition video dataset for Kinetics-{400,600,700}
13    <https://deepmind.com/research/open-source/open-source-datasets/kinetics/>
14"""
15
16
17def Kinetics(
18    data_path: str,
19    clip_sampler: ClipSampler,
20    video_sampler: Type[torch.utils.data.Sampler] = torch.utils.data.RandomSampler,
21    transform: Optional[Callable[[Dict[str, Any]], Dict[str, Any]]] = None,
22    video_path_prefix: str = "",
23    decode_audio: bool = True,
24    decoder: str = "pyav",
25) -> LabeledVideoDataset:
26    """
27    A helper function to create ``LabeledVideoDataset`` object for the Kinetics dataset.
28
29    Args:
30        data_path (str): Path to the data. The path type defines how the data
31            should be read:
32
33            * For a file path, the file is read and each line is parsed into a
34              video path and label.
35            * For a directory, the directory structure defines the classes
36              (i.e. each subdirectory is a class).
37
38        clip_sampler (ClipSampler): Defines how clips should be sampled from each
39                video. See the clip sampling documentation for more information.
40
41        video_sampler (Type[torch.utils.data.Sampler]): Sampler for the internal
42                video container. This defines the order videos are decoded and,
43                if necessary, the distributed split.
44
45        transform (Callable): This callable is evaluated on the clip output before
46                the clip is returned. It can be used for user defined preprocessing and
47                augmentations to the clips. See the ``LabeledVideoDataset`` class for clip
48                output format.
49
50        video_path_prefix (str): Path to root directory with the videos that are
51                loaded in ``LabeledVideoDataset``. All the video paths before loading
52                are prefixed with this path.
53
54        decode_audio (bool): If True, also decode audio from video.
55
56        decoder (str): Defines what type of decoder used to decode a video.
57
58    """
59
60    torch._C._log_api_usage_once("PYTORCHVIDEO.dataset.Kinetics")
61
62    return labeled_video_dataset(
63        data_path,
64        clip_sampler,
65        video_sampler,
66        transform,
67        video_path_prefix,
68        decode_audio,
69        decoder,
70    )
71