1"""This script is largely borrowed from https://github.com/open-mmlab/mmaction.
2"""
3
4import argparse
5import sys
6import os
7import os.path as osp
8import glob
9import fnmatch
10import random
11import zipfile
12from pipes import quote
13from multiprocessing import Pool, current_process
14import csv
15
16def dump_frames(vid_item):
17
18    from gluoncv.utils.filesystem import try_import_mmcv
19    mmcv = try_import_mmcv()
20
21    full_path, vid_path, vid_id = vid_item
22    vid_name = vid_path.split('.')[0]
23    out_full_path = osp.join(args.out_dir, vid_name)
24    try:
25        os.mkdir(out_full_path)
26    except OSError:
27        pass
28    vr = mmcv.VideoReader(full_path)
29    for i in range(len(vr)):
30        if vr[i] is not None:
31            if args.new_width > 0 and args.new_height > 0:
32                cur_img = mmcv.imresize(vr[i], (args.new_width, args.new_height))
33            else:
34                cur_img = vr[i]
35            mmcv.imwrite(
36                cur_img, '{}/img_{:05d}.jpg'.format(out_full_path, i + 1))
37        else:
38            print('[Warning] length inconsistent!'
39                  'Early stop with {} out of {} frames'.format(i + 1, len(vr)))
40            break
41    print('{} done with {} frames'.format(vid_name, len(vr)))
42    sys.stdout.flush()
43    return True
44
45
46def run_optical_flow(vid_item, dev_id=0):
47    full_path, vid_path, vid_id = vid_item
48    vid_name = vid_path.split('.')[0]
49    out_full_path = osp.join(args.out_dir, vid_name)
50    try:
51        os.mkdir(out_full_path)
52    except OSError:
53        pass
54
55    current = current_process()
56    dev_id = (int(current._identity[0]) - 1) % args.num_gpu
57    image_path = '{}/img'.format(out_full_path)
58    flow_x_path = '{}/flow_x'.format(out_full_path)
59    flow_y_path = '{}/flow_y'.format(out_full_path)
60
61    cmd = osp.join(args.df_path, 'build/extract_gpu') + \
62        ' -f={} -x={} -y={} -i={} -b=20 -t=1 -d={} -s=1 -o={} -w={} -h={}' \
63        .format(
64        quote(full_path),
65        quote(flow_x_path), quote(flow_y_path), quote(image_path),
66        dev_id, args.out_format, args.new_width, args.new_height)
67
68    os.system(cmd)
69    print('{} {} done'.format(vid_id, vid_name))
70    sys.stdout.flush()
71    return True
72
73
74def run_warp_optical_flow(vid_item, dev_id=0):
75    full_path, vid_path, vid_id = vid_item
76    vid_name = vid_path.split('.')[0]
77    out_full_path = osp.join(args.out_dir, vid_name)
78    try:
79        os.mkdir(out_full_path)
80    except OSError:
81        pass
82
83    current = current_process()
84    dev_id = (int(current._identity[0]) - 1) % args.num_gpu
85    flow_x_path = '{}/flow_x'.format(out_full_path)
86    flow_y_path = '{}/flow_y'.format(out_full_path)
87
88    cmd = osp.join(args.df_path + 'build/extract_warp_gpu') + \
89        ' -f={} -x={} -y={} -b=20 -t=1 -d={} -s=1 -o={}'.format(
90            quote(full_path), quote(flow_x_path), quote(flow_y_path),
91            dev_id, args.out_format)
92
93    os.system(cmd)
94    print('warp on {} {} done'.format(vid_id, vid_name))
95    sys.stdout.flush()
96    return True
97
98
99def parse_args():
100    parser = argparse.ArgumentParser(description='prepare Kinetics400 dataset')
101    parser.add_argument('--download_dir', type=str, default='~/.mxnet/datasets/kinetics400')
102    parser.add_argument('--src_dir', type=str, default='~/.mxnet/datasets/kinetics400/train')
103    parser.add_argument('--out_dir', type=str, default='~/.mxnet/datasets/kinetics400/rawframes_train')
104    parser.add_argument('--frame_path', type=str, default='~/.mxnet/datasets/kinetics400/rawframes_train')
105    parser.add_argument('--anno_dir', type=str, default='~/.mxnet/datasets/kinetics400/annotations')
106    parser.add_argument('--out_list_path', type=str, default='~/.mxnet/datasets/kinetics400')
107    parser.add_argument('--level', type=int, choices=[1, 2], default=2)
108    parser.add_argument('--num_worker', type=int, default=8)
109    parser.add_argument('--flow_type', type=str, default=None, choices=[None, 'tvl1', 'warp_tvl1'])
110    parser.add_argument('--df_path', type=str, default='./dense_flow', help='need dense flow implementation')
111    parser.add_argument("--out_format", type=str, default='dir', choices=['dir', 'zip'], help='output format')
112    parser.add_argument("--ext", type=str, default='mp4', choices=['avi', 'mp4'], help='video file extensions')
113    parser.add_argument("--new_width", type=int, default=0, help='resize image width')
114    parser.add_argument("--new_height", type=int, default=0, help='resize image height')
115    parser.add_argument("--num_gpu", type=int, default=8, help='number of GPU')
116    parser.add_argument("--resume", action='store_true', default=False, help='resume optical flow extraction instead of overwriting')
117    parser.add_argument('--dataset', type=str, choices=['ucf101', 'kinetics400'], default='kinetics400')
118    parser.add_argument('--rgb_prefix', type=str, default='img_')
119    parser.add_argument('--flow_x_prefix', type=str, default='flow_x_')
120    parser.add_argument('--flow_y_prefix', type=str, default='flow_y_')
121    parser.add_argument('--num_split', type=int, default=1)
122    parser.add_argument('--subset', type=str, default='train', choices=['train', 'val', 'test'])
123    parser.add_argument('--format', type=str, default='rawframes', choices=['rawframes', 'videos'])
124    parser.add_argument('--shuffle', action='store_true', default=False)
125    parser.add_argument('--tiny_dataset', action='store_true', default=False)
126    parser.add_argument('--download', action='store_true', default=False)
127    parser.add_argument('--decode_video', action='store_true', default=False)
128    parser.add_argument('--build_file_list', action='store_true', default=False)
129    args = parser.parse_args()
130
131    args.download_dir = os.path.expanduser(args.download_dir)
132    args.src_dir = os.path.expanduser(args.src_dir)
133    args.out_dir = os.path.expanduser(args.out_dir)
134    args.frame_path = os.path.expanduser(args.frame_path)
135    args.anno_dir = os.path.expanduser(args.anno_dir)
136    args.out_list_path = os.path.expanduser(args.out_list_path)
137
138    return args
139
140def decode_video(args):
141
142    if not osp.isdir(args.out_dir):
143        print('Creating folder: {}'.format(args.out_dir))
144        os.makedirs(args.out_dir)
145    if args.level == 2:
146        classes = os.listdir(args.src_dir)
147        for classname in classes:
148            new_dir = osp.join(args.out_dir, classname)
149            if not osp.isdir(new_dir):
150                print('Creating folder: {}'.format(new_dir))
151                os.makedirs(new_dir)
152
153    print('Reading videos from folder: ', args.src_dir)
154    print('Extension of videos: ', args.ext)
155    if args.level == 2:
156        fullpath_list = glob.glob(args.src_dir + '/*/*.' + args.ext)
157        done_fullpath_list = glob.glob(args.out_dir + '/*/*')
158    elif args.level == 1:
159        fullpath_list = glob.glob(args.src_dir + '/*.' + args.ext)
160        done_fullpath_list = glob.glob(args.out_dir + '/*')
161    print('Total number of videos found: ', len(fullpath_list))
162    if args.resume:
163        fullpath_list = set(fullpath_list).difference(set(done_fullpath_list))
164        fullpath_list = list(fullpath_list)
165        print('Resuming. number of videos to be done: ', len(fullpath_list))
166
167    if args.level == 2:
168        vid_list = list(map(lambda p: osp.join(
169            '/'.join(p.split('/')[-2:])), fullpath_list))
170    elif args.level == 1:
171        vid_list = list(map(lambda p: p.split('/')[-1], fullpath_list))
172
173    pool = Pool(args.num_worker)
174    if args.flow_type == 'tvl1':
175        pool.map(run_optical_flow, zip(
176            fullpath_list, vid_list, range(len(vid_list))))
177    elif args.flow_type == 'warp_tvl1':
178        pool.map(run_warp_optical_flow, zip(
179            fullpath_list, vid_list, range(len(vid_list))))
180    else:
181        pool.map(dump_frames, zip(
182            fullpath_list, vid_list, range(len(vid_list))))
183
184def parse_ucf101_splits(args):
185    level = args.level
186    class_ind = [x.strip().split()
187                 for x in open(os.path.join(args.anno_dir, 'classInd.txt'))]
188    class_mapping = {x[1]: int(x[0]) - 1 for x in class_ind}
189
190    def line2rec(line):
191        items = line.strip().split(' ')
192        vid = items[0].split('.')[0]
193        vid = '/'.join(vid.split('/')[-level:])
194        label = class_mapping[items[0].split('/')[0]]
195        return vid, label
196
197    splits = []
198    for i in range(1, 4):
199        train_list = [line2rec(x) for x in open(
200            os.path.join(args.anno_dir, 'trainlist{:02d}.txt'.format(i)))]
201        test_list = [line2rec(x) for x in open(
202            os.path.join(args.anno_dir, 'testlist{:02d}.txt'.format(i)))]
203        splits.append((train_list, test_list))
204    return splits
205
206def parse_kinetics_splits(args):
207    level = args.level
208    csv_reader = csv.reader(
209        open(os.path.join(args.anno_dir, 'kinetics_train.csv')))
210    # skip the first line
211    next(csv_reader)
212
213    def convert_label(s):
214        return s.replace('"', '').replace(' ', '_')
215
216    labels_sorted = sorted(
217        set([convert_label(row[0]) for row in csv_reader]))
218    class_mapping = {label: i for i, label in enumerate(labels_sorted)}
219
220    def list2rec(x, test=False):
221        if test:
222            vid = '{}_{:06d}_{:06d}'.format(x[0], int(x[1]), int(x[2]))
223            label = -1  # label unknown
224            return vid, label
225        else:
226            vid = '{}_{:06d}_{:06d}'.format(x[1], int(x[2]), int(x[3]))
227            if level == 2:
228                vid = '{}/{}'.format(convert_label(x[0]), vid)
229            else:
230                assert level == 1
231            label = class_mapping[convert_label(x[0])]
232            return vid, label
233
234    csv_reader = csv.reader(
235        open(os.path.join(args.anno_dir, 'kinetics_train.csv')))
236    next(csv_reader)
237    train_list = [list2rec(x) for x in csv_reader]
238    csv_reader = csv.reader(
239        open(os.path.join(args.anno_dir, 'kinetics_val.csv')))
240    next(csv_reader)
241    val_list = [list2rec(x) for x in csv_reader]
242    csv_reader = csv.reader(
243        open(os.path.join(args.anno_dir, 'kinetics_test.csv')))
244    next(csv_reader)
245    test_list = [list2rec(x, test=True) for x in csv_reader]
246
247    return ((train_list, val_list, test_list), )
248
249def parse_directory(path, key_func=lambda x: x[-11:],
250                    rgb_prefix='img_',
251                    flow_x_prefix='flow_x_',
252                    flow_y_prefix='flow_y_',
253                    level=1):
254    """
255    Parse directories holding extracted frames from standard benchmarks
256    """
257    print('parse frames under folder {}'.format(path))
258    if level == 1:
259        frame_folders = glob.glob(os.path.join(path, '*'))
260    elif level == 2:
261        frame_folders = glob.glob(os.path.join(path, '*', '*'))
262    else:
263        raise ValueError('level can be only 1 or 2')
264
265    def count_files(directory, prefix_list):
266        lst = os.listdir(directory)
267        cnt_list = [len(fnmatch.filter(lst, x+'*')) for x in prefix_list]
268        return cnt_list
269
270    # check RGB
271    frame_dict = {}
272    for i, f in enumerate(frame_folders):
273        all_cnt = count_files(f, (rgb_prefix, flow_x_prefix, flow_y_prefix))
274        k = key_func(f)
275
276        x_cnt = all_cnt[1]
277        y_cnt = all_cnt[2]
278        if x_cnt != y_cnt:
279            raise ValueError(
280                'x and y direction have different number '
281                'of flow images. video: ' + f)
282        if i % 200 == 0:
283            print('{} videos parsed'.format(i))
284
285        frame_dict[k] = (f, all_cnt[0], x_cnt)
286
287    print('frame folder analysis done')
288    return frame_dict
289
290def build_split_list(split, frame_info, shuffle=False):
291
292    def build_set_list(set_list):
293        rgb_list, flow_list = list(), list()
294        for item in set_list:
295            if item[0] not in frame_info:
296                # print("item:", item)
297                continue
298            elif frame_info[item[0]][1] > 0:
299                rgb_cnt = frame_info[item[0]][1]
300                flow_cnt = frame_info[item[0]][2]
301                rgb_list.append('{} {} {}\n'.format(
302                    item[0], rgb_cnt, item[1]))
303                flow_list.append('{} {} {}\n'.format(
304                    item[0], flow_cnt, item[1]))
305            else:
306                rgb_list.append('{} {}\n'.format(
307                    item[0], item[1]))
308                flow_list.append('{} {}\n'.format(
309                    item[0], item[1]))
310        if shuffle:
311            random.shuffle(rgb_list)
312            random.shuffle(flow_list)
313        return rgb_list, flow_list
314
315    train_rgb_list, train_flow_list = build_set_list(split[0])
316    test_rgb_list, test_flow_list = build_set_list(split[1])
317    return (train_rgb_list, test_rgb_list), (train_flow_list, test_flow_list)
318
319def build_file_list(args):
320
321    if args.level == 2:
322        def key_func(x): return '/'.join(x.split('/')[-2:])
323    else:
324        def key_func(x): return x.split('/')[-1]
325
326    if args.format == 'rawframes':
327        frame_info = parse_directory(args.frame_path,
328                                     key_func=key_func,
329                                     rgb_prefix=args.rgb_prefix,
330                                     flow_x_prefix=args.flow_x_prefix,
331                                     flow_y_prefix=args.flow_y_prefix,
332                                     level=args.level)
333    else:
334        if args.level == 1:
335            video_list = glob.glob(osp.join(args.frame_path, '*'))
336        elif args.level == 2:
337            video_list = glob.glob(osp.join(args.frame_path, '*', '*'))
338        frame_info = {osp.relpath(
339            x.split('.')[0], args.frame_path): (x, -1, -1) for x in video_list}
340
341    if args.dataset == 'ucf101':
342        split_tp = parse_ucf101_splits(args)
343    elif args.dataset == 'kinetics400':
344        split_tp = parse_kinetics_splits(args)
345    assert len(split_tp) == args.num_split
346
347    out_path = args.out_list_path
348    if len(split_tp) > 1:
349        for i, split in enumerate(split_tp):
350            lists = build_split_list(split_tp[i], frame_info,
351                                     shuffle=args.shuffle)
352            filename = '{}_train_split_{}_{}.txt'.format(args.dataset,
353                                                         i + 1, args.format)
354            with open(osp.join(out_path, filename), 'w') as f:
355                f.writelines(lists[0][0])
356            filename = '{}_val_split_{}_{}.txt'.format(args.dataset,
357                                                       i + 1, args.format)
358            with open(osp.join(out_path, filename), 'w') as f:
359                f.writelines(lists[0][1])
360    else:
361        lists = build_split_list(split_tp[0], frame_info,
362                                 shuffle=args.shuffle)
363        filename = '{}_{}_list_{}.txt'.format(args.dataset,
364                                              args.subset,
365                                              args.format)
366        if args.subset == 'train':
367            ind = 0
368        elif args.subset == 'val':
369            ind = 1
370        elif args.subset == 'test':
371            ind = 2
372        with open(osp.join(out_path, filename), 'w') as f:
373            f.writelines(lists[0][ind])
374
375def download_kinetics400(args):
376
377    print('Start downloading Kinetics400 annotation files.')
378    download_kinetics400_anno(args)
379    print('Download complete.')
380
381    download_kinetics400_videos(args)
382
383def download_kinetics400_anno(args):
384
385    target_dir = args.download_dir
386    if not os.path.exists(target_dir):
387        os.makedirs(target_dir)
388
389    train_file = 'https://deepmind.com/documents/66/kinetics_train.zip'
390    val_file = 'https://deepmind.com/documents/65/kinetics_val.zip'
391    test_file = 'wget https://deepmind.com/documents/81/kinetics_test.zip'
392
393    os.system('wget -P %s %s' % (target_dir, train_file))
394    os.system('wget -P %s %s' % (target_dir, val_file))
395    os.system('wget -P %s %s' % (target_dir, test_file))
396
397    with zipfile.ZipFile(os.path.join(target_dir, 'kinetics_train.zip')) as zf:
398        zf.extractall(path=target_dir)
399
400    with zipfile.ZipFile(os.path.join(target_dir, 'kinetics_val.zip')) as zf:
401        zf.extractall(path=target_dir)
402
403    with zipfile.ZipFile(os.path.join(target_dir, 'kinetics_test.zip')) as zf:
404        zf.extractall(path=target_dir)
405
406def download_kinetics400_videos(args):
407
408    print('Please refer to the official Kinetics crawler for downloading the videos \
409        at https://github.com/activitynet/ActivityNet/tree/master/Crawler/Kinetics. ')
410
411if __name__ == '__main__':
412    global args
413    args = parse_args()
414
415    if args.download:
416        print('Downloading Kinetics400 dataset.')
417        download_kinetics400(args)
418
419    if args.decode_video:
420        print('Decoding videos to frames.')
421        decode_video(args)
422
423    if args.build_file_list:
424        print('Generating training files.')
425        build_file_list(args)
426