1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
4from __future__ import print_function, division
5import json
6import argparse
7from os import walk
8from os import path as osp
9from zipfile import ZipFile
10from gluoncv.utils import download, makedirs
11
12
13def parse_args():
14    parser = argparse.ArgumentParser(
15        description='Initialize Market1501 dataset.',
16        epilog='Example: python market1501.py',
17        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
18    parser.add_argument('--download-dir', type=str, default='~/.mxnet/datasets/', help='dataset directory on disk')
19    parser.add_argument('--no-download', action='store_true', help='disable automatic download if set')
20    args = parser.parse_args()
21    return args
22
23
24def extract(fpath, exdir):
25    print("Extracting zip file")
26    with ZipFile(fpath) as z:
27        z.extractall(path=exdir)
28    print("Extracting Done")
29
30
31def make_list(exdir):
32    train_dir = osp.join(exdir, "bounding_box_train")
33    train_list = {}
34    for _, _, files in walk(train_dir, topdown=False):
35        for name in files:
36            if '.jpg' in name:
37                name_split = name.split('_')
38                pid = name_split[0]
39                pcam = name_split[1][1]
40                if pid not in train_list:
41                    train_list[pid] = []
42                train_list[pid].append({"name": name, "pid": pid, "pcam": pcam})
43
44    with open(osp.join(exdir, 'train.txt'), 'w') as f:
45        for i, key in enumerate(train_list):
46            for item in train_list[key]:
47                f.write(item['name'] + " " + str(i) + " " + item["pcam"] + "\n")
48    print("Make Label List Done")
49
50
51def main():
52    args = parse_args()
53    name = "Market-1501-v15.09.15"
54    url = "http://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/dataset/{name}.zip".format(name=name)
55    root = osp.expanduser(args.download_dir)
56    makedirs(root)
57    fpath = osp.join(root, name + '.zip')
58    exdir = osp.join(root, name)
59    if not osp.exists(fpath) and not osp.isdir(exdir) and args.no_download:
60        raise ValueError(('{} dataset archive not found, make sure it is present.'
61                          ' Or you should not disable "--no-download" to grab it'.format(fpath)))
62    # Download by default
63    if not args.no_download:
64        print('Downloading dataset')
65        download(url, fpath, overwrite=False)
66        print('Dataset downloaded')
67    # Extract dataset if fresh copy downloaded or existing archive is yet to be extracted
68    if not args.no_download or not osp.isdir(exdir):
69        extract(fpath, root)
70        make_list(exdir)
71
72
73if __name__ == '__main__':
74    main()
75