1#!/usr/bin/env python3
2#
3#
4# Collects CI artifacts from S3 storage, downloading them
5# to a local directory.
6#
7# The artifacts' folder in the S3 bucket must have the following token
8# format:
9#  <token>-[<value>]__   (repeat)
10#
11# Recognized tokens (unrecognized tokens are ignored):
12#  p       - project (e.g., "confluent-kafka-python")
13#  bld     - builder (e.g., "travis")
14#  plat    - platform ("osx", "linux", ..)
15#  arch    - arch ("x64", ..)
16#  tag     - git tag
17#  sha     - git sha
18#  bid     - builder's build-id
19#  bldtype - Release, Debug (appveyor)
20#
21# Example:
22#   p-confluent-kafka-python__bld-travis__plat-linux__tag-__sha-112130ce297656ea1c39e7c94c99286f95133a24__bid-271588764__/confluent_kafka-0.11.0-cp35-cp35m-manylinux1_x86_64.whl
23
24
25import re
26import os
27import argparse
28import boto3
29
30s3_bucket = 'librdkafka-ci-packages'
31dry_run = False
32
33class Artifact (object):
34    def __init__(self, arts, path, info=None):
35        self.path = path
36        # Remove unexpanded AppVeyor $(..) tokens from filename
37        self.fname = re.sub(r'\$\([^\)]+\)', '', os.path.basename(path))
38        slpath = os.path.join(os.path.dirname(path), self.fname)
39        if os.path.isfile(slpath):
40            # Already points to local file in correct location
41            self.lpath = slpath
42        else:
43            # Prepare download location in dlpath
44            self.lpath = os.path.join(arts.dlpath, slpath)
45
46        if info is None:
47            self.info = dict()
48        else:
49            # Assign the map and convert all keys to lower case
50            self.info = {k.lower(): v for k, v in info.items()}
51            # Rename values, e.g., 'plat':'linux' to 'plat':'debian'
52            for k,v in self.info.items():
53                rdict = packaging.rename_vals.get(k, None)
54                if rdict is not None:
55                    self.info[k] = rdict.get(v, v)
56
57        # Score value for sorting
58        self.score = 0
59
60        # AppVeyor symbol builds are of less value
61        if self.fname.find('.symbols.') != -1:
62            self.score -= 10
63
64        self.arts = arts
65        arts.artifacts.append(self)
66
67
68    def __repr__(self):
69        return self.path
70
71    def __lt__ (self, other):
72        return self.score < other.score
73
74    def download(self):
75        """ Download artifact from S3 and store in local directory .lpath.
76            If the artifact is already downloaded nothing is done. """
77        if os.path.isfile(self.lpath) and os.path.getsize(self.lpath) > 0:
78            return
79        print('Downloading %s -> %s' % (self.path, self.lpath))
80        if dry_run:
81            return
82        ldir = os.path.dirname(self.lpath)
83        if not os.path.isdir(ldir):
84            os.makedirs(ldir, 0o755)
85        self.arts.s3_bucket.download_file(self.path, self.lpath)
86
87
88class Artifacts (object):
89    def __init__(self, match, dlpath):
90        super(Artifacts, self).__init__()
91        self.match = match
92        self.artifacts = list()
93        # Download directory (make sure it ends with a path separator)
94        if not dlpath.endswith(os.path.sep):
95            dlpath = os.path.join(dlpath, '')
96        self.dlpath = dlpath
97        if not os.path.isdir(self.dlpath):
98            if not dry_run:
99                os.makedirs(self.dlpath, 0o755)
100
101    def collect_single(self, path, req_tag=True):
102        """ Collect single artifact, be it in S3 or locally.
103        :param: path string: S3 or local (relative) path
104        :param: req_tag bool: Require tag to match.
105        """
106
107        print('?  %s' % path)
108
109        # For local files, strip download path.
110        # Also ignore any parent directories.
111        if path.startswith(self.dlpath):
112            folder = os.path.basename(os.path.dirname(path[len(self.dlpath):]))
113        else:
114            folder = os.path.basename(os.path.dirname(path))
115
116        # The folder contains the tokens needed to perform
117        # matching of project, gitref, etc.
118        rinfo = re.findall(r'(?P<tag>[^-]+)-(?P<val>.*?)__', folder)
119        if rinfo is None or len(rinfo) == 0:
120            print('Incorrect folder/file name format for %s' % folder)
121            return None
122
123        info = dict(rinfo)
124
125        # Ignore AppVeyor Debug builds
126        if info.get('bldtype', '').lower() == 'debug':
127            print('Ignoring debug artifact %s' % folder)
128            return None
129
130        tag = info.get('tag', None)
131        if tag is not None and (len(tag) == 0 or tag.startswith('$(')):
132            # AppVeyor doesn't substite $(APPVEYOR_REPO_TAG_NAME)
133            # with an empty value when not set, it leaves that token
134            # in the string - so translate that to no tag.
135            del info['tag']
136
137        # Match tag or sha to gitref
138        unmatched = list()
139        for m,v in self.match.items():
140            if m not in info or info[m] != v:
141                unmatched.append(m)
142
143        # Make sure all matches were satisfied, unless this is a
144        # common artifact.
145        if info.get('p', '') != 'common' and len(unmatched) > 0:
146            print(info)
147            print('%s: %s did not match %s' % (info.get('p', None), folder, unmatched))
148            return None
149
150        return Artifact(self, path, info)
151
152
153    def collect_s3(self):
154        """ Collect and download build-artifacts from S3 based on git reference """
155        print('Collecting artifacts matching %s from S3 bucket %s' % (self.match, s3_bucket))
156        self.s3 = boto3.resource('s3')
157        self.s3_bucket = self.s3.Bucket(s3_bucket)
158        self.s3_client = boto3.client('s3')
159        for item in self.s3_client.list_objects(Bucket=s3_bucket, Prefix='librdkafka/').get('Contents'):
160            self.collect_single(item.get('Key'))
161
162        for a in self.artifacts:
163            a.download()
164
165    def collect_local(self, path, req_tag=True):
166        """ Collect artifacts from a local directory possibly previously
167        collected from s3 """
168        for f in [os.path.join(dp, f) for dp, dn, filenames in os.walk(path) for f in filenames]:
169            if not os.path.isfile(f):
170                continue
171            self.collect_single(f, req_tag)
172
173
174