1#!/usr/bin/python
2# -*- coding: utf-8 -*-
3
4# Copyright (c) 2014, Chris Schmidt <chris.schmidt () contrastsecurity.com>
5#
6# Built using https://github.com/hamnis/useful-scripts/blob/master/python/download-maven-artifact
7# as a reference and starting point.
8#
9# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt)
10from __future__ import absolute_import, division, print_function
11__metaclass__ = type
12
13ANSIBLE_METADATA = {'metadata_version': '1.1',
14                    'status': ['preview'],
15                    'supported_by': 'community'}
16
17DOCUMENTATION = '''
18---
19module: maven_artifact
20short_description: Downloads an Artifact from a Maven Repository
21version_added: "2.0"
22description:
23    - Downloads an artifact from a maven repository given the maven coordinates provided to the module.
24    - Can retrieve snapshots or release versions of the artifact and will resolve the latest available
25      version if one is not available.
26author: "Chris Schmidt (@chrisisbeef)"
27requirements:
28    - lxml
29    - boto if using a S3 repository (s3://...)
30options:
31    group_id:
32        description:
33            - The Maven groupId coordinate
34        required: true
35    artifact_id:
36        description:
37            - The maven artifactId coordinate
38        required: true
39    version:
40        description:
41            - The maven version coordinate
42        default: latest
43    classifier:
44        description:
45            - The maven classifier coordinate
46    extension:
47        description:
48            - The maven type/extension coordinate
49        default: jar
50    repository_url:
51        description:
52            - The URL of the Maven Repository to download from.
53            - Use s3://... if the repository is hosted on Amazon S3, added in version 2.2.
54            - Use file://... if the repository is local, added in version 2.6
55        default: http://repo1.maven.org/maven2
56    username:
57        description:
58            - The username to authenticate as to the Maven Repository. Use AWS secret key of the repository is hosted on S3
59        aliases: [ "aws_secret_key" ]
60    password:
61        description:
62            - The password to authenticate with to the Maven Repository. Use AWS secret access key of the repository is hosted on S3
63        aliases: [ "aws_secret_access_key" ]
64    headers:
65        description:
66            - Add custom HTTP headers to a request in hash/dict format.
67        type: dict
68        version_added: "2.8"
69    dest:
70        description:
71            - The path where the artifact should be written to
72            - If file mode or ownerships are specified and destination path already exists, they affect the downloaded file
73        required: true
74    state:
75        description:
76            - The desired state of the artifact
77        default: present
78        choices: [present,absent]
79    timeout:
80        description:
81            - Specifies a timeout in seconds for the connection attempt
82        default: 10
83        version_added: "2.3"
84    validate_certs:
85        description:
86            - If C(no), SSL certificates will not be validated. This should only be set to C(no) when no other option exists.
87        type: bool
88        default: 'yes'
89        version_added: "1.9.3"
90    keep_name:
91        description:
92            - If C(yes), the downloaded artifact's name is preserved, i.e the version number remains part of it.
93            - This option only has effect when C(dest) is a directory and C(version) is set to C(latest).
94        type: bool
95        default: 'no'
96        version_added: "2.4"
97    verify_checksum:
98        description:
99            - If C(never), the md5 checksum will never be downloaded and verified.
100            - If C(download), the md5 checksum will be downloaded and verified only after artifact download. This is the default.
101            - If C(change), the md5 checksum will be downloaded and verified if the destination already exist,
102              to verify if they are identical. This was the behaviour before 2.6. Since it downloads the md5 before (maybe)
103              downloading the artifact, and since some repository software, when acting as a proxy/cache, return a 404 error
104              if the artifact has not been cached yet, it may fail unexpectedly.
105              If you still need it, you should consider using C(always) instead - if you deal with a checksum, it is better to
106              use it to verify integrity after download.
107            - C(always) combines C(download) and C(change).
108        required: false
109        default: 'download'
110        choices: ['never', 'download', 'change', 'always']
111        version_added: "2.6"
112extends_documentation_fragment:
113    - files
114'''
115
116EXAMPLES = '''
117# Download the latest version of the JUnit framework artifact from Maven Central
118- maven_artifact:
119    group_id: junit
120    artifact_id: junit
121    dest: /tmp/junit-latest.jar
122
123# Download JUnit 4.11 from Maven Central
124- maven_artifact:
125    group_id: junit
126    artifact_id: junit
127    version: 4.11
128    dest: /tmp/junit-4.11.jar
129
130# Download an artifact from a private repository requiring authentication
131- maven_artifact:
132    group_id: com.company
133    artifact_id: library-name
134    repository_url: 'https://repo.company.com/maven'
135    username: user
136    password: pass
137    dest: /tmp/library-name-latest.jar
138
139# Download a WAR File to the Tomcat webapps directory to be deployed
140- maven_artifact:
141    group_id: com.company
142    artifact_id: web-app
143    extension: war
144    repository_url: 'https://repo.company.com/maven'
145    dest: /var/lib/tomcat7/webapps/web-app.war
146
147# Keep a downloaded artifact's name, i.e. retain the version
148- maven_artifact:
149    version: latest
150    artifact_id: spring-core
151    group_id: org.springframework
152    dest: /tmp/
153    keep_name: yes
154
155# Download the latest version of the JUnit framework artifact from Maven local
156- maven_artifact:
157    group_id: junit
158    artifact_id: junit
159    dest: /tmp/junit-latest.jar
160    repository_url: "file://{{ lookup('env','HOME') }}/.m2/repository"
161'''
162
163import hashlib
164import os
165import posixpath
166import shutil
167import io
168import tempfile
169import traceback
170
171LXML_ETREE_IMP_ERR = None
172try:
173    from lxml import etree
174    HAS_LXML_ETREE = True
175except ImportError:
176    LXML_ETREE_IMP_ERR = traceback.format_exc()
177    HAS_LXML_ETREE = False
178
179BOTO_IMP_ERR = None
180try:
181    import boto3
182    HAS_BOTO = True
183except ImportError:
184    BOTO_IMP_ERR = traceback.format_exc()
185    HAS_BOTO = False
186
187from ansible.module_utils.basic import AnsibleModule, missing_required_lib
188from ansible.module_utils.six.moves.urllib.parse import urlparse
189from ansible.module_utils.urls import fetch_url
190from ansible.module_utils._text import to_bytes, to_native, to_text
191
192
193def split_pre_existing_dir(dirname):
194    '''
195    Return the first pre-existing directory and a list of the new directories that will be created.
196    '''
197    head, tail = os.path.split(dirname)
198    b_head = to_bytes(head, errors='surrogate_or_strict')
199    if not os.path.exists(b_head):
200        if head == dirname:
201            return None, [head]
202        else:
203            (pre_existing_dir, new_directory_list) = split_pre_existing_dir(head)
204    else:
205        return head, [tail]
206    new_directory_list.append(tail)
207    return pre_existing_dir, new_directory_list
208
209
210def adjust_recursive_directory_permissions(pre_existing_dir, new_directory_list, module, directory_args, changed):
211    '''
212    Walk the new directories list and make sure that permissions are as we would expect
213    '''
214    if new_directory_list:
215        first_sub_dir = new_directory_list.pop(0)
216        if not pre_existing_dir:
217            working_dir = first_sub_dir
218        else:
219            working_dir = os.path.join(pre_existing_dir, first_sub_dir)
220        directory_args['path'] = working_dir
221        changed = module.set_fs_attributes_if_different(directory_args, changed)
222        changed = adjust_recursive_directory_permissions(working_dir, new_directory_list, module, directory_args, changed)
223    return changed
224
225
226class Artifact(object):
227    def __init__(self, group_id, artifact_id, version, classifier='', extension='jar'):
228        if not group_id:
229            raise ValueError("group_id must be set")
230        if not artifact_id:
231            raise ValueError("artifact_id must be set")
232
233        self.group_id = group_id
234        self.artifact_id = artifact_id
235        self.version = version
236        self.classifier = classifier
237
238        if not extension:
239            self.extension = "jar"
240        else:
241            self.extension = extension
242
243    def is_snapshot(self):
244        return self.version and self.version.endswith("SNAPSHOT")
245
246    def path(self, with_version=True):
247        base = posixpath.join(self.group_id.replace(".", "/"), self.artifact_id)
248        if with_version and self.version:
249            base = posixpath.join(base, self.version)
250        return base
251
252    def _generate_filename(self):
253        filename = self.artifact_id + "-" + self.classifier + "." + self.extension
254        if not self.classifier:
255            filename = self.artifact_id + "." + self.extension
256        return filename
257
258    def get_filename(self, filename=None):
259        if not filename:
260            filename = self._generate_filename()
261        elif os.path.isdir(filename):
262            filename = os.path.join(filename, self._generate_filename())
263        return filename
264
265    def __str__(self):
266        result = "%s:%s:%s" % (self.group_id, self.artifact_id, self.version)
267        if self.classifier:
268            result = "%s:%s:%s:%s:%s" % (self.group_id, self.artifact_id, self.extension, self.classifier, self.version)
269        elif self.extension != "jar":
270            result = "%s:%s:%s:%s" % (self.group_id, self.artifact_id, self.extension, self.version)
271        return result
272
273    @staticmethod
274    def parse(input):
275        parts = input.split(":")
276        if len(parts) >= 3:
277            g = parts[0]
278            a = parts[1]
279            v = parts[len(parts) - 1]
280            t = None
281            c = None
282            if len(parts) == 4:
283                t = parts[2]
284            if len(parts) == 5:
285                t = parts[2]
286                c = parts[3]
287            return Artifact(g, a, v, c, t)
288        else:
289            return None
290
291
292class MavenDownloader:
293    def __init__(self, module, base="http://repo1.maven.org/maven2", local=False, headers=None):
294        self.module = module
295        if base.endswith("/"):
296            base = base.rstrip("/")
297        self.base = base
298        self.local = local
299        self.headers = headers
300        self.user_agent = "Ansible {0} maven_artifact".format(self.module.ansible_version)
301        self.latest_version_found = None
302        self.metadata_file_name = "maven-metadata-local.xml" if local else "maven-metadata.xml"
303
304    def find_latest_version_available(self, artifact):
305        if self.latest_version_found:
306            return self.latest_version_found
307        path = "/%s/%s" % (artifact.path(False), self.metadata_file_name)
308        content = self._getContent(self.base + path, "Failed to retrieve the maven metadata file: " + path)
309        xml = etree.fromstring(content)
310        v = xml.xpath("/metadata/versioning/versions/version[last()]/text()")
311        if v:
312            self.latest_version_found = v[0]
313            return v[0]
314
315    def find_uri_for_artifact(self, artifact):
316        if artifact.version == "latest":
317            artifact.version = self.find_latest_version_available(artifact)
318
319        if artifact.is_snapshot():
320            if self.local:
321                return self._uri_for_artifact(artifact, artifact.version)
322            path = "/%s/%s" % (artifact.path(), self.metadata_file_name)
323            content = self._getContent(self.base + path, "Failed to retrieve the maven metadata file: " + path)
324            xml = etree.fromstring(content)
325
326            for snapshotArtifact in xml.xpath("/metadata/versioning/snapshotVersions/snapshotVersion"):
327                classifier = snapshotArtifact.xpath("classifier/text()")
328                artifact_classifier = classifier[0] if classifier else ''
329                extension = snapshotArtifact.xpath("extension/text()")
330                artifact_extension = extension[0] if extension else ''
331                if artifact_classifier == artifact.classifier and artifact_extension == artifact.extension:
332                    return self._uri_for_artifact(artifact, snapshotArtifact.xpath("value/text()")[0])
333            timestamp_xmlpath = xml.xpath("/metadata/versioning/snapshot/timestamp/text()")
334            if timestamp_xmlpath:
335                timestamp = timestamp_xmlpath[0]
336                build_number = xml.xpath("/metadata/versioning/snapshot/buildNumber/text()")[0]
337                return self._uri_for_artifact(artifact, artifact.version.replace("SNAPSHOT", timestamp + "-" + build_number))
338
339        return self._uri_for_artifact(artifact, artifact.version)
340
341    def _uri_for_artifact(self, artifact, version=None):
342        if artifact.is_snapshot() and not version:
343            raise ValueError("Expected uniqueversion for snapshot artifact " + str(artifact))
344        elif not artifact.is_snapshot():
345            version = artifact.version
346        if artifact.classifier:
347            return posixpath.join(self.base, artifact.path(), artifact.artifact_id + "-" + version + "-" + artifact.classifier + "." + artifact.extension)
348
349        return posixpath.join(self.base, artifact.path(), artifact.artifact_id + "-" + version + "." + artifact.extension)
350
351    # for small files, directly get the full content
352    def _getContent(self, url, failmsg, force=True):
353        if self.local:
354            parsed_url = urlparse(url)
355            if os.path.isfile(parsed_url.path):
356                with io.open(parsed_url.path, 'rb') as f:
357                    return f.read()
358            if force:
359                raise ValueError(failmsg + " because can not find file: " + url)
360            return None
361        response = self._request(url, failmsg, force)
362        if response:
363            return response.read()
364        return None
365
366    # only for HTTP request
367    def _request(self, url, failmsg, force=True):
368        url_to_use = url
369        parsed_url = urlparse(url)
370
371        if parsed_url.scheme == 's3':
372            parsed_url = urlparse(url)
373            bucket_name = parsed_url.netloc
374            key_name = parsed_url.path[1:]
375            client = boto3.client('s3', aws_access_key_id=self.module.params.get('username', ''), aws_secret_access_key=self.module.params.get('password', ''))
376            url_to_use = client.generate_presigned_url('get_object', Params={'Bucket': bucket_name, 'Key': key_name}, ExpiresIn=10)
377
378        req_timeout = self.module.params.get('timeout')
379
380        # Hack to add parameters in the way that fetch_url expects
381        self.module.params['url_username'] = self.module.params.get('username', '')
382        self.module.params['url_password'] = self.module.params.get('password', '')
383        self.module.params['http_agent'] = self.user_agent
384
385        response, info = fetch_url(self.module, url_to_use, timeout=req_timeout, headers=self.headers)
386        if info['status'] == 200:
387            return response
388        if force:
389            raise ValueError(failmsg + " because of " + info['msg'] + "for URL " + url_to_use)
390        return None
391
392    def download(self, tmpdir, artifact, verify_download, filename=None):
393        if not artifact.version or artifact.version == "latest":
394            artifact = Artifact(artifact.group_id, artifact.artifact_id, self.find_latest_version_available(artifact),
395                                artifact.classifier, artifact.extension)
396        url = self.find_uri_for_artifact(artifact)
397        tempfd, tempname = tempfile.mkstemp(dir=tmpdir)
398
399        try:
400            # copy to temp file
401            if self.local:
402                parsed_url = urlparse(url)
403                if os.path.isfile(parsed_url.path):
404                    shutil.copy2(parsed_url.path, tempname)
405                else:
406                    return "Can not find local file: " + parsed_url.path
407            else:
408                response = self._request(url, "Failed to download artifact " + str(artifact))
409                with os.fdopen(tempfd, 'wb') as f:
410                    shutil.copyfileobj(response, f)
411
412            if verify_download:
413                invalid_md5 = self.is_invalid_md5(tempname, url)
414                if invalid_md5:
415                    # if verify_change was set, the previous file would be deleted
416                    os.remove(tempname)
417                    return invalid_md5
418        except Exception as e:
419            os.remove(tempname)
420            raise e
421
422        # all good, now copy temp file to target
423        shutil.move(tempname, artifact.get_filename(filename))
424        return None
425
426    def is_invalid_md5(self, file, remote_url):
427        if os.path.exists(file):
428            local_md5 = self._local_md5(file)
429            if self.local:
430                parsed_url = urlparse(remote_url)
431                remote_md5 = self._local_md5(parsed_url.path)
432            else:
433                try:
434                    remote_md5 = to_text(self._getContent(remote_url + '.md5', "Failed to retrieve MD5", False), errors='strict')
435                except UnicodeError as e:
436                    return "Cannot retrieve a valid md5 from %s: %s" % (remote_url, to_native(e))
437                if(not remote_md5):
438                    return "Cannot find md5 from " + remote_url
439            try:
440                # Check if remote md5 only contains md5 or md5 + filename
441                _remote_md5 = remote_md5.split(None)[0]
442                remote_md5 = _remote_md5
443                # remote_md5 is empty so we continue and keep original md5 string
444                # This should not happen since we check for remote_md5 before
445            except IndexError as e:
446                pass
447            if local_md5 == remote_md5:
448                return None
449            else:
450                return "Checksum does not match: we computed " + local_md5 + "but the repository states " + remote_md5
451
452        return "Path does not exist: " + file
453
454    def _local_md5(self, file):
455        md5 = hashlib.md5()
456        with io.open(file, 'rb') as f:
457            for chunk in iter(lambda: f.read(8192), b''):
458                md5.update(chunk)
459        return md5.hexdigest()
460
461
462def main():
463    module = AnsibleModule(
464        argument_spec=dict(
465            group_id=dict(required=True),
466            artifact_id=dict(required=True),
467            version=dict(default="latest"),
468            classifier=dict(default=''),
469            extension=dict(default='jar'),
470            repository_url=dict(default=None),
471            username=dict(default=None, aliases=['aws_secret_key']),
472            password=dict(default=None, no_log=True, aliases=['aws_secret_access_key']),
473            headers=dict(type='dict'),
474            state=dict(default="present", choices=["present", "absent"]),  # TODO - Implement a "latest" state
475            timeout=dict(default=10, type='int'),
476            dest=dict(type="path", required=True),
477            validate_certs=dict(required=False, default=True, type='bool'),
478            keep_name=dict(required=False, default=False, type='bool'),
479            verify_checksum=dict(required=False, default='download', choices=['never', 'download', 'change', 'always'])
480        ),
481        add_file_common_args=True
482    )
483
484    if not HAS_LXML_ETREE:
485        module.fail_json(msg=missing_required_lib('lxml'), exception=LXML_ETREE_IMP_ERR)
486
487    repository_url = module.params["repository_url"]
488    if not repository_url:
489        repository_url = "http://repo1.maven.org/maven2"
490    try:
491        parsed_url = urlparse(repository_url)
492    except AttributeError as e:
493        module.fail_json(msg='url parsing went wrong %s' % e)
494
495    local = parsed_url.scheme == "file"
496
497    if parsed_url.scheme == 's3' and not HAS_BOTO:
498        module.fail_json(msg=missing_required_lib('boto3', reason='when using s3:// repository URLs'),
499                         exception=BOTO_IMP_ERR)
500
501    group_id = module.params["group_id"]
502    artifact_id = module.params["artifact_id"]
503    version = module.params["version"]
504    classifier = module.params["classifier"]
505    extension = module.params["extension"]
506    headers = module.params['headers']
507    state = module.params["state"]
508    dest = module.params["dest"]
509    b_dest = to_bytes(dest, errors='surrogate_or_strict')
510    keep_name = module.params["keep_name"]
511    verify_checksum = module.params["verify_checksum"]
512    verify_download = verify_checksum in ['download', 'always']
513    verify_change = verify_checksum in ['change', 'always']
514
515    downloader = MavenDownloader(module, repository_url, local, headers)
516
517    try:
518        artifact = Artifact(group_id, artifact_id, version, classifier, extension)
519    except ValueError as e:
520        module.fail_json(msg=e.args[0])
521
522    changed = False
523    prev_state = "absent"
524
525    if dest.endswith(os.sep):
526        b_dest = to_bytes(dest, errors='surrogate_or_strict')
527        if not os.path.exists(b_dest):
528            (pre_existing_dir, new_directory_list) = split_pre_existing_dir(dest)
529            os.makedirs(b_dest)
530            directory_args = module.load_file_common_arguments(module.params)
531            directory_mode = module.params["directory_mode"]
532            if directory_mode is not None:
533                directory_args['mode'] = directory_mode
534            else:
535                directory_args['mode'] = None
536            changed = adjust_recursive_directory_permissions(pre_existing_dir, new_directory_list, module, directory_args, changed)
537
538    if os.path.isdir(b_dest):
539        version_part = version
540        if keep_name and version == 'latest':
541            version_part = downloader.find_latest_version_available(artifact)
542
543        if classifier:
544            dest = posixpath.join(dest, "%s-%s-%s.%s" % (artifact_id, version_part, classifier, extension))
545        else:
546            dest = posixpath.join(dest, "%s-%s.%s" % (artifact_id, version_part, extension))
547        b_dest = to_bytes(dest, errors='surrogate_or_strict')
548
549    if os.path.lexists(b_dest) and ((not verify_change) or not downloader.is_invalid_md5(dest, downloader.find_uri_for_artifact(artifact))):
550        prev_state = "present"
551
552    if prev_state == "absent":
553        try:
554            download_error = downloader.download(module.tmpdir, artifact, verify_download, b_dest)
555            if download_error is None:
556                changed = True
557            else:
558                module.fail_json(msg="Cannot retrieve the artifact to destination: " + download_error)
559        except ValueError as e:
560            module.fail_json(msg=e.args[0])
561
562    module.params['dest'] = dest
563    file_args = module.load_file_common_arguments(module.params)
564    changed = module.set_fs_attributes_if_different(file_args, changed)
565    if changed:
566        module.exit_json(state=state, dest=dest, group_id=group_id, artifact_id=artifact_id, version=version, classifier=classifier,
567                         extension=extension, repository_url=repository_url, changed=changed)
568    else:
569        module.exit_json(state=state, dest=dest, changed=changed)
570
571
572if __name__ == '__main__':
573    main()
574