1#!/usr/bin/python 2# -*- coding: utf-8 -*- 3 4# Copyright (c) 2014, Chris Schmidt <chris.schmidt () contrastsecurity.com> 5# 6# Built using https://github.com/hamnis/useful-scripts/blob/master/python/download-maven-artifact 7# as a reference and starting point. 8# 9# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt) 10from __future__ import absolute_import, division, print_function 11__metaclass__ = type 12 13ANSIBLE_METADATA = {'metadata_version': '1.1', 14 'status': ['preview'], 15 'supported_by': 'community'} 16 17DOCUMENTATION = ''' 18--- 19module: maven_artifact 20short_description: Downloads an Artifact from a Maven Repository 21version_added: "2.0" 22description: 23 - Downloads an artifact from a maven repository given the maven coordinates provided to the module. 24 - Can retrieve snapshots or release versions of the artifact and will resolve the latest available 25 version if one is not available. 26author: "Chris Schmidt (@chrisisbeef)" 27requirements: 28 - lxml 29 - boto if using a S3 repository (s3://...) 30options: 31 group_id: 32 description: 33 - The Maven groupId coordinate 34 required: true 35 artifact_id: 36 description: 37 - The maven artifactId coordinate 38 required: true 39 version: 40 description: 41 - The maven version coordinate 42 default: latest 43 classifier: 44 description: 45 - The maven classifier coordinate 46 extension: 47 description: 48 - The maven type/extension coordinate 49 default: jar 50 repository_url: 51 description: 52 - The URL of the Maven Repository to download from. 53 - Use s3://... if the repository is hosted on Amazon S3, added in version 2.2. 54 - Use file://... if the repository is local, added in version 2.6 55 default: http://repo1.maven.org/maven2 56 username: 57 description: 58 - The username to authenticate as to the Maven Repository. Use AWS secret key of the repository is hosted on S3 59 aliases: [ "aws_secret_key" ] 60 password: 61 description: 62 - The password to authenticate with to the Maven Repository. Use AWS secret access key of the repository is hosted on S3 63 aliases: [ "aws_secret_access_key" ] 64 headers: 65 description: 66 - Add custom HTTP headers to a request in hash/dict format. 67 type: dict 68 version_added: "2.8" 69 dest: 70 description: 71 - The path where the artifact should be written to 72 - If file mode or ownerships are specified and destination path already exists, they affect the downloaded file 73 required: true 74 state: 75 description: 76 - The desired state of the artifact 77 default: present 78 choices: [present,absent] 79 timeout: 80 description: 81 - Specifies a timeout in seconds for the connection attempt 82 default: 10 83 version_added: "2.3" 84 validate_certs: 85 description: 86 - If C(no), SSL certificates will not be validated. This should only be set to C(no) when no other option exists. 87 type: bool 88 default: 'yes' 89 version_added: "1.9.3" 90 keep_name: 91 description: 92 - If C(yes), the downloaded artifact's name is preserved, i.e the version number remains part of it. 93 - This option only has effect when C(dest) is a directory and C(version) is set to C(latest). 94 type: bool 95 default: 'no' 96 version_added: "2.4" 97 verify_checksum: 98 description: 99 - If C(never), the md5 checksum will never be downloaded and verified. 100 - If C(download), the md5 checksum will be downloaded and verified only after artifact download. This is the default. 101 - If C(change), the md5 checksum will be downloaded and verified if the destination already exist, 102 to verify if they are identical. This was the behaviour before 2.6. Since it downloads the md5 before (maybe) 103 downloading the artifact, and since some repository software, when acting as a proxy/cache, return a 404 error 104 if the artifact has not been cached yet, it may fail unexpectedly. 105 If you still need it, you should consider using C(always) instead - if you deal with a checksum, it is better to 106 use it to verify integrity after download. 107 - C(always) combines C(download) and C(change). 108 required: false 109 default: 'download' 110 choices: ['never', 'download', 'change', 'always'] 111 version_added: "2.6" 112extends_documentation_fragment: 113 - files 114''' 115 116EXAMPLES = ''' 117# Download the latest version of the JUnit framework artifact from Maven Central 118- maven_artifact: 119 group_id: junit 120 artifact_id: junit 121 dest: /tmp/junit-latest.jar 122 123# Download JUnit 4.11 from Maven Central 124- maven_artifact: 125 group_id: junit 126 artifact_id: junit 127 version: 4.11 128 dest: /tmp/junit-4.11.jar 129 130# Download an artifact from a private repository requiring authentication 131- maven_artifact: 132 group_id: com.company 133 artifact_id: library-name 134 repository_url: 'https://repo.company.com/maven' 135 username: user 136 password: pass 137 dest: /tmp/library-name-latest.jar 138 139# Download a WAR File to the Tomcat webapps directory to be deployed 140- maven_artifact: 141 group_id: com.company 142 artifact_id: web-app 143 extension: war 144 repository_url: 'https://repo.company.com/maven' 145 dest: /var/lib/tomcat7/webapps/web-app.war 146 147# Keep a downloaded artifact's name, i.e. retain the version 148- maven_artifact: 149 version: latest 150 artifact_id: spring-core 151 group_id: org.springframework 152 dest: /tmp/ 153 keep_name: yes 154 155# Download the latest version of the JUnit framework artifact from Maven local 156- maven_artifact: 157 group_id: junit 158 artifact_id: junit 159 dest: /tmp/junit-latest.jar 160 repository_url: "file://{{ lookup('env','HOME') }}/.m2/repository" 161''' 162 163import hashlib 164import os 165import posixpath 166import shutil 167import io 168import tempfile 169import traceback 170 171LXML_ETREE_IMP_ERR = None 172try: 173 from lxml import etree 174 HAS_LXML_ETREE = True 175except ImportError: 176 LXML_ETREE_IMP_ERR = traceback.format_exc() 177 HAS_LXML_ETREE = False 178 179BOTO_IMP_ERR = None 180try: 181 import boto3 182 HAS_BOTO = True 183except ImportError: 184 BOTO_IMP_ERR = traceback.format_exc() 185 HAS_BOTO = False 186 187from ansible.module_utils.basic import AnsibleModule, missing_required_lib 188from ansible.module_utils.six.moves.urllib.parse import urlparse 189from ansible.module_utils.urls import fetch_url 190from ansible.module_utils._text import to_bytes, to_native, to_text 191 192 193def split_pre_existing_dir(dirname): 194 ''' 195 Return the first pre-existing directory and a list of the new directories that will be created. 196 ''' 197 head, tail = os.path.split(dirname) 198 b_head = to_bytes(head, errors='surrogate_or_strict') 199 if not os.path.exists(b_head): 200 if head == dirname: 201 return None, [head] 202 else: 203 (pre_existing_dir, new_directory_list) = split_pre_existing_dir(head) 204 else: 205 return head, [tail] 206 new_directory_list.append(tail) 207 return pre_existing_dir, new_directory_list 208 209 210def adjust_recursive_directory_permissions(pre_existing_dir, new_directory_list, module, directory_args, changed): 211 ''' 212 Walk the new directories list and make sure that permissions are as we would expect 213 ''' 214 if new_directory_list: 215 first_sub_dir = new_directory_list.pop(0) 216 if not pre_existing_dir: 217 working_dir = first_sub_dir 218 else: 219 working_dir = os.path.join(pre_existing_dir, first_sub_dir) 220 directory_args['path'] = working_dir 221 changed = module.set_fs_attributes_if_different(directory_args, changed) 222 changed = adjust_recursive_directory_permissions(working_dir, new_directory_list, module, directory_args, changed) 223 return changed 224 225 226class Artifact(object): 227 def __init__(self, group_id, artifact_id, version, classifier='', extension='jar'): 228 if not group_id: 229 raise ValueError("group_id must be set") 230 if not artifact_id: 231 raise ValueError("artifact_id must be set") 232 233 self.group_id = group_id 234 self.artifact_id = artifact_id 235 self.version = version 236 self.classifier = classifier 237 238 if not extension: 239 self.extension = "jar" 240 else: 241 self.extension = extension 242 243 def is_snapshot(self): 244 return self.version and self.version.endswith("SNAPSHOT") 245 246 def path(self, with_version=True): 247 base = posixpath.join(self.group_id.replace(".", "/"), self.artifact_id) 248 if with_version and self.version: 249 base = posixpath.join(base, self.version) 250 return base 251 252 def _generate_filename(self): 253 filename = self.artifact_id + "-" + self.classifier + "." + self.extension 254 if not self.classifier: 255 filename = self.artifact_id + "." + self.extension 256 return filename 257 258 def get_filename(self, filename=None): 259 if not filename: 260 filename = self._generate_filename() 261 elif os.path.isdir(filename): 262 filename = os.path.join(filename, self._generate_filename()) 263 return filename 264 265 def __str__(self): 266 result = "%s:%s:%s" % (self.group_id, self.artifact_id, self.version) 267 if self.classifier: 268 result = "%s:%s:%s:%s:%s" % (self.group_id, self.artifact_id, self.extension, self.classifier, self.version) 269 elif self.extension != "jar": 270 result = "%s:%s:%s:%s" % (self.group_id, self.artifact_id, self.extension, self.version) 271 return result 272 273 @staticmethod 274 def parse(input): 275 parts = input.split(":") 276 if len(parts) >= 3: 277 g = parts[0] 278 a = parts[1] 279 v = parts[len(parts) - 1] 280 t = None 281 c = None 282 if len(parts) == 4: 283 t = parts[2] 284 if len(parts) == 5: 285 t = parts[2] 286 c = parts[3] 287 return Artifact(g, a, v, c, t) 288 else: 289 return None 290 291 292class MavenDownloader: 293 def __init__(self, module, base="http://repo1.maven.org/maven2", local=False, headers=None): 294 self.module = module 295 if base.endswith("/"): 296 base = base.rstrip("/") 297 self.base = base 298 self.local = local 299 self.headers = headers 300 self.user_agent = "Ansible {0} maven_artifact".format(self.module.ansible_version) 301 self.latest_version_found = None 302 self.metadata_file_name = "maven-metadata-local.xml" if local else "maven-metadata.xml" 303 304 def find_latest_version_available(self, artifact): 305 if self.latest_version_found: 306 return self.latest_version_found 307 path = "/%s/%s" % (artifact.path(False), self.metadata_file_name) 308 content = self._getContent(self.base + path, "Failed to retrieve the maven metadata file: " + path) 309 xml = etree.fromstring(content) 310 v = xml.xpath("/metadata/versioning/versions/version[last()]/text()") 311 if v: 312 self.latest_version_found = v[0] 313 return v[0] 314 315 def find_uri_for_artifact(self, artifact): 316 if artifact.version == "latest": 317 artifact.version = self.find_latest_version_available(artifact) 318 319 if artifact.is_snapshot(): 320 if self.local: 321 return self._uri_for_artifact(artifact, artifact.version) 322 path = "/%s/%s" % (artifact.path(), self.metadata_file_name) 323 content = self._getContent(self.base + path, "Failed to retrieve the maven metadata file: " + path) 324 xml = etree.fromstring(content) 325 326 for snapshotArtifact in xml.xpath("/metadata/versioning/snapshotVersions/snapshotVersion"): 327 classifier = snapshotArtifact.xpath("classifier/text()") 328 artifact_classifier = classifier[0] if classifier else '' 329 extension = snapshotArtifact.xpath("extension/text()") 330 artifact_extension = extension[0] if extension else '' 331 if artifact_classifier == artifact.classifier and artifact_extension == artifact.extension: 332 return self._uri_for_artifact(artifact, snapshotArtifact.xpath("value/text()")[0]) 333 timestamp_xmlpath = xml.xpath("/metadata/versioning/snapshot/timestamp/text()") 334 if timestamp_xmlpath: 335 timestamp = timestamp_xmlpath[0] 336 build_number = xml.xpath("/metadata/versioning/snapshot/buildNumber/text()")[0] 337 return self._uri_for_artifact(artifact, artifact.version.replace("SNAPSHOT", timestamp + "-" + build_number)) 338 339 return self._uri_for_artifact(artifact, artifact.version) 340 341 def _uri_for_artifact(self, artifact, version=None): 342 if artifact.is_snapshot() and not version: 343 raise ValueError("Expected uniqueversion for snapshot artifact " + str(artifact)) 344 elif not artifact.is_snapshot(): 345 version = artifact.version 346 if artifact.classifier: 347 return posixpath.join(self.base, artifact.path(), artifact.artifact_id + "-" + version + "-" + artifact.classifier + "." + artifact.extension) 348 349 return posixpath.join(self.base, artifact.path(), artifact.artifact_id + "-" + version + "." + artifact.extension) 350 351 # for small files, directly get the full content 352 def _getContent(self, url, failmsg, force=True): 353 if self.local: 354 parsed_url = urlparse(url) 355 if os.path.isfile(parsed_url.path): 356 with io.open(parsed_url.path, 'rb') as f: 357 return f.read() 358 if force: 359 raise ValueError(failmsg + " because can not find file: " + url) 360 return None 361 response = self._request(url, failmsg, force) 362 if response: 363 return response.read() 364 return None 365 366 # only for HTTP request 367 def _request(self, url, failmsg, force=True): 368 url_to_use = url 369 parsed_url = urlparse(url) 370 371 if parsed_url.scheme == 's3': 372 parsed_url = urlparse(url) 373 bucket_name = parsed_url.netloc 374 key_name = parsed_url.path[1:] 375 client = boto3.client('s3', aws_access_key_id=self.module.params.get('username', ''), aws_secret_access_key=self.module.params.get('password', '')) 376 url_to_use = client.generate_presigned_url('get_object', Params={'Bucket': bucket_name, 'Key': key_name}, ExpiresIn=10) 377 378 req_timeout = self.module.params.get('timeout') 379 380 # Hack to add parameters in the way that fetch_url expects 381 self.module.params['url_username'] = self.module.params.get('username', '') 382 self.module.params['url_password'] = self.module.params.get('password', '') 383 self.module.params['http_agent'] = self.user_agent 384 385 response, info = fetch_url(self.module, url_to_use, timeout=req_timeout, headers=self.headers) 386 if info['status'] == 200: 387 return response 388 if force: 389 raise ValueError(failmsg + " because of " + info['msg'] + "for URL " + url_to_use) 390 return None 391 392 def download(self, tmpdir, artifact, verify_download, filename=None): 393 if not artifact.version or artifact.version == "latest": 394 artifact = Artifact(artifact.group_id, artifact.artifact_id, self.find_latest_version_available(artifact), 395 artifact.classifier, artifact.extension) 396 url = self.find_uri_for_artifact(artifact) 397 tempfd, tempname = tempfile.mkstemp(dir=tmpdir) 398 399 try: 400 # copy to temp file 401 if self.local: 402 parsed_url = urlparse(url) 403 if os.path.isfile(parsed_url.path): 404 shutil.copy2(parsed_url.path, tempname) 405 else: 406 return "Can not find local file: " + parsed_url.path 407 else: 408 response = self._request(url, "Failed to download artifact " + str(artifact)) 409 with os.fdopen(tempfd, 'wb') as f: 410 shutil.copyfileobj(response, f) 411 412 if verify_download: 413 invalid_md5 = self.is_invalid_md5(tempname, url) 414 if invalid_md5: 415 # if verify_change was set, the previous file would be deleted 416 os.remove(tempname) 417 return invalid_md5 418 except Exception as e: 419 os.remove(tempname) 420 raise e 421 422 # all good, now copy temp file to target 423 shutil.move(tempname, artifact.get_filename(filename)) 424 return None 425 426 def is_invalid_md5(self, file, remote_url): 427 if os.path.exists(file): 428 local_md5 = self._local_md5(file) 429 if self.local: 430 parsed_url = urlparse(remote_url) 431 remote_md5 = self._local_md5(parsed_url.path) 432 else: 433 try: 434 remote_md5 = to_text(self._getContent(remote_url + '.md5', "Failed to retrieve MD5", False), errors='strict') 435 except UnicodeError as e: 436 return "Cannot retrieve a valid md5 from %s: %s" % (remote_url, to_native(e)) 437 if(not remote_md5): 438 return "Cannot find md5 from " + remote_url 439 try: 440 # Check if remote md5 only contains md5 or md5 + filename 441 _remote_md5 = remote_md5.split(None)[0] 442 remote_md5 = _remote_md5 443 # remote_md5 is empty so we continue and keep original md5 string 444 # This should not happen since we check for remote_md5 before 445 except IndexError as e: 446 pass 447 if local_md5 == remote_md5: 448 return None 449 else: 450 return "Checksum does not match: we computed " + local_md5 + "but the repository states " + remote_md5 451 452 return "Path does not exist: " + file 453 454 def _local_md5(self, file): 455 md5 = hashlib.md5() 456 with io.open(file, 'rb') as f: 457 for chunk in iter(lambda: f.read(8192), b''): 458 md5.update(chunk) 459 return md5.hexdigest() 460 461 462def main(): 463 module = AnsibleModule( 464 argument_spec=dict( 465 group_id=dict(required=True), 466 artifact_id=dict(required=True), 467 version=dict(default="latest"), 468 classifier=dict(default=''), 469 extension=dict(default='jar'), 470 repository_url=dict(default=None), 471 username=dict(default=None, aliases=['aws_secret_key']), 472 password=dict(default=None, no_log=True, aliases=['aws_secret_access_key']), 473 headers=dict(type='dict'), 474 state=dict(default="present", choices=["present", "absent"]), # TODO - Implement a "latest" state 475 timeout=dict(default=10, type='int'), 476 dest=dict(type="path", required=True), 477 validate_certs=dict(required=False, default=True, type='bool'), 478 keep_name=dict(required=False, default=False, type='bool'), 479 verify_checksum=dict(required=False, default='download', choices=['never', 'download', 'change', 'always']) 480 ), 481 add_file_common_args=True 482 ) 483 484 if not HAS_LXML_ETREE: 485 module.fail_json(msg=missing_required_lib('lxml'), exception=LXML_ETREE_IMP_ERR) 486 487 repository_url = module.params["repository_url"] 488 if not repository_url: 489 repository_url = "http://repo1.maven.org/maven2" 490 try: 491 parsed_url = urlparse(repository_url) 492 except AttributeError as e: 493 module.fail_json(msg='url parsing went wrong %s' % e) 494 495 local = parsed_url.scheme == "file" 496 497 if parsed_url.scheme == 's3' and not HAS_BOTO: 498 module.fail_json(msg=missing_required_lib('boto3', reason='when using s3:// repository URLs'), 499 exception=BOTO_IMP_ERR) 500 501 group_id = module.params["group_id"] 502 artifact_id = module.params["artifact_id"] 503 version = module.params["version"] 504 classifier = module.params["classifier"] 505 extension = module.params["extension"] 506 headers = module.params['headers'] 507 state = module.params["state"] 508 dest = module.params["dest"] 509 b_dest = to_bytes(dest, errors='surrogate_or_strict') 510 keep_name = module.params["keep_name"] 511 verify_checksum = module.params["verify_checksum"] 512 verify_download = verify_checksum in ['download', 'always'] 513 verify_change = verify_checksum in ['change', 'always'] 514 515 downloader = MavenDownloader(module, repository_url, local, headers) 516 517 try: 518 artifact = Artifact(group_id, artifact_id, version, classifier, extension) 519 except ValueError as e: 520 module.fail_json(msg=e.args[0]) 521 522 changed = False 523 prev_state = "absent" 524 525 if dest.endswith(os.sep): 526 b_dest = to_bytes(dest, errors='surrogate_or_strict') 527 if not os.path.exists(b_dest): 528 (pre_existing_dir, new_directory_list) = split_pre_existing_dir(dest) 529 os.makedirs(b_dest) 530 directory_args = module.load_file_common_arguments(module.params) 531 directory_mode = module.params["directory_mode"] 532 if directory_mode is not None: 533 directory_args['mode'] = directory_mode 534 else: 535 directory_args['mode'] = None 536 changed = adjust_recursive_directory_permissions(pre_existing_dir, new_directory_list, module, directory_args, changed) 537 538 if os.path.isdir(b_dest): 539 version_part = version 540 if keep_name and version == 'latest': 541 version_part = downloader.find_latest_version_available(artifact) 542 543 if classifier: 544 dest = posixpath.join(dest, "%s-%s-%s.%s" % (artifact_id, version_part, classifier, extension)) 545 else: 546 dest = posixpath.join(dest, "%s-%s.%s" % (artifact_id, version_part, extension)) 547 b_dest = to_bytes(dest, errors='surrogate_or_strict') 548 549 if os.path.lexists(b_dest) and ((not verify_change) or not downloader.is_invalid_md5(dest, downloader.find_uri_for_artifact(artifact))): 550 prev_state = "present" 551 552 if prev_state == "absent": 553 try: 554 download_error = downloader.download(module.tmpdir, artifact, verify_download, b_dest) 555 if download_error is None: 556 changed = True 557 else: 558 module.fail_json(msg="Cannot retrieve the artifact to destination: " + download_error) 559 except ValueError as e: 560 module.fail_json(msg=e.args[0]) 561 562 module.params['dest'] = dest 563 file_args = module.load_file_common_arguments(module.params) 564 changed = module.set_fs_attributes_if_different(file_args, changed) 565 if changed: 566 module.exit_json(state=state, dest=dest, group_id=group_id, artifact_id=artifact_id, version=version, classifier=classifier, 567 extension=extension, repository_url=repository_url, changed=changed) 568 else: 569 module.exit_json(state=state, dest=dest, changed=changed) 570 571 572if __name__ == '__main__': 573 main() 574