1#!/usr/local/bin/python3.8 2# -*- coding: utf-8 -*- 3 4# Copyright: (c) 2016, Ben Doherty <bendohmv@gmail.com> 5# Sponsored by Oomph, Inc. http://www.oomphinc.com 6# Copyright: (c) 2017, Ansible Project 7# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt) 8 9from __future__ import absolute_import, division, print_function 10__metaclass__ = type 11 12DOCUMENTATION = r''' 13--- 14module: archive 15short_description: Creates a compressed archive of one or more files or trees 16extends_documentation_fragment: files 17description: 18 - Creates or extends an archive. 19 - The source and archive are on the remote host, and the archive I(is not) copied to the local host. 20 - Source files can be deleted after archival by specifying I(remove=True). 21options: 22 path: 23 description: 24 - Remote absolute path, glob, or list of paths or globs for the file or files to compress or archive. 25 type: list 26 elements: path 27 required: true 28 format: 29 description: 30 - The type of compression to use. 31 - Support for xz was added in Ansible 2.5. 32 type: str 33 choices: [ bz2, gz, tar, xz, zip ] 34 default: gz 35 dest: 36 description: 37 - The file name of the destination archive. The parent directory must exists on the remote host. 38 - This is required when C(path) refers to multiple files by either specifying a glob, a directory or multiple paths in a list. 39 - If the destination archive already exists, it will be truncated and overwritten. 40 type: path 41 exclude_path: 42 description: 43 - Remote absolute path, glob, or list of paths or globs for the file or files to exclude from I(path) list and glob expansion. 44 - Use I(exclusion_patterns) to instead exclude files or subdirectories below any of the paths from the I(path) list. 45 type: list 46 elements: path 47 default: [] 48 exclusion_patterns: 49 description: 50 - Glob style patterns to exclude files or directories from the resulting archive. 51 - This differs from I(exclude_path) which applies only to the source paths from I(path). 52 type: list 53 elements: path 54 version_added: 3.2.0 55 force_archive: 56 description: 57 - Allows you to force the module to treat this as an archive even if only a single file is specified. 58 - By default when a single file is specified it is compressed only (not archived). 59 - Enable this if you want to use M(ansible.builtin.unarchive) on an archive of a single file created with this module. 60 type: bool 61 default: false 62 remove: 63 description: 64 - Remove any added source files and trees after adding to archive. 65 type: bool 66 default: no 67notes: 68 - Requires tarfile, zipfile, gzip and bzip2 packages on target host. 69 - Requires lzma or backports.lzma if using xz format. 70 - Can produce I(gzip), I(bzip2), I(lzma) and I(zip) compressed files or archives. 71seealso: 72- module: ansible.builtin.unarchive 73author: 74- Ben Doherty (@bendoh) 75''' 76 77EXAMPLES = r''' 78- name: Compress directory /path/to/foo/ into /path/to/foo.tgz 79 community.general.archive: 80 path: /path/to/foo 81 dest: /path/to/foo.tgz 82 83- name: Compress regular file /path/to/foo into /path/to/foo.gz and remove it 84 community.general.archive: 85 path: /path/to/foo 86 remove: yes 87 88- name: Create a zip archive of /path/to/foo 89 community.general.archive: 90 path: /path/to/foo 91 format: zip 92 93- name: Create a bz2 archive of multiple files, rooted at /path 94 community.general.archive: 95 path: 96 - /path/to/foo 97 - /path/wong/foo 98 dest: /path/file.tar.bz2 99 format: bz2 100 101- name: Create a bz2 archive of a globbed path, while excluding specific dirnames 102 community.general.archive: 103 path: 104 - /path/to/foo/* 105 dest: /path/file.tar.bz2 106 exclude_path: 107 - /path/to/foo/bar 108 - /path/to/foo/baz 109 format: bz2 110 111- name: Create a bz2 archive of a globbed path, while excluding a glob of dirnames 112 community.general.archive: 113 path: 114 - /path/to/foo/* 115 dest: /path/file.tar.bz2 116 exclude_path: 117 - /path/to/foo/ba* 118 format: bz2 119 120- name: Use gzip to compress a single archive (i.e don't archive it first with tar) 121 community.general.archive: 122 path: /path/to/foo/single.file 123 dest: /path/file.gz 124 format: gz 125 126- name: Create a tar.gz archive of a single file. 127 community.general.archive: 128 path: /path/to/foo/single.file 129 dest: /path/file.tar.gz 130 format: gz 131 force_archive: true 132''' 133 134RETURN = r''' 135state: 136 description: 137 The state of the input C(path). 138 type: str 139 returned: always 140dest_state: 141 description: 142 - The state of the I(dest) file. 143 - C(absent) when the file does not exist. 144 - C(archive) when the file is an archive. 145 - C(compress) when the file is compressed, but not an archive. 146 - C(incomplete) when the file is an archive, but some files under I(path) were not found. 147 type: str 148 returned: success 149 version_added: 3.4.0 150missing: 151 description: Any files that were missing from the source. 152 type: list 153 returned: success 154archived: 155 description: Any files that were compressed or added to the archive. 156 type: list 157 returned: success 158arcroot: 159 description: The archive root. 160 type: str 161 returned: always 162expanded_paths: 163 description: The list of matching paths from paths argument. 164 type: list 165 returned: always 166expanded_exclude_paths: 167 description: The list of matching exclude paths from the exclude_path argument. 168 type: list 169 returned: always 170''' 171 172import abc 173import bz2 174import glob 175import gzip 176import io 177import os 178import re 179import shutil 180import tarfile 181import zipfile 182from fnmatch import fnmatch 183from sys import version_info 184from traceback import format_exc 185 186from ansible.module_utils.basic import AnsibleModule, missing_required_lib 187from ansible.module_utils.common.text.converters import to_bytes, to_native 188from ansible.module_utils import six 189 190 191LZMA_IMP_ERR = None 192if six.PY3: 193 try: 194 import lzma 195 HAS_LZMA = True 196 except ImportError: 197 LZMA_IMP_ERR = format_exc() 198 HAS_LZMA = False 199else: 200 try: 201 from backports import lzma 202 HAS_LZMA = True 203 except ImportError: 204 LZMA_IMP_ERR = format_exc() 205 HAS_LZMA = False 206 207PY27 = version_info[0:2] >= (2, 7) 208 209STATE_ABSENT = 'absent' 210STATE_ARCHIVED = 'archive' 211STATE_COMPRESSED = 'compress' 212STATE_INCOMPLETE = 'incomplete' 213 214 215def common_path(paths): 216 empty = b'' if paths and isinstance(paths[0], six.binary_type) else '' 217 218 return os.path.join( 219 os.path.dirname(os.path.commonprefix([os.path.join(os.path.dirname(p), empty) for p in paths])), empty 220 ) 221 222 223def expand_paths(paths): 224 expanded_path = [] 225 is_globby = False 226 for path in paths: 227 b_path = _to_bytes(path) 228 if b'*' in b_path or b'?' in b_path: 229 e_paths = glob.glob(b_path) 230 is_globby = True 231 else: 232 e_paths = [b_path] 233 expanded_path.extend(e_paths) 234 return expanded_path, is_globby 235 236 237def legacy_filter(path, exclusion_patterns): 238 return matches_exclusion_patterns(path, exclusion_patterns) 239 240 241def matches_exclusion_patterns(path, exclusion_patterns): 242 return any(fnmatch(path, p) for p in exclusion_patterns) 243 244 245def is_archive(path): 246 return re.search(br'\.(tar|tar\.(gz|bz2|xz)|tgz|tbz2|zip)$', os.path.basename(path), re.IGNORECASE) 247 248 249def strip_prefix(prefix, string): 250 return string[len(prefix):] if string.startswith(prefix) else string 251 252 253def _to_bytes(s): 254 return to_bytes(s, errors='surrogate_or_strict') 255 256 257def _to_native(s): 258 return to_native(s, errors='surrogate_or_strict') 259 260 261def _to_native_ascii(s): 262 return to_native(s, errors='surrogate_or_strict', encoding='ascii') 263 264 265@six.add_metaclass(abc.ABCMeta) 266class Archive(object): 267 def __init__(self, module): 268 self.module = module 269 270 self.destination = _to_bytes(module.params['dest']) if module.params['dest'] else None 271 self.exclusion_patterns = module.params['exclusion_patterns'] or [] 272 self.format = module.params['format'] 273 self.must_archive = module.params['force_archive'] 274 self.remove = module.params['remove'] 275 276 self.changed = False 277 self.destination_state = STATE_ABSENT 278 self.errors = [] 279 self.file = None 280 self.successes = [] 281 self.targets = [] 282 self.not_found = [] 283 284 paths = module.params['path'] 285 self.expanded_paths, has_globs = expand_paths(paths) 286 self.expanded_exclude_paths = expand_paths(module.params['exclude_path'])[0] 287 288 self.paths = sorted(set(self.expanded_paths) - set(self.expanded_exclude_paths)) 289 290 if not self.paths: 291 module.fail_json( 292 path=', '.join(paths), 293 expanded_paths=_to_native(b', '.join(self.expanded_paths)), 294 expanded_exclude_paths=_to_native(b', '.join(self.expanded_exclude_paths)), 295 msg='Error, no source paths were found' 296 ) 297 298 self.root = common_path(self.paths) 299 300 if not self.must_archive: 301 self.must_archive = any([has_globs, os.path.isdir(self.paths[0]), len(self.paths) > 1]) 302 303 if not self.destination and not self.must_archive: 304 self.destination = b'%s.%s' % (self.paths[0], _to_bytes(self.format)) 305 306 if self.must_archive and not self.destination: 307 module.fail_json( 308 dest=_to_native(self.destination), 309 path=', '.join(paths), 310 msg='Error, must specify "dest" when archiving multiple files or trees' 311 ) 312 313 if self.remove: 314 self._check_removal_safety() 315 316 self.original_size = self.destination_size() 317 318 def add(self, path, archive_name): 319 try: 320 self._add(_to_native_ascii(path), _to_native(archive_name)) 321 if self.contains(_to_native(archive_name)): 322 self.successes.append(path) 323 except Exception as e: 324 self.errors.append('%s: %s' % (_to_native_ascii(path), _to_native(e))) 325 326 def add_single_target(self, path): 327 if self.format in ('zip', 'tar'): 328 self.open() 329 self.add(path, strip_prefix(self.root, path)) 330 self.close() 331 self.destination_state = STATE_ARCHIVED 332 else: 333 try: 334 f_out = self._open_compressed_file(_to_native_ascii(self.destination), 'wb') 335 with open(path, 'rb') as f_in: 336 shutil.copyfileobj(f_in, f_out) 337 f_out.close() 338 self.successes.append(path) 339 self.destination_state = STATE_COMPRESSED 340 except (IOError, OSError) as e: 341 self.module.fail_json( 342 path=_to_native(path), 343 dest=_to_native(self.destination), 344 msg='Unable to write to compressed file: %s' % _to_native(e), exception=format_exc() 345 ) 346 347 def add_targets(self): 348 self.open() 349 try: 350 for target in self.targets: 351 if os.path.isdir(target): 352 for directory_path, directory_names, file_names in os.walk(target, topdown=True): 353 for directory_name in directory_names: 354 full_path = os.path.join(directory_path, directory_name) 355 self.add(full_path, strip_prefix(self.root, full_path)) 356 357 for file_name in file_names: 358 full_path = os.path.join(directory_path, file_name) 359 self.add(full_path, strip_prefix(self.root, full_path)) 360 else: 361 self.add(target, strip_prefix(self.root, target)) 362 except Exception as e: 363 if self.format in ('zip', 'tar'): 364 archive_format = self.format 365 else: 366 archive_format = 'tar.' + self.format 367 self.module.fail_json( 368 msg='Error when writing %s archive at %s: %s' % ( 369 archive_format, _to_native(self.destination), _to_native(e) 370 ), 371 exception=format_exc() 372 ) 373 self.close() 374 375 if self.errors: 376 self.module.fail_json( 377 msg='Errors when writing archive at %s: %s' % (_to_native(self.destination), '; '.join(self.errors)) 378 ) 379 380 def compare_with_original(self): 381 self.changed |= self.original_size != self.destination_size() 382 383 def destination_exists(self): 384 return self.destination and os.path.exists(self.destination) 385 386 def destination_readable(self): 387 return self.destination and os.access(self.destination, os.R_OK) 388 389 def destination_size(self): 390 return os.path.getsize(self.destination) if self.destination_exists() else 0 391 392 def find_targets(self): 393 for path in self.paths: 394 if not os.path.lexists(path): 395 self.not_found.append(path) 396 else: 397 self.targets.append(path) 398 399 def has_targets(self): 400 return bool(self.targets) 401 402 def has_unfound_targets(self): 403 return bool(self.not_found) 404 405 def remove_single_target(self, path): 406 try: 407 os.remove(path) 408 except OSError as e: 409 self.module.fail_json( 410 path=_to_native(path), 411 msg='Unable to remove source file: %s' % _to_native(e), exception=format_exc() 412 ) 413 414 def remove_targets(self): 415 for path in self.successes: 416 if os.path.exists(path): 417 try: 418 if os.path.isdir(path): 419 shutil.rmtree(path) 420 else: 421 os.remove(path) 422 except OSError: 423 self.errors.append(_to_native(path)) 424 for path in self.paths: 425 try: 426 if os.path.isdir(path): 427 shutil.rmtree(path) 428 except OSError: 429 self.errors.append(_to_native(path)) 430 431 if self.errors: 432 self.module.fail_json( 433 dest=_to_native(self.destination), msg='Error deleting some source files: ', files=self.errors 434 ) 435 436 def update_permissions(self): 437 try: 438 file_args = self.module.load_file_common_arguments(self.module.params, path=self.destination) 439 except TypeError: 440 # The path argument is only supported in Ansible-base 2.10+. Fall back to 441 # pre-2.10 behavior for older Ansible versions. 442 self.module.params['path'] = self.destination 443 file_args = self.module.load_file_common_arguments(self.module.params) 444 445 self.changed = self.module.set_fs_attributes_if_different(file_args, self.changed) 446 447 @property 448 def result(self): 449 return { 450 'archived': [_to_native(p) for p in self.successes], 451 'dest': _to_native(self.destination), 452 'dest_state': self.destination_state, 453 'changed': self.changed, 454 'arcroot': _to_native(self.root), 455 'missing': [_to_native(p) for p in self.not_found], 456 'expanded_paths': [_to_native(p) for p in self.expanded_paths], 457 'expanded_exclude_paths': [_to_native(p) for p in self.expanded_exclude_paths], 458 } 459 460 def _check_removal_safety(self): 461 for path in self.paths: 462 if os.path.isdir(path) and self.destination.startswith(os.path.join(path, b'')): 463 self.module.fail_json( 464 path=b', '.join(self.paths), 465 msg='Error, created archive can not be contained in source paths when remove=true' 466 ) 467 468 def _open_compressed_file(self, path, mode): 469 f = None 470 if self.format == 'gz': 471 f = gzip.open(path, mode) 472 elif self.format == 'bz2': 473 f = bz2.BZ2File(path, mode) 474 elif self.format == 'xz': 475 f = lzma.LZMAFile(path, mode) 476 else: 477 self.module.fail_json(msg="%s is not a valid format" % self.format) 478 479 return f 480 481 @abc.abstractmethod 482 def close(self): 483 pass 484 485 @abc.abstractmethod 486 def contains(self, name): 487 pass 488 489 @abc.abstractmethod 490 def open(self): 491 pass 492 493 @abc.abstractmethod 494 def _add(self, path, archive_name): 495 pass 496 497 498class ZipArchive(Archive): 499 def __init__(self, module): 500 super(ZipArchive, self).__init__(module) 501 502 def close(self): 503 self.file.close() 504 505 def contains(self, name): 506 try: 507 self.file.getinfo(name) 508 except KeyError: 509 return False 510 return True 511 512 def open(self): 513 self.file = zipfile.ZipFile(_to_native_ascii(self.destination), 'w', zipfile.ZIP_DEFLATED, True) 514 515 def _add(self, path, archive_name): 516 if not legacy_filter(path, self.exclusion_patterns): 517 self.file.write(path, archive_name) 518 519 520class TarArchive(Archive): 521 def __init__(self, module): 522 super(TarArchive, self).__init__(module) 523 self.fileIO = None 524 525 def close(self): 526 self.file.close() 527 if self.format == 'xz': 528 with lzma.open(_to_native(self.destination), 'wb') as f: 529 f.write(self.fileIO.getvalue()) 530 self.fileIO.close() 531 532 def contains(self, name): 533 try: 534 self.file.getmember(name) 535 except KeyError: 536 return False 537 return True 538 539 def open(self): 540 if self.format in ('gz', 'bz2'): 541 self.file = tarfile.open(_to_native_ascii(self.destination), 'w|' + self.format) 542 # python3 tarfile module allows xz format but for python2 we have to create the tarfile 543 # in memory and then compress it with lzma. 544 elif self.format == 'xz': 545 self.fileIO = io.BytesIO() 546 self.file = tarfile.open(fileobj=self.fileIO, mode='w') 547 elif self.format == 'tar': 548 self.file = tarfile.open(_to_native_ascii(self.destination), 'w') 549 else: 550 self.module.fail_json(msg="%s is not a valid archive format" % self.format) 551 552 def _add(self, path, archive_name): 553 def py27_filter(tarinfo): 554 return None if matches_exclusion_patterns(tarinfo.name, self.exclusion_patterns) else tarinfo 555 556 def py26_filter(path): 557 return legacy_filter(path, self.exclusion_patterns) 558 559 if PY27: 560 self.file.add(path, archive_name, recursive=False, filter=py27_filter) 561 else: 562 self.file.add(path, archive_name, recursive=False, exclude=py26_filter) 563 564 565def get_archive(module): 566 if module.params['format'] == 'zip': 567 return ZipArchive(module) 568 else: 569 return TarArchive(module) 570 571 572def main(): 573 module = AnsibleModule( 574 argument_spec=dict( 575 path=dict(type='list', elements='path', required=True), 576 format=dict(type='str', default='gz', choices=['bz2', 'gz', 'tar', 'xz', 'zip']), 577 dest=dict(type='path'), 578 exclude_path=dict(type='list', elements='path', default=[]), 579 exclusion_patterns=dict(type='list', elements='path'), 580 force_archive=dict(type='bool', default=False), 581 remove=dict(type='bool', default=False), 582 ), 583 add_file_common_args=True, 584 supports_check_mode=True, 585 ) 586 587 if not HAS_LZMA and module.params['format'] == 'xz': 588 module.fail_json( 589 msg=missing_required_lib("lzma or backports.lzma", reason="when using xz format"), exception=LZMA_IMP_ERR 590 ) 591 592 check_mode = module.check_mode 593 594 archive = get_archive(module) 595 archive.find_targets() 596 597 if not archive.has_targets(): 598 if archive.destination_exists(): 599 archive.destination_state = STATE_ARCHIVED if is_archive(archive.destination) else STATE_COMPRESSED 600 elif archive.has_targets() and archive.must_archive: 601 if check_mode: 602 archive.changed = True 603 else: 604 archive.add_targets() 605 archive.destination_state = STATE_INCOMPLETE if archive.has_unfound_targets() else STATE_ARCHIVED 606 archive.compare_with_original() 607 if archive.remove: 608 archive.remove_targets() 609 else: 610 if check_mode: 611 if not archive.destination_exists(): 612 archive.changed = True 613 else: 614 path = archive.paths[0] 615 archive.add_single_target(path) 616 archive.compare_with_original() 617 if archive.remove: 618 archive.remove_single_target(path) 619 620 if archive.destination_exists(): 621 archive.update_permissions() 622 623 module.exit_json(**archive.result) 624 625 626if __name__ == '__main__': 627 main() 628