1#!/usr/local/bin/python3.8
2# -*- coding: utf-8 -*-
3
4# Copyright: (c) 2016, Ben Doherty <bendohmv@gmail.com>
5# Sponsored by Oomph, Inc. http://www.oomphinc.com
6# Copyright: (c) 2017, Ansible Project
7# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt)
8
9from __future__ import absolute_import, division, print_function
10__metaclass__ = type
11
12DOCUMENTATION = r'''
13---
14module: archive
15short_description: Creates a compressed archive of one or more files or trees
16extends_documentation_fragment: files
17description:
18    - Creates or extends an archive.
19    - The source and archive are on the remote host, and the archive I(is not) copied to the local host.
20    - Source files can be deleted after archival by specifying I(remove=True).
21options:
22  path:
23    description:
24      - Remote absolute path, glob, or list of paths or globs for the file or files to compress or archive.
25    type: list
26    elements: path
27    required: true
28  format:
29    description:
30      - The type of compression to use.
31      - Support for xz was added in Ansible 2.5.
32    type: str
33    choices: [ bz2, gz, tar, xz, zip ]
34    default: gz
35  dest:
36    description:
37      - The file name of the destination archive. The parent directory must exists on the remote host.
38      - This is required when C(path) refers to multiple files by either specifying a glob, a directory or multiple paths in a list.
39      - If the destination archive already exists, it will be truncated and overwritten.
40    type: path
41  exclude_path:
42    description:
43      - Remote absolute path, glob, or list of paths or globs for the file or files to exclude from I(path) list and glob expansion.
44      - Use I(exclusion_patterns) to instead exclude files or subdirectories below any of the paths from the I(path) list.
45    type: list
46    elements: path
47    default: []
48  exclusion_patterns:
49    description:
50      - Glob style patterns to exclude files or directories from the resulting archive.
51      - This differs from I(exclude_path) which applies only to the source paths from I(path).
52    type: list
53    elements: path
54    version_added: 3.2.0
55  force_archive:
56    description:
57      - Allows you to force the module to treat this as an archive even if only a single file is specified.
58      - By default when a single file is specified it is compressed only (not archived).
59      - Enable this if you want to use M(ansible.builtin.unarchive) on an archive of a single file created with this module.
60    type: bool
61    default: false
62  remove:
63    description:
64      - Remove any added source files and trees after adding to archive.
65    type: bool
66    default: no
67notes:
68    - Requires tarfile, zipfile, gzip and bzip2 packages on target host.
69    - Requires lzma or backports.lzma if using xz format.
70    - Can produce I(gzip), I(bzip2), I(lzma) and I(zip) compressed files or archives.
71seealso:
72- module: ansible.builtin.unarchive
73author:
74- Ben Doherty (@bendoh)
75'''
76
77EXAMPLES = r'''
78- name: Compress directory /path/to/foo/ into /path/to/foo.tgz
79  community.general.archive:
80    path: /path/to/foo
81    dest: /path/to/foo.tgz
82
83- name: Compress regular file /path/to/foo into /path/to/foo.gz and remove it
84  community.general.archive:
85    path: /path/to/foo
86    remove: yes
87
88- name: Create a zip archive of /path/to/foo
89  community.general.archive:
90    path: /path/to/foo
91    format: zip
92
93- name: Create a bz2 archive of multiple files, rooted at /path
94  community.general.archive:
95    path:
96    - /path/to/foo
97    - /path/wong/foo
98    dest: /path/file.tar.bz2
99    format: bz2
100
101- name: Create a bz2 archive of a globbed path, while excluding specific dirnames
102  community.general.archive:
103    path:
104    - /path/to/foo/*
105    dest: /path/file.tar.bz2
106    exclude_path:
107    - /path/to/foo/bar
108    - /path/to/foo/baz
109    format: bz2
110
111- name: Create a bz2 archive of a globbed path, while excluding a glob of dirnames
112  community.general.archive:
113    path:
114    - /path/to/foo/*
115    dest: /path/file.tar.bz2
116    exclude_path:
117    - /path/to/foo/ba*
118    format: bz2
119
120- name: Use gzip to compress a single archive (i.e don't archive it first with tar)
121  community.general.archive:
122    path: /path/to/foo/single.file
123    dest: /path/file.gz
124    format: gz
125
126- name: Create a tar.gz archive of a single file.
127  community.general.archive:
128    path: /path/to/foo/single.file
129    dest: /path/file.tar.gz
130    format: gz
131    force_archive: true
132'''
133
134RETURN = r'''
135state:
136    description:
137        The state of the input C(path).
138    type: str
139    returned: always
140dest_state:
141    description:
142      - The state of the I(dest) file.
143      - C(absent) when the file does not exist.
144      - C(archive) when the file is an archive.
145      - C(compress) when the file is compressed, but not an archive.
146      - C(incomplete) when the file is an archive, but some files under I(path) were not found.
147    type: str
148    returned: success
149    version_added: 3.4.0
150missing:
151    description: Any files that were missing from the source.
152    type: list
153    returned: success
154archived:
155    description: Any files that were compressed or added to the archive.
156    type: list
157    returned: success
158arcroot:
159    description: The archive root.
160    type: str
161    returned: always
162expanded_paths:
163    description: The list of matching paths from paths argument.
164    type: list
165    returned: always
166expanded_exclude_paths:
167    description: The list of matching exclude paths from the exclude_path argument.
168    type: list
169    returned: always
170'''
171
172import abc
173import bz2
174import glob
175import gzip
176import io
177import os
178import re
179import shutil
180import tarfile
181import zipfile
182from fnmatch import fnmatch
183from sys import version_info
184from traceback import format_exc
185
186from ansible.module_utils.basic import AnsibleModule, missing_required_lib
187from ansible.module_utils.common.text.converters import to_bytes, to_native
188from ansible.module_utils import six
189
190
191LZMA_IMP_ERR = None
192if six.PY3:
193    try:
194        import lzma
195        HAS_LZMA = True
196    except ImportError:
197        LZMA_IMP_ERR = format_exc()
198        HAS_LZMA = False
199else:
200    try:
201        from backports import lzma
202        HAS_LZMA = True
203    except ImportError:
204        LZMA_IMP_ERR = format_exc()
205        HAS_LZMA = False
206
207PY27 = version_info[0:2] >= (2, 7)
208
209STATE_ABSENT = 'absent'
210STATE_ARCHIVED = 'archive'
211STATE_COMPRESSED = 'compress'
212STATE_INCOMPLETE = 'incomplete'
213
214
215def common_path(paths):
216    empty = b'' if paths and isinstance(paths[0], six.binary_type) else ''
217
218    return os.path.join(
219        os.path.dirname(os.path.commonprefix([os.path.join(os.path.dirname(p), empty) for p in paths])), empty
220    )
221
222
223def expand_paths(paths):
224    expanded_path = []
225    is_globby = False
226    for path in paths:
227        b_path = _to_bytes(path)
228        if b'*' in b_path or b'?' in b_path:
229            e_paths = glob.glob(b_path)
230            is_globby = True
231        else:
232            e_paths = [b_path]
233        expanded_path.extend(e_paths)
234    return expanded_path, is_globby
235
236
237def legacy_filter(path, exclusion_patterns):
238    return matches_exclusion_patterns(path, exclusion_patterns)
239
240
241def matches_exclusion_patterns(path, exclusion_patterns):
242    return any(fnmatch(path, p) for p in exclusion_patterns)
243
244
245def is_archive(path):
246    return re.search(br'\.(tar|tar\.(gz|bz2|xz)|tgz|tbz2|zip)$', os.path.basename(path), re.IGNORECASE)
247
248
249def strip_prefix(prefix, string):
250    return string[len(prefix):] if string.startswith(prefix) else string
251
252
253def _to_bytes(s):
254    return to_bytes(s, errors='surrogate_or_strict')
255
256
257def _to_native(s):
258    return to_native(s, errors='surrogate_or_strict')
259
260
261def _to_native_ascii(s):
262    return to_native(s, errors='surrogate_or_strict', encoding='ascii')
263
264
265@six.add_metaclass(abc.ABCMeta)
266class Archive(object):
267    def __init__(self, module):
268        self.module = module
269
270        self.destination = _to_bytes(module.params['dest']) if module.params['dest'] else None
271        self.exclusion_patterns = module.params['exclusion_patterns'] or []
272        self.format = module.params['format']
273        self.must_archive = module.params['force_archive']
274        self.remove = module.params['remove']
275
276        self.changed = False
277        self.destination_state = STATE_ABSENT
278        self.errors = []
279        self.file = None
280        self.successes = []
281        self.targets = []
282        self.not_found = []
283
284        paths = module.params['path']
285        self.expanded_paths, has_globs = expand_paths(paths)
286        self.expanded_exclude_paths = expand_paths(module.params['exclude_path'])[0]
287
288        self.paths = sorted(set(self.expanded_paths) - set(self.expanded_exclude_paths))
289
290        if not self.paths:
291            module.fail_json(
292                path=', '.join(paths),
293                expanded_paths=_to_native(b', '.join(self.expanded_paths)),
294                expanded_exclude_paths=_to_native(b', '.join(self.expanded_exclude_paths)),
295                msg='Error, no source paths were found'
296            )
297
298        self.root = common_path(self.paths)
299
300        if not self.must_archive:
301            self.must_archive = any([has_globs, os.path.isdir(self.paths[0]), len(self.paths) > 1])
302
303        if not self.destination and not self.must_archive:
304            self.destination = b'%s.%s' % (self.paths[0], _to_bytes(self.format))
305
306        if self.must_archive and not self.destination:
307            module.fail_json(
308                dest=_to_native(self.destination),
309                path=', '.join(paths),
310                msg='Error, must specify "dest" when archiving multiple files or trees'
311            )
312
313        if self.remove:
314            self._check_removal_safety()
315
316        self.original_size = self.destination_size()
317
318    def add(self, path, archive_name):
319        try:
320            self._add(_to_native_ascii(path), _to_native(archive_name))
321            if self.contains(_to_native(archive_name)):
322                self.successes.append(path)
323        except Exception as e:
324            self.errors.append('%s: %s' % (_to_native_ascii(path), _to_native(e)))
325
326    def add_single_target(self, path):
327        if self.format in ('zip', 'tar'):
328            self.open()
329            self.add(path, strip_prefix(self.root, path))
330            self.close()
331            self.destination_state = STATE_ARCHIVED
332        else:
333            try:
334                f_out = self._open_compressed_file(_to_native_ascii(self.destination), 'wb')
335                with open(path, 'rb') as f_in:
336                    shutil.copyfileobj(f_in, f_out)
337                f_out.close()
338                self.successes.append(path)
339                self.destination_state = STATE_COMPRESSED
340            except (IOError, OSError) as e:
341                self.module.fail_json(
342                    path=_to_native(path),
343                    dest=_to_native(self.destination),
344                    msg='Unable to write to compressed file: %s' % _to_native(e), exception=format_exc()
345                )
346
347    def add_targets(self):
348        self.open()
349        try:
350            for target in self.targets:
351                if os.path.isdir(target):
352                    for directory_path, directory_names, file_names in os.walk(target, topdown=True):
353                        for directory_name in directory_names:
354                            full_path = os.path.join(directory_path, directory_name)
355                            self.add(full_path, strip_prefix(self.root, full_path))
356
357                        for file_name in file_names:
358                            full_path = os.path.join(directory_path, file_name)
359                            self.add(full_path, strip_prefix(self.root, full_path))
360                else:
361                    self.add(target, strip_prefix(self.root, target))
362        except Exception as e:
363            if self.format in ('zip', 'tar'):
364                archive_format = self.format
365            else:
366                archive_format = 'tar.' + self.format
367            self.module.fail_json(
368                msg='Error when writing %s archive at %s: %s' % (
369                    archive_format, _to_native(self.destination), _to_native(e)
370                ),
371                exception=format_exc()
372            )
373        self.close()
374
375        if self.errors:
376            self.module.fail_json(
377                msg='Errors when writing archive at %s: %s' % (_to_native(self.destination), '; '.join(self.errors))
378            )
379
380    def compare_with_original(self):
381        self.changed |= self.original_size != self.destination_size()
382
383    def destination_exists(self):
384        return self.destination and os.path.exists(self.destination)
385
386    def destination_readable(self):
387        return self.destination and os.access(self.destination, os.R_OK)
388
389    def destination_size(self):
390        return os.path.getsize(self.destination) if self.destination_exists() else 0
391
392    def find_targets(self):
393        for path in self.paths:
394            if not os.path.lexists(path):
395                self.not_found.append(path)
396            else:
397                self.targets.append(path)
398
399    def has_targets(self):
400        return bool(self.targets)
401
402    def has_unfound_targets(self):
403        return bool(self.not_found)
404
405    def remove_single_target(self, path):
406        try:
407            os.remove(path)
408        except OSError as e:
409            self.module.fail_json(
410                path=_to_native(path),
411                msg='Unable to remove source file: %s' % _to_native(e), exception=format_exc()
412            )
413
414    def remove_targets(self):
415        for path in self.successes:
416            if os.path.exists(path):
417                try:
418                    if os.path.isdir(path):
419                        shutil.rmtree(path)
420                    else:
421                        os.remove(path)
422                except OSError:
423                    self.errors.append(_to_native(path))
424        for path in self.paths:
425            try:
426                if os.path.isdir(path):
427                    shutil.rmtree(path)
428            except OSError:
429                self.errors.append(_to_native(path))
430
431        if self.errors:
432            self.module.fail_json(
433                dest=_to_native(self.destination), msg='Error deleting some source files: ', files=self.errors
434            )
435
436    def update_permissions(self):
437        try:
438            file_args = self.module.load_file_common_arguments(self.module.params, path=self.destination)
439        except TypeError:
440            # The path argument is only supported in Ansible-base 2.10+. Fall back to
441            # pre-2.10 behavior for older Ansible versions.
442            self.module.params['path'] = self.destination
443            file_args = self.module.load_file_common_arguments(self.module.params)
444
445        self.changed = self.module.set_fs_attributes_if_different(file_args, self.changed)
446
447    @property
448    def result(self):
449        return {
450            'archived': [_to_native(p) for p in self.successes],
451            'dest': _to_native(self.destination),
452            'dest_state': self.destination_state,
453            'changed': self.changed,
454            'arcroot': _to_native(self.root),
455            'missing': [_to_native(p) for p in self.not_found],
456            'expanded_paths': [_to_native(p) for p in self.expanded_paths],
457            'expanded_exclude_paths': [_to_native(p) for p in self.expanded_exclude_paths],
458        }
459
460    def _check_removal_safety(self):
461        for path in self.paths:
462            if os.path.isdir(path) and self.destination.startswith(os.path.join(path, b'')):
463                self.module.fail_json(
464                    path=b', '.join(self.paths),
465                    msg='Error, created archive can not be contained in source paths when remove=true'
466                )
467
468    def _open_compressed_file(self, path, mode):
469        f = None
470        if self.format == 'gz':
471            f = gzip.open(path, mode)
472        elif self.format == 'bz2':
473            f = bz2.BZ2File(path, mode)
474        elif self.format == 'xz':
475            f = lzma.LZMAFile(path, mode)
476        else:
477            self.module.fail_json(msg="%s is not a valid format" % self.format)
478
479        return f
480
481    @abc.abstractmethod
482    def close(self):
483        pass
484
485    @abc.abstractmethod
486    def contains(self, name):
487        pass
488
489    @abc.abstractmethod
490    def open(self):
491        pass
492
493    @abc.abstractmethod
494    def _add(self, path, archive_name):
495        pass
496
497
498class ZipArchive(Archive):
499    def __init__(self, module):
500        super(ZipArchive, self).__init__(module)
501
502    def close(self):
503        self.file.close()
504
505    def contains(self, name):
506        try:
507            self.file.getinfo(name)
508        except KeyError:
509            return False
510        return True
511
512    def open(self):
513        self.file = zipfile.ZipFile(_to_native_ascii(self.destination), 'w', zipfile.ZIP_DEFLATED, True)
514
515    def _add(self, path, archive_name):
516        if not legacy_filter(path, self.exclusion_patterns):
517            self.file.write(path, archive_name)
518
519
520class TarArchive(Archive):
521    def __init__(self, module):
522        super(TarArchive, self).__init__(module)
523        self.fileIO = None
524
525    def close(self):
526        self.file.close()
527        if self.format == 'xz':
528            with lzma.open(_to_native(self.destination), 'wb') as f:
529                f.write(self.fileIO.getvalue())
530            self.fileIO.close()
531
532    def contains(self, name):
533        try:
534            self.file.getmember(name)
535        except KeyError:
536            return False
537        return True
538
539    def open(self):
540        if self.format in ('gz', 'bz2'):
541            self.file = tarfile.open(_to_native_ascii(self.destination), 'w|' + self.format)
542        # python3 tarfile module allows xz format but for python2 we have to create the tarfile
543        # in memory and then compress it with lzma.
544        elif self.format == 'xz':
545            self.fileIO = io.BytesIO()
546            self.file = tarfile.open(fileobj=self.fileIO, mode='w')
547        elif self.format == 'tar':
548            self.file = tarfile.open(_to_native_ascii(self.destination), 'w')
549        else:
550            self.module.fail_json(msg="%s is not a valid archive format" % self.format)
551
552    def _add(self, path, archive_name):
553        def py27_filter(tarinfo):
554            return None if matches_exclusion_patterns(tarinfo.name, self.exclusion_patterns) else tarinfo
555
556        def py26_filter(path):
557            return legacy_filter(path, self.exclusion_patterns)
558
559        if PY27:
560            self.file.add(path, archive_name, recursive=False, filter=py27_filter)
561        else:
562            self.file.add(path, archive_name, recursive=False, exclude=py26_filter)
563
564
565def get_archive(module):
566    if module.params['format'] == 'zip':
567        return ZipArchive(module)
568    else:
569        return TarArchive(module)
570
571
572def main():
573    module = AnsibleModule(
574        argument_spec=dict(
575            path=dict(type='list', elements='path', required=True),
576            format=dict(type='str', default='gz', choices=['bz2', 'gz', 'tar', 'xz', 'zip']),
577            dest=dict(type='path'),
578            exclude_path=dict(type='list', elements='path', default=[]),
579            exclusion_patterns=dict(type='list', elements='path'),
580            force_archive=dict(type='bool', default=False),
581            remove=dict(type='bool', default=False),
582        ),
583        add_file_common_args=True,
584        supports_check_mode=True,
585    )
586
587    if not HAS_LZMA and module.params['format'] == 'xz':
588        module.fail_json(
589            msg=missing_required_lib("lzma or backports.lzma", reason="when using xz format"), exception=LZMA_IMP_ERR
590        )
591
592    check_mode = module.check_mode
593
594    archive = get_archive(module)
595    archive.find_targets()
596
597    if not archive.has_targets():
598        if archive.destination_exists():
599            archive.destination_state = STATE_ARCHIVED if is_archive(archive.destination) else STATE_COMPRESSED
600    elif archive.has_targets() and archive.must_archive:
601        if check_mode:
602            archive.changed = True
603        else:
604            archive.add_targets()
605            archive.destination_state = STATE_INCOMPLETE if archive.has_unfound_targets() else STATE_ARCHIVED
606            archive.compare_with_original()
607            if archive.remove:
608                archive.remove_targets()
609    else:
610        if check_mode:
611            if not archive.destination_exists():
612                archive.changed = True
613        else:
614            path = archive.paths[0]
615            archive.add_single_target(path)
616            archive.compare_with_original()
617            if archive.remove:
618                archive.remove_single_target(path)
619
620    if archive.destination_exists():
621        archive.update_permissions()
622
623    module.exit_json(**archive.result)
624
625
626if __name__ == '__main__':
627    main()
628