1"""Utility functions for copying and archiving files and directory trees.
2
3XXX The functions here don't copy the resource fork or other metadata on Mac.
4
5"""
6
7import os
8import sys
9import stat
10from os.path import abspath
11import fnmatch
12import collections
13import errno
14
15try:
16    from pwd import getpwnam
17except ImportError:
18    getpwnam = None
19
20try:
21    from grp import getgrnam
22except ImportError:
23    getgrnam = None
24
25__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
26           "copytree", "move", "rmtree", "Error", "SpecialFileError",
27           "ExecError", "make_archive", "get_archive_formats",
28           "register_archive_format", "unregister_archive_format"]
29
30class Error(EnvironmentError):
31    pass
32
33class SpecialFileError(EnvironmentError):
34    """Raised when trying to do a kind of operation (e.g. copying) which is
35    not supported on a special file (e.g. a named pipe)"""
36
37class ExecError(EnvironmentError):
38    """Raised when a command could not be executed"""
39
40try:
41    WindowsError
42except NameError:
43    WindowsError = None
44
45def copyfileobj(fsrc, fdst, length=16*1024):
46    """copy data from file-like object fsrc to file-like object fdst"""
47    while 1:
48        buf = fsrc.read(length)
49        if not buf:
50            break
51        fdst.write(buf)
52
53def _samefile(src, dst):
54    # Macintosh, Unix.
55    if hasattr(os.path, 'samefile'):
56        try:
57            return os.path.samefile(src, dst)
58        except OSError:
59            return False
60
61    # All other platforms: check for same pathname.
62    return (os.path.normcase(os.path.abspath(src)) ==
63            os.path.normcase(os.path.abspath(dst)))
64
65def copyfile(src, dst):
66    """Copy data from src to dst"""
67    if _samefile(src, dst):
68        raise Error("`%s` and `%s` are the same file" % (src, dst))
69
70    for fn in [src, dst]:
71        try:
72            st = os.stat(fn)
73        except OSError:
74            # File most likely does not exist
75            pass
76        else:
77            # XXX What about other special files? (sockets, devices...)
78            if stat.S_ISFIFO(st.st_mode):
79                raise SpecialFileError("`%s` is a named pipe" % fn)
80
81    with open(src, 'rb') as fsrc:
82        with open(dst, 'wb') as fdst:
83            copyfileobj(fsrc, fdst)
84
85def copymode(src, dst):
86    """Copy mode bits from src to dst"""
87    if hasattr(os, 'chmod'):
88        st = os.stat(src)
89        mode = stat.S_IMODE(st.st_mode)
90        os.chmod(dst, mode)
91
92def copystat(src, dst):
93    """Copy all stat info (mode bits, atime, mtime, flags) from src to dst"""
94    st = os.stat(src)
95    mode = stat.S_IMODE(st.st_mode)
96    if hasattr(os, 'utime'):
97        os.utime(dst, (st.st_atime, st.st_mtime))
98    if hasattr(os, 'chmod'):
99        os.chmod(dst, mode)
100    if hasattr(os, 'chflags') and hasattr(st, 'st_flags'):
101        try:
102            os.chflags(dst, st.st_flags)
103        except OSError, why:
104            if (not hasattr(errno, 'EOPNOTSUPP') or
105                why.errno != errno.EOPNOTSUPP):
106                raise
107
108def copy(src, dst):
109    """Copy data and mode bits ("cp src dst").
110
111    The destination may be a directory.
112
113    """
114    if os.path.isdir(dst):
115        dst = os.path.join(dst, os.path.basename(src))
116    copyfile(src, dst)
117    copymode(src, dst)
118
119def copy2(src, dst):
120    """Copy data and all stat info ("cp -p src dst").
121
122    The destination may be a directory.
123
124    """
125    if os.path.isdir(dst):
126        dst = os.path.join(dst, os.path.basename(src))
127    copyfile(src, dst)
128    copystat(src, dst)
129
130def ignore_patterns(*patterns):
131    """Function that can be used as copytree() ignore parameter.
132
133    Patterns is a sequence of glob-style patterns
134    that are used to exclude files"""
135    def _ignore_patterns(path, names):
136        ignored_names = []
137        for pattern in patterns:
138            ignored_names.extend(fnmatch.filter(names, pattern))
139        return set(ignored_names)
140    return _ignore_patterns
141
142def copytree(src, dst, symlinks=False, ignore=None):
143    """Recursively copy a directory tree using copy2().
144
145    The destination directory must not already exist.
146    If exception(s) occur, an Error is raised with a list of reasons.
147
148    If the optional symlinks flag is true, symbolic links in the
149    source tree result in symbolic links in the destination tree; if
150    it is false, the contents of the files pointed to by symbolic
151    links are copied.
152
153    The optional ignore argument is a callable. If given, it
154    is called with the `src` parameter, which is the directory
155    being visited by copytree(), and `names` which is the list of
156    `src` contents, as returned by os.listdir():
157
158        callable(src, names) -> ignored_names
159
160    Since copytree() is called recursively, the callable will be
161    called once for each directory that is copied. It returns a
162    list of names relative to the `src` directory that should
163    not be copied.
164
165    XXX Consider this example code rather than the ultimate tool.
166
167    """
168    names = os.listdir(src)
169    if ignore is not None:
170        ignored_names = ignore(src, names)
171    else:
172        ignored_names = set()
173
174    os.makedirs(dst)
175    errors = []
176    for name in names:
177        if name in ignored_names:
178            continue
179        srcname = os.path.join(src, name)
180        dstname = os.path.join(dst, name)
181        try:
182            if symlinks and os.path.islink(srcname):
183                linkto = os.readlink(srcname)
184                os.symlink(linkto, dstname)
185            elif os.path.isdir(srcname):
186                copytree(srcname, dstname, symlinks, ignore)
187            else:
188                # Will raise a SpecialFileError for unsupported file types
189                copy2(srcname, dstname)
190        # catch the Error from the recursive copytree so that we can
191        # continue with other files
192        except Error, err:
193            errors.extend(err.args[0])
194        except EnvironmentError, why:
195            errors.append((srcname, dstname, str(why)))
196    try:
197        copystat(src, dst)
198    except OSError, why:
199        if WindowsError is not None and isinstance(why, WindowsError):
200            # Copying file access times may fail on Windows
201            pass
202        else:
203            errors.extend((src, dst, str(why)))
204    if errors:
205        raise Error, errors
206
207def rmtree(path, ignore_errors=False, onerror=None):
208    """Recursively delete a directory tree.
209
210    If ignore_errors is set, errors are ignored; otherwise, if onerror
211    is set, it is called to handle the error with arguments (func,
212    path, exc_info) where func is os.listdir, os.remove, or os.rmdir;
213    path is the argument to that function that caused it to fail; and
214    exc_info is a tuple returned by sys.exc_info().  If ignore_errors
215    is false and onerror is None, an exception is raised.
216
217    """
218    if ignore_errors:
219        def onerror(*args):
220            pass
221    elif onerror is None:
222        def onerror(*args):
223            raise
224    try:
225        if os.path.islink(path):
226            # symlinks to directories are forbidden, see bug #1669
227            raise OSError("Cannot call rmtree on a symbolic link")
228    except OSError:
229        onerror(os.path.islink, path, sys.exc_info())
230        # can't continue even if onerror hook returns
231        return
232    names = []
233    try:
234        names = os.listdir(path)
235    except os.error, err:
236        onerror(os.listdir, path, sys.exc_info())
237    for name in names:
238        fullname = os.path.join(path, name)
239        try:
240            mode = os.lstat(fullname).st_mode
241        except os.error:
242            mode = 0
243        if stat.S_ISDIR(mode):
244            rmtree(fullname, ignore_errors, onerror)
245        else:
246            try:
247                os.remove(fullname)
248            except os.error, err:
249                onerror(os.remove, fullname, sys.exc_info())
250    try:
251        os.rmdir(path)
252    except os.error:
253        onerror(os.rmdir, path, sys.exc_info())
254
255
256def _basename(path):
257    # A basename() variant which first strips the trailing slash, if present.
258    # Thus we always get the last component of the path, even for directories.
259    return os.path.basename(path.rstrip(os.path.sep))
260
261def move(src, dst):
262    """Recursively move a file or directory to another location. This is
263    similar to the Unix "mv" command.
264
265    If the destination is a directory or a symlink to a directory, the source
266    is moved inside the directory. The destination path must not already
267    exist.
268
269    If the destination already exists but is not a directory, it may be
270    overwritten depending on os.rename() semantics.
271
272    If the destination is on our current filesystem, then rename() is used.
273    Otherwise, src is copied to the destination and then removed.
274    A lot more could be done here...  A look at a mv.c shows a lot of
275    the issues this implementation glosses over.
276
277    """
278    real_dst = dst
279    if os.path.isdir(dst):
280        if _samefile(src, dst):
281            # We might be on a case insensitive filesystem,
282            # perform the rename anyway.
283            os.rename(src, dst)
284            return
285
286        real_dst = os.path.join(dst, _basename(src))
287        if os.path.exists(real_dst):
288            raise Error, "Destination path '%s' already exists" % real_dst
289    try:
290        os.rename(src, real_dst)
291    except OSError:
292        if os.path.isdir(src):
293            if _destinsrc(src, dst):
294                raise Error, "Cannot move a directory '%s' into itself '%s'." % (src, dst)
295            copytree(src, real_dst, symlinks=True)
296            rmtree(src)
297        else:
298            copy2(src, real_dst)
299            os.unlink(src)
300
301def _destinsrc(src, dst):
302    src = abspath(src)
303    dst = abspath(dst)
304    if not src.endswith(os.path.sep):
305        src += os.path.sep
306    if not dst.endswith(os.path.sep):
307        dst += os.path.sep
308    return dst.startswith(src)
309
310def _get_gid(name):
311    """Returns a gid, given a group name."""
312    if getgrnam is None or name is None:
313        return None
314    try:
315        result = getgrnam(name)
316    except KeyError:
317        result = None
318    if result is not None:
319        return result[2]
320    return None
321
322def _get_uid(name):
323    """Returns an uid, given a user name."""
324    if getpwnam is None or name is None:
325        return None
326    try:
327        result = getpwnam(name)
328    except KeyError:
329        result = None
330    if result is not None:
331        return result[2]
332    return None
333
334def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
335                  owner=None, group=None, logger=None):
336    """Create a (possibly compressed) tar file from all the files under
337    'base_dir'.
338
339    'compress' must be "gzip" (the default), "bzip2", or None.
340
341    'owner' and 'group' can be used to define an owner and a group for the
342    archive that is being built. If not provided, the current owner and group
343    will be used.
344
345    The output tar file will be named 'base_name' +  ".tar", possibly plus
346    the appropriate compression extension (".gz", or ".bz2").
347
348    Returns the output filename.
349    """
350    tar_compression = {'gzip': 'gz', 'bzip2': 'bz2', None: ''}
351    compress_ext = {'gzip': '.gz', 'bzip2': '.bz2'}
352
353    # flags for compression program, each element of list will be an argument
354    if compress is not None and compress not in compress_ext.keys():
355        raise ValueError, \
356              ("bad value for 'compress': must be None, 'gzip' or 'bzip2'")
357
358    archive_name = base_name + '.tar' + compress_ext.get(compress, '')
359    archive_dir = os.path.dirname(archive_name)
360
361    if not os.path.exists(archive_dir):
362        logger.info("creating %s" % archive_dir)
363        if not dry_run:
364            os.makedirs(archive_dir)
365
366
367    # creating the tarball
368    import tarfile  # late import so Python build itself doesn't break
369
370    if logger is not None:
371        logger.info('Creating tar archive')
372
373    uid = _get_uid(owner)
374    gid = _get_gid(group)
375
376    def _set_uid_gid(tarinfo):
377        if gid is not None:
378            tarinfo.gid = gid
379            tarinfo.gname = group
380        if uid is not None:
381            tarinfo.uid = uid
382            tarinfo.uname = owner
383        return tarinfo
384
385    if not dry_run:
386        tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress])
387        try:
388            tar.add(base_dir, filter=_set_uid_gid)
389        finally:
390            tar.close()
391
392    return archive_name
393
394def _call_external_zip(base_dir, zip_filename, verbose=False, dry_run=False):
395    # XXX see if we want to keep an external call here
396    if verbose:
397        zipoptions = "-r"
398    else:
399        zipoptions = "-rq"
400    from distutils.errors import DistutilsExecError
401    from distutils.spawn import spawn
402    try:
403        spawn(["zip", zipoptions, zip_filename, base_dir], dry_run=dry_run)
404    except DistutilsExecError:
405        # XXX really should distinguish between "couldn't find
406        # external 'zip' command" and "zip failed".
407        raise ExecError, \
408            ("unable to create zip file '%s': "
409            "could neither import the 'zipfile' module nor "
410            "find a standalone zip utility") % zip_filename
411
412def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
413    """Create a zip file from all the files under 'base_dir'.
414
415    The output zip file will be named 'base_name' + ".zip".  Uses either the
416    "zipfile" Python module (if available) or the InfoZIP "zip" utility
417    (if installed and found on the default search path).  If neither tool is
418    available, raises ExecError.  Returns the name of the output zip
419    file.
420    """
421    zip_filename = base_name + ".zip"
422    archive_dir = os.path.dirname(base_name)
423
424    if not os.path.exists(archive_dir):
425        if logger is not None:
426            logger.info("creating %s", archive_dir)
427        if not dry_run:
428            os.makedirs(archive_dir)
429
430    # If zipfile module is not available, try spawning an external 'zip'
431    # command.
432    try:
433        import zipfile
434    except ImportError:
435        zipfile = None
436
437    if zipfile is None:
438        _call_external_zip(base_dir, zip_filename, verbose, dry_run)
439    else:
440        if logger is not None:
441            logger.info("creating '%s' and adding '%s' to it",
442                        zip_filename, base_dir)
443
444        if not dry_run:
445            zip = zipfile.ZipFile(zip_filename, "w",
446                                  compression=zipfile.ZIP_DEFLATED)
447
448            for dirpath, dirnames, filenames in os.walk(base_dir):
449                for name in filenames:
450                    path = os.path.normpath(os.path.join(dirpath, name))
451                    if os.path.isfile(path):
452                        zip.write(path, path)
453                        if logger is not None:
454                            logger.info("adding '%s'", path)
455            zip.close()
456
457    return zip_filename
458
459_ARCHIVE_FORMATS = {
460    'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"),
461    'bztar': (_make_tarball, [('compress', 'bzip2')], "bzip2'ed tar-file"),
462    'tar':   (_make_tarball, [('compress', None)], "uncompressed tar file"),
463    'zip':   (_make_zipfile, [],"ZIP file")
464    }
465
466def get_archive_formats():
467    """Returns a list of supported formats for archiving and unarchiving.
468
469    Each element of the returned sequence is a tuple (name, description)
470    """
471    formats = [(name, registry[2]) for name, registry in
472               _ARCHIVE_FORMATS.items()]
473    formats.sort()
474    return formats
475
476def register_archive_format(name, function, extra_args=None, description=''):
477    """Registers an archive format.
478
479    name is the name of the format. function is the callable that will be
480    used to create archives. If provided, extra_args is a sequence of
481    (name, value) tuples that will be passed as arguments to the callable.
482    description can be provided to describe the format, and will be returned
483    by the get_archive_formats() function.
484    """
485    if extra_args is None:
486        extra_args = []
487    if not isinstance(function, collections.Callable):
488        raise TypeError('The %s object is not callable' % function)
489    if not isinstance(extra_args, (tuple, list)):
490        raise TypeError('extra_args needs to be a sequence')
491    for element in extra_args:
492        if not isinstance(element, (tuple, list)) or len(element) !=2 :
493            raise TypeError('extra_args elements are : (arg_name, value)')
494
495    _ARCHIVE_FORMATS[name] = (function, extra_args, description)
496
497def unregister_archive_format(name):
498    del _ARCHIVE_FORMATS[name]
499
500def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
501                 dry_run=0, owner=None, group=None, logger=None):
502    """Create an archive file (eg. zip or tar).
503
504    'base_name' is the name of the file to create, minus any format-specific
505    extension; 'format' is the archive format: one of "zip", "tar", "bztar"
506    or "gztar".
507
508    'root_dir' is a directory that will be the root directory of the
509    archive; ie. we typically chdir into 'root_dir' before creating the
510    archive.  'base_dir' is the directory where we start archiving from;
511    ie. 'base_dir' will be the common prefix of all files and
512    directories in the archive.  'root_dir' and 'base_dir' both default
513    to the current directory.  Returns the name of the archive file.
514
515    'owner' and 'group' are used when creating a tar archive. By default,
516    uses the current owner and group.
517    """
518    save_cwd = os.getcwd()
519    if root_dir is not None:
520        if logger is not None:
521            logger.debug("changing into '%s'", root_dir)
522        base_name = os.path.abspath(base_name)
523        if not dry_run:
524            os.chdir(root_dir)
525
526    if base_dir is None:
527        base_dir = os.curdir
528
529    kwargs = {'dry_run': dry_run, 'logger': logger}
530
531    try:
532        format_info = _ARCHIVE_FORMATS[format]
533    except KeyError:
534        raise ValueError, "unknown archive format '%s'" % format
535
536    func = format_info[0]
537    for arg, val in format_info[1]:
538        kwargs[arg] = val
539
540    if format != 'zip':
541        kwargs['owner'] = owner
542        kwargs['group'] = group
543
544    try:
545        filename = func(base_name, base_dir, **kwargs)
546    finally:
547        if root_dir is not None:
548            if logger is not None:
549                logger.debug("changing back to '%s'", save_cwd)
550            os.chdir(save_cwd)
551
552    return filename
553