1"""Utility functions for copying and archiving files and directory trees. 2 3XXX The functions here don't copy the resource fork or other metadata on Mac. 4 5""" 6 7import os 8import sys 9import stat 10from os.path import abspath 11import fnmatch 12import collections 13import errno 14 15try: 16 from pwd import getpwnam 17except ImportError: 18 getpwnam = None 19 20try: 21 from grp import getgrnam 22except ImportError: 23 getgrnam = None 24 25__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2", 26 "copytree", "move", "rmtree", "Error", "SpecialFileError", 27 "ExecError", "make_archive", "get_archive_formats", 28 "register_archive_format", "unregister_archive_format"] 29 30class Error(EnvironmentError): 31 pass 32 33class SpecialFileError(EnvironmentError): 34 """Raised when trying to do a kind of operation (e.g. copying) which is 35 not supported on a special file (e.g. a named pipe)""" 36 37class ExecError(EnvironmentError): 38 """Raised when a command could not be executed""" 39 40try: 41 WindowsError 42except NameError: 43 WindowsError = None 44 45def copyfileobj(fsrc, fdst, length=16*1024): 46 """copy data from file-like object fsrc to file-like object fdst""" 47 while 1: 48 buf = fsrc.read(length) 49 if not buf: 50 break 51 fdst.write(buf) 52 53def _samefile(src, dst): 54 # Macintosh, Unix. 55 if hasattr(os.path, 'samefile'): 56 try: 57 return os.path.samefile(src, dst) 58 except OSError: 59 return False 60 61 # All other platforms: check for same pathname. 62 return (os.path.normcase(os.path.abspath(src)) == 63 os.path.normcase(os.path.abspath(dst))) 64 65def copyfile(src, dst): 66 """Copy data from src to dst""" 67 if _samefile(src, dst): 68 raise Error("`%s` and `%s` are the same file" % (src, dst)) 69 70 for fn in [src, dst]: 71 try: 72 st = os.stat(fn) 73 except OSError: 74 # File most likely does not exist 75 pass 76 else: 77 # XXX What about other special files? (sockets, devices...) 78 if stat.S_ISFIFO(st.st_mode): 79 raise SpecialFileError("`%s` is a named pipe" % fn) 80 81 with open(src, 'rb') as fsrc: 82 with open(dst, 'wb') as fdst: 83 copyfileobj(fsrc, fdst) 84 85def copymode(src, dst): 86 """Copy mode bits from src to dst""" 87 if hasattr(os, 'chmod'): 88 st = os.stat(src) 89 mode = stat.S_IMODE(st.st_mode) 90 os.chmod(dst, mode) 91 92def copystat(src, dst): 93 """Copy all stat info (mode bits, atime, mtime, flags) from src to dst""" 94 st = os.stat(src) 95 mode = stat.S_IMODE(st.st_mode) 96 if hasattr(os, 'utime'): 97 os.utime(dst, (st.st_atime, st.st_mtime)) 98 if hasattr(os, 'chmod'): 99 os.chmod(dst, mode) 100 if hasattr(os, 'chflags') and hasattr(st, 'st_flags'): 101 try: 102 os.chflags(dst, st.st_flags) 103 except OSError, why: 104 if (not hasattr(errno, 'EOPNOTSUPP') or 105 why.errno != errno.EOPNOTSUPP): 106 raise 107 108def copy(src, dst): 109 """Copy data and mode bits ("cp src dst"). 110 111 The destination may be a directory. 112 113 """ 114 if os.path.isdir(dst): 115 dst = os.path.join(dst, os.path.basename(src)) 116 copyfile(src, dst) 117 copymode(src, dst) 118 119def copy2(src, dst): 120 """Copy data and all stat info ("cp -p src dst"). 121 122 The destination may be a directory. 123 124 """ 125 if os.path.isdir(dst): 126 dst = os.path.join(dst, os.path.basename(src)) 127 copyfile(src, dst) 128 copystat(src, dst) 129 130def ignore_patterns(*patterns): 131 """Function that can be used as copytree() ignore parameter. 132 133 Patterns is a sequence of glob-style patterns 134 that are used to exclude files""" 135 def _ignore_patterns(path, names): 136 ignored_names = [] 137 for pattern in patterns: 138 ignored_names.extend(fnmatch.filter(names, pattern)) 139 return set(ignored_names) 140 return _ignore_patterns 141 142def copytree(src, dst, symlinks=False, ignore=None): 143 """Recursively copy a directory tree using copy2(). 144 145 The destination directory must not already exist. 146 If exception(s) occur, an Error is raised with a list of reasons. 147 148 If the optional symlinks flag is true, symbolic links in the 149 source tree result in symbolic links in the destination tree; if 150 it is false, the contents of the files pointed to by symbolic 151 links are copied. 152 153 The optional ignore argument is a callable. If given, it 154 is called with the `src` parameter, which is the directory 155 being visited by copytree(), and `names` which is the list of 156 `src` contents, as returned by os.listdir(): 157 158 callable(src, names) -> ignored_names 159 160 Since copytree() is called recursively, the callable will be 161 called once for each directory that is copied. It returns a 162 list of names relative to the `src` directory that should 163 not be copied. 164 165 XXX Consider this example code rather than the ultimate tool. 166 167 """ 168 names = os.listdir(src) 169 if ignore is not None: 170 ignored_names = ignore(src, names) 171 else: 172 ignored_names = set() 173 174 os.makedirs(dst) 175 errors = [] 176 for name in names: 177 if name in ignored_names: 178 continue 179 srcname = os.path.join(src, name) 180 dstname = os.path.join(dst, name) 181 try: 182 if symlinks and os.path.islink(srcname): 183 linkto = os.readlink(srcname) 184 os.symlink(linkto, dstname) 185 elif os.path.isdir(srcname): 186 copytree(srcname, dstname, symlinks, ignore) 187 else: 188 # Will raise a SpecialFileError for unsupported file types 189 copy2(srcname, dstname) 190 # catch the Error from the recursive copytree so that we can 191 # continue with other files 192 except Error, err: 193 errors.extend(err.args[0]) 194 except EnvironmentError, why: 195 errors.append((srcname, dstname, str(why))) 196 try: 197 copystat(src, dst) 198 except OSError, why: 199 if WindowsError is not None and isinstance(why, WindowsError): 200 # Copying file access times may fail on Windows 201 pass 202 else: 203 errors.extend((src, dst, str(why))) 204 if errors: 205 raise Error, errors 206 207def rmtree(path, ignore_errors=False, onerror=None): 208 """Recursively delete a directory tree. 209 210 If ignore_errors is set, errors are ignored; otherwise, if onerror 211 is set, it is called to handle the error with arguments (func, 212 path, exc_info) where func is os.listdir, os.remove, or os.rmdir; 213 path is the argument to that function that caused it to fail; and 214 exc_info is a tuple returned by sys.exc_info(). If ignore_errors 215 is false and onerror is None, an exception is raised. 216 217 """ 218 if ignore_errors: 219 def onerror(*args): 220 pass 221 elif onerror is None: 222 def onerror(*args): 223 raise 224 try: 225 if os.path.islink(path): 226 # symlinks to directories are forbidden, see bug #1669 227 raise OSError("Cannot call rmtree on a symbolic link") 228 except OSError: 229 onerror(os.path.islink, path, sys.exc_info()) 230 # can't continue even if onerror hook returns 231 return 232 names = [] 233 try: 234 names = os.listdir(path) 235 except os.error, err: 236 onerror(os.listdir, path, sys.exc_info()) 237 for name in names: 238 fullname = os.path.join(path, name) 239 try: 240 mode = os.lstat(fullname).st_mode 241 except os.error: 242 mode = 0 243 if stat.S_ISDIR(mode): 244 rmtree(fullname, ignore_errors, onerror) 245 else: 246 try: 247 os.remove(fullname) 248 except os.error, err: 249 onerror(os.remove, fullname, sys.exc_info()) 250 try: 251 os.rmdir(path) 252 except os.error: 253 onerror(os.rmdir, path, sys.exc_info()) 254 255 256def _basename(path): 257 # A basename() variant which first strips the trailing slash, if present. 258 # Thus we always get the last component of the path, even for directories. 259 return os.path.basename(path.rstrip(os.path.sep)) 260 261def move(src, dst): 262 """Recursively move a file or directory to another location. This is 263 similar to the Unix "mv" command. 264 265 If the destination is a directory or a symlink to a directory, the source 266 is moved inside the directory. The destination path must not already 267 exist. 268 269 If the destination already exists but is not a directory, it may be 270 overwritten depending on os.rename() semantics. 271 272 If the destination is on our current filesystem, then rename() is used. 273 Otherwise, src is copied to the destination and then removed. 274 A lot more could be done here... A look at a mv.c shows a lot of 275 the issues this implementation glosses over. 276 277 """ 278 real_dst = dst 279 if os.path.isdir(dst): 280 if _samefile(src, dst): 281 # We might be on a case insensitive filesystem, 282 # perform the rename anyway. 283 os.rename(src, dst) 284 return 285 286 real_dst = os.path.join(dst, _basename(src)) 287 if os.path.exists(real_dst): 288 raise Error, "Destination path '%s' already exists" % real_dst 289 try: 290 os.rename(src, real_dst) 291 except OSError: 292 if os.path.isdir(src): 293 if _destinsrc(src, dst): 294 raise Error, "Cannot move a directory '%s' into itself '%s'." % (src, dst) 295 copytree(src, real_dst, symlinks=True) 296 rmtree(src) 297 else: 298 copy2(src, real_dst) 299 os.unlink(src) 300 301def _destinsrc(src, dst): 302 src = abspath(src) 303 dst = abspath(dst) 304 if not src.endswith(os.path.sep): 305 src += os.path.sep 306 if not dst.endswith(os.path.sep): 307 dst += os.path.sep 308 return dst.startswith(src) 309 310def _get_gid(name): 311 """Returns a gid, given a group name.""" 312 if getgrnam is None or name is None: 313 return None 314 try: 315 result = getgrnam(name) 316 except KeyError: 317 result = None 318 if result is not None: 319 return result[2] 320 return None 321 322def _get_uid(name): 323 """Returns an uid, given a user name.""" 324 if getpwnam is None or name is None: 325 return None 326 try: 327 result = getpwnam(name) 328 except KeyError: 329 result = None 330 if result is not None: 331 return result[2] 332 return None 333 334def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0, 335 owner=None, group=None, logger=None): 336 """Create a (possibly compressed) tar file from all the files under 337 'base_dir'. 338 339 'compress' must be "gzip" (the default), "bzip2", or None. 340 341 'owner' and 'group' can be used to define an owner and a group for the 342 archive that is being built. If not provided, the current owner and group 343 will be used. 344 345 The output tar file will be named 'base_name' + ".tar", possibly plus 346 the appropriate compression extension (".gz", or ".bz2"). 347 348 Returns the output filename. 349 """ 350 tar_compression = {'gzip': 'gz', 'bzip2': 'bz2', None: ''} 351 compress_ext = {'gzip': '.gz', 'bzip2': '.bz2'} 352 353 # flags for compression program, each element of list will be an argument 354 if compress is not None and compress not in compress_ext.keys(): 355 raise ValueError, \ 356 ("bad value for 'compress': must be None, 'gzip' or 'bzip2'") 357 358 archive_name = base_name + '.tar' + compress_ext.get(compress, '') 359 archive_dir = os.path.dirname(archive_name) 360 361 if not os.path.exists(archive_dir): 362 logger.info("creating %s" % archive_dir) 363 if not dry_run: 364 os.makedirs(archive_dir) 365 366 367 # creating the tarball 368 import tarfile # late import so Python build itself doesn't break 369 370 if logger is not None: 371 logger.info('Creating tar archive') 372 373 uid = _get_uid(owner) 374 gid = _get_gid(group) 375 376 def _set_uid_gid(tarinfo): 377 if gid is not None: 378 tarinfo.gid = gid 379 tarinfo.gname = group 380 if uid is not None: 381 tarinfo.uid = uid 382 tarinfo.uname = owner 383 return tarinfo 384 385 if not dry_run: 386 tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress]) 387 try: 388 tar.add(base_dir, filter=_set_uid_gid) 389 finally: 390 tar.close() 391 392 return archive_name 393 394def _call_external_zip(base_dir, zip_filename, verbose=False, dry_run=False): 395 # XXX see if we want to keep an external call here 396 if verbose: 397 zipoptions = "-r" 398 else: 399 zipoptions = "-rq" 400 from distutils.errors import DistutilsExecError 401 from distutils.spawn import spawn 402 try: 403 spawn(["zip", zipoptions, zip_filename, base_dir], dry_run=dry_run) 404 except DistutilsExecError: 405 # XXX really should distinguish between "couldn't find 406 # external 'zip' command" and "zip failed". 407 raise ExecError, \ 408 ("unable to create zip file '%s': " 409 "could neither import the 'zipfile' module nor " 410 "find a standalone zip utility") % zip_filename 411 412def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None): 413 """Create a zip file from all the files under 'base_dir'. 414 415 The output zip file will be named 'base_name' + ".zip". Uses either the 416 "zipfile" Python module (if available) or the InfoZIP "zip" utility 417 (if installed and found on the default search path). If neither tool is 418 available, raises ExecError. Returns the name of the output zip 419 file. 420 """ 421 zip_filename = base_name + ".zip" 422 archive_dir = os.path.dirname(base_name) 423 424 if not os.path.exists(archive_dir): 425 if logger is not None: 426 logger.info("creating %s", archive_dir) 427 if not dry_run: 428 os.makedirs(archive_dir) 429 430 # If zipfile module is not available, try spawning an external 'zip' 431 # command. 432 try: 433 import zipfile 434 except ImportError: 435 zipfile = None 436 437 if zipfile is None: 438 _call_external_zip(base_dir, zip_filename, verbose, dry_run) 439 else: 440 if logger is not None: 441 logger.info("creating '%s' and adding '%s' to it", 442 zip_filename, base_dir) 443 444 if not dry_run: 445 zip = zipfile.ZipFile(zip_filename, "w", 446 compression=zipfile.ZIP_DEFLATED) 447 448 for dirpath, dirnames, filenames in os.walk(base_dir): 449 for name in filenames: 450 path = os.path.normpath(os.path.join(dirpath, name)) 451 if os.path.isfile(path): 452 zip.write(path, path) 453 if logger is not None: 454 logger.info("adding '%s'", path) 455 zip.close() 456 457 return zip_filename 458 459_ARCHIVE_FORMATS = { 460 'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"), 461 'bztar': (_make_tarball, [('compress', 'bzip2')], "bzip2'ed tar-file"), 462 'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"), 463 'zip': (_make_zipfile, [],"ZIP file") 464 } 465 466def get_archive_formats(): 467 """Returns a list of supported formats for archiving and unarchiving. 468 469 Each element of the returned sequence is a tuple (name, description) 470 """ 471 formats = [(name, registry[2]) for name, registry in 472 _ARCHIVE_FORMATS.items()] 473 formats.sort() 474 return formats 475 476def register_archive_format(name, function, extra_args=None, description=''): 477 """Registers an archive format. 478 479 name is the name of the format. function is the callable that will be 480 used to create archives. If provided, extra_args is a sequence of 481 (name, value) tuples that will be passed as arguments to the callable. 482 description can be provided to describe the format, and will be returned 483 by the get_archive_formats() function. 484 """ 485 if extra_args is None: 486 extra_args = [] 487 if not isinstance(function, collections.Callable): 488 raise TypeError('The %s object is not callable' % function) 489 if not isinstance(extra_args, (tuple, list)): 490 raise TypeError('extra_args needs to be a sequence') 491 for element in extra_args: 492 if not isinstance(element, (tuple, list)) or len(element) !=2 : 493 raise TypeError('extra_args elements are : (arg_name, value)') 494 495 _ARCHIVE_FORMATS[name] = (function, extra_args, description) 496 497def unregister_archive_format(name): 498 del _ARCHIVE_FORMATS[name] 499 500def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0, 501 dry_run=0, owner=None, group=None, logger=None): 502 """Create an archive file (eg. zip or tar). 503 504 'base_name' is the name of the file to create, minus any format-specific 505 extension; 'format' is the archive format: one of "zip", "tar", "bztar" 506 or "gztar". 507 508 'root_dir' is a directory that will be the root directory of the 509 archive; ie. we typically chdir into 'root_dir' before creating the 510 archive. 'base_dir' is the directory where we start archiving from; 511 ie. 'base_dir' will be the common prefix of all files and 512 directories in the archive. 'root_dir' and 'base_dir' both default 513 to the current directory. Returns the name of the archive file. 514 515 'owner' and 'group' are used when creating a tar archive. By default, 516 uses the current owner and group. 517 """ 518 save_cwd = os.getcwd() 519 if root_dir is not None: 520 if logger is not None: 521 logger.debug("changing into '%s'", root_dir) 522 base_name = os.path.abspath(base_name) 523 if not dry_run: 524 os.chdir(root_dir) 525 526 if base_dir is None: 527 base_dir = os.curdir 528 529 kwargs = {'dry_run': dry_run, 'logger': logger} 530 531 try: 532 format_info = _ARCHIVE_FORMATS[format] 533 except KeyError: 534 raise ValueError, "unknown archive format '%s'" % format 535 536 func = format_info[0] 537 for arg, val in format_info[1]: 538 kwargs[arg] = val 539 540 if format != 'zip': 541 kwargs['owner'] = owner 542 kwargs['group'] = group 543 544 try: 545 filename = func(base_name, base_dir, **kwargs) 546 finally: 547 if root_dir is not None: 548 if logger is not None: 549 logger.debug("changing back to '%s'", save_cwd) 550 os.chdir(save_cwd) 551 552 return filename 553