1''' 2Make strings safe for use as ASCII filenames, while trying to preserve as much 3meaning as possible. 4''' 5 6import errno 7import os 8import shutil 9import time 10from math import ceil 11from contextlib import suppress, closing 12 13from calibre import force_unicode, isbytestring, prints, sanitize_file_name 14from calibre.constants import ( 15 filesystem_encoding, iswindows, preferred_encoding, ismacos 16) 17from calibre.utils.localization import get_udc 18from polyglot.builtins import iteritems, itervalues 19 20 21def ascii_text(orig): 22 udc = get_udc() 23 try: 24 ascii = udc.decode(orig) 25 except Exception: 26 if isinstance(orig, str): 27 orig = orig.encode('ascii', 'replace') 28 ascii = orig.decode(preferred_encoding, 'replace') 29 if isinstance(ascii, bytes): 30 ascii = ascii.decode('ascii', 'replace') 31 return ascii 32 33 34def ascii_filename(orig, substitute='_'): 35 if isinstance(substitute, bytes): 36 substitute = substitute.decode(filesystem_encoding) 37 orig = ascii_text(orig).replace('?', '_') 38 ans = ''.join(x if ord(x) >= 32 else substitute for x in orig) 39 return sanitize_file_name(ans, substitute=substitute) 40 41 42def shorten_component(s, by_what): 43 l = len(s) 44 if l < by_what: 45 return s 46 l = (l - by_what)//2 47 if l <= 0: 48 return s 49 return s[:l] + s[-l:] 50 51 52def limit_component(x, limit=254): 53 # windows and macs use ytf-16 codepoints for length, linux uses arbitrary 54 # binary data, but we will assume utf-8 55 filename_encoding_for_length = 'utf-16' if iswindows or ismacos else 'utf-8' 56 57 def encoded_length(): 58 q = x if isinstance(x, bytes) else x.encode(filename_encoding_for_length) 59 return len(q) 60 61 while encoded_length() > limit: 62 delta = encoded_length() - limit 63 x = shorten_component(x, max(2, delta // 2)) 64 65 return x 66 67 68def shorten_components_to(length, components, more_to_take=0, last_has_extension=True): 69 components = [limit_component(cx) for cx in components] 70 filepath = os.sep.join(components) 71 extra = len(filepath) - (length - more_to_take) 72 if extra < 1: 73 return components 74 deltas = [] 75 for x in components: 76 pct = len(x)/float(len(filepath)) 77 deltas.append(int(ceil(pct*extra))) 78 ans = [] 79 80 for i, x in enumerate(components): 81 delta = deltas[i] 82 if delta > len(x): 83 r = x[0] if x is components[-1] else '' 84 else: 85 if last_has_extension and x is components[-1]: 86 b, e = os.path.splitext(x) 87 if e == '.': 88 e = '' 89 r = shorten_component(b, delta)+e 90 if r.startswith('.'): 91 r = x[0]+r 92 else: 93 r = shorten_component(x, delta) 94 r = r.strip() 95 if not r: 96 r = x.strip()[0] if x.strip() else 'x' 97 ans.append(r) 98 if len(os.sep.join(ans)) > length: 99 return shorten_components_to(length, components, more_to_take+2) 100 return ans 101 102 103def find_executable_in_path(name, path=None): 104 if path is None: 105 path = os.environ.get('PATH', '') 106 exts = '.exe .cmd .bat'.split() if iswindows and not name.endswith('.exe') else ('',) 107 path = path.split(os.pathsep) 108 for x in path: 109 for ext in exts: 110 q = os.path.abspath(os.path.join(x, name)) + ext 111 if os.access(q, os.X_OK): 112 return q 113 114 115def is_case_sensitive(path): 116 ''' 117 Return True if the filesystem is case sensitive. 118 119 path must be the path to an existing directory. You must have permission 120 to create and delete files in this directory. The results of this test 121 apply to the filesystem containing the directory in path. 122 ''' 123 is_case_sensitive = False 124 if not iswindows: 125 name1, name2 = ('calibre_test_case_sensitivity.txt', 126 'calibre_TesT_CaSe_sensitiVitY.Txt') 127 f1, f2 = os.path.join(path, name1), os.path.join(path, name2) 128 with suppress(OSError): 129 os.remove(f1) 130 open(f1, 'w').close() 131 is_case_sensitive = not os.path.exists(f2) 132 os.remove(f1) 133 return is_case_sensitive 134 135 136def case_preserving_open_file(path, mode='wb', mkdir_mode=0o777): 137 ''' 138 Open the file pointed to by path with the specified mode. If any 139 directories in path do not exist, they are created. Returns the 140 opened file object and the path to the opened file object. This path is 141 guaranteed to have the same case as the on disk path. For case insensitive 142 filesystems, the returned path may be different from the passed in path. 143 The returned path is always unicode and always an absolute path. 144 145 If mode is None, then this function assumes that path points to a directory 146 and return the path to the directory as the file object. 147 148 mkdir_mode specifies the mode with which any missing directories in path 149 are created. 150 ''' 151 if isbytestring(path): 152 path = path.decode(filesystem_encoding) 153 154 path = os.path.abspath(path) 155 156 sep = force_unicode(os.sep, 'ascii') 157 158 if path.endswith(sep): 159 path = path[:-1] 160 if not path: 161 raise ValueError('Path must not point to root') 162 163 components = path.split(sep) 164 if not components: 165 raise ValueError('Invalid path: %r'%path) 166 167 cpath = sep 168 if iswindows: 169 # Always upper case the drive letter and add a trailing slash so that 170 # the first os.listdir works correctly 171 cpath = components[0].upper() + sep 172 173 bdir = path if mode is None else os.path.dirname(path) 174 if not os.path.exists(bdir): 175 os.makedirs(bdir, mkdir_mode) 176 177 # Walk all the directories in path, putting the on disk case version of 178 # the directory into cpath 179 dirs = components[1:] if mode is None else components[1:-1] 180 for comp in dirs: 181 cdir = os.path.join(cpath, comp) 182 cl = comp.lower() 183 try: 184 candidates = [c for c in os.listdir(cpath) if c.lower() == cl] 185 except: 186 # Dont have permission to do the listdir, assume the case is 187 # correct as we have no way to check it. 188 pass 189 else: 190 if len(candidates) == 1: 191 cdir = os.path.join(cpath, candidates[0]) 192 # else: We are on a case sensitive file system so cdir must already 193 # be correct 194 cpath = cdir 195 196 if mode is None: 197 ans = fpath = cpath 198 else: 199 fname = components[-1] 200 ans = lopen(os.path.join(cpath, fname), mode) 201 # Ensure file and all its metadata is written to disk so that subsequent 202 # listdir() has file name in it. I don't know if this is actually 203 # necessary, but given the diversity of platforms, best to be safe. 204 ans.flush() 205 os.fsync(ans.fileno()) 206 207 cl = fname.lower() 208 try: 209 candidates = [c for c in os.listdir(cpath) if c.lower() == cl] 210 except OSError: 211 # The containing directory, somehow disappeared? 212 candidates = [] 213 if len(candidates) == 1: 214 fpath = os.path.join(cpath, candidates[0]) 215 else: 216 # We are on a case sensitive filesystem 217 fpath = os.path.join(cpath, fname) 218 return ans, fpath 219 220 221def windows_get_fileid(path): 222 ''' The fileid uniquely identifies actual file contents (it is the same for 223 all hardlinks to a file). Similar to inode number on linux. ''' 224 from calibre_extensions.winutil import get_file_id 225 if isbytestring(path): 226 path = path.decode(filesystem_encoding) 227 with suppress(OSError): 228 return get_file_id(path) 229 230 231def samefile_windows(src, dst): 232 samestring = (os.path.normcase(os.path.abspath(src)) == 233 os.path.normcase(os.path.abspath(dst))) 234 if samestring: 235 return True 236 237 a, b = windows_get_fileid(src), windows_get_fileid(dst) 238 if a is None and b is None: 239 return False 240 return a == b 241 242 243def samefile(src, dst): 244 ''' 245 Check if two paths point to the same actual file on the filesystem. Handles 246 symlinks, case insensitivity, mapped drives, etc. 247 248 Returns True iff both paths exist and point to the same file on disk. 249 250 Note: On windows will return True if the two string are identical (up to 251 case) even if the file does not exist. This is because I have no way of 252 knowing how reliable the GetFileInformationByHandle method is. 253 ''' 254 if iswindows: 255 return samefile_windows(src, dst) 256 257 if hasattr(os.path, 'samefile'): 258 # Unix 259 try: 260 return os.path.samefile(src, dst) 261 except OSError: 262 return False 263 264 # All other platforms: check for same pathname. 265 samestring = (os.path.normcase(os.path.abspath(src)) == 266 os.path.normcase(os.path.abspath(dst))) 267 return samestring 268 269 270def windows_get_size(path): 271 ''' On windows file sizes are only accurately stored in the actual file, 272 not in the directory entry (which could be out of date). So we open the 273 file, and get the actual size. ''' 274 from calibre_extensions import winutil 275 if isbytestring(path): 276 path = path.decode(filesystem_encoding) 277 with closing(winutil.create_file( 278 path, 0, winutil.FILE_SHARE_READ | winutil.FILE_SHARE_WRITE | winutil.FILE_SHARE_DELETE, 279 winutil.OPEN_EXISTING, 0) 280 ) as h: 281 return winutil.get_file_size(h) 282 283 284def windows_hardlink(src, dest): 285 from calibre_extensions import winutil 286 winutil.create_hard_link(dest, src) 287 src_size = os.path.getsize(src) 288 # We open and close dest, to ensure its directory entry is updated 289 # see http://blogs.msdn.com/b/oldnewthing/archive/2011/12/26/10251026.aspx 290 for i in range(10): 291 # If we are on a network filesystem, we have to wait for some indeterminate time, since 292 # network file systems are the best thing since sliced bread 293 try: 294 if windows_get_size(dest) == src_size: 295 return 296 except OSError: 297 pass 298 time.sleep(0.3) 299 300 sz = windows_get_size(dest) 301 if sz != src_size: 302 msg = 'Creating hardlink from %s to %s failed: %%s' % (src, dest) 303 raise OSError(msg % ('hardlink size: %d not the same as source size' % sz)) 304 305 306def windows_fast_hardlink(src, dest): 307 from calibre_extensions import winutil 308 winutil.create_hard_link(dest, src) 309 ssz, dsz = windows_get_size(src), windows_get_size(dest) 310 if ssz != dsz: 311 msg = 'Creating hardlink from %s to %s failed: %%s' % (src, dest) 312 raise OSError(msg % ('hardlink size: %d not the same as source size: %s' % (dsz, ssz))) 313 314 315def windows_nlinks(path): 316 from calibre_extensions import winutil 317 if isbytestring(path): 318 path = path.decode(filesystem_encoding) 319 return winutil.nlinks(path) 320 321 322class WindowsAtomicFolderMove: 323 324 ''' 325 Move all the files inside a specified folder in an atomic fashion, 326 preventing any other process from locking a file while the operation is 327 incomplete. Raises an IOError if another process has locked a file before 328 the operation starts. Note that this only operates on the files in the 329 folder, not any sub-folders. 330 ''' 331 332 def __init__(self, path): 333 from collections import defaultdict 334 from calibre_extensions import winutil 335 self.handle_map = {} 336 337 if isbytestring(path): 338 path = path.decode(filesystem_encoding) 339 340 if not os.path.exists(path): 341 return 342 343 names = os.listdir(path) 344 name_to_fileid = {x:windows_get_fileid(os.path.join(path, x)) for x in names} 345 fileid_to_names = defaultdict(set) 346 for name, fileid in iteritems(name_to_fileid): 347 fileid_to_names[fileid].add(name) 348 349 for x in names: 350 f = os.path.normcase(os.path.abspath(os.path.join(path, x))) 351 if not os.path.isfile(f): 352 continue 353 with suppress(OSError): 354 # Ensure the file is not read-only 355 winutil.set_file_attributes(f, winutil.FILE_ATTRIBUTE_NORMAL) 356 357 try: 358 h = winutil.create_file(f, winutil.GENERIC_READ, 359 winutil.FILE_SHARE_DELETE, 360 winutil.OPEN_EXISTING, winutil.FILE_FLAG_SEQUENTIAL_SCAN) 361 except OSError as e: 362 if e.winerror == winutil.ERROR_SHARING_VIOLATION: 363 # The file could be a hardlink to an already opened file, 364 # in which case we use the same handle for both files 365 fileid = name_to_fileid[x] 366 found = False 367 if fileid is not None: 368 for other in fileid_to_names[fileid]: 369 other = os.path.normcase(os.path.abspath(os.path.join(path, other))) 370 if other in self.handle_map: 371 self.handle_map[f] = self.handle_map[other] 372 found = True 373 break 374 if found: 375 continue 376 377 self.close_handles() 378 if e.winerror == winutil.ERROR_SHARING_VIOLATION: 379 err = IOError(errno.EACCES, 380 _('File is open in another process')) 381 err.filename = f 382 raise err 383 prints('CreateFile failed for: %r' % f) 384 raise 385 except: 386 self.close_handles() 387 prints('CreateFile failed for: %r' % f) 388 raise 389 self.handle_map[f] = h 390 391 def copy_path_to(self, path, dest): 392 from calibre_extensions import winutil 393 handle = None 394 for p, h in self.handle_map.items(): 395 if samefile_windows(path, p): 396 handle = h 397 break 398 if handle is None: 399 if os.path.exists(path): 400 raise ValueError('The file %r did not exist when this move' 401 ' operation was started'%path) 402 else: 403 raise ValueError('The file %r does not exist'%path) 404 405 with suppress(OSError): 406 windows_hardlink(path, dest) 407 return 408 409 winutil.set_file_pointer(handle, 0, winutil.FILE_BEGIN) 410 with lopen(dest, 'wb') as f: 411 sz = 1024 * 1024 412 while True: 413 raw = winutil.read_file(handle, sz) 414 if not raw: 415 break 416 f.write(raw) 417 418 def release_file(self, path): 419 ' Release the lock on the file pointed to by path. Will also release the lock on any hardlinks to path ' 420 key = None 421 for p, h in iteritems(self.handle_map): 422 if samefile_windows(path, p): 423 key = (p, h) 424 break 425 if key is not None: 426 key[1].close() 427 remove = [f for f, h in iteritems(self.handle_map) if h is key[1]] 428 for x in remove: 429 self.handle_map.pop(x) 430 431 def close_handles(self): 432 for h in itervalues(self.handle_map): 433 h.close() 434 self.handle_map = {} 435 436 def delete_originals(self): 437 from calibre_extensions import winutil 438 for path in self.handle_map: 439 winutil.delete_file(path) 440 self.close_handles() 441 442 443def hardlink_file(src, dest): 444 if iswindows: 445 windows_hardlink(src, dest) 446 return 447 os.link(src, dest) 448 449 450def nlinks_file(path): 451 ' Return number of hardlinks to the file ' 452 if iswindows: 453 return windows_nlinks(path) 454 return os.stat(path).st_nlink 455 456 457if iswindows: 458 from calibre_extensions.winutil import move_file 459 460 def rename_file(a, b): 461 if isinstance(a, bytes): 462 a = os.fsdecode(a) 463 if isinstance(b, bytes): 464 b = os.fsdecode(b) 465 move_file(a, b) 466 467 468def retry_on_fail(func, *args, count=10, sleep_time=0.2): 469 for i in range(count): 470 try: 471 func(*args) 472 break 473 except OSError: 474 if i > count - 2: 475 raise 476 # Try the operation repeatedly in case something like a virus 477 # scanner has opened one of the files (I love windows) 478 time.sleep(sleep_time) 479 480 481def atomic_rename(oldpath, newpath): 482 '''Replace the file newpath with the file oldpath. Can fail if the files 483 are on different volumes. If succeeds, guaranteed to be atomic. newpath may 484 or may not exist. If it exists, it is replaced. ''' 485 if iswindows: 486 retry_on_fail(rename_file, oldpath, newpath) 487 else: 488 os.rename(oldpath, newpath) 489 490 491def remove_dir_if_empty(path, ignore_metadata_caches=False): 492 ''' Remove a directory if it is empty or contains only the folder metadata 493 caches from different OSes. To delete the folder if it contains only 494 metadata caches, set ignore_metadata_caches to True.''' 495 try: 496 os.rmdir(path) 497 except OSError as e: 498 if e.errno == errno.ENOTEMPTY or len(os.listdir(path)) > 0: 499 # Some linux systems appear to raise an EPERM instead of an 500 # ENOTEMPTY, see https://bugs.launchpad.net/bugs/1240797 501 if ignore_metadata_caches: 502 try: 503 found = False 504 for x in os.listdir(path): 505 if x.lower() in {'.ds_store', 'thumbs.db'}: 506 found = True 507 x = os.path.join(path, x) 508 if os.path.isdir(x): 509 import shutil 510 shutil.rmtree(x) 511 else: 512 os.remove(x) 513 except Exception: # We could get an error, if, for example, windows has locked Thumbs.db 514 found = False 515 if found: 516 remove_dir_if_empty(path) 517 return 518 raise 519 520 521expanduser = os.path.expanduser 522 523 524def format_permissions(st_mode): 525 import stat 526 for func, letter in (x.split(':') for x in 'REG:- DIR:d BLK:b CHR:c FIFO:p LNK:l SOCK:s'.split()): 527 if getattr(stat, 'S_IS' + func)(st_mode): 528 break 529 else: 530 letter = '?' 531 rwx = ('---', '--x', '-w-', '-wx', 'r--', 'r-x', 'rw-', 'rwx') 532 ans = [letter] + list(rwx[(st_mode >> 6) & 7]) + list(rwx[(st_mode >> 3) & 7]) + list(rwx[(st_mode & 7)]) 533 if st_mode & stat.S_ISUID: 534 ans[3] = 's' if (st_mode & stat.S_IXUSR) else 'S' 535 if st_mode & stat.S_ISGID: 536 ans[6] = 's' if (st_mode & stat.S_IXGRP) else 'l' 537 if st_mode & stat.S_ISVTX: 538 ans[9] = 't' if (st_mode & stat.S_IXUSR) else 'T' 539 return ''.join(ans) 540 541 542def copyfile(src, dest): 543 shutil.copyfile(src, dest) 544 try: 545 shutil.copystat(src, dest) 546 except Exception: 547 pass 548 549 550def get_hardlink_function(src, dest): 551 if not iswindows: 552 return os.link 553 from calibre_extensions import winutil 554 root = dest[0] + ':\\' 555 if src[0].lower() == dest[0].lower() and hasattr(winutil, 'supports_hardlinks') and winutil.supports_hardlinks(root): 556 return windows_fast_hardlink 557 558 559def copyfile_using_links(path, dest, dest_is_dir=True, filecopyfunc=copyfile): 560 path, dest = os.path.abspath(path), os.path.abspath(dest) 561 if dest_is_dir: 562 dest = os.path.join(dest, os.path.basename(path)) 563 hardlink = get_hardlink_function(path, dest) 564 try: 565 hardlink(path, dest) 566 except Exception: 567 filecopyfunc(path, dest) 568 569 570def copytree_using_links(path, dest, dest_is_parent=True, filecopyfunc=copyfile): 571 path, dest = os.path.abspath(path), os.path.abspath(dest) 572 if dest_is_parent: 573 dest = os.path.join(dest, os.path.basename(path)) 574 hardlink = get_hardlink_function(path, dest) 575 try: 576 os.makedirs(dest) 577 except OSError as e: 578 if e.errno != errno.EEXIST: 579 raise 580 for dirpath, dirnames, filenames in os.walk(path): 581 base = os.path.relpath(dirpath, path) 582 dest_base = os.path.join(dest, base) 583 for dname in dirnames: 584 try: 585 os.mkdir(os.path.join(dest_base, dname)) 586 except OSError as e: 587 if e.errno != errno.EEXIST: 588 raise 589 for fname in filenames: 590 src, df = os.path.join(dirpath, fname), os.path.join(dest_base, fname) 591 try: 592 hardlink(src, df) 593 except Exception: 594 filecopyfunc(src, df) 595 596 597rmtree = shutil.rmtree 598 599 600if iswindows: 601 long_path_prefix = '\\\\?\\' 602 603 def make_long_path_useable(path): 604 if len(path) > 200 and os.path.isabs(path) and not path.startswith(long_path_prefix): 605 path = long_path_prefix + os.path.normpath(path) 606 return path 607else: 608 def make_long_path_useable(path): 609 return path 610