1'''
2Make strings safe for use as ASCII filenames, while trying to preserve as much
3meaning as possible.
4'''
5
6import errno
7import os
8import shutil
9import time
10from math import ceil
11from contextlib import suppress, closing
12
13from calibre import force_unicode, isbytestring, prints, sanitize_file_name
14from calibre.constants import (
15    filesystem_encoding, iswindows, preferred_encoding, ismacos
16)
17from calibre.utils.localization import get_udc
18from polyglot.builtins import iteritems, itervalues
19
20
21def ascii_text(orig):
22    udc = get_udc()
23    try:
24        ascii = udc.decode(orig)
25    except Exception:
26        if isinstance(orig, str):
27            orig = orig.encode('ascii', 'replace')
28        ascii = orig.decode(preferred_encoding, 'replace')
29    if isinstance(ascii, bytes):
30        ascii = ascii.decode('ascii', 'replace')
31    return ascii
32
33
34def ascii_filename(orig, substitute='_'):
35    if isinstance(substitute, bytes):
36        substitute = substitute.decode(filesystem_encoding)
37    orig = ascii_text(orig).replace('?', '_')
38    ans = ''.join(x if ord(x) >= 32 else substitute for x in orig)
39    return sanitize_file_name(ans, substitute=substitute)
40
41
42def shorten_component(s, by_what):
43    l = len(s)
44    if l < by_what:
45        return s
46    l = (l - by_what)//2
47    if l <= 0:
48        return s
49    return s[:l] + s[-l:]
50
51
52def limit_component(x, limit=254):
53    # windows and macs use ytf-16 codepoints for length, linux uses arbitrary
54    # binary data, but we will assume utf-8
55    filename_encoding_for_length = 'utf-16' if iswindows or ismacos else 'utf-8'
56
57    def encoded_length():
58        q = x if isinstance(x, bytes) else x.encode(filename_encoding_for_length)
59        return len(q)
60
61    while encoded_length() > limit:
62        delta = encoded_length() - limit
63        x = shorten_component(x, max(2, delta // 2))
64
65    return x
66
67
68def shorten_components_to(length, components, more_to_take=0, last_has_extension=True):
69    components = [limit_component(cx) for cx in components]
70    filepath = os.sep.join(components)
71    extra = len(filepath) - (length - more_to_take)
72    if extra < 1:
73        return components
74    deltas = []
75    for x in components:
76        pct = len(x)/float(len(filepath))
77        deltas.append(int(ceil(pct*extra)))
78    ans = []
79
80    for i, x in enumerate(components):
81        delta = deltas[i]
82        if delta > len(x):
83            r = x[0] if x is components[-1] else ''
84        else:
85            if last_has_extension and x is components[-1]:
86                b, e = os.path.splitext(x)
87                if e == '.':
88                    e = ''
89                r = shorten_component(b, delta)+e
90                if r.startswith('.'):
91                    r = x[0]+r
92            else:
93                r = shorten_component(x, delta)
94            r = r.strip()
95            if not r:
96                r = x.strip()[0] if x.strip() else 'x'
97        ans.append(r)
98    if len(os.sep.join(ans)) > length:
99        return shorten_components_to(length, components, more_to_take+2)
100    return ans
101
102
103def find_executable_in_path(name, path=None):
104    if path is None:
105        path = os.environ.get('PATH', '')
106    exts = '.exe .cmd .bat'.split() if iswindows and not name.endswith('.exe') else ('',)
107    path = path.split(os.pathsep)
108    for x in path:
109        for ext in exts:
110            q = os.path.abspath(os.path.join(x, name)) + ext
111            if os.access(q, os.X_OK):
112                return q
113
114
115def is_case_sensitive(path):
116    '''
117    Return True if the filesystem is case sensitive.
118
119    path must be the path to an existing directory. You must have permission
120    to create and delete files in this directory. The results of this test
121    apply to the filesystem containing the directory in path.
122    '''
123    is_case_sensitive = False
124    if not iswindows:
125        name1, name2 = ('calibre_test_case_sensitivity.txt',
126                        'calibre_TesT_CaSe_sensitiVitY.Txt')
127        f1, f2 = os.path.join(path, name1), os.path.join(path, name2)
128        with suppress(OSError):
129            os.remove(f1)
130        open(f1, 'w').close()
131        is_case_sensitive = not os.path.exists(f2)
132        os.remove(f1)
133    return is_case_sensitive
134
135
136def case_preserving_open_file(path, mode='wb', mkdir_mode=0o777):
137    '''
138    Open the file pointed to by path with the specified mode. If any
139    directories in path do not exist, they are created. Returns the
140    opened file object and the path to the opened file object. This path is
141    guaranteed to have the same case as the on disk path. For case insensitive
142    filesystems, the returned path may be different from the passed in path.
143    The returned path is always unicode and always an absolute path.
144
145    If mode is None, then this function assumes that path points to a directory
146    and return the path to the directory as the file object.
147
148    mkdir_mode specifies the mode with which any missing directories in path
149    are created.
150    '''
151    if isbytestring(path):
152        path = path.decode(filesystem_encoding)
153
154    path = os.path.abspath(path)
155
156    sep = force_unicode(os.sep, 'ascii')
157
158    if path.endswith(sep):
159        path = path[:-1]
160    if not path:
161        raise ValueError('Path must not point to root')
162
163    components = path.split(sep)
164    if not components:
165        raise ValueError('Invalid path: %r'%path)
166
167    cpath = sep
168    if iswindows:
169        # Always upper case the drive letter and add a trailing slash so that
170        # the first os.listdir works correctly
171        cpath = components[0].upper() + sep
172
173    bdir = path if mode is None else os.path.dirname(path)
174    if not os.path.exists(bdir):
175        os.makedirs(bdir, mkdir_mode)
176
177    # Walk all the directories in path, putting the on disk case version of
178    # the directory into cpath
179    dirs = components[1:] if mode is None else components[1:-1]
180    for comp in dirs:
181        cdir = os.path.join(cpath, comp)
182        cl = comp.lower()
183        try:
184            candidates = [c for c in os.listdir(cpath) if c.lower() == cl]
185        except:
186            # Dont have permission to do the listdir, assume the case is
187            # correct as we have no way to check it.
188            pass
189        else:
190            if len(candidates) == 1:
191                cdir = os.path.join(cpath, candidates[0])
192            # else: We are on a case sensitive file system so cdir must already
193            # be correct
194        cpath = cdir
195
196    if mode is None:
197        ans = fpath = cpath
198    else:
199        fname = components[-1]
200        ans = lopen(os.path.join(cpath, fname), mode)
201        # Ensure file and all its metadata is written to disk so that subsequent
202        # listdir() has file name in it. I don't know if this is actually
203        # necessary, but given the diversity of platforms, best to be safe.
204        ans.flush()
205        os.fsync(ans.fileno())
206
207        cl = fname.lower()
208        try:
209            candidates = [c for c in os.listdir(cpath) if c.lower() == cl]
210        except OSError:
211            # The containing directory, somehow disappeared?
212            candidates = []
213        if len(candidates) == 1:
214            fpath = os.path.join(cpath, candidates[0])
215        else:
216            # We are on a case sensitive filesystem
217            fpath = os.path.join(cpath, fname)
218    return ans, fpath
219
220
221def windows_get_fileid(path):
222    ''' The fileid uniquely identifies actual file contents (it is the same for
223    all hardlinks to a file). Similar to inode number on linux. '''
224    from calibre_extensions.winutil import get_file_id
225    if isbytestring(path):
226        path = path.decode(filesystem_encoding)
227    with suppress(OSError):
228        return get_file_id(path)
229
230
231def samefile_windows(src, dst):
232    samestring = (os.path.normcase(os.path.abspath(src)) ==
233            os.path.normcase(os.path.abspath(dst)))
234    if samestring:
235        return True
236
237    a, b = windows_get_fileid(src), windows_get_fileid(dst)
238    if a is None and b is None:
239        return False
240    return a == b
241
242
243def samefile(src, dst):
244    '''
245    Check if two paths point to the same actual file on the filesystem. Handles
246    symlinks, case insensitivity, mapped drives, etc.
247
248    Returns True iff both paths exist and point to the same file on disk.
249
250    Note: On windows will return True if the two string are identical (up to
251    case) even if the file does not exist. This is because I have no way of
252    knowing how reliable the GetFileInformationByHandle method is.
253    '''
254    if iswindows:
255        return samefile_windows(src, dst)
256
257    if hasattr(os.path, 'samefile'):
258        # Unix
259        try:
260            return os.path.samefile(src, dst)
261        except OSError:
262            return False
263
264    # All other platforms: check for same pathname.
265    samestring = (os.path.normcase(os.path.abspath(src)) ==
266            os.path.normcase(os.path.abspath(dst)))
267    return samestring
268
269
270def windows_get_size(path):
271    ''' On windows file sizes are only accurately stored in the actual file,
272    not in the directory entry (which could be out of date). So we open the
273    file, and get the actual size. '''
274    from calibre_extensions import winutil
275    if isbytestring(path):
276        path = path.decode(filesystem_encoding)
277    with closing(winutil.create_file(
278        path, 0, winutil.FILE_SHARE_READ | winutil.FILE_SHARE_WRITE | winutil.FILE_SHARE_DELETE,
279        winutil.OPEN_EXISTING, 0)
280    ) as h:
281        return winutil.get_file_size(h)
282
283
284def windows_hardlink(src, dest):
285    from calibre_extensions import winutil
286    winutil.create_hard_link(dest, src)
287    src_size = os.path.getsize(src)
288    # We open and close dest, to ensure its directory entry is updated
289    # see http://blogs.msdn.com/b/oldnewthing/archive/2011/12/26/10251026.aspx
290    for i in range(10):
291        # If we are on a network filesystem, we have to wait for some indeterminate time, since
292        # network file systems are the best thing since sliced bread
293        try:
294            if windows_get_size(dest) == src_size:
295                return
296        except OSError:
297            pass
298        time.sleep(0.3)
299
300    sz = windows_get_size(dest)
301    if sz != src_size:
302        msg = 'Creating hardlink from %s to %s failed: %%s' % (src, dest)
303        raise OSError(msg % ('hardlink size: %d not the same as source size' % sz))
304
305
306def windows_fast_hardlink(src, dest):
307    from calibre_extensions import winutil
308    winutil.create_hard_link(dest, src)
309    ssz, dsz = windows_get_size(src), windows_get_size(dest)
310    if ssz != dsz:
311        msg = 'Creating hardlink from %s to %s failed: %%s' % (src, dest)
312        raise OSError(msg % ('hardlink size: %d not the same as source size: %s' % (dsz, ssz)))
313
314
315def windows_nlinks(path):
316    from calibre_extensions import winutil
317    if isbytestring(path):
318        path = path.decode(filesystem_encoding)
319    return winutil.nlinks(path)
320
321
322class WindowsAtomicFolderMove:
323
324    '''
325    Move all the files inside a specified folder in an atomic fashion,
326    preventing any other process from locking a file while the operation is
327    incomplete. Raises an IOError if another process has locked a file before
328    the operation starts. Note that this only operates on the files in the
329    folder, not any sub-folders.
330    '''
331
332    def __init__(self, path):
333        from collections import defaultdict
334        from calibre_extensions import winutil
335        self.handle_map = {}
336
337        if isbytestring(path):
338            path = path.decode(filesystem_encoding)
339
340        if not os.path.exists(path):
341            return
342
343        names = os.listdir(path)
344        name_to_fileid = {x:windows_get_fileid(os.path.join(path, x)) for x in names}
345        fileid_to_names = defaultdict(set)
346        for name, fileid in iteritems(name_to_fileid):
347            fileid_to_names[fileid].add(name)
348
349        for x in names:
350            f = os.path.normcase(os.path.abspath(os.path.join(path, x)))
351            if not os.path.isfile(f):
352                continue
353            with suppress(OSError):
354                # Ensure the file is not read-only
355                winutil.set_file_attributes(f, winutil.FILE_ATTRIBUTE_NORMAL)
356
357            try:
358                h = winutil.create_file(f, winutil.GENERIC_READ,
359                        winutil.FILE_SHARE_DELETE,
360                        winutil.OPEN_EXISTING, winutil.FILE_FLAG_SEQUENTIAL_SCAN)
361            except OSError as e:
362                if e.winerror == winutil.ERROR_SHARING_VIOLATION:
363                    # The file could be a hardlink to an already opened file,
364                    # in which case we use the same handle for both files
365                    fileid = name_to_fileid[x]
366                    found = False
367                    if fileid is not None:
368                        for other in fileid_to_names[fileid]:
369                            other = os.path.normcase(os.path.abspath(os.path.join(path, other)))
370                            if other in self.handle_map:
371                                self.handle_map[f] = self.handle_map[other]
372                                found = True
373                                break
374                    if found:
375                        continue
376
377                self.close_handles()
378                if e.winerror == winutil.ERROR_SHARING_VIOLATION:
379                    err = IOError(errno.EACCES,
380                            _('File is open in another process'))
381                    err.filename = f
382                    raise err
383                prints('CreateFile failed for: %r' % f)
384                raise
385            except:
386                self.close_handles()
387                prints('CreateFile failed for: %r' % f)
388                raise
389            self.handle_map[f] = h
390
391    def copy_path_to(self, path, dest):
392        from calibre_extensions import winutil
393        handle = None
394        for p, h in self.handle_map.items():
395            if samefile_windows(path, p):
396                handle = h
397                break
398        if handle is None:
399            if os.path.exists(path):
400                raise ValueError('The file %r did not exist when this move'
401                        ' operation was started'%path)
402            else:
403                raise ValueError('The file %r does not exist'%path)
404
405        with suppress(OSError):
406            windows_hardlink(path, dest)
407            return
408
409        winutil.set_file_pointer(handle, 0, winutil.FILE_BEGIN)
410        with lopen(dest, 'wb') as f:
411            sz = 1024 * 1024
412            while True:
413                raw = winutil.read_file(handle, sz)
414                if not raw:
415                    break
416                f.write(raw)
417
418    def release_file(self, path):
419        ' Release the lock on the file pointed to by path. Will also release the lock on any hardlinks to path '
420        key = None
421        for p, h in iteritems(self.handle_map):
422            if samefile_windows(path, p):
423                key = (p, h)
424                break
425        if key is not None:
426            key[1].close()
427            remove = [f for f, h in iteritems(self.handle_map) if h is key[1]]
428            for x in remove:
429                self.handle_map.pop(x)
430
431    def close_handles(self):
432        for h in itervalues(self.handle_map):
433            h.close()
434        self.handle_map = {}
435
436    def delete_originals(self):
437        from calibre_extensions import winutil
438        for path in self.handle_map:
439            winutil.delete_file(path)
440        self.close_handles()
441
442
443def hardlink_file(src, dest):
444    if iswindows:
445        windows_hardlink(src, dest)
446        return
447    os.link(src, dest)
448
449
450def nlinks_file(path):
451    ' Return number of hardlinks to the file '
452    if iswindows:
453        return windows_nlinks(path)
454    return os.stat(path).st_nlink
455
456
457if iswindows:
458    from calibre_extensions.winutil import move_file
459
460    def rename_file(a, b):
461        if isinstance(a, bytes):
462            a = os.fsdecode(a)
463        if isinstance(b, bytes):
464            b = os.fsdecode(b)
465        move_file(a, b)
466
467
468def retry_on_fail(func, *args, count=10, sleep_time=0.2):
469    for i in range(count):
470        try:
471            func(*args)
472            break
473        except OSError:
474            if i > count - 2:
475                raise
476            # Try the operation repeatedly in case something like a virus
477            # scanner has opened one of the files (I love windows)
478            time.sleep(sleep_time)
479
480
481def atomic_rename(oldpath, newpath):
482    '''Replace the file newpath with the file oldpath. Can fail if the files
483    are on different volumes. If succeeds, guaranteed to be atomic. newpath may
484    or may not exist. If it exists, it is replaced. '''
485    if iswindows:
486        retry_on_fail(rename_file, oldpath, newpath)
487    else:
488        os.rename(oldpath, newpath)
489
490
491def remove_dir_if_empty(path, ignore_metadata_caches=False):
492    ''' Remove a directory if it is empty or contains only the folder metadata
493    caches from different OSes. To delete the folder if it contains only
494    metadata caches, set ignore_metadata_caches to True.'''
495    try:
496        os.rmdir(path)
497    except OSError as e:
498        if e.errno == errno.ENOTEMPTY or len(os.listdir(path)) > 0:
499            # Some linux systems appear to raise an EPERM instead of an
500            # ENOTEMPTY, see https://bugs.launchpad.net/bugs/1240797
501            if ignore_metadata_caches:
502                try:
503                    found = False
504                    for x in os.listdir(path):
505                        if x.lower() in {'.ds_store', 'thumbs.db'}:
506                            found = True
507                            x = os.path.join(path, x)
508                            if os.path.isdir(x):
509                                import shutil
510                                shutil.rmtree(x)
511                            else:
512                                os.remove(x)
513                except Exception:  # We could get an error, if, for example, windows has locked Thumbs.db
514                    found = False
515                if found:
516                    remove_dir_if_empty(path)
517            return
518        raise
519
520
521expanduser = os.path.expanduser
522
523
524def format_permissions(st_mode):
525    import stat
526    for func, letter in (x.split(':') for x in 'REG:- DIR:d BLK:b CHR:c FIFO:p LNK:l SOCK:s'.split()):
527        if getattr(stat, 'S_IS' + func)(st_mode):
528            break
529    else:
530        letter = '?'
531    rwx = ('---', '--x', '-w-', '-wx', 'r--', 'r-x', 'rw-', 'rwx')
532    ans = [letter] + list(rwx[(st_mode >> 6) & 7]) + list(rwx[(st_mode >> 3) & 7]) + list(rwx[(st_mode & 7)])
533    if st_mode & stat.S_ISUID:
534        ans[3] = 's' if (st_mode & stat.S_IXUSR) else 'S'
535    if st_mode & stat.S_ISGID:
536        ans[6] = 's' if (st_mode & stat.S_IXGRP) else 'l'
537    if st_mode & stat.S_ISVTX:
538        ans[9] = 't' if (st_mode & stat.S_IXUSR) else 'T'
539    return ''.join(ans)
540
541
542def copyfile(src, dest):
543    shutil.copyfile(src, dest)
544    try:
545        shutil.copystat(src, dest)
546    except Exception:
547        pass
548
549
550def get_hardlink_function(src, dest):
551    if not iswindows:
552        return os.link
553    from calibre_extensions import winutil
554    root = dest[0] + ':\\'
555    if src[0].lower() == dest[0].lower() and hasattr(winutil, 'supports_hardlinks') and winutil.supports_hardlinks(root):
556        return windows_fast_hardlink
557
558
559def copyfile_using_links(path, dest, dest_is_dir=True, filecopyfunc=copyfile):
560    path, dest = os.path.abspath(path), os.path.abspath(dest)
561    if dest_is_dir:
562        dest = os.path.join(dest, os.path.basename(path))
563    hardlink = get_hardlink_function(path, dest)
564    try:
565        hardlink(path, dest)
566    except Exception:
567        filecopyfunc(path, dest)
568
569
570def copytree_using_links(path, dest, dest_is_parent=True, filecopyfunc=copyfile):
571    path, dest = os.path.abspath(path), os.path.abspath(dest)
572    if dest_is_parent:
573        dest = os.path.join(dest, os.path.basename(path))
574    hardlink = get_hardlink_function(path, dest)
575    try:
576        os.makedirs(dest)
577    except OSError as e:
578        if e.errno != errno.EEXIST:
579            raise
580    for dirpath, dirnames, filenames in os.walk(path):
581        base = os.path.relpath(dirpath, path)
582        dest_base = os.path.join(dest, base)
583        for dname in dirnames:
584            try:
585                os.mkdir(os.path.join(dest_base, dname))
586            except OSError as e:
587                if e.errno != errno.EEXIST:
588                    raise
589        for fname in filenames:
590            src, df = os.path.join(dirpath, fname), os.path.join(dest_base, fname)
591            try:
592                hardlink(src, df)
593            except Exception:
594                filecopyfunc(src, df)
595
596
597rmtree = shutil.rmtree
598
599
600if iswindows:
601    long_path_prefix = '\\\\?\\'
602
603    def make_long_path_useable(path):
604        if len(path) > 200 and os.path.isabs(path) and not path.startswith(long_path_prefix):
605            path = long_path_prefix + os.path.normpath(path)
606        return path
607else:
608    def make_long_path_useable(path):
609        return path
610