1"""Common operations on Posix pathnames.
2
3Instead of importing this module directly, import os and refer to
4this module as os.path.  The "os.path" name is an alias for this
5module on Posix systems; on other systems (e.g. Mac, Windows),
6os.path provides the same operations in a manner specific to that
7platform, and is an alias to another module (e.g. macpath, ntpath).
8
9Some of this can actually be useful on non-Posix systems too, e.g.
10for manipulation of the pathname component of URLs.
11"""
12
13# Strings representing various path-related bits and pieces.
14# These are primarily for export; internally, they are hardcoded.
15# Should be set before imports for resolving cyclic dependency.
16curdir = '.'
17pardir = '..'
18extsep = '.'
19sep = '/'
20pathsep = ':'
21defpath = '/bin:/usr/bin'
22altsep = None
23devnull = '/dev/null'
24
25import os
26import sys
27import stat
28import genericpath
29from genericpath import *
30
31__all__ = ["normcase","isabs","join","splitdrive","split","splitext",
32           "basename","dirname","commonprefix","getsize","getmtime",
33           "getatime","getctime","islink","exists","lexists","isdir","isfile",
34           "ismount", "expanduser","expandvars","normpath","abspath",
35           "samefile","sameopenfile","samestat",
36           "curdir","pardir","sep","pathsep","defpath","altsep","extsep",
37           "devnull","realpath","supports_unicode_filenames","relpath",
38           "commonpath"]
39
40
41def _get_sep(path):
42    if isinstance(path, bytes):
43        return b'/'
44    else:
45        return '/'
46
47# Normalize the case of a pathname.  Trivial in Posix, string.lower on Mac.
48# On MS-DOS this may also turn slashes into backslashes; however, other
49# normalizations (such as optimizing '../' away) are not allowed
50# (another function should be defined to do that).
51
52def normcase(s):
53    """Normalize case of pathname.  Has no effect under Posix"""
54    s = os.fspath(s)
55    if not isinstance(s, (bytes, str)):
56        raise TypeError("normcase() argument must be str or bytes, "
57                        "not '{}'".format(s.__class__.__name__))
58    return s
59
60
61# Return whether a path is absolute.
62# Trivial in Posix, harder on the Mac or MS-DOS.
63
64def isabs(s):
65    """Test whether a path is absolute"""
66    s = os.fspath(s)
67    sep = _get_sep(s)
68    return s.startswith(sep)
69
70
71# Join pathnames.
72# Ignore the previous parts if a part is absolute.
73# Insert a '/' unless the first part is empty or already ends in '/'.
74
75def join(a, *p):
76    """Join two or more pathname components, inserting '/' as needed.
77    If any component is an absolute path, all previous path components
78    will be discarded.  An empty last part will result in a path that
79    ends with a separator."""
80    a = os.fspath(a)
81    sep = _get_sep(a)
82    path = a
83    try:
84        if not p:
85            path[:0] + sep  #23780: Ensure compatible data type even if p is null.
86        for b in map(os.fspath, p):
87            if b.startswith(sep):
88                path = b
89            elif not path or path.endswith(sep):
90                path += b
91            else:
92                path += sep + b
93    except (TypeError, AttributeError, BytesWarning):
94        genericpath._check_arg_types('join', a, *p)
95        raise
96    return path
97
98
99# Split a path in head (everything up to the last '/') and tail (the
100# rest).  If the path ends in '/', tail will be empty.  If there is no
101# '/' in the path, head  will be empty.
102# Trailing '/'es are stripped from head unless it is the root.
103
104def split(p):
105    """Split a pathname.  Returns tuple "(head, tail)" where "tail" is
106    everything after the final slash.  Either part may be empty."""
107    p = os.fspath(p)
108    sep = _get_sep(p)
109    i = p.rfind(sep) + 1
110    head, tail = p[:i], p[i:]
111    if head and head != sep*len(head):
112        head = head.rstrip(sep)
113    return head, tail
114
115
116# Split a path in root and extension.
117# The extension is everything starting at the last dot in the last
118# pathname component; the root is everything before that.
119# It is always true that root + ext == p.
120
121def splitext(p):
122    p = os.fspath(p)
123    if isinstance(p, bytes):
124        sep = b'/'
125        extsep = b'.'
126    else:
127        sep = '/'
128        extsep = '.'
129    return genericpath._splitext(p, sep, None, extsep)
130splitext.__doc__ = genericpath._splitext.__doc__
131
132# Split a pathname into a drive specification and the rest of the
133# path.  Useful on DOS/Windows/NT; on Unix, the drive is always empty.
134
135def splitdrive(p):
136    """Split a pathname into drive and path. On Posix, drive is always
137    empty."""
138    p = os.fspath(p)
139    return p[:0], p
140
141
142# Return the tail (basename) part of a path, same as split(path)[1].
143
144def basename(p):
145    """Returns the final component of a pathname"""
146    p = os.fspath(p)
147    sep = _get_sep(p)
148    i = p.rfind(sep) + 1
149    return p[i:]
150
151
152# Return the head (dirname) part of a path, same as split(path)[0].
153
154def dirname(p):
155    """Returns the directory component of a pathname"""
156    p = os.fspath(p)
157    sep = _get_sep(p)
158    i = p.rfind(sep) + 1
159    head = p[:i]
160    if head and head != sep*len(head):
161        head = head.rstrip(sep)
162    return head
163
164
165# Is a path a symbolic link?
166# This will always return false on systems where os.lstat doesn't exist.
167
168def islink(path):
169    """Test whether a path is a symbolic link"""
170    try:
171        st = os.lstat(path)
172    except (OSError, AttributeError):
173        return False
174    return stat.S_ISLNK(st.st_mode)
175
176# Being true for dangling symbolic links is also useful.
177
178def lexists(path):
179    """Test whether a path exists.  Returns True for broken symbolic links"""
180    try:
181        os.lstat(path)
182    except OSError:
183        return False
184    return True
185
186
187# Is a path a mount point?
188# (Does this work for all UNIXes?  Is it even guaranteed to work by Posix?)
189
190def ismount(path):
191    """Test whether a path is a mount point"""
192    try:
193        s1 = os.lstat(path)
194    except OSError:
195        # It doesn't exist -- so not a mount point. :-)
196        return False
197    else:
198        # A symlink can never be a mount point
199        if stat.S_ISLNK(s1.st_mode):
200            return False
201
202    if isinstance(path, bytes):
203        parent = join(path, b'..')
204    else:
205        parent = join(path, '..')
206    parent = realpath(parent)
207    try:
208        s2 = os.lstat(parent)
209    except OSError:
210        return False
211
212    dev1 = s1.st_dev
213    dev2 = s2.st_dev
214    if dev1 != dev2:
215        return True     # path/.. on a different device as path
216    ino1 = s1.st_ino
217    ino2 = s2.st_ino
218    if ino1 == ino2:
219        return True     # path/.. is the same i-node as path
220    return False
221
222
223# Expand paths beginning with '~' or '~user'.
224# '~' means $HOME; '~user' means that user's home directory.
225# If the path doesn't begin with '~', or if the user or $HOME is unknown,
226# the path is returned unchanged (leaving error reporting to whatever
227# function is called with the expanded path as argument).
228# See also module 'glob' for expansion of *, ? and [...] in pathnames.
229# (A function should also be defined to do full *sh-style environment
230# variable expansion.)
231
232def expanduser(path):
233    """Expand ~ and ~user constructions.  If user or $HOME is unknown,
234    do nothing."""
235    path = os.fspath(path)
236    if isinstance(path, bytes):
237        tilde = b'~'
238    else:
239        tilde = '~'
240    if not path.startswith(tilde):
241        return path
242    sep = _get_sep(path)
243    i = path.find(sep, 1)
244    if i < 0:
245        i = len(path)
246    if i == 1:
247        if 'HOME' not in os.environ:
248            import pwd
249            try:
250                userhome = pwd.getpwuid(os.getuid()).pw_dir
251            except KeyError:
252                # bpo-10496: if the current user identifier doesn't exist in the
253                # password database, return the path unchanged
254                return path
255        else:
256            userhome = os.environ['HOME']
257    else:
258        import pwd
259        name = path[1:i]
260        if isinstance(name, bytes):
261            name = str(name, 'ASCII')
262        try:
263            pwent = pwd.getpwnam(name)
264        except KeyError:
265            # bpo-10496: if the user name from the path doesn't exist in the
266            # password database, return the path unchanged
267            return path
268        userhome = pwent.pw_dir
269    if isinstance(path, bytes):
270        userhome = os.fsencode(userhome)
271        root = b'/'
272    else:
273        root = '/'
274    userhome = userhome.rstrip(root)
275    return (userhome + path[i:]) or root
276
277
278# Expand paths containing shell variable substitutions.
279# This expands the forms $variable and ${variable} only.
280# Non-existent variables are left unchanged.
281
282_varprog = None
283_varprogb = None
284
285def expandvars(path):
286    """Expand shell variables of form $var and ${var}.  Unknown variables
287    are left unchanged."""
288    path = os.fspath(path)
289    global _varprog, _varprogb
290    if isinstance(path, bytes):
291        if b'$' not in path:
292            return path
293        if not _varprogb:
294            import re
295            _varprogb = re.compile(br'\$(\w+|\{[^}]*\})', re.ASCII)
296        search = _varprogb.search
297        start = b'{'
298        end = b'}'
299        environ = getattr(os, 'environb', None)
300    else:
301        if '$' not in path:
302            return path
303        if not _varprog:
304            import re
305            _varprog = re.compile(r'\$(\w+|\{[^}]*\})', re.ASCII)
306        search = _varprog.search
307        start = '{'
308        end = '}'
309        environ = os.environ
310    i = 0
311    while True:
312        m = search(path, i)
313        if not m:
314            break
315        i, j = m.span(0)
316        name = m.group(1)
317        if name.startswith(start) and name.endswith(end):
318            name = name[1:-1]
319        try:
320            if environ is None:
321                value = os.fsencode(os.environ[os.fsdecode(name)])
322            else:
323                value = environ[name]
324        except KeyError:
325            i = j
326        else:
327            tail = path[j:]
328            path = path[:i] + value
329            i = len(path)
330            path += tail
331    return path
332
333
334# Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A/B.
335# It should be understood that this may change the meaning of the path
336# if it contains symbolic links!
337
338def normpath(path):
339    """Normalize path, eliminating double slashes, etc."""
340    path = os.fspath(path)
341    if isinstance(path, bytes):
342        sep = b'/'
343        empty = b''
344        dot = b'.'
345        dotdot = b'..'
346    else:
347        sep = '/'
348        empty = ''
349        dot = '.'
350        dotdot = '..'
351    if path == empty:
352        return dot
353    initial_slashes = path.startswith(sep)
354    # POSIX allows one or two initial slashes, but treats three or more
355    # as single slash.
356    if (initial_slashes and
357        path.startswith(sep*2) and not path.startswith(sep*3)):
358        initial_slashes = 2
359    comps = path.split(sep)
360    new_comps = []
361    for comp in comps:
362        if comp in (empty, dot):
363            continue
364        if (comp != dotdot or (not initial_slashes and not new_comps) or
365             (new_comps and new_comps[-1] == dotdot)):
366            new_comps.append(comp)
367        elif new_comps:
368            new_comps.pop()
369    comps = new_comps
370    path = sep.join(comps)
371    if initial_slashes:
372        path = sep*initial_slashes + path
373    return path or dot
374
375
376def abspath(path):
377    """Return an absolute path."""
378    path = os.fspath(path)
379    if not isabs(path):
380        if isinstance(path, bytes):
381            cwd = os.getcwdb()
382        else:
383            cwd = os.getcwd()
384        path = join(cwd, path)
385    return normpath(path)
386
387
388# Return a canonical path (i.e. the absolute location of a file on the
389# filesystem).
390
391def realpath(filename):
392    """Return the canonical path of the specified filename, eliminating any
393symbolic links encountered in the path."""
394    filename = os.fspath(filename)
395    path, ok = _joinrealpath(filename[:0], filename, {})
396    return abspath(path)
397
398# Join two paths, normalizing and eliminating any symbolic links
399# encountered in the second path.
400def _joinrealpath(path, rest, seen):
401    if isinstance(path, bytes):
402        sep = b'/'
403        curdir = b'.'
404        pardir = b'..'
405    else:
406        sep = '/'
407        curdir = '.'
408        pardir = '..'
409
410    if isabs(rest):
411        rest = rest[1:]
412        path = sep
413
414    while rest:
415        name, _, rest = rest.partition(sep)
416        if not name or name == curdir:
417            # current dir
418            continue
419        if name == pardir:
420            # parent dir
421            if path:
422                path, name = split(path)
423                if name == pardir:
424                    path = join(path, pardir, pardir)
425            else:
426                path = pardir
427            continue
428        newpath = join(path, name)
429        if not islink(newpath):
430            path = newpath
431            continue
432        # Resolve the symbolic link
433        if newpath in seen:
434            # Already seen this path
435            path = seen[newpath]
436            if path is not None:
437                # use cached value
438                continue
439            # The symlink is not resolved, so we must have a symlink loop.
440            # Return already resolved part + rest of the path unchanged.
441            return join(newpath, rest), False
442        seen[newpath] = None # not resolved symlink
443        path, ok = _joinrealpath(path, os.readlink(newpath), seen)
444        if not ok:
445            return join(path, rest), False
446        seen[newpath] = path # resolved symlink
447
448    return path, True
449
450
451supports_unicode_filenames = (sys.platform == 'darwin')
452
453def relpath(path, start=None):
454    """Return a relative version of a path"""
455
456    if not path:
457        raise ValueError("no path specified")
458
459    path = os.fspath(path)
460    if isinstance(path, bytes):
461        curdir = b'.'
462        sep = b'/'
463        pardir = b'..'
464    else:
465        curdir = '.'
466        sep = '/'
467        pardir = '..'
468
469    if start is None:
470        start = curdir
471    else:
472        start = os.fspath(start)
473
474    try:
475        start_list = [x for x in abspath(start).split(sep) if x]
476        path_list = [x for x in abspath(path).split(sep) if x]
477        # Work out how much of the filepath is shared by start and path.
478        i = len(commonprefix([start_list, path_list]))
479
480        rel_list = [pardir] * (len(start_list)-i) + path_list[i:]
481        if not rel_list:
482            return curdir
483        return join(*rel_list)
484    except (TypeError, AttributeError, BytesWarning, DeprecationWarning):
485        genericpath._check_arg_types('relpath', path, start)
486        raise
487
488
489# Return the longest common sub-path of the sequence of paths given as input.
490# The paths are not normalized before comparing them (this is the
491# responsibility of the caller). Any trailing separator is stripped from the
492# returned path.
493
494def commonpath(paths):
495    """Given a sequence of path names, returns the longest common sub-path."""
496
497    if not paths:
498        raise ValueError('commonpath() arg is an empty sequence')
499
500    paths = tuple(map(os.fspath, paths))
501    if isinstance(paths[0], bytes):
502        sep = b'/'
503        curdir = b'.'
504    else:
505        sep = '/'
506        curdir = '.'
507
508    try:
509        split_paths = [path.split(sep) for path in paths]
510
511        try:
512            isabs, = set(p[:1] == sep for p in paths)
513        except ValueError:
514            raise ValueError("Can't mix absolute and relative paths") from None
515
516        split_paths = [[c for c in s if c and c != curdir] for s in split_paths]
517        s1 = min(split_paths)
518        s2 = max(split_paths)
519        common = s1
520        for i, c in enumerate(s1):
521            if c != s2[i]:
522                common = s1[:i]
523                break
524
525        prefix = sep if isabs else sep[:0]
526        return prefix + sep.join(common)
527    except (TypeError, AttributeError):
528        genericpath._check_arg_types('commonpath', *paths)
529        raise
530