1"""Common operations on Posix pathnames.
2
3Instead of importing this module directly, import os and refer to
4this module as os.path.  The "os.path" name is an alias for this
5module on Posix systems; on other systems (e.g. Windows),
6os.path provides the same operations in a manner specific to that
7platform, and is an alias to another module (e.g. ntpath).
8
9Some of this can actually be useful on non-Posix systems too, e.g.
10for manipulation of the pathname component of URLs.
11"""
12
13# Strings representing various path-related bits and pieces.
14# These are primarily for export; internally, they are hardcoded.
15# Should be set before imports for resolving cyclic dependency.
16curdir = '.'
17pardir = '..'
18extsep = '.'
19sep = '/'
20pathsep = ':'
21defpath = '/bin:/usr/bin'
22altsep = None
23devnull = '/dev/null'
24
25import os
26import sys
27import stat
28import genericpath
29from genericpath import *
30
31__all__ = ["normcase","isabs","join","splitdrive","split","splitext",
32           "basename","dirname","commonprefix","getsize","getmtime",
33           "getatime","getctime","islink","exists","lexists","isdir","isfile",
34           "ismount", "expanduser","expandvars","normpath","abspath",
35           "samefile","sameopenfile","samestat",
36           "curdir","pardir","sep","pathsep","defpath","altsep","extsep",
37           "devnull","realpath","supports_unicode_filenames","relpath",
38           "commonpath"]
39
40
41def _get_sep(path):
42    if isinstance(path, bytes):
43        return b'/'
44    else:
45        return '/'
46
47# Normalize the case of a pathname.  Trivial in Posix, string.lower on Mac.
48# On MS-DOS this may also turn slashes into backslashes; however, other
49# normalizations (such as optimizing '../' away) are not allowed
50# (another function should be defined to do that).
51
52def normcase(s):
53    """Normalize case of pathname.  Has no effect under Posix"""
54    return os.fspath(s)
55
56
57# Return whether a path is absolute.
58# Trivial in Posix, harder on the Mac or MS-DOS.
59
60def isabs(s):
61    """Test whether a path is absolute"""
62    s = os.fspath(s)
63    sep = _get_sep(s)
64    return s.startswith(sep)
65
66
67# Join pathnames.
68# Ignore the previous parts if a part is absolute.
69# Insert a '/' unless the first part is empty or already ends in '/'.
70
71def join(a, *p):
72    """Join two or more pathname components, inserting '/' as needed.
73    If any component is an absolute path, all previous path components
74    will be discarded.  An empty last part will result in a path that
75    ends with a separator."""
76    a = os.fspath(a)
77    sep = _get_sep(a)
78    path = a
79    try:
80        if not p:
81            path[:0] + sep  #23780: Ensure compatible data type even if p is null.
82        for b in map(os.fspath, p):
83            if b.startswith(sep):
84                path = b
85            elif not path or path.endswith(sep):
86                path += b
87            else:
88                path += sep + b
89    except (TypeError, AttributeError, BytesWarning):
90        genericpath._check_arg_types('join', a, *p)
91        raise
92    return path
93
94
95# Split a path in head (everything up to the last '/') and tail (the
96# rest).  If the path ends in '/', tail will be empty.  If there is no
97# '/' in the path, head  will be empty.
98# Trailing '/'es are stripped from head unless it is the root.
99
100def split(p):
101    """Split a pathname.  Returns tuple "(head, tail)" where "tail" is
102    everything after the final slash.  Either part may be empty."""
103    p = os.fspath(p)
104    sep = _get_sep(p)
105    i = p.rfind(sep) + 1
106    head, tail = p[:i], p[i:]
107    if head and head != sep*len(head):
108        head = head.rstrip(sep)
109    return head, tail
110
111
112# Split a path in root and extension.
113# The extension is everything starting at the last dot in the last
114# pathname component; the root is everything before that.
115# It is always true that root + ext == p.
116
117def splitext(p):
118    p = os.fspath(p)
119    if isinstance(p, bytes):
120        sep = b'/'
121        extsep = b'.'
122    else:
123        sep = '/'
124        extsep = '.'
125    return genericpath._splitext(p, sep, None, extsep)
126splitext.__doc__ = genericpath._splitext.__doc__
127
128# Split a pathname into a drive specification and the rest of the
129# path.  Useful on DOS/Windows/NT; on Unix, the drive is always empty.
130
131def splitdrive(p):
132    """Split a pathname into drive and path. On Posix, drive is always
133    empty."""
134    p = os.fspath(p)
135    return p[:0], p
136
137
138# Return the tail (basename) part of a path, same as split(path)[1].
139
140def basename(p):
141    """Returns the final component of a pathname"""
142    p = os.fspath(p)
143    sep = _get_sep(p)
144    i = p.rfind(sep) + 1
145    return p[i:]
146
147
148# Return the head (dirname) part of a path, same as split(path)[0].
149
150def dirname(p):
151    """Returns the directory component of a pathname"""
152    p = os.fspath(p)
153    sep = _get_sep(p)
154    i = p.rfind(sep) + 1
155    head = p[:i]
156    if head and head != sep*len(head):
157        head = head.rstrip(sep)
158    return head
159
160
161# Is a path a symbolic link?
162# This will always return false on systems where os.lstat doesn't exist.
163
164def islink(path):
165    """Test whether a path is a symbolic link"""
166    try:
167        st = os.lstat(path)
168    except (OSError, ValueError, AttributeError):
169        return False
170    return stat.S_ISLNK(st.st_mode)
171
172# Being true for dangling symbolic links is also useful.
173
174def lexists(path):
175    """Test whether a path exists.  Returns True for broken symbolic links"""
176    try:
177        os.lstat(path)
178    except (OSError, ValueError):
179        return False
180    return True
181
182
183# Is a path a mount point?
184# (Does this work for all UNIXes?  Is it even guaranteed to work by Posix?)
185
186def ismount(path):
187    """Test whether a path is a mount point"""
188    try:
189        s1 = os.lstat(path)
190    except (OSError, ValueError):
191        # It doesn't exist -- so not a mount point. :-)
192        return False
193    else:
194        # A symlink can never be a mount point
195        if stat.S_ISLNK(s1.st_mode):
196            return False
197
198    if isinstance(path, bytes):
199        parent = join(path, b'..')
200    else:
201        parent = join(path, '..')
202    parent = realpath(parent)
203    try:
204        s2 = os.lstat(parent)
205    except (OSError, ValueError):
206        return False
207
208    dev1 = s1.st_dev
209    dev2 = s2.st_dev
210    if dev1 != dev2:
211        return True     # path/.. on a different device as path
212    ino1 = s1.st_ino
213    ino2 = s2.st_ino
214    if ino1 == ino2:
215        return True     # path/.. is the same i-node as path
216    return False
217
218
219# Expand paths beginning with '~' or '~user'.
220# '~' means $HOME; '~user' means that user's home directory.
221# If the path doesn't begin with '~', or if the user or $HOME is unknown,
222# the path is returned unchanged (leaving error reporting to whatever
223# function is called with the expanded path as argument).
224# See also module 'glob' for expansion of *, ? and [...] in pathnames.
225# (A function should also be defined to do full *sh-style environment
226# variable expansion.)
227
228def expanduser(path):
229    """Expand ~ and ~user constructions.  If user or $HOME is unknown,
230    do nothing."""
231    path = os.fspath(path)
232    if isinstance(path, bytes):
233        tilde = b'~'
234    else:
235        tilde = '~'
236    if not path.startswith(tilde):
237        return path
238    sep = _get_sep(path)
239    i = path.find(sep, 1)
240    if i < 0:
241        i = len(path)
242    if i == 1:
243        if 'HOME' not in os.environ:
244            import pwd
245            try:
246                userhome = pwd.getpwuid(os.getuid()).pw_dir
247            except KeyError:
248                # bpo-10496: if the current user identifier doesn't exist in the
249                # password database, return the path unchanged
250                return path
251        else:
252            userhome = os.environ['HOME']
253    else:
254        import pwd
255        name = path[1:i]
256        if isinstance(name, bytes):
257            name = str(name, 'ASCII')
258        try:
259            pwent = pwd.getpwnam(name)
260        except KeyError:
261            # bpo-10496: if the user name from the path doesn't exist in the
262            # password database, return the path unchanged
263            return path
264        userhome = pwent.pw_dir
265    # if no user home, return the path unchanged on VxWorks
266    if userhome is None and sys.platform == "vxworks":
267        return path
268    if isinstance(path, bytes):
269        userhome = os.fsencode(userhome)
270        root = b'/'
271    else:
272        root = '/'
273    userhome = userhome.rstrip(root)
274    return (userhome + path[i:]) or root
275
276
277# Expand paths containing shell variable substitutions.
278# This expands the forms $variable and ${variable} only.
279# Non-existent variables are left unchanged.
280
281_varprog = None
282_varprogb = None
283
284def expandvars(path):
285    """Expand shell variables of form $var and ${var}.  Unknown variables
286    are left unchanged."""
287    path = os.fspath(path)
288    global _varprog, _varprogb
289    if isinstance(path, bytes):
290        if b'$' not in path:
291            return path
292        if not _varprogb:
293            import re
294            _varprogb = re.compile(br'\$(\w+|\{[^}]*\})', re.ASCII)
295        search = _varprogb.search
296        start = b'{'
297        end = b'}'
298        environ = getattr(os, 'environb', None)
299    else:
300        if '$' not in path:
301            return path
302        if not _varprog:
303            import re
304            _varprog = re.compile(r'\$(\w+|\{[^}]*\})', re.ASCII)
305        search = _varprog.search
306        start = '{'
307        end = '}'
308        environ = os.environ
309    i = 0
310    while True:
311        m = search(path, i)
312        if not m:
313            break
314        i, j = m.span(0)
315        name = m.group(1)
316        if name.startswith(start) and name.endswith(end):
317            name = name[1:-1]
318        try:
319            if environ is None:
320                value = os.fsencode(os.environ[os.fsdecode(name)])
321            else:
322                value = environ[name]
323        except KeyError:
324            i = j
325        else:
326            tail = path[j:]
327            path = path[:i] + value
328            i = len(path)
329            path += tail
330    return path
331
332
333# Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A/B.
334# It should be understood that this may change the meaning of the path
335# if it contains symbolic links!
336
337def normpath(path):
338    """Normalize path, eliminating double slashes, etc."""
339    path = os.fspath(path)
340    if isinstance(path, bytes):
341        sep = b'/'
342        empty = b''
343        dot = b'.'
344        dotdot = b'..'
345    else:
346        sep = '/'
347        empty = ''
348        dot = '.'
349        dotdot = '..'
350    if path == empty:
351        return dot
352    initial_slashes = path.startswith(sep)
353    # POSIX allows one or two initial slashes, but treats three or more
354    # as single slash.
355    # (see http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13)
356    if (initial_slashes and
357        path.startswith(sep*2) and not path.startswith(sep*3)):
358        initial_slashes = 2
359    comps = path.split(sep)
360    new_comps = []
361    for comp in comps:
362        if comp in (empty, dot):
363            continue
364        if (comp != dotdot or (not initial_slashes and not new_comps) or
365             (new_comps and new_comps[-1] == dotdot)):
366            new_comps.append(comp)
367        elif new_comps:
368            new_comps.pop()
369    comps = new_comps
370    path = sep.join(comps)
371    if initial_slashes:
372        path = sep*initial_slashes + path
373    return path or dot
374
375
376def abspath(path):
377    """Return an absolute path."""
378    path = os.fspath(path)
379    if not isabs(path):
380        if isinstance(path, bytes):
381            cwd = os.getcwdb()
382        else:
383            cwd = os.getcwd()
384        path = join(cwd, path)
385    return normpath(path)
386
387
388# Return a canonical path (i.e. the absolute location of a file on the
389# filesystem).
390
391def realpath(filename, *, strict=False):
392    """Return the canonical path of the specified filename, eliminating any
393symbolic links encountered in the path."""
394    filename = os.fspath(filename)
395    path, ok = _joinrealpath(filename[:0], filename, strict, {})
396    return abspath(path)
397
398# Join two paths, normalizing and eliminating any symbolic links
399# encountered in the second path.
400def _joinrealpath(path, rest, strict, seen):
401    if isinstance(path, bytes):
402        sep = b'/'
403        curdir = b'.'
404        pardir = b'..'
405    else:
406        sep = '/'
407        curdir = '.'
408        pardir = '..'
409
410    if isabs(rest):
411        rest = rest[1:]
412        path = sep
413
414    while rest:
415        name, _, rest = rest.partition(sep)
416        if not name or name == curdir:
417            # current dir
418            continue
419        if name == pardir:
420            # parent dir
421            if path:
422                path, name = split(path)
423                if name == pardir:
424                    path = join(path, pardir, pardir)
425            else:
426                path = pardir
427            continue
428        newpath = join(path, name)
429        try:
430            st = os.lstat(newpath)
431        except OSError:
432            if strict:
433                raise
434            is_link = False
435        else:
436            is_link = stat.S_ISLNK(st.st_mode)
437        if not is_link:
438            path = newpath
439            continue
440        # Resolve the symbolic link
441        if newpath in seen:
442            # Already seen this path
443            path = seen[newpath]
444            if path is not None:
445                # use cached value
446                continue
447            # The symlink is not resolved, so we must have a symlink loop.
448            if strict:
449                # Raise OSError(errno.ELOOP)
450                os.stat(newpath)
451            else:
452                # Return already resolved part + rest of the path unchanged.
453                return join(newpath, rest), False
454        seen[newpath] = None # not resolved symlink
455        path, ok = _joinrealpath(path, os.readlink(newpath), strict, seen)
456        if not ok:
457            return join(path, rest), False
458        seen[newpath] = path # resolved symlink
459
460    return path, True
461
462
463supports_unicode_filenames = (sys.platform == 'darwin')
464
465def relpath(path, start=None):
466    """Return a relative version of a path"""
467
468    if not path:
469        raise ValueError("no path specified")
470
471    path = os.fspath(path)
472    if isinstance(path, bytes):
473        curdir = b'.'
474        sep = b'/'
475        pardir = b'..'
476    else:
477        curdir = '.'
478        sep = '/'
479        pardir = '..'
480
481    if start is None:
482        start = curdir
483    else:
484        start = os.fspath(start)
485
486    try:
487        start_list = [x for x in abspath(start).split(sep) if x]
488        path_list = [x for x in abspath(path).split(sep) if x]
489        # Work out how much of the filepath is shared by start and path.
490        i = len(commonprefix([start_list, path_list]))
491
492        rel_list = [pardir] * (len(start_list)-i) + path_list[i:]
493        if not rel_list:
494            return curdir
495        return join(*rel_list)
496    except (TypeError, AttributeError, BytesWarning, DeprecationWarning):
497        genericpath._check_arg_types('relpath', path, start)
498        raise
499
500
501# Return the longest common sub-path of the sequence of paths given as input.
502# The paths are not normalized before comparing them (this is the
503# responsibility of the caller). Any trailing separator is stripped from the
504# returned path.
505
506def commonpath(paths):
507    """Given a sequence of path names, returns the longest common sub-path."""
508
509    if not paths:
510        raise ValueError('commonpath() arg is an empty sequence')
511
512    paths = tuple(map(os.fspath, paths))
513    if isinstance(paths[0], bytes):
514        sep = b'/'
515        curdir = b'.'
516    else:
517        sep = '/'
518        curdir = '.'
519
520    try:
521        split_paths = [path.split(sep) for path in paths]
522
523        try:
524            isabs, = set(p[:1] == sep for p in paths)
525        except ValueError:
526            raise ValueError("Can't mix absolute and relative paths") from None
527
528        split_paths = [[c for c in s if c and c != curdir] for s in split_paths]
529        s1 = min(split_paths)
530        s2 = max(split_paths)
531        common = s1
532        for i, c in enumerate(s1):
533            if c != s2[i]:
534                common = s1[:i]
535                break
536
537        prefix = sep if isabs else sep[:0]
538        return prefix + sep.join(common)
539    except (TypeError, AttributeError):
540        genericpath._check_arg_types('commonpath', *paths)
541        raise
542