1"""Common operations on Posix pathnames.
2
3Instead of importing this module directly, import os and refer to
4this module as os.path.  The "os.path" name is an alias for this
5module on Posix systems; on other systems (e.g. Windows),
6os.path provides the same operations in a manner specific to that
7platform, and is an alias to another module (e.g. ntpath).
8
9Some of this can actually be useful on non-Posix systems too, e.g.
10for manipulation of the pathname component of URLs.
11"""
12
13# Strings representing various path-related bits and pieces.
14# These are primarily for export; internally, they are hardcoded.
15# Should be set before imports for resolving cyclic dependency.
16curdir = '.'
17pardir = '..'
18extsep = '.'
19sep = '/'
20pathsep = ':'
21defpath = '/bin:/usr/bin'
22altsep = None
23devnull = '/dev/null'
24
25import os
26import sys
27import stat
28import genericpath
29from genericpath import *
30
31__all__ = ["normcase","isabs","join","splitdrive","split","splitext",
32           "basename","dirname","commonprefix","getsize","getmtime",
33           "getatime","getctime","islink","exists","lexists","isdir","isfile",
34           "ismount", "expanduser","expandvars","normpath","abspath",
35           "samefile","sameopenfile","samestat",
36           "curdir","pardir","sep","pathsep","defpath","altsep","extsep",
37           "devnull","realpath","supports_unicode_filenames","relpath",
38           "commonpath"]
39
40
41def _get_sep(path):
42    if isinstance(path, bytes):
43        return b'/'
44    else:
45        return '/'
46
47# Normalize the case of a pathname.  Trivial in Posix, string.lower on Mac.
48# On MS-DOS this may also turn slashes into backslashes; however, other
49# normalizations (such as optimizing '../' away) are not allowed
50# (another function should be defined to do that).
51
52def normcase(s):
53    """Normalize case of pathname.  Has no effect under Posix"""
54    return os.fspath(s)
55
56
57# Return whether a path is absolute.
58# Trivial in Posix, harder on the Mac or MS-DOS.
59
60def isabs(s):
61    """Test whether a path is absolute"""
62    s = os.fspath(s)
63    sep = _get_sep(s)
64    return s.startswith(sep)
65
66
67# Join pathnames.
68# Ignore the previous parts if a part is absolute.
69# Insert a '/' unless the first part is empty or already ends in '/'.
70
71def join(a, *p):
72    """Join two or more pathname components, inserting '/' as needed.
73    If any component is an absolute path, all previous path components
74    will be discarded.  An empty last part will result in a path that
75    ends with a separator."""
76    a = os.fspath(a)
77    sep = _get_sep(a)
78    path = a
79    try:
80        if not p:
81            path[:0] + sep  #23780: Ensure compatible data type even if p is null.
82        for b in map(os.fspath, p):
83            if b.startswith(sep):
84                path = b
85            elif not path or path.endswith(sep):
86                path += b
87            else:
88                path += sep + b
89    except (TypeError, AttributeError, BytesWarning):
90        genericpath._check_arg_types('join', a, *p)
91        raise
92    return path
93
94
95# Split a path in head (everything up to the last '/') and tail (the
96# rest).  If the path ends in '/', tail will be empty.  If there is no
97# '/' in the path, head  will be empty.
98# Trailing '/'es are stripped from head unless it is the root.
99
100def split(p):
101    """Split a pathname.  Returns tuple "(head, tail)" where "tail" is
102    everything after the final slash.  Either part may be empty."""
103    p = os.fspath(p)
104    sep = _get_sep(p)
105    i = p.rfind(sep) + 1
106    head, tail = p[:i], p[i:]
107    if head and head != sep*len(head):
108        head = head.rstrip(sep)
109    return head, tail
110
111
112# Split a path in root and extension.
113# The extension is everything starting at the last dot in the last
114# pathname component; the root is everything before that.
115# It is always true that root + ext == p.
116
117def splitext(p):
118    p = os.fspath(p)
119    if isinstance(p, bytes):
120        sep = b'/'
121        extsep = b'.'
122    else:
123        sep = '/'
124        extsep = '.'
125    return genericpath._splitext(p, sep, None, extsep)
126splitext.__doc__ = genericpath._splitext.__doc__
127
128# Split a pathname into a drive specification and the rest of the
129# path.  Useful on DOS/Windows/NT; on Unix, the drive is always empty.
130
131def splitdrive(p):
132    """Split a pathname into drive and path. On Posix, drive is always
133    empty."""
134    p = os.fspath(p)
135    return p[:0], p
136
137
138# Return the tail (basename) part of a path, same as split(path)[1].
139
140def basename(p):
141    """Returns the final component of a pathname"""
142    p = os.fspath(p)
143    sep = _get_sep(p)
144    i = p.rfind(sep) + 1
145    return p[i:]
146
147
148# Return the head (dirname) part of a path, same as split(path)[0].
149
150def dirname(p):
151    """Returns the directory component of a pathname"""
152    p = os.fspath(p)
153    sep = _get_sep(p)
154    i = p.rfind(sep) + 1
155    head = p[:i]
156    if head and head != sep*len(head):
157        head = head.rstrip(sep)
158    return head
159
160
161# Is a path a symbolic link?
162# This will always return false on systems where os.lstat doesn't exist.
163
164def islink(path):
165    """Test whether a path is a symbolic link"""
166    try:
167        st = os.lstat(path)
168    except (OSError, ValueError, AttributeError):
169        return False
170    return stat.S_ISLNK(st.st_mode)
171
172# Being true for dangling symbolic links is also useful.
173
174def lexists(path):
175    """Test whether a path exists.  Returns True for broken symbolic links"""
176    try:
177        os.lstat(path)
178    except (OSError, ValueError):
179        return False
180    return True
181
182
183# Is a path a mount point?
184# (Does this work for all UNIXes?  Is it even guaranteed to work by Posix?)
185
186def ismount(path):
187    """Test whether a path is a mount point"""
188    try:
189        s1 = os.lstat(path)
190    except (OSError, ValueError):
191        # It doesn't exist -- so not a mount point. :-)
192        return False
193    else:
194        # A symlink can never be a mount point
195        if stat.S_ISLNK(s1.st_mode):
196            return False
197
198    if isinstance(path, bytes):
199        parent = join(path, b'..')
200    else:
201        parent = join(path, '..')
202    parent = realpath(parent)
203    try:
204        s2 = os.lstat(parent)
205    except (OSError, ValueError):
206        return False
207
208    dev1 = s1.st_dev
209    dev2 = s2.st_dev
210    if dev1 != dev2:
211        return True     # path/.. on a different device as path
212    ino1 = s1.st_ino
213    ino2 = s2.st_ino
214    if ino1 == ino2:
215        return True     # path/.. is the same i-node as path
216    return False
217
218
219# Expand paths beginning with '~' or '~user'.
220# '~' means $HOME; '~user' means that user's home directory.
221# If the path doesn't begin with '~', or if the user or $HOME is unknown,
222# the path is returned unchanged (leaving error reporting to whatever
223# function is called with the expanded path as argument).
224# See also module 'glob' for expansion of *, ? and [...] in pathnames.
225# (A function should also be defined to do full *sh-style environment
226# variable expansion.)
227
228def expanduser(path):
229    """Expand ~ and ~user constructions.  If user or $HOME is unknown,
230    do nothing."""
231    path = os.fspath(path)
232    if isinstance(path, bytes):
233        tilde = b'~'
234    else:
235        tilde = '~'
236    if not path.startswith(tilde):
237        return path
238    sep = _get_sep(path)
239    i = path.find(sep, 1)
240    if i < 0:
241        i = len(path)
242    if i == 1:
243        if 'HOME' not in os.environ:
244            import pwd
245            try:
246                userhome = pwd.getpwuid(os.getuid()).pw_dir
247            except KeyError:
248                # bpo-10496: if the current user identifier doesn't exist in the
249                # password database, return the path unchanged
250                return path
251        else:
252            userhome = os.environ['HOME']
253    else:
254        import pwd
255        name = path[1:i]
256        if isinstance(name, bytes):
257            name = str(name, 'ASCII')
258        try:
259            pwent = pwd.getpwnam(name)
260        except KeyError:
261            # bpo-10496: if the user name from the path doesn't exist in the
262            # password database, return the path unchanged
263            return path
264        userhome = pwent.pw_dir
265    if isinstance(path, bytes):
266        userhome = os.fsencode(userhome)
267        root = b'/'
268    else:
269        root = '/'
270    userhome = userhome.rstrip(root)
271    return (userhome + path[i:]) or root
272
273
274# Expand paths containing shell variable substitutions.
275# This expands the forms $variable and ${variable} only.
276# Non-existent variables are left unchanged.
277
278_varprog = None
279_varprogb = None
280
281def expandvars(path):
282    """Expand shell variables of form $var and ${var}.  Unknown variables
283    are left unchanged."""
284    path = os.fspath(path)
285    global _varprog, _varprogb
286    if isinstance(path, bytes):
287        if b'$' not in path:
288            return path
289        if not _varprogb:
290            import re
291            _varprogb = re.compile(br'\$(\w+|\{[^}]*\})', re.ASCII)
292        search = _varprogb.search
293        start = b'{'
294        end = b'}'
295        environ = getattr(os, 'environb', None)
296    else:
297        if '$' not in path:
298            return path
299        if not _varprog:
300            import re
301            _varprog = re.compile(r'\$(\w+|\{[^}]*\})', re.ASCII)
302        search = _varprog.search
303        start = '{'
304        end = '}'
305        environ = os.environ
306    i = 0
307    while True:
308        m = search(path, i)
309        if not m:
310            break
311        i, j = m.span(0)
312        name = m.group(1)
313        if name.startswith(start) and name.endswith(end):
314            name = name[1:-1]
315        try:
316            if environ is None:
317                value = os.fsencode(os.environ[os.fsdecode(name)])
318            else:
319                value = environ[name]
320        except KeyError:
321            i = j
322        else:
323            tail = path[j:]
324            path = path[:i] + value
325            i = len(path)
326            path += tail
327    return path
328
329
330# Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A/B.
331# It should be understood that this may change the meaning of the path
332# if it contains symbolic links!
333
334def normpath(path):
335    """Normalize path, eliminating double slashes, etc."""
336    path = os.fspath(path)
337    if isinstance(path, bytes):
338        sep = b'/'
339        empty = b''
340        dot = b'.'
341        dotdot = b'..'
342    else:
343        sep = '/'
344        empty = ''
345        dot = '.'
346        dotdot = '..'
347    if path == empty:
348        return dot
349    initial_slashes = path.startswith(sep)
350    # POSIX allows one or two initial slashes, but treats three or more
351    # as single slash.
352    if (initial_slashes and
353        path.startswith(sep*2) and not path.startswith(sep*3)):
354        initial_slashes = 2
355    comps = path.split(sep)
356    new_comps = []
357    for comp in comps:
358        if comp in (empty, dot):
359            continue
360        if (comp != dotdot or (not initial_slashes and not new_comps) or
361             (new_comps and new_comps[-1] == dotdot)):
362            new_comps.append(comp)
363        elif new_comps:
364            new_comps.pop()
365    comps = new_comps
366    path = sep.join(comps)
367    if initial_slashes:
368        path = sep*initial_slashes + path
369    return path or dot
370
371
372def abspath(path):
373    """Return an absolute path."""
374    path = os.fspath(path)
375    if not isabs(path):
376        if isinstance(path, bytes):
377            cwd = os.getcwdb()
378        else:
379            cwd = os.getcwd()
380        path = join(cwd, path)
381    return normpath(path)
382
383
384# Return a canonical path (i.e. the absolute location of a file on the
385# filesystem).
386
387def realpath(filename):
388    """Return the canonical path of the specified filename, eliminating any
389symbolic links encountered in the path."""
390    filename = os.fspath(filename)
391    path, ok = _joinrealpath(filename[:0], filename, {})
392    return abspath(path)
393
394# Join two paths, normalizing and eliminating any symbolic links
395# encountered in the second path.
396def _joinrealpath(path, rest, seen):
397    if isinstance(path, bytes):
398        sep = b'/'
399        curdir = b'.'
400        pardir = b'..'
401    else:
402        sep = '/'
403        curdir = '.'
404        pardir = '..'
405
406    if isabs(rest):
407        rest = rest[1:]
408        path = sep
409
410    while rest:
411        name, _, rest = rest.partition(sep)
412        if not name or name == curdir:
413            # current dir
414            continue
415        if name == pardir:
416            # parent dir
417            if path:
418                path, name = split(path)
419                if name == pardir:
420                    path = join(path, pardir, pardir)
421            else:
422                path = pardir
423            continue
424        newpath = join(path, name)
425        if not islink(newpath):
426            path = newpath
427            continue
428        # Resolve the symbolic link
429        if newpath in seen:
430            # Already seen this path
431            path = seen[newpath]
432            if path is not None:
433                # use cached value
434                continue
435            # The symlink is not resolved, so we must have a symlink loop.
436            # Return already resolved part + rest of the path unchanged.
437            return join(newpath, rest), False
438        seen[newpath] = None # not resolved symlink
439        path, ok = _joinrealpath(path, os.readlink(newpath), seen)
440        if not ok:
441            return join(path, rest), False
442        seen[newpath] = path # resolved symlink
443
444    return path, True
445
446
447supports_unicode_filenames = (sys.platform == 'darwin')
448
449def relpath(path, start=None):
450    """Return a relative version of a path"""
451
452    if not path:
453        raise ValueError("no path specified")
454
455    path = os.fspath(path)
456    if isinstance(path, bytes):
457        curdir = b'.'
458        sep = b'/'
459        pardir = b'..'
460    else:
461        curdir = '.'
462        sep = '/'
463        pardir = '..'
464
465    if start is None:
466        start = curdir
467    else:
468        start = os.fspath(start)
469
470    try:
471        start_list = [x for x in abspath(start).split(sep) if x]
472        path_list = [x for x in abspath(path).split(sep) if x]
473        # Work out how much of the filepath is shared by start and path.
474        i = len(commonprefix([start_list, path_list]))
475
476        rel_list = [pardir] * (len(start_list)-i) + path_list[i:]
477        if not rel_list:
478            return curdir
479        return join(*rel_list)
480    except (TypeError, AttributeError, BytesWarning, DeprecationWarning):
481        genericpath._check_arg_types('relpath', path, start)
482        raise
483
484
485# Return the longest common sub-path of the sequence of paths given as input.
486# The paths are not normalized before comparing them (this is the
487# responsibility of the caller). Any trailing separator is stripped from the
488# returned path.
489
490def commonpath(paths):
491    """Given a sequence of path names, returns the longest common sub-path."""
492
493    if not paths:
494        raise ValueError('commonpath() arg is an empty sequence')
495
496    paths = tuple(map(os.fspath, paths))
497    if isinstance(paths[0], bytes):
498        sep = b'/'
499        curdir = b'.'
500    else:
501        sep = '/'
502        curdir = '.'
503
504    try:
505        split_paths = [path.split(sep) for path in paths]
506
507        try:
508            isabs, = set(p[:1] == sep for p in paths)
509        except ValueError:
510            raise ValueError("Can't mix absolute and relative paths") from None
511
512        split_paths = [[c for c in s if c and c != curdir] for s in split_paths]
513        s1 = min(split_paths)
514        s2 = max(split_paths)
515        common = s1
516        for i, c in enumerate(s1):
517            if c != s2[i]:
518                common = s1[:i]
519                break
520
521        prefix = sep if isabs else sep[:0]
522        return prefix + sep.join(common)
523    except (TypeError, AttributeError):
524        genericpath._check_arg_types('commonpath', *paths)
525        raise
526