1import collections
2import functools
3import os
4import re
5import struct
6import sys
7import warnings
8from typing import IO, Dict, Iterator, NamedTuple, Optional, Tuple
9
10
11# Python does not provide platform information at sufficient granularity to
12# identify the architecture of the running executable in some cases, so we
13# determine it dynamically by reading the information from the running
14# process. This only applies on Linux, which uses the ELF format.
15class _ELFFileHeader:
16    # https://en.wikipedia.org/wiki/Executable_and_Linkable_Format#File_header
17    class _InvalidELFFileHeader(ValueError):
18        """
19        An invalid ELF file header was found.
20        """
21
22    ELF_MAGIC_NUMBER = 0x7F454C46
23    ELFCLASS32 = 1
24    ELFCLASS64 = 2
25    ELFDATA2LSB = 1
26    ELFDATA2MSB = 2
27    EM_386 = 3
28    EM_S390 = 22
29    EM_ARM = 40
30    EM_X86_64 = 62
31    EF_ARM_ABIMASK = 0xFF000000
32    EF_ARM_ABI_VER5 = 0x05000000
33    EF_ARM_ABI_FLOAT_HARD = 0x00000400
34
35    def __init__(self, file: IO[bytes]) -> None:
36        def unpack(fmt: str) -> int:
37            try:
38                data = file.read(struct.calcsize(fmt))
39                result: Tuple[int, ...] = struct.unpack(fmt, data)
40            except struct.error:
41                raise _ELFFileHeader._InvalidELFFileHeader()
42            return result[0]
43
44        self.e_ident_magic = unpack(">I")
45        if self.e_ident_magic != self.ELF_MAGIC_NUMBER:
46            raise _ELFFileHeader._InvalidELFFileHeader()
47        self.e_ident_class = unpack("B")
48        if self.e_ident_class not in {self.ELFCLASS32, self.ELFCLASS64}:
49            raise _ELFFileHeader._InvalidELFFileHeader()
50        self.e_ident_data = unpack("B")
51        if self.e_ident_data not in {self.ELFDATA2LSB, self.ELFDATA2MSB}:
52            raise _ELFFileHeader._InvalidELFFileHeader()
53        self.e_ident_version = unpack("B")
54        self.e_ident_osabi = unpack("B")
55        self.e_ident_abiversion = unpack("B")
56        self.e_ident_pad = file.read(7)
57        format_h = "<H" if self.e_ident_data == self.ELFDATA2LSB else ">H"
58        format_i = "<I" if self.e_ident_data == self.ELFDATA2LSB else ">I"
59        format_q = "<Q" if self.e_ident_data == self.ELFDATA2LSB else ">Q"
60        format_p = format_i if self.e_ident_class == self.ELFCLASS32 else format_q
61        self.e_type = unpack(format_h)
62        self.e_machine = unpack(format_h)
63        self.e_version = unpack(format_i)
64        self.e_entry = unpack(format_p)
65        self.e_phoff = unpack(format_p)
66        self.e_shoff = unpack(format_p)
67        self.e_flags = unpack(format_i)
68        self.e_ehsize = unpack(format_h)
69        self.e_phentsize = unpack(format_h)
70        self.e_phnum = unpack(format_h)
71        self.e_shentsize = unpack(format_h)
72        self.e_shnum = unpack(format_h)
73        self.e_shstrndx = unpack(format_h)
74
75
76def _get_elf_header() -> Optional[_ELFFileHeader]:
77    try:
78        with open(sys.executable, "rb") as f:
79            elf_header = _ELFFileHeader(f)
80    except (OSError, TypeError, _ELFFileHeader._InvalidELFFileHeader):
81        return None
82    return elf_header
83
84
85def _is_linux_armhf() -> bool:
86    # hard-float ABI can be detected from the ELF header of the running
87    # process
88    # https://static.docs.arm.com/ihi0044/g/aaelf32.pdf
89    elf_header = _get_elf_header()
90    if elf_header is None:
91        return False
92    result = elf_header.e_ident_class == elf_header.ELFCLASS32
93    result &= elf_header.e_ident_data == elf_header.ELFDATA2LSB
94    result &= elf_header.e_machine == elf_header.EM_ARM
95    result &= (
96        elf_header.e_flags & elf_header.EF_ARM_ABIMASK
97    ) == elf_header.EF_ARM_ABI_VER5
98    result &= (
99        elf_header.e_flags & elf_header.EF_ARM_ABI_FLOAT_HARD
100    ) == elf_header.EF_ARM_ABI_FLOAT_HARD
101    return result
102
103
104def _is_linux_i686() -> bool:
105    elf_header = _get_elf_header()
106    if elf_header is None:
107        return False
108    result = elf_header.e_ident_class == elf_header.ELFCLASS32
109    result &= elf_header.e_ident_data == elf_header.ELFDATA2LSB
110    result &= elf_header.e_machine == elf_header.EM_386
111    return result
112
113
114def _have_compatible_abi(arch: str) -> bool:
115    if arch == "armv7l":
116        return _is_linux_armhf()
117    if arch == "i686":
118        return _is_linux_i686()
119    return arch in {"x86_64", "aarch64", "ppc64", "ppc64le", "s390x"}
120
121
122# If glibc ever changes its major version, we need to know what the last
123# minor version was, so we can build the complete list of all versions.
124# For now, guess what the highest minor version might be, assume it will
125# be 50 for testing. Once this actually happens, update the dictionary
126# with the actual value.
127_LAST_GLIBC_MINOR: Dict[int, int] = collections.defaultdict(lambda: 50)
128
129
130class _GLibCVersion(NamedTuple):
131    major: int
132    minor: int
133
134
135def _glibc_version_string_confstr() -> Optional[str]:
136    """
137    Primary implementation of glibc_version_string using os.confstr.
138    """
139    # os.confstr is quite a bit faster than ctypes.DLL. It's also less likely
140    # to be broken or missing. This strategy is used in the standard library
141    # platform module.
142    # https://github.com/python/cpython/blob/fcf1d003bf4f0100c/Lib/platform.py#L175-L183
143    try:
144        # os.confstr("CS_GNU_LIBC_VERSION") returns a string like "glibc 2.17".
145        version_string = os.confstr("CS_GNU_LIBC_VERSION")
146        assert version_string is not None
147        _, version = version_string.split()
148    except (AssertionError, AttributeError, OSError, ValueError):
149        # os.confstr() or CS_GNU_LIBC_VERSION not available (or a bad value)...
150        return None
151    return version
152
153
154def _glibc_version_string_ctypes() -> Optional[str]:
155    """
156    Fallback implementation of glibc_version_string using ctypes.
157    """
158    try:
159        import ctypes
160    except ImportError:
161        return None
162
163    # ctypes.CDLL(None) internally calls dlopen(NULL), and as the dlopen
164    # manpage says, "If filename is NULL, then the returned handle is for the
165    # main program". This way we can let the linker do the work to figure out
166    # which libc our process is actually using.
167    #
168    # We must also handle the special case where the executable is not a
169    # dynamically linked executable. This can occur when using musl libc,
170    # for example. In this situation, dlopen() will error, leading to an
171    # OSError. Interestingly, at least in the case of musl, there is no
172    # errno set on the OSError. The single string argument used to construct
173    # OSError comes from libc itself and is therefore not portable to
174    # hard code here. In any case, failure to call dlopen() means we
175    # can proceed, so we bail on our attempt.
176    try:
177        process_namespace = ctypes.CDLL(None)
178    except OSError:
179        return None
180
181    try:
182        gnu_get_libc_version = process_namespace.gnu_get_libc_version
183    except AttributeError:
184        # Symbol doesn't exist -> therefore, we are not linked to
185        # glibc.
186        return None
187
188    # Call gnu_get_libc_version, which returns a string like "2.5"
189    gnu_get_libc_version.restype = ctypes.c_char_p
190    version_str: str = gnu_get_libc_version()
191    # py2 / py3 compatibility:
192    if not isinstance(version_str, str):
193        version_str = version_str.decode("ascii")
194
195    return version_str
196
197
198def _glibc_version_string() -> Optional[str]:
199    """Returns glibc version string, or None if not using glibc."""
200    return _glibc_version_string_confstr() or _glibc_version_string_ctypes()
201
202
203def _parse_glibc_version(version_str: str) -> Tuple[int, int]:
204    """Parse glibc version.
205
206    We use a regexp instead of str.split because we want to discard any
207    random junk that might come after the minor version -- this might happen
208    in patched/forked versions of glibc (e.g. Linaro's version of glibc
209    uses version strings like "2.20-2014.11"). See gh-3588.
210    """
211    m = re.match(r"(?P<major>[0-9]+)\.(?P<minor>[0-9]+)", version_str)
212    if not m:
213        warnings.warn(
214            "Expected glibc version with 2 components major.minor,"
215            " got: %s" % version_str,
216            RuntimeWarning,
217        )
218        return -1, -1
219    return int(m.group("major")), int(m.group("minor"))
220
221
222@functools.lru_cache()
223def _get_glibc_version() -> Tuple[int, int]:
224    version_str = _glibc_version_string()
225    if version_str is None:
226        return (-1, -1)
227    return _parse_glibc_version(version_str)
228
229
230# From PEP 513, PEP 600
231def _is_compatible(name: str, arch: str, version: _GLibCVersion) -> bool:
232    sys_glibc = _get_glibc_version()
233    if sys_glibc < version:
234        return False
235    # Check for presence of _manylinux module.
236    try:
237        import _manylinux  # noqa
238    except ImportError:
239        return True
240    if hasattr(_manylinux, "manylinux_compatible"):
241        result = _manylinux.manylinux_compatible(version[0], version[1], arch)
242        if result is not None:
243            return bool(result)
244        return True
245    if version == _GLibCVersion(2, 5):
246        if hasattr(_manylinux, "manylinux1_compatible"):
247            return bool(_manylinux.manylinux1_compatible)
248    if version == _GLibCVersion(2, 12):
249        if hasattr(_manylinux, "manylinux2010_compatible"):
250            return bool(_manylinux.manylinux2010_compatible)
251    if version == _GLibCVersion(2, 17):
252        if hasattr(_manylinux, "manylinux2014_compatible"):
253            return bool(_manylinux.manylinux2014_compatible)
254    return True
255
256
257_LEGACY_MANYLINUX_MAP = {
258    # CentOS 7 w/ glibc 2.17 (PEP 599)
259    (2, 17): "manylinux2014",
260    # CentOS 6 w/ glibc 2.12 (PEP 571)
261    (2, 12): "manylinux2010",
262    # CentOS 5 w/ glibc 2.5 (PEP 513)
263    (2, 5): "manylinux1",
264}
265
266
267def platform_tags(linux: str, arch: str) -> Iterator[str]:
268    if not _have_compatible_abi(arch):
269        return
270    # Oldest glibc to be supported regardless of architecture is (2, 17).
271    too_old_glibc2 = _GLibCVersion(2, 16)
272    if arch in {"x86_64", "i686"}:
273        # On x86/i686 also oldest glibc to be supported is (2, 5).
274        too_old_glibc2 = _GLibCVersion(2, 4)
275    current_glibc = _GLibCVersion(*_get_glibc_version())
276    glibc_max_list = [current_glibc]
277    # We can assume compatibility across glibc major versions.
278    # https://sourceware.org/bugzilla/show_bug.cgi?id=24636
279    #
280    # Build a list of maximum glibc versions so that we can
281    # output the canonical list of all glibc from current_glibc
282    # down to too_old_glibc2, including all intermediary versions.
283    for glibc_major in range(current_glibc.major - 1, 1, -1):
284        glibc_minor = _LAST_GLIBC_MINOR[glibc_major]
285        glibc_max_list.append(_GLibCVersion(glibc_major, glibc_minor))
286    for glibc_max in glibc_max_list:
287        if glibc_max.major == too_old_glibc2.major:
288            min_minor = too_old_glibc2.minor
289        else:
290            # For other glibc major versions oldest supported is (x, 0).
291            min_minor = -1
292        for glibc_minor in range(glibc_max.minor, min_minor, -1):
293            glibc_version = _GLibCVersion(glibc_max.major, glibc_minor)
294            tag = "manylinux_{}_{}".format(*glibc_version)
295            if _is_compatible(tag, arch, glibc_version):
296                yield linux.replace("linux", tag)
297            # Handle the legacy manylinux1, manylinux2010, manylinux2014 tags.
298            if glibc_version in _LEGACY_MANYLINUX_MAP:
299                legacy_tag = _LEGACY_MANYLINUX_MAP[glibc_version]
300                if _is_compatible(legacy_tag, arch, glibc_version):
301                    yield linux.replace("linux", legacy_tag)
302