1import collections 2import functools 3import os 4import re 5import struct 6import sys 7import warnings 8from typing import IO, Dict, Iterator, NamedTuple, Optional, Tuple 9 10 11# Python does not provide platform information at sufficient granularity to 12# identify the architecture of the running executable in some cases, so we 13# determine it dynamically by reading the information from the running 14# process. This only applies on Linux, which uses the ELF format. 15class _ELFFileHeader: 16 # https://en.wikipedia.org/wiki/Executable_and_Linkable_Format#File_header 17 class _InvalidELFFileHeader(ValueError): 18 """ 19 An invalid ELF file header was found. 20 """ 21 22 ELF_MAGIC_NUMBER = 0x7F454C46 23 ELFCLASS32 = 1 24 ELFCLASS64 = 2 25 ELFDATA2LSB = 1 26 ELFDATA2MSB = 2 27 EM_386 = 3 28 EM_S390 = 22 29 EM_ARM = 40 30 EM_X86_64 = 62 31 EF_ARM_ABIMASK = 0xFF000000 32 EF_ARM_ABI_VER5 = 0x05000000 33 EF_ARM_ABI_FLOAT_HARD = 0x00000400 34 35 def __init__(self, file: IO[bytes]) -> None: 36 def unpack(fmt: str) -> int: 37 try: 38 data = file.read(struct.calcsize(fmt)) 39 result: Tuple[int, ...] = struct.unpack(fmt, data) 40 except struct.error: 41 raise _ELFFileHeader._InvalidELFFileHeader() 42 return result[0] 43 44 self.e_ident_magic = unpack(">I") 45 if self.e_ident_magic != self.ELF_MAGIC_NUMBER: 46 raise _ELFFileHeader._InvalidELFFileHeader() 47 self.e_ident_class = unpack("B") 48 if self.e_ident_class not in {self.ELFCLASS32, self.ELFCLASS64}: 49 raise _ELFFileHeader._InvalidELFFileHeader() 50 self.e_ident_data = unpack("B") 51 if self.e_ident_data not in {self.ELFDATA2LSB, self.ELFDATA2MSB}: 52 raise _ELFFileHeader._InvalidELFFileHeader() 53 self.e_ident_version = unpack("B") 54 self.e_ident_osabi = unpack("B") 55 self.e_ident_abiversion = unpack("B") 56 self.e_ident_pad = file.read(7) 57 format_h = "<H" if self.e_ident_data == self.ELFDATA2LSB else ">H" 58 format_i = "<I" if self.e_ident_data == self.ELFDATA2LSB else ">I" 59 format_q = "<Q" if self.e_ident_data == self.ELFDATA2LSB else ">Q" 60 format_p = format_i if self.e_ident_class == self.ELFCLASS32 else format_q 61 self.e_type = unpack(format_h) 62 self.e_machine = unpack(format_h) 63 self.e_version = unpack(format_i) 64 self.e_entry = unpack(format_p) 65 self.e_phoff = unpack(format_p) 66 self.e_shoff = unpack(format_p) 67 self.e_flags = unpack(format_i) 68 self.e_ehsize = unpack(format_h) 69 self.e_phentsize = unpack(format_h) 70 self.e_phnum = unpack(format_h) 71 self.e_shentsize = unpack(format_h) 72 self.e_shnum = unpack(format_h) 73 self.e_shstrndx = unpack(format_h) 74 75 76def _get_elf_header() -> Optional[_ELFFileHeader]: 77 try: 78 with open(sys.executable, "rb") as f: 79 elf_header = _ELFFileHeader(f) 80 except (OSError, TypeError, _ELFFileHeader._InvalidELFFileHeader): 81 return None 82 return elf_header 83 84 85def _is_linux_armhf() -> bool: 86 # hard-float ABI can be detected from the ELF header of the running 87 # process 88 # https://static.docs.arm.com/ihi0044/g/aaelf32.pdf 89 elf_header = _get_elf_header() 90 if elf_header is None: 91 return False 92 result = elf_header.e_ident_class == elf_header.ELFCLASS32 93 result &= elf_header.e_ident_data == elf_header.ELFDATA2LSB 94 result &= elf_header.e_machine == elf_header.EM_ARM 95 result &= ( 96 elf_header.e_flags & elf_header.EF_ARM_ABIMASK 97 ) == elf_header.EF_ARM_ABI_VER5 98 result &= ( 99 elf_header.e_flags & elf_header.EF_ARM_ABI_FLOAT_HARD 100 ) == elf_header.EF_ARM_ABI_FLOAT_HARD 101 return result 102 103 104def _is_linux_i686() -> bool: 105 elf_header = _get_elf_header() 106 if elf_header is None: 107 return False 108 result = elf_header.e_ident_class == elf_header.ELFCLASS32 109 result &= elf_header.e_ident_data == elf_header.ELFDATA2LSB 110 result &= elf_header.e_machine == elf_header.EM_386 111 return result 112 113 114def _have_compatible_abi(arch: str) -> bool: 115 if arch == "armv7l": 116 return _is_linux_armhf() 117 if arch == "i686": 118 return _is_linux_i686() 119 return arch in {"x86_64", "aarch64", "ppc64", "ppc64le", "s390x"} 120 121 122# If glibc ever changes its major version, we need to know what the last 123# minor version was, so we can build the complete list of all versions. 124# For now, guess what the highest minor version might be, assume it will 125# be 50 for testing. Once this actually happens, update the dictionary 126# with the actual value. 127_LAST_GLIBC_MINOR: Dict[int, int] = collections.defaultdict(lambda: 50) 128 129 130class _GLibCVersion(NamedTuple): 131 major: int 132 minor: int 133 134 135def _glibc_version_string_confstr() -> Optional[str]: 136 """ 137 Primary implementation of glibc_version_string using os.confstr. 138 """ 139 # os.confstr is quite a bit faster than ctypes.DLL. It's also less likely 140 # to be broken or missing. This strategy is used in the standard library 141 # platform module. 142 # https://github.com/python/cpython/blob/fcf1d003bf4f0100c/Lib/platform.py#L175-L183 143 try: 144 # os.confstr("CS_GNU_LIBC_VERSION") returns a string like "glibc 2.17". 145 version_string = os.confstr("CS_GNU_LIBC_VERSION") 146 assert version_string is not None 147 _, version = version_string.split() 148 except (AssertionError, AttributeError, OSError, ValueError): 149 # os.confstr() or CS_GNU_LIBC_VERSION not available (or a bad value)... 150 return None 151 return version 152 153 154def _glibc_version_string_ctypes() -> Optional[str]: 155 """ 156 Fallback implementation of glibc_version_string using ctypes. 157 """ 158 try: 159 import ctypes 160 except ImportError: 161 return None 162 163 # ctypes.CDLL(None) internally calls dlopen(NULL), and as the dlopen 164 # manpage says, "If filename is NULL, then the returned handle is for the 165 # main program". This way we can let the linker do the work to figure out 166 # which libc our process is actually using. 167 # 168 # We must also handle the special case where the executable is not a 169 # dynamically linked executable. This can occur when using musl libc, 170 # for example. In this situation, dlopen() will error, leading to an 171 # OSError. Interestingly, at least in the case of musl, there is no 172 # errno set on the OSError. The single string argument used to construct 173 # OSError comes from libc itself and is therefore not portable to 174 # hard code here. In any case, failure to call dlopen() means we 175 # can proceed, so we bail on our attempt. 176 try: 177 process_namespace = ctypes.CDLL(None) 178 except OSError: 179 return None 180 181 try: 182 gnu_get_libc_version = process_namespace.gnu_get_libc_version 183 except AttributeError: 184 # Symbol doesn't exist -> therefore, we are not linked to 185 # glibc. 186 return None 187 188 # Call gnu_get_libc_version, which returns a string like "2.5" 189 gnu_get_libc_version.restype = ctypes.c_char_p 190 version_str: str = gnu_get_libc_version() 191 # py2 / py3 compatibility: 192 if not isinstance(version_str, str): 193 version_str = version_str.decode("ascii") 194 195 return version_str 196 197 198def _glibc_version_string() -> Optional[str]: 199 """Returns glibc version string, or None if not using glibc.""" 200 return _glibc_version_string_confstr() or _glibc_version_string_ctypes() 201 202 203def _parse_glibc_version(version_str: str) -> Tuple[int, int]: 204 """Parse glibc version. 205 206 We use a regexp instead of str.split because we want to discard any 207 random junk that might come after the minor version -- this might happen 208 in patched/forked versions of glibc (e.g. Linaro's version of glibc 209 uses version strings like "2.20-2014.11"). See gh-3588. 210 """ 211 m = re.match(r"(?P<major>[0-9]+)\.(?P<minor>[0-9]+)", version_str) 212 if not m: 213 warnings.warn( 214 "Expected glibc version with 2 components major.minor," 215 " got: %s" % version_str, 216 RuntimeWarning, 217 ) 218 return -1, -1 219 return int(m.group("major")), int(m.group("minor")) 220 221 222@functools.lru_cache() 223def _get_glibc_version() -> Tuple[int, int]: 224 version_str = _glibc_version_string() 225 if version_str is None: 226 return (-1, -1) 227 return _parse_glibc_version(version_str) 228 229 230# From PEP 513, PEP 600 231def _is_compatible(name: str, arch: str, version: _GLibCVersion) -> bool: 232 sys_glibc = _get_glibc_version() 233 if sys_glibc < version: 234 return False 235 # Check for presence of _manylinux module. 236 try: 237 import _manylinux # noqa 238 except ImportError: 239 return True 240 if hasattr(_manylinux, "manylinux_compatible"): 241 result = _manylinux.manylinux_compatible(version[0], version[1], arch) 242 if result is not None: 243 return bool(result) 244 return True 245 if version == _GLibCVersion(2, 5): 246 if hasattr(_manylinux, "manylinux1_compatible"): 247 return bool(_manylinux.manylinux1_compatible) 248 if version == _GLibCVersion(2, 12): 249 if hasattr(_manylinux, "manylinux2010_compatible"): 250 return bool(_manylinux.manylinux2010_compatible) 251 if version == _GLibCVersion(2, 17): 252 if hasattr(_manylinux, "manylinux2014_compatible"): 253 return bool(_manylinux.manylinux2014_compatible) 254 return True 255 256 257_LEGACY_MANYLINUX_MAP = { 258 # CentOS 7 w/ glibc 2.17 (PEP 599) 259 (2, 17): "manylinux2014", 260 # CentOS 6 w/ glibc 2.12 (PEP 571) 261 (2, 12): "manylinux2010", 262 # CentOS 5 w/ glibc 2.5 (PEP 513) 263 (2, 5): "manylinux1", 264} 265 266 267def platform_tags(linux: str, arch: str) -> Iterator[str]: 268 if not _have_compatible_abi(arch): 269 return 270 # Oldest glibc to be supported regardless of architecture is (2, 17). 271 too_old_glibc2 = _GLibCVersion(2, 16) 272 if arch in {"x86_64", "i686"}: 273 # On x86/i686 also oldest glibc to be supported is (2, 5). 274 too_old_glibc2 = _GLibCVersion(2, 4) 275 current_glibc = _GLibCVersion(*_get_glibc_version()) 276 glibc_max_list = [current_glibc] 277 # We can assume compatibility across glibc major versions. 278 # https://sourceware.org/bugzilla/show_bug.cgi?id=24636 279 # 280 # Build a list of maximum glibc versions so that we can 281 # output the canonical list of all glibc from current_glibc 282 # down to too_old_glibc2, including all intermediary versions. 283 for glibc_major in range(current_glibc.major - 1, 1, -1): 284 glibc_minor = _LAST_GLIBC_MINOR[glibc_major] 285 glibc_max_list.append(_GLibCVersion(glibc_major, glibc_minor)) 286 for glibc_max in glibc_max_list: 287 if glibc_max.major == too_old_glibc2.major: 288 min_minor = too_old_glibc2.minor 289 else: 290 # For other glibc major versions oldest supported is (x, 0). 291 min_minor = -1 292 for glibc_minor in range(glibc_max.minor, min_minor, -1): 293 glibc_version = _GLibCVersion(glibc_max.major, glibc_minor) 294 tag = "manylinux_{}_{}".format(*glibc_version) 295 if _is_compatible(tag, arch, glibc_version): 296 yield linux.replace("linux", tag) 297 # Handle the legacy manylinux1, manylinux2010, manylinux2014 tags. 298 if glibc_version in _LEGACY_MANYLINUX_MAP: 299 legacy_tag = _LEGACY_MANYLINUX_MAP[glibc_version] 300 if _is_compatible(legacy_tag, arch, glibc_version): 301 yield linux.replace("linux", legacy_tag) 302