1""" 2Lib/ctypes.util.find_library() support for AIX 3Similar approach as done for Darwin support by using separate files 4but unlike Darwin - no extension such as ctypes.macholib.* 5 6dlopen() is an interface to AIX initAndLoad() - primary documentation at: 7https://www.ibm.com/support/knowledgecenter/en/ssw_aix_61/com.ibm.aix.basetrf1/dlopen.htm 8https://www.ibm.com/support/knowledgecenter/en/ssw_aix_61/com.ibm.aix.basetrf1/load.htm 9 10AIX supports two styles for dlopen(): svr4 (System V Release 4) which is common on posix 11platforms, but also a BSD style - aka SVR3. 12 13From AIX 5.3 Difference Addendum (December 2004) 142.9 SVR4 linking affinity 15Nowadays, there are two major object file formats used by the operating systems: 16XCOFF: The COFF enhanced by IBM and others. The original COFF (Common 17Object File Format) was the base of SVR3 and BSD 4.2 systems. 18ELF: Executable and Linking Format that was developed by AT&T and is a 19base for SVR4 UNIX. 20 21While the shared library content is identical on AIX - one is located as a filepath name 22(svr4 style) and the other is located as a member of an archive (and the archive 23is located as a filepath name). 24 25The key difference arises when supporting multiple abi formats (i.e., 32 and 64 bit). 26For svr4 either only one ABI is supported, or there are two directories, or there 27are different file names. The most common solution for multiple ABI is multiple 28directories. 29 30For the XCOFF (aka AIX) style - one directory (one archive file) is sufficient 31as multiple shared libraries can be in the archive - even sharing the same name. 32In documentation the archive is also referred to as the "base" and the shared 33library object is referred to as the "member". 34 35For dlopen() on AIX (read initAndLoad()) the calls are similar. 36Default activity occurs when no path information is provided. When path 37information is provided dlopen() does not search any other directories. 38 39For SVR4 - the shared library name is the name of the file expected: libFOO.so 40For AIX - the shared library is expressed as base(member). The search is for the 41base (e.g., libFOO.a) and once the base is found the shared library - identified by 42member (e.g., libFOO.so, or shr.o) is located and loaded. 43 44The mode bit RTLD_MEMBER tells initAndLoad() that it needs to use the AIX (SVR3) 45naming style. 46""" 47__author__ = "Michael Felt <aixtools@felt.demon.nl>" 48 49import re 50from os import environ, path 51from sys import executable 52from ctypes import c_void_p, sizeof 53from subprocess import Popen, PIPE, DEVNULL 54 55# Executable bit size - 32 or 64 56# Used to filter the search in an archive by size, e.g., -X64 57AIX_ABI = sizeof(c_void_p) * 8 58 59 60from sys import maxsize 61def _last_version(libnames, sep): 62 def _num_version(libname): 63 # "libxyz.so.MAJOR.MINOR" => [MAJOR, MINOR] 64 parts = libname.split(sep) 65 nums = [] 66 try: 67 while parts: 68 nums.insert(0, int(parts.pop())) 69 except ValueError: 70 pass 71 return nums or [maxsize] 72 return max(reversed(libnames), key=_num_version) 73 74def get_ld_header(p): 75 # "nested-function, but placed at module level 76 ld_header = None 77 for line in p.stdout: 78 if line.startswith(('/', './', '../')): 79 ld_header = line 80 elif "INDEX" in line: 81 return ld_header.rstrip('\n') 82 return None 83 84def get_ld_header_info(p): 85 # "nested-function, but placed at module level 86 # as an ld_header was found, return known paths, archives and members 87 # these lines start with a digit 88 info = [] 89 for line in p.stdout: 90 if re.match("[0-9]", line): 91 info.append(line) 92 else: 93 # blank line (separator), consume line and end for loop 94 break 95 return info 96 97def get_ld_headers(file): 98 """ 99 Parse the header of the loader section of executable and archives 100 This function calls /usr/bin/dump -H as a subprocess 101 and returns a list of (ld_header, ld_header_info) tuples. 102 """ 103 # get_ld_headers parsing: 104 # 1. Find a line that starts with /, ./, or ../ - set as ld_header 105 # 2. If "INDEX" in occurs in a following line - return ld_header 106 # 3. get info (lines starting with [0-9]) 107 ldr_headers = [] 108 p = Popen(["/usr/bin/dump", f"-X{AIX_ABI}", "-H", file], 109 universal_newlines=True, stdout=PIPE, stderr=DEVNULL) 110 # be sure to read to the end-of-file - getting all entries 111 while True: 112 ld_header = get_ld_header(p) 113 if ld_header: 114 ldr_headers.append((ld_header, get_ld_header_info(p))) 115 else: 116 break 117 p.stdout.close() 118 p.wait() 119 return ldr_headers 120 121def get_shared(ld_headers): 122 """ 123 extract the shareable objects from ld_headers 124 character "[" is used to strip off the path information. 125 Note: the "[" and "]" characters that are part of dump -H output 126 are not removed here. 127 """ 128 shared = [] 129 for (line, _) in ld_headers: 130 # potential member lines contain "[" 131 # otherwise, no processing needed 132 if "[" in line: 133 # Strip off trailing colon (:) 134 shared.append(line[line.index("["):-1]) 135 return shared 136 137def get_one_match(expr, lines): 138 """ 139 Must be only one match, otherwise result is None. 140 When there is a match, strip leading "[" and trailing "]" 141 """ 142 # member names in the ld_headers output are between square brackets 143 expr = rf'\[({expr})\]' 144 matches = list(filter(None, (re.search(expr, line) for line in lines))) 145 if len(matches) == 1: 146 return matches[0].group(1) 147 else: 148 return None 149 150# additional processing to deal with AIX legacy names for 64-bit members 151def get_legacy(members): 152 """ 153 This routine provides historical aka legacy naming schemes started 154 in AIX4 shared library support for library members names. 155 e.g., in /usr/lib/libc.a the member name shr.o for 32-bit binary and 156 shr_64.o for 64-bit binary. 157 """ 158 if AIX_ABI == 64: 159 # AIX 64-bit member is one of shr64.o, shr_64.o, or shr4_64.o 160 expr = r'shr4?_?64\.o' 161 member = get_one_match(expr, members) 162 if member: 163 return member 164 else: 165 # 32-bit legacy names - both shr.o and shr4.o exist. 166 # shr.o is the preferred name so we look for shr.o first 167 # i.e., shr4.o is returned only when shr.o does not exist 168 for name in ['shr.o', 'shr4.o']: 169 member = get_one_match(re.escape(name), members) 170 if member: 171 return member 172 return None 173 174def get_version(name, members): 175 """ 176 Sort list of members and return highest numbered version - if it exists. 177 This function is called when an unversioned libFOO.a(libFOO.so) has 178 not been found. 179 180 Versioning for the member name is expected to follow 181 GNU LIBTOOL conventions: the highest version (x, then X.y, then X.Y.z) 182 * find [libFoo.so.X] 183 * find [libFoo.so.X.Y] 184 * find [libFoo.so.X.Y.Z] 185 186 Before the GNU convention became the standard scheme regardless of 187 binary size AIX packagers used GNU convention "as-is" for 32-bit 188 archive members but used an "distinguishing" name for 64-bit members. 189 This scheme inserted either 64 or _64 between libFOO and .so 190 - generally libFOO_64.so, but occasionally libFOO64.so 191 """ 192 # the expression ending for versions must start as 193 # '.so.[0-9]', i.e., *.so.[at least one digit] 194 # while multiple, more specific expressions could be specified 195 # to search for .so.X, .so.X.Y and .so.X.Y.Z 196 # after the first required 'dot' digit 197 # any combination of additional 'dot' digits pairs are accepted 198 # anything more than libFOO.so.digits.digits.digits 199 # should be seen as a member name outside normal expectations 200 exprs = [rf'lib{name}\.so\.[0-9]+[0-9.]*', 201 rf'lib{name}_?64\.so\.[0-9]+[0-9.]*'] 202 for expr in exprs: 203 versions = [] 204 for line in members: 205 m = re.search(expr, line) 206 if m: 207 versions.append(m.group(0)) 208 if versions: 209 return _last_version(versions, '.') 210 return None 211 212def get_member(name, members): 213 """ 214 Return an archive member matching the request in name. 215 Name is the library name without any prefix like lib, suffix like .so, 216 or version number. 217 Given a list of members find and return the most appropriate result 218 Priority is given to generic libXXX.so, then a versioned libXXX.so.a.b.c 219 and finally, legacy AIX naming scheme. 220 """ 221 # look first for a generic match - prepend lib and append .so 222 expr = rf'lib{name}\.so' 223 member = get_one_match(expr, members) 224 if member: 225 return member 226 elif AIX_ABI == 64: 227 expr = rf'lib{name}64\.so' 228 member = get_one_match(expr, members) 229 if member: 230 return member 231 # since an exact match with .so as suffix was not found 232 # look for a versioned name 233 # If a versioned name is not found, look for AIX legacy member name 234 member = get_version(name, members) 235 if member: 236 return member 237 else: 238 return get_legacy(members) 239 240def get_libpaths(): 241 """ 242 On AIX, the buildtime searchpath is stored in the executable. 243 as "loader header information". 244 The command /usr/bin/dump -H extracts this info. 245 Prefix searched libraries with LD_LIBRARY_PATH (preferred), 246 or LIBPATH if defined. These paths are appended to the paths 247 to libraries the python executable is linked with. 248 This mimics AIX dlopen() behavior. 249 """ 250 libpaths = environ.get("LD_LIBRARY_PATH") 251 if libpaths is None: 252 libpaths = environ.get("LIBPATH") 253 if libpaths is None: 254 libpaths = [] 255 else: 256 libpaths = libpaths.split(":") 257 objects = get_ld_headers(executable) 258 for (_, lines) in objects: 259 for line in lines: 260 # the second (optional) argument is PATH if it includes a / 261 path = line.split()[1] 262 if "/" in path: 263 libpaths.extend(path.split(":")) 264 return libpaths 265 266def find_shared(paths, name): 267 """ 268 paths is a list of directories to search for an archive. 269 name is the abbreviated name given to find_library(). 270 Process: search "paths" for archive, and if an archive is found 271 return the result of get_member(). 272 If an archive is not found then return None 273 """ 274 for dir in paths: 275 # /lib is a symbolic link to /usr/lib, skip it 276 if dir == "/lib": 277 continue 278 # "lib" is prefixed to emulate compiler name resolution, 279 # e.g., -lc to libc 280 base = f'lib{name}.a' 281 archive = path.join(dir, base) 282 if path.exists(archive): 283 members = get_shared(get_ld_headers(archive)) 284 member = get_member(re.escape(name), members) 285 if member is not None: 286 return (base, member) 287 else: 288 return (None, None) 289 return (None, None) 290 291def find_library(name): 292 """AIX implementation of ctypes.util.find_library() 293 Find an archive member that will dlopen(). If not available, 294 also search for a file (or link) with a .so suffix. 295 296 AIX supports two types of schemes that can be used with dlopen(). 297 The so-called SystemV Release4 (svr4) format is commonly suffixed 298 with .so while the (default) AIX scheme has the library (archive) 299 ending with the suffix .a 300 As an archive has multiple members (e.g., 32-bit and 64-bit) in one file 301 the argument passed to dlopen must include both the library and 302 the member names in a single string. 303 304 find_library() looks first for an archive (.a) with a suitable member. 305 If no archive+member pair is found, look for a .so file. 306 """ 307 308 libpaths = get_libpaths() 309 (base, member) = find_shared(libpaths, name) 310 if base is not None: 311 return f"{base}({member})" 312 313 # To get here, a member in an archive has not been found 314 # In other words, either: 315 # a) a .a file was not found 316 # b) a .a file did not have a suitable member 317 # So, look for a .so file 318 # Check libpaths for .so file 319 # Note, the installation must prepare a link from a .so 320 # to a versioned file 321 # This is common practice by GNU libtool on other platforms 322 soname = f"lib{name}.so" 323 for dir in libpaths: 324 # /lib is a symbolic link to /usr/lib, skip it 325 if dir == "/lib": 326 continue 327 shlib = path.join(dir, soname) 328 if path.exists(shlib): 329 return soname 330 # if we are here, we have not found anything plausible 331 return None 332