1"""
2Lib/ctypes.util.find_library() support for AIX
3Similar approach as done for Darwin support by using separate files
4but unlike Darwin - no extension such as ctypes.macholib.*
5
6dlopen() is an interface to AIX initAndLoad() - primary documentation at:
7https://www.ibm.com/support/knowledgecenter/en/ssw_aix_61/com.ibm.aix.basetrf1/dlopen.htm
8https://www.ibm.com/support/knowledgecenter/en/ssw_aix_61/com.ibm.aix.basetrf1/load.htm
9
10AIX supports two styles for dlopen(): svr4 (System V Release 4) which is common on posix
11platforms, but also a BSD style - aka SVR3.
12
13From AIX 5.3 Difference Addendum (December 2004)
142.9 SVR4 linking affinity
15Nowadays, there are two major object file formats used by the operating systems:
16XCOFF: The COFF enhanced by IBM and others. The original COFF (Common
17Object File Format) was the base of SVR3 and BSD 4.2 systems.
18ELF:   Executable and Linking Format that was developed by AT&T and is a
19base for SVR4 UNIX.
20
21While the shared library content is identical on AIX - one is located as a filepath name
22(svr4 style) and the other is located as a member of an archive (and the archive
23is located as a filepath name).
24
25The key difference arises when supporting multiple abi formats (i.e., 32 and 64 bit).
26For svr4 either only one ABI is supported, or there are two directories, or there
27are different file names. The most common solution for multiple ABI is multiple
28directories.
29
30For the XCOFF (aka AIX) style - one directory (one archive file) is sufficient
31as multiple shared libraries can be in the archive - even sharing the same name.
32In documentation the archive is also referred to as the "base" and the shared
33library object is referred to as the "member".
34
35For dlopen() on AIX (read initAndLoad()) the calls are similar.
36Default activity occurs when no path information is provided. When path
37information is provided dlopen() does not search any other directories.
38
39For SVR4 - the shared library name is the name of the file expected: libFOO.so
40For AIX - the shared library is expressed as base(member). The search is for the
41base (e.g., libFOO.a) and once the base is found the shared library - identified by
42member (e.g., libFOO.so, or shr.o) is located and loaded.
43
44The mode bit RTLD_MEMBER tells initAndLoad() that it needs to use the AIX (SVR3)
45naming style.
46"""
47__author__ = "Michael Felt <aixtools@felt.demon.nl>"
48
49import re
50from os import environ, path
51from sys import executable
52from ctypes import c_void_p, sizeof
53from subprocess import Popen, PIPE, DEVNULL
54
55# Executable bit size - 32 or 64
56# Used to filter the search in an archive by size, e.g., -X64
57AIX_ABI = sizeof(c_void_p) * 8
58
59
60from sys import maxsize
61def _last_version(libnames, sep):
62    def _num_version(libname):
63        # "libxyz.so.MAJOR.MINOR" => [MAJOR, MINOR]
64        parts = libname.split(sep)
65        nums = []
66        try:
67            while parts:
68                nums.insert(0, int(parts.pop()))
69        except ValueError:
70            pass
71        return nums or [maxsize]
72    return max(reversed(libnames), key=_num_version)
73
74def get_ld_header(p):
75    # "nested-function, but placed at module level
76    ld_header = None
77    for line in p.stdout:
78        if line.startswith(('/', './', '../')):
79            ld_header = line
80        elif "INDEX" in line:
81            return ld_header.rstrip('\n')
82    return None
83
84def get_ld_header_info(p):
85    # "nested-function, but placed at module level
86    # as an ld_header was found, return known paths, archives and members
87    # these lines start with a digit
88    info = []
89    for line in p.stdout:
90        if re.match("[0-9]", line):
91            info.append(line)
92        else:
93            # blank line (separator), consume line and end for loop
94            break
95    return info
96
97def get_ld_headers(file):
98    """
99    Parse the header of the loader section of executable and archives
100    This function calls /usr/bin/dump -H as a subprocess
101    and returns a list of (ld_header, ld_header_info) tuples.
102    """
103    # get_ld_headers parsing:
104    # 1. Find a line that starts with /, ./, or ../ - set as ld_header
105    # 2. If "INDEX" in occurs in a following line - return ld_header
106    # 3. get info (lines starting with [0-9])
107    ldr_headers = []
108    p = Popen(["/usr/bin/dump", f"-X{AIX_ABI}", "-H", file],
109        universal_newlines=True, stdout=PIPE, stderr=DEVNULL)
110    # be sure to read to the end-of-file - getting all entries
111    while True:
112        ld_header = get_ld_header(p)
113        if ld_header:
114            ldr_headers.append((ld_header, get_ld_header_info(p)))
115        else:
116            break
117    p.stdout.close()
118    p.wait()
119    return ldr_headers
120
121def get_shared(ld_headers):
122    """
123    extract the shareable objects from ld_headers
124    character "[" is used to strip off the path information.
125    Note: the "[" and "]" characters that are part of dump -H output
126    are not removed here.
127    """
128    shared = []
129    for (line, _) in ld_headers:
130        # potential member lines contain "["
131        # otherwise, no processing needed
132        if "[" in line:
133            # Strip off trailing colon (:)
134            shared.append(line[line.index("["):-1])
135    return shared
136
137def get_one_match(expr, lines):
138    """
139    Must be only one match, otherwise result is None.
140    When there is a match, strip leading "[" and trailing "]"
141    """
142    # member names in the ld_headers output are between square brackets
143    expr = rf'\[({expr})\]'
144    matches = list(filter(None, (re.search(expr, line) for line in lines)))
145    if len(matches) == 1:
146        return matches[0].group(1)
147    else:
148        return None
149
150# additional processing to deal with AIX legacy names for 64-bit members
151def get_legacy(members):
152    """
153    This routine provides historical aka legacy naming schemes started
154    in AIX4 shared library support for library members names.
155    e.g., in /usr/lib/libc.a the member name shr.o for 32-bit binary and
156    shr_64.o for 64-bit binary.
157    """
158    if AIX_ABI == 64:
159        # AIX 64-bit member is one of shr64.o, shr_64.o, or shr4_64.o
160        expr = r'shr4?_?64\.o'
161        member = get_one_match(expr, members)
162        if member:
163            return member
164    else:
165        # 32-bit legacy names - both shr.o and shr4.o exist.
166        # shr.o is the preferred name so we look for shr.o first
167        #  i.e., shr4.o is returned only when shr.o does not exist
168        for name in ['shr.o', 'shr4.o']:
169            member = get_one_match(re.escape(name), members)
170            if member:
171                return member
172    return None
173
174def get_version(name, members):
175    """
176    Sort list of members and return highest numbered version - if it exists.
177    This function is called when an unversioned libFOO.a(libFOO.so) has
178    not been found.
179
180    Versioning for the member name is expected to follow
181    GNU LIBTOOL conventions: the highest version (x, then X.y, then X.Y.z)
182     * find [libFoo.so.X]
183     * find [libFoo.so.X.Y]
184     * find [libFoo.so.X.Y.Z]
185
186    Before the GNU convention became the standard scheme regardless of
187    binary size AIX packagers used GNU convention "as-is" for 32-bit
188    archive members but used an "distinguishing" name for 64-bit members.
189    This scheme inserted either 64 or _64 between libFOO and .so
190    - generally libFOO_64.so, but occasionally libFOO64.so
191    """
192    # the expression ending for versions must start as
193    # '.so.[0-9]', i.e., *.so.[at least one digit]
194    # while multiple, more specific expressions could be specified
195    # to search for .so.X, .so.X.Y and .so.X.Y.Z
196    # after the first required 'dot' digit
197    # any combination of additional 'dot' digits pairs are accepted
198    # anything more than libFOO.so.digits.digits.digits
199    # should be seen as a member name outside normal expectations
200    exprs = [rf'lib{name}\.so\.[0-9]+[0-9.]*',
201        rf'lib{name}_?64\.so\.[0-9]+[0-9.]*']
202    for expr in exprs:
203        versions = []
204        for line in members:
205            m = re.search(expr, line)
206            if m:
207                versions.append(m.group(0))
208        if versions:
209            return _last_version(versions, '.')
210    return None
211
212def get_member(name, members):
213    """
214    Return an archive member matching the request in name.
215    Name is the library name without any prefix like lib, suffix like .so,
216    or version number.
217    Given a list of members find and return the most appropriate result
218    Priority is given to generic libXXX.so, then a versioned libXXX.so.a.b.c
219    and finally, legacy AIX naming scheme.
220    """
221    # look first for a generic match - prepend lib and append .so
222    expr = rf'lib{name}\.so'
223    member = get_one_match(expr, members)
224    if member:
225        return member
226    elif AIX_ABI == 64:
227        expr = rf'lib{name}64\.so'
228        member = get_one_match(expr, members)
229    if member:
230        return member
231    # since an exact match with .so as suffix was not found
232    # look for a versioned name
233    # If a versioned name is not found, look for AIX legacy member name
234    member = get_version(name, members)
235    if member:
236        return member
237    else:
238        return get_legacy(members)
239
240def get_libpaths():
241    """
242    On AIX, the buildtime searchpath is stored in the executable.
243    as "loader header information".
244    The command /usr/bin/dump -H extracts this info.
245    Prefix searched libraries with LD_LIBRARY_PATH (preferred),
246    or LIBPATH if defined. These paths are appended to the paths
247    to libraries the python executable is linked with.
248    This mimics AIX dlopen() behavior.
249    """
250    libpaths = environ.get("LD_LIBRARY_PATH")
251    if libpaths is None:
252        libpaths = environ.get("LIBPATH")
253    if libpaths is None:
254        libpaths = []
255    else:
256        libpaths = libpaths.split(":")
257    objects = get_ld_headers(executable)
258    for (_, lines) in objects:
259        for line in lines:
260            # the second (optional) argument is PATH if it includes a /
261            path = line.split()[1]
262            if "/" in path:
263                libpaths.extend(path.split(":"))
264    return libpaths
265
266def find_shared(paths, name):
267    """
268    paths is a list of directories to search for an archive.
269    name is the abbreviated name given to find_library().
270    Process: search "paths" for archive, and if an archive is found
271    return the result of get_member().
272    If an archive is not found then return None
273    """
274    for dir in paths:
275        # /lib is a symbolic link to /usr/lib, skip it
276        if dir == "/lib":
277            continue
278        # "lib" is prefixed to emulate compiler name resolution,
279        # e.g., -lc to libc
280        base = f'lib{name}.a'
281        archive = path.join(dir, base)
282        if path.exists(archive):
283            members = get_shared(get_ld_headers(archive))
284            member = get_member(re.escape(name), members)
285            if member is not None:
286                return (base, member)
287            else:
288                return (None, None)
289    return (None, None)
290
291def find_library(name):
292    """AIX implementation of ctypes.util.find_library()
293    Find an archive member that will dlopen(). If not available,
294    also search for a file (or link) with a .so suffix.
295
296    AIX supports two types of schemes that can be used with dlopen().
297    The so-called SystemV Release4 (svr4) format is commonly suffixed
298    with .so while the (default) AIX scheme has the library (archive)
299    ending with the suffix .a
300    As an archive has multiple members (e.g., 32-bit and 64-bit) in one file
301    the argument passed to dlopen must include both the library and
302    the member names in a single string.
303
304    find_library() looks first for an archive (.a) with a suitable member.
305    If no archive+member pair is found, look for a .so file.
306    """
307
308    libpaths = get_libpaths()
309    (base, member) = find_shared(libpaths, name)
310    if base is not None:
311        return f"{base}({member})"
312
313    # To get here, a member in an archive has not been found
314    # In other words, either:
315    # a) a .a file was not found
316    # b) a .a file did not have a suitable member
317    # So, look for a .so file
318    # Check libpaths for .so file
319    # Note, the installation must prepare a link from a .so
320    # to a versioned file
321    # This is common practice by GNU libtool on other platforms
322    soname = f"lib{name}.so"
323    for dir in libpaths:
324        # /lib is a symbolic link to /usr/lib, skip it
325        if dir == "/lib":
326            continue
327        shlib = path.join(dir, soname)
328        if path.exists(shlib):
329            return soname
330    # if we are here, we have not found anything plausible
331    return None
332