1#!/usr/bin/env python3
2#
3# This file is part of the MicroPython project, http://micropython.org/
4#
5# The MIT License (MIT)
6#
7# Copyright (c) 2019 Damien P. George
8#
9# Permission is hereby granted, free of charge, to any person obtaining a copy
10# of this software and associated documentation files (the "Software"), to deal
11# in the Software without restriction, including without limitation the rights
12# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies of the Software, and to permit persons to whom the Software is
14# furnished to do so, subject to the following conditions:
15#
16# The above copyright notice and this permission notice shall be included in
17# all copies or substantial portions of the Software.
18#
19# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25# THE SOFTWARE.
26
27"""
28Link .o files to .mpy
29"""
30
31import sys, os, struct, re
32from elftools.elf import elffile
33
34sys.path.append(os.path.dirname(__file__) + "/../py")
35import makeqstrdata as qstrutil
36
37# MicroPython constants
38MPY_VERSION = 5
39MP_NATIVE_ARCH_X86 = 1
40MP_NATIVE_ARCH_X64 = 2
41MP_NATIVE_ARCH_ARMV7M = 5
42MP_NATIVE_ARCH_ARMV7EMSP = 7
43MP_NATIVE_ARCH_ARMV7EMDP = 8
44MP_NATIVE_ARCH_XTENSA = 9
45MP_NATIVE_ARCH_XTENSAWIN = 10
46MP_CODE_BYTECODE = 2
47MP_CODE_NATIVE_VIPER = 4
48MP_SCOPE_FLAG_VIPERRELOC = 0x10
49MP_SCOPE_FLAG_VIPERRODATA = 0x20
50MP_SCOPE_FLAG_VIPERBSS = 0x40
51MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE = 1
52MICROPY_PY_BUILTINS_STR_UNICODE = 2
53MP_SMALL_INT_BITS = 31
54QSTR_WINDOW_SIZE = 32
55
56# ELF constants
57R_386_32 = 1
58R_X86_64_64 = 1
59R_XTENSA_32 = 1
60R_386_PC32 = 2
61R_X86_64_PC32 = 2
62R_ARM_ABS32 = 2
63R_386_GOT32 = 3
64R_ARM_REL32 = 3
65R_386_PLT32 = 4
66R_X86_64_PLT32 = 4
67R_XTENSA_PLT = 6
68R_386_GOTOFF = 9
69R_386_GOTPC = 10
70R_ARM_THM_CALL = 10
71R_XTENSA_DIFF32 = 19
72R_XTENSA_SLOT0_OP = 20
73R_ARM_BASE_PREL = 25  # aka R_ARM_GOTPC
74R_ARM_GOT_BREL = 26  # aka R_ARM_GOT32
75R_ARM_THM_JUMP24 = 30
76R_X86_64_GOTPCREL = 9
77R_X86_64_REX_GOTPCRELX = 42
78R_386_GOT32X = 43
79
80################################################################################
81# Architecture configuration
82
83
84def asm_jump_x86(entry):
85    return struct.pack("<BI", 0xE9, entry - 5)
86
87
88def asm_jump_arm(entry):
89    b_off = entry - 4
90    if b_off >> 11 == 0 or b_off >> 11 == -1:
91        # Signed value fits in 12 bits
92        b0 = 0xE000 | (b_off >> 1 & 0x07FF)
93        b1 = 0
94    else:
95        # Use large jump
96        b0 = 0xF000 | (b_off >> 12 & 0x07FF)
97        b1 = 0xB800 | (b_off >> 1 & 0x7FF)
98    return struct.pack("<HH", b0, b1)
99
100
101def asm_jump_xtensa(entry):
102    jump_offset = entry - 4
103    jump_op = jump_offset << 6 | 6
104    return struct.pack("<BH", jump_op & 0xFF, jump_op >> 8)
105
106
107class ArchData:
108    def __init__(self, name, mpy_feature, qstr_entry_size, word_size, arch_got, asm_jump):
109        self.name = name
110        self.mpy_feature = mpy_feature
111        self.qstr_entry_size = qstr_entry_size
112        self.word_size = word_size
113        self.arch_got = arch_got
114        self.asm_jump = asm_jump
115        self.separate_rodata = name == "EM_XTENSA" and qstr_entry_size == 4
116
117
118ARCH_DATA = {
119    "x86": ArchData(
120        "EM_386",
121        MP_NATIVE_ARCH_X86 << 2
122        | MICROPY_PY_BUILTINS_STR_UNICODE
123        | MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE,
124        2,
125        4,
126        (R_386_PC32, R_386_GOT32, R_386_GOT32X),
127        asm_jump_x86,
128    ),
129    "x64": ArchData(
130        "EM_X86_64",
131        MP_NATIVE_ARCH_X64 << 2
132        | MICROPY_PY_BUILTINS_STR_UNICODE
133        | MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE,
134        2,
135        8,
136        (R_X86_64_GOTPCREL, R_X86_64_REX_GOTPCRELX),
137        asm_jump_x86,
138    ),
139    "armv7m": ArchData(
140        "EM_ARM",
141        MP_NATIVE_ARCH_ARMV7M << 2 | MICROPY_PY_BUILTINS_STR_UNICODE,
142        2,
143        4,
144        (R_ARM_GOT_BREL,),
145        asm_jump_arm,
146    ),
147    "armv7emsp": ArchData(
148        "EM_ARM",
149        MP_NATIVE_ARCH_ARMV7EMSP << 2 | MICROPY_PY_BUILTINS_STR_UNICODE,
150        2,
151        4,
152        (R_ARM_GOT_BREL,),
153        asm_jump_arm,
154    ),
155    "armv7emdp": ArchData(
156        "EM_ARM",
157        MP_NATIVE_ARCH_ARMV7EMDP << 2 | MICROPY_PY_BUILTINS_STR_UNICODE,
158        2,
159        4,
160        (R_ARM_GOT_BREL,),
161        asm_jump_arm,
162    ),
163    "xtensa": ArchData(
164        "EM_XTENSA",
165        MP_NATIVE_ARCH_XTENSA << 2 | MICROPY_PY_BUILTINS_STR_UNICODE,
166        2,
167        4,
168        (R_XTENSA_32, R_XTENSA_PLT),
169        asm_jump_xtensa,
170    ),
171    "xtensawin": ArchData(
172        "EM_XTENSA",
173        MP_NATIVE_ARCH_XTENSAWIN << 2 | MICROPY_PY_BUILTINS_STR_UNICODE,
174        4,
175        4,
176        (R_XTENSA_32, R_XTENSA_PLT),
177        asm_jump_xtensa,
178    ),
179}
180
181################################################################################
182# Helper functions
183
184
185def align_to(value, align):
186    return (value + align - 1) & ~(align - 1)
187
188
189def unpack_u24le(data, offset):
190    return data[offset] | data[offset + 1] << 8 | data[offset + 2] << 16
191
192
193def pack_u24le(data, offset, value):
194    data[offset] = value & 0xFF
195    data[offset + 1] = value >> 8 & 0xFF
196    data[offset + 2] = value >> 16 & 0xFF
197
198
199def xxd(text):
200    for i in range(0, len(text), 16):
201        print("{:08x}:".format(i), end="")
202        for j in range(4):
203            off = i + j * 4
204            if off < len(text):
205                d = int.from_bytes(text[off : off + 4], "little")
206                print(" {:08x}".format(d), end="")
207        print()
208
209
210# Smaller numbers are enabled first
211LOG_LEVEL_1 = 1
212LOG_LEVEL_2 = 2
213LOG_LEVEL_3 = 3
214log_level = LOG_LEVEL_1
215
216
217def log(level, msg):
218    if level <= log_level:
219        print(msg)
220
221
222################################################################################
223# Qstr extraction
224
225
226def extract_qstrs(source_files):
227    def read_qstrs(f):
228        with open(f) as f:
229            vals = set()
230            objs = set()
231            for line in f:
232                while line:
233                    m = re.search(r"MP_OBJ_NEW_QSTR\((MP_QSTR_[A-Za-z0-9_]*)\)", line)
234                    if m:
235                        objs.add(m.group(1))
236                    else:
237                        m = re.search(r"MP_QSTR_[A-Za-z0-9_]*", line)
238                        if m:
239                            vals.add(m.group())
240                    if m:
241                        s = m.span()
242                        line = line[: s[0]] + line[s[1] :]
243                    else:
244                        line = ""
245            return vals, objs
246
247    static_qstrs = ["MP_QSTR_" + qstrutil.qstr_escape(q) for q in qstrutil.static_qstr_list]
248
249    qstr_vals = set()
250    qstr_objs = set()
251    for f in source_files:
252        vals, objs = read_qstrs(f)
253        qstr_vals.update(vals)
254        qstr_objs.update(objs)
255    qstr_vals.difference_update(static_qstrs)
256
257    return static_qstrs, qstr_vals, qstr_objs
258
259
260################################################################################
261# Linker
262
263
264class LinkError(Exception):
265    pass
266
267
268class Section:
269    def __init__(self, name, data, alignment, filename=None):
270        self.filename = filename
271        self.name = name
272        self.data = data
273        self.alignment = alignment
274        self.addr = 0
275        self.reloc = []
276
277    @staticmethod
278    def from_elfsec(elfsec, filename):
279        assert elfsec.header.sh_addr == 0
280        return Section(elfsec.name, elfsec.data(), elfsec.data_alignment, filename)
281
282
283class GOTEntry:
284    def __init__(self, name, sym, link_addr=0):
285        self.name = name
286        self.sym = sym
287        self.offset = None
288        self.link_addr = link_addr
289
290    def isexternal(self):
291        return self.sec_name.startswith(".external")
292
293    def istext(self):
294        return self.sec_name.startswith(".text")
295
296    def isrodata(self):
297        return self.sec_name.startswith((".rodata", ".data.rel.ro"))
298
299    def isbss(self):
300        return self.sec_name.startswith(".bss")
301
302
303class LiteralEntry:
304    def __init__(self, value, offset):
305        self.value = value
306        self.offset = offset
307
308
309class LinkEnv:
310    def __init__(self, arch):
311        self.arch = ARCH_DATA[arch]
312        self.sections = []  # list of sections in order of output
313        self.literal_sections = []  # list of literal sections (xtensa only)
314        self.known_syms = {}  # dict of symbols that are defined
315        self.unresolved_syms = []  # list of unresolved symbols
316        self.mpy_relocs = []  # list of relocations needed in the output .mpy file
317
318    def check_arch(self, arch_name):
319        if arch_name != self.arch.name:
320            raise LinkError("incompatible arch")
321
322    def print_sections(self):
323        log(LOG_LEVEL_2, "sections:")
324        for sec in self.sections:
325            log(LOG_LEVEL_2, "  {:08x} {} size={}".format(sec.addr, sec.name, len(sec.data)))
326
327    def find_addr(self, name):
328        if name in self.known_syms:
329            s = self.known_syms[name]
330            return s.section.addr + s["st_value"]
331        raise LinkError("unknown symbol: {}".format(name))
332
333
334def build_got_generic(env):
335    env.got_entries = {}
336    for sec in env.sections:
337        for r in sec.reloc:
338            s = r.sym
339            if not (
340                s.entry["st_info"]["bind"] == "STB_GLOBAL"
341                and r["r_info_type"] in env.arch.arch_got
342            ):
343                continue
344            s_type = s.entry["st_info"]["type"]
345            assert s_type in ("STT_NOTYPE", "STT_FUNC", "STT_OBJECT"), s_type
346            assert s.name
347            if s.name in env.got_entries:
348                continue
349            env.got_entries[s.name] = GOTEntry(s.name, s)
350
351
352def build_got_xtensa(env):
353    env.got_entries = {}
354    env.lit_entries = {}
355    env.xt_literals = {}
356
357    # Extract the values from the literal table
358    for sec in env.literal_sections:
359        assert len(sec.data) % env.arch.word_size == 0
360
361        # Look through literal relocations to find any global pointers that should be GOT entries
362        for r in sec.reloc:
363            s = r.sym
364            s_type = s.entry["st_info"]["type"]
365            assert s_type in ("STT_NOTYPE", "STT_FUNC", "STT_OBJECT", "STT_SECTION"), s_type
366            assert r["r_info_type"] in env.arch.arch_got
367            assert r["r_offset"] % env.arch.word_size == 0
368            # This entry is a global pointer
369            existing = struct.unpack_from("<I", sec.data, r["r_offset"])[0]
370            if s_type == "STT_SECTION":
371                assert r["r_addend"] == 0
372                name = "{}+0x{:x}".format(s.section.name, existing)
373            else:
374                assert existing == 0
375                name = s.name
376                if r["r_addend"] != 0:
377                    name = "{}+0x{:x}".format(name, r["r_addend"])
378            idx = "{}+0x{:x}".format(sec.filename, r["r_offset"])
379            env.xt_literals[idx] = name
380            if name in env.got_entries:
381                # Deduplicate GOT entries
382                continue
383            env.got_entries[name] = GOTEntry(name, s, existing)
384
385        # Go through all literal entries finding those that aren't global pointers so must be actual literals
386        for i in range(0, len(sec.data), env.arch.word_size):
387            idx = "{}+0x{:x}".format(sec.filename, i)
388            if idx not in env.xt_literals:
389                # This entry is an actual literal
390                value = struct.unpack_from("<I", sec.data, i)[0]
391                env.xt_literals[idx] = value
392                if value in env.lit_entries:
393                    # Deduplicate literals
394                    continue
395                env.lit_entries[value] = LiteralEntry(
396                    value, len(env.lit_entries) * env.arch.word_size
397                )
398
399
400def populate_got(env):
401    # Compute GOT destination addresses
402    for got_entry in env.got_entries.values():
403        sym = got_entry.sym
404        if hasattr(sym, "resolved"):
405            sym = sym.resolved
406        sec = sym.section
407        addr = sym["st_value"]
408        got_entry.sec_name = sec.name
409        got_entry.link_addr += sec.addr + addr
410
411    # Get sorted GOT, sorted by external, text, rodata, bss so relocations can be combined
412    got_list = sorted(
413        env.got_entries.values(),
414        key=lambda g: g.isexternal() + 2 * g.istext() + 3 * g.isrodata() + 4 * g.isbss(),
415    )
416
417    # Layout and populate the GOT
418    offset = 0
419    for got_entry in got_list:
420        got_entry.offset = offset
421        offset += env.arch.word_size
422        o = env.got_section.addr + got_entry.offset
423        env.full_text[o : o + env.arch.word_size] = got_entry.link_addr.to_bytes(
424            env.arch.word_size, "little"
425        )
426
427    # Create a relocation for each GOT entry
428    for got_entry in got_list:
429        if got_entry.name == "mp_fun_table":
430            dest = "mp_fun_table"
431        elif got_entry.name.startswith("mp_fun_table+0x"):
432            dest = int(got_entry.name.split("+")[1], 16) // env.arch.word_size
433        elif got_entry.sec_name.startswith(".text"):
434            dest = ".text"
435        elif got_entry.sec_name.startswith(".rodata"):
436            dest = ".rodata"
437        elif got_entry.sec_name.startswith(".data.rel.ro"):
438            dest = ".data.rel.ro"
439        elif got_entry.sec_name.startswith(".bss"):
440            dest = ".bss"
441        else:
442            assert 0, (got_entry.name, got_entry.sec_name)
443        env.mpy_relocs.append((".text", env.got_section.addr + got_entry.offset, dest))
444
445    # Print out the final GOT
446    log(LOG_LEVEL_2, "GOT: {:08x}".format(env.got_section.addr))
447    for g in got_list:
448        log(
449            LOG_LEVEL_2,
450            "  {:08x} {} -> {}+{:08x}".format(g.offset, g.name, g.sec_name, g.link_addr),
451        )
452
453
454def populate_lit(env):
455    log(LOG_LEVEL_2, "LIT: {:08x}".format(env.lit_section.addr))
456    for lit_entry in env.lit_entries.values():
457        value = lit_entry.value
458        log(LOG_LEVEL_2, "  {:08x} = {:08x}".format(lit_entry.offset, value))
459        o = env.lit_section.addr + lit_entry.offset
460        env.full_text[o : o + env.arch.word_size] = value.to_bytes(env.arch.word_size, "little")
461
462
463def do_relocation_text(env, text_addr, r):
464    # Extract relevant info about symbol that's being relocated
465    s = r.sym
466    s_bind = s.entry["st_info"]["bind"]
467    s_shndx = s.entry["st_shndx"]
468    s_type = s.entry["st_info"]["type"]
469    r_offset = r["r_offset"] + text_addr
470    r_info_type = r["r_info_type"]
471    try:
472        # only for RELA sections
473        r_addend = r["r_addend"]
474    except KeyError:
475        r_addend = 0
476
477    # Default relocation type and name for logging
478    reloc_type = "le32"
479    log_name = None
480
481    if (
482        env.arch.name == "EM_386"
483        and r_info_type in (R_386_PC32, R_386_PLT32)
484        or env.arch.name == "EM_X86_64"
485        and r_info_type in (R_X86_64_PC32, R_X86_64_PLT32)
486        or env.arch.name == "EM_ARM"
487        and r_info_type in (R_ARM_REL32, R_ARM_THM_CALL, R_ARM_THM_JUMP24)
488        or s_bind == "STB_LOCAL"
489        and env.arch.name == "EM_XTENSA"
490        and r_info_type == R_XTENSA_32  # not GOT
491    ):
492        # Standard relocation to fixed location within text/rodata
493        if hasattr(s, "resolved"):
494            s = s.resolved
495
496        sec = s.section
497
498        if env.arch.separate_rodata and sec.name.startswith(".rodata"):
499            raise LinkError("fixed relocation to rodata with rodata referenced via GOT")
500
501        if sec.name.startswith(".bss"):
502            raise LinkError(
503                "{}: fixed relocation to bss (bss variables can't be static)".format(s.filename)
504            )
505
506        if sec.name.startswith(".external"):
507            raise LinkError(
508                "{}: fixed relocation to external symbol: {}".format(s.filename, s.name)
509            )
510
511        addr = sec.addr + s["st_value"]
512        reloc = addr - r_offset + r_addend
513
514        if r_info_type in (R_ARM_THM_CALL, R_ARM_THM_JUMP24):
515            # Both relocations have the same bit pattern to rewrite:
516            #   R_ARM_THM_CALL: bl
517            #   R_ARM_THM_JUMP24: b.w
518            reloc_type = "thumb_b"
519
520    elif (
521        env.arch.name == "EM_386"
522        and r_info_type == R_386_GOTPC
523        or env.arch.name == "EM_ARM"
524        and r_info_type == R_ARM_BASE_PREL
525    ):
526        # Relocation to GOT address itself
527        assert s.name == "_GLOBAL_OFFSET_TABLE_"
528        addr = env.got_section.addr
529        reloc = addr - r_offset + r_addend
530
531    elif (
532        env.arch.name == "EM_386"
533        and r_info_type in (R_386_GOT32, R_386_GOT32X)
534        or env.arch.name == "EM_ARM"
535        and r_info_type == R_ARM_GOT_BREL
536    ):
537        # Relcation pointing to GOT
538        reloc = addr = env.got_entries[s.name].offset
539
540    elif env.arch.name == "EM_X86_64" and r_info_type in (
541        R_X86_64_GOTPCREL,
542        R_X86_64_REX_GOTPCRELX,
543    ):
544        # Relcation pointing to GOT
545        got_entry = env.got_entries[s.name]
546        addr = env.got_section.addr + got_entry.offset
547        reloc = addr - r_offset + r_addend
548
549    elif env.arch.name == "EM_386" and r_info_type == R_386_GOTOFF:
550        # Relocation relative to GOT
551        addr = s.section.addr + s["st_value"]
552        reloc = addr - env.got_section.addr + r_addend
553
554    elif env.arch.name == "EM_XTENSA" and r_info_type == R_XTENSA_SLOT0_OP:
555        # Relocation pointing to GOT, xtensa specific
556        sec = s.section
557        if sec.name.startswith(".text"):
558            # it looks like R_XTENSA_SLOT0_OP into .text is already correctly relocated
559            return
560        assert sec.name.startswith(".literal"), sec.name
561        lit_idx = "{}+0x{:x}".format(sec.filename, r_addend)
562        lit_ptr = env.xt_literals[lit_idx]
563        if isinstance(lit_ptr, str):
564            addr = env.got_section.addr + env.got_entries[lit_ptr].offset
565            log_name = "GOT {}".format(lit_ptr)
566        else:
567            addr = env.lit_section.addr + env.lit_entries[lit_ptr].offset
568            log_name = "LIT"
569        reloc = addr - r_offset
570        reloc_type = "xtensa_l32r"
571
572    elif env.arch.name == "EM_XTENSA" and r_info_type == R_XTENSA_DIFF32:
573        if s.section.name.startswith(".text"):
574            # it looks like R_XTENSA_DIFF32 into .text is already correctly relocated
575            return
576        assert 0
577
578    else:
579        # Unknown/unsupported relocation
580        assert 0, r_info_type
581
582    # Write relocation
583    if reloc_type == "le32":
584        (existing,) = struct.unpack_from("<I", env.full_text, r_offset)
585        struct.pack_into("<I", env.full_text, r_offset, (existing + reloc) & 0xFFFFFFFF)
586    elif reloc_type == "thumb_b":
587        b_h, b_l = struct.unpack_from("<HH", env.full_text, r_offset)
588        existing = (b_h & 0x7FF) << 12 | (b_l & 0x7FF) << 1
589        if existing >= 0x400000:  # 2's complement
590            existing -= 0x800000
591        new = existing + reloc
592        b_h = (b_h & 0xF800) | (new >> 12) & 0x7FF
593        b_l = (b_l & 0xF800) | (new >> 1) & 0x7FF
594        struct.pack_into("<HH", env.full_text, r_offset, b_h, b_l)
595    elif reloc_type == "xtensa_l32r":
596        l32r = unpack_u24le(env.full_text, r_offset)
597        assert l32r & 0xF == 1  # RI16 encoded l32r
598        l32r_imm16 = l32r >> 8
599        l32r_imm16 = (l32r_imm16 + reloc >> 2) & 0xFFFF
600        l32r = l32r & 0xFF | l32r_imm16 << 8
601        pack_u24le(env.full_text, r_offset, l32r)
602    else:
603        assert 0, reloc_type
604
605    # Log information about relocation
606    if log_name is None:
607        if s_type == "STT_SECTION":
608            log_name = s.section.name
609        else:
610            log_name = s.name
611    log(LOG_LEVEL_3, "  {:08x} {} -> {:08x}".format(r_offset, log_name, addr))
612
613
614def do_relocation_data(env, text_addr, r):
615    s = r.sym
616    s_type = s.entry["st_info"]["type"]
617    r_offset = r["r_offset"] + text_addr
618    r_info_type = r["r_info_type"]
619    try:
620        # only for RELA sections
621        r_addend = r["r_addend"]
622    except KeyError:
623        r_addend = 0
624
625    if (
626        env.arch.name == "EM_386"
627        and r_info_type == R_386_32
628        or env.arch.name == "EM_X86_64"
629        and r_info_type == R_X86_64_64
630        or env.arch.name == "EM_ARM"
631        and r_info_type == R_ARM_ABS32
632        or env.arch.name == "EM_XTENSA"
633        and r_info_type == R_XTENSA_32
634    ):
635        # Relocation in data.rel.ro to internal/external symbol
636        if env.arch.word_size == 4:
637            struct_type = "<I"
638        elif env.arch.word_size == 8:
639            struct_type = "<Q"
640        sec = s.section
641        assert r_offset % env.arch.word_size == 0
642        addr = sec.addr + s["st_value"] + r_addend
643        if s_type == "STT_SECTION":
644            log_name = sec.name
645        else:
646            log_name = s.name
647        log(LOG_LEVEL_3, "  {:08x} -> {} {:08x}".format(r_offset, log_name, addr))
648        if env.arch.separate_rodata:
649            data = env.full_rodata
650        else:
651            data = env.full_text
652        (existing,) = struct.unpack_from(struct_type, data, r_offset)
653        if sec.name.startswith((".text", ".rodata", ".data.rel.ro", ".bss")):
654            struct.pack_into(struct_type, data, r_offset, existing + addr)
655            kind = sec.name
656        elif sec.name == ".external.mp_fun_table":
657            assert addr == 0
658            kind = s.mp_fun_table_offset
659        else:
660            assert 0, sec.name
661        if env.arch.separate_rodata:
662            base = ".rodata"
663        else:
664            base = ".text"
665        env.mpy_relocs.append((base, r_offset, kind))
666
667    else:
668        # Unknown/unsupported relocation
669        assert 0, r_info_type
670
671
672def load_object_file(env, felf):
673    with open(felf, "rb") as f:
674        elf = elffile.ELFFile(f)
675        env.check_arch(elf["e_machine"])
676
677        # Get symbol table
678        symtab = list(elf.get_section_by_name(".symtab").iter_symbols())
679
680        # Load needed sections from ELF file
681        sections_shndx = {}  # maps elf shndx to Section object
682        for idx, s in enumerate(elf.iter_sections()):
683            if s.header.sh_type in ("SHT_PROGBITS", "SHT_NOBITS"):
684                if s.data_size == 0:
685                    # Ignore empty sections
686                    pass
687                elif s.name.startswith((".literal", ".text", ".rodata", ".data.rel.ro", ".bss")):
688                    sec = Section.from_elfsec(s, felf)
689                    sections_shndx[idx] = sec
690                    if s.name.startswith(".literal"):
691                        env.literal_sections.append(sec)
692                    else:
693                        env.sections.append(sec)
694                elif s.name.startswith(".data"):
695                    raise LinkError("{}: {} non-empty".format(felf, s.name))
696                else:
697                    # Ignore section
698                    pass
699            elif s.header.sh_type in ("SHT_REL", "SHT_RELA"):
700                shndx = s.header.sh_info
701                if shndx in sections_shndx:
702                    sec = sections_shndx[shndx]
703                    sec.reloc_name = s.name
704                    sec.reloc = list(s.iter_relocations())
705                    for r in sec.reloc:
706                        r.sym = symtab[r["r_info_sym"]]
707
708        # Link symbols to their sections, and update known and unresolved symbols
709        for sym in symtab:
710            sym.filename = felf
711            shndx = sym.entry["st_shndx"]
712            if shndx in sections_shndx:
713                # Symbol with associated section
714                sym.section = sections_shndx[shndx]
715                if sym["st_info"]["bind"] == "STB_GLOBAL":
716                    # Defined global symbol
717                    if sym.name in env.known_syms and not sym.name.startswith(
718                        "__x86.get_pc_thunk."
719                    ):
720                        raise LinkError("duplicate symbol: {}".format(sym.name))
721                    env.known_syms[sym.name] = sym
722            elif sym.entry["st_shndx"] == "SHN_UNDEF" and sym["st_info"]["bind"] == "STB_GLOBAL":
723                # Undefined global symbol, needs resolving
724                env.unresolved_syms.append(sym)
725
726
727def link_objects(env, native_qstr_vals_len, native_qstr_objs_len):
728    # Build GOT information
729    if env.arch.name == "EM_XTENSA":
730        build_got_xtensa(env)
731    else:
732        build_got_generic(env)
733
734    # Creat GOT section
735    got_size = len(env.got_entries) * env.arch.word_size
736    env.got_section = Section("GOT", bytearray(got_size), env.arch.word_size)
737    if env.arch.name == "EM_XTENSA":
738        env.sections.insert(0, env.got_section)
739    else:
740        env.sections.append(env.got_section)
741
742    # Create optional literal section
743    if env.arch.name == "EM_XTENSA":
744        lit_size = len(env.lit_entries) * env.arch.word_size
745        env.lit_section = Section("LIT", bytearray(lit_size), env.arch.word_size)
746        env.sections.insert(1, env.lit_section)
747
748    # Create section to contain mp_native_qstr_val_table
749    env.qstr_val_section = Section(
750        ".text.QSTR_VAL",
751        bytearray(native_qstr_vals_len * env.arch.qstr_entry_size),
752        env.arch.qstr_entry_size,
753    )
754    env.sections.append(env.qstr_val_section)
755
756    # Create section to contain mp_native_qstr_obj_table
757    env.qstr_obj_section = Section(
758        ".text.QSTR_OBJ", bytearray(native_qstr_objs_len * env.arch.word_size), env.arch.word_size
759    )
760    env.sections.append(env.qstr_obj_section)
761
762    # Resolve unknown symbols
763    mp_fun_table_sec = Section(".external.mp_fun_table", b"", 0)
764    fun_table = {
765        key: 67 + idx
766        for idx, key in enumerate(
767            [
768                "mp_type_type",
769                "mp_type_str",
770                "mp_type_list",
771                "mp_type_dict",
772                "mp_type_fun_builtin_0",
773                "mp_type_fun_builtin_1",
774                "mp_type_fun_builtin_2",
775                "mp_type_fun_builtin_3",
776                "mp_type_fun_builtin_var",
777                "mp_stream_read_obj",
778                "mp_stream_readinto_obj",
779                "mp_stream_unbuffered_readline_obj",
780                "mp_stream_write_obj",
781            ]
782        )
783    }
784    for sym in env.unresolved_syms:
785        assert sym["st_value"] == 0
786        if sym.name == "_GLOBAL_OFFSET_TABLE_":
787            pass
788        elif sym.name == "mp_fun_table":
789            sym.section = Section(".external", b"", 0)
790        elif sym.name == "mp_native_qstr_val_table":
791            sym.section = env.qstr_val_section
792        elif sym.name == "mp_native_qstr_obj_table":
793            sym.section = env.qstr_obj_section
794        elif sym.name in env.known_syms:
795            sym.resolved = env.known_syms[sym.name]
796        else:
797            if sym.name in fun_table:
798                sym.section = mp_fun_table_sec
799                sym.mp_fun_table_offset = fun_table[sym.name]
800            else:
801                raise LinkError("{}: undefined symbol: {}".format(sym.filename, sym.name))
802
803    # Align sections, assign their addresses, and create full_text
804    env.full_text = bytearray(env.arch.asm_jump(8))  # dummy, to be filled in later
805    env.full_rodata = bytearray(0)
806    env.full_bss = bytearray(0)
807    for sec in env.sections:
808        if env.arch.separate_rodata and sec.name.startswith((".rodata", ".data.rel.ro")):
809            data = env.full_rodata
810        elif sec.name.startswith(".bss"):
811            data = env.full_bss
812        else:
813            data = env.full_text
814        sec.addr = align_to(len(data), sec.alignment)
815        data.extend(b"\x00" * (sec.addr - len(data)))
816        data.extend(sec.data)
817
818    env.print_sections()
819
820    populate_got(env)
821    if env.arch.name == "EM_XTENSA":
822        populate_lit(env)
823
824    # Fill in relocations
825    for sec in env.sections:
826        if not sec.reloc:
827            continue
828        log(
829            LOG_LEVEL_3,
830            "{}: {} relocations via {}:".format(sec.filename, sec.name, sec.reloc_name),
831        )
832        for r in sec.reloc:
833            if sec.name.startswith((".text", ".rodata")):
834                do_relocation_text(env, sec.addr, r)
835            elif sec.name.startswith(".data.rel.ro"):
836                do_relocation_data(env, sec.addr, r)
837            else:
838                assert 0, sec.name
839
840
841################################################################################
842# .mpy output
843
844
845class MPYOutput:
846    def open(self, fname):
847        self.f = open(fname, "wb")
848        self.prev_base = -1
849        self.prev_offset = -1
850
851    def close(self):
852        self.f.close()
853
854    def write_bytes(self, buf):
855        self.f.write(buf)
856
857    def write_uint(self, val):
858        b = bytearray()
859        b.insert(0, val & 0x7F)
860        val >>= 7
861        while val:
862            b.insert(0, 0x80 | (val & 0x7F))
863            val >>= 7
864        self.write_bytes(b)
865
866    def write_qstr(self, s):
867        if s in qstrutil.static_qstr_list:
868            self.write_bytes(bytes([0, qstrutil.static_qstr_list.index(s) + 1]))
869        else:
870            s = bytes(s, "ascii")
871            self.write_uint(len(s) << 1)
872            self.write_bytes(s)
873
874    def write_reloc(self, base, offset, dest, n):
875        need_offset = not (base == self.prev_base and offset == self.prev_offset + 1)
876        self.prev_offset = offset + n - 1
877        if dest <= 2:
878            dest = (dest << 1) | (n > 1)
879        else:
880            assert 6 <= dest <= 127
881            assert n == 1
882        dest = dest << 1 | need_offset
883        assert 0 <= dest <= 0xFE, dest
884        self.write_bytes(bytes([dest]))
885        if need_offset:
886            if base == ".text":
887                base = 0
888            elif base == ".rodata":
889                base = 1
890            self.write_uint(offset << 1 | base)
891        if n > 1:
892            self.write_uint(n)
893
894
895def build_mpy(env, entry_offset, fmpy, native_qstr_vals, native_qstr_objs):
896    # Write jump instruction to start of text
897    jump = env.arch.asm_jump(entry_offset)
898    env.full_text[: len(jump)] = jump
899
900    log(LOG_LEVEL_1, "arch:         {}".format(env.arch.name))
901    log(LOG_LEVEL_1, "text size:    {}".format(len(env.full_text)))
902    if len(env.full_rodata):
903        log(LOG_LEVEL_1, "rodata size:  {}".format(len(env.full_rodata)))
904    log(LOG_LEVEL_1, "bss size:     {}".format(len(env.full_bss)))
905    log(LOG_LEVEL_1, "GOT entries:  {}".format(len(env.got_entries)))
906
907    # xxd(env.full_text)
908
909    out = MPYOutput()
910    out.open(fmpy)
911
912    # MPY: header
913    out.write_bytes(
914        bytearray(
915            [ord("M"), MPY_VERSION, env.arch.mpy_feature, MP_SMALL_INT_BITS, QSTR_WINDOW_SIZE]
916        )
917    )
918
919    # MPY: kind/len
920    out.write_uint(len(env.full_text) << 2 | (MP_CODE_NATIVE_VIPER - MP_CODE_BYTECODE))
921
922    # MPY: machine code
923    out.write_bytes(env.full_text)
924
925    # MPY: n_qstr_link (assumes little endian)
926    out.write_uint(len(native_qstr_vals) + len(native_qstr_objs))
927    for q in range(len(native_qstr_vals)):
928        off = env.qstr_val_section.addr + q * env.arch.qstr_entry_size
929        out.write_uint(off << 2)
930        out.write_qstr(native_qstr_vals[q])
931    for q in range(len(native_qstr_objs)):
932        off = env.qstr_obj_section.addr + q * env.arch.word_size
933        out.write_uint(off << 2 | 3)
934        out.write_qstr(native_qstr_objs[q])
935
936    # MPY: scope_flags
937    scope_flags = MP_SCOPE_FLAG_VIPERRELOC
938    if len(env.full_rodata):
939        scope_flags |= MP_SCOPE_FLAG_VIPERRODATA
940    if len(env.full_bss):
941        scope_flags |= MP_SCOPE_FLAG_VIPERBSS
942    out.write_uint(scope_flags)
943
944    # MPY: n_obj
945    out.write_uint(0)
946
947    # MPY: n_raw_code
948    out.write_uint(0)
949
950    # MPY: rodata and/or bss
951    if len(env.full_rodata):
952        rodata_const_table_idx = 1
953        out.write_uint(len(env.full_rodata))
954        out.write_bytes(env.full_rodata)
955    if len(env.full_bss):
956        bss_const_table_idx = bool(env.full_rodata) + 1
957        out.write_uint(len(env.full_bss))
958
959    # MPY: relocation information
960    prev_kind = None
961    for base, addr, kind in env.mpy_relocs:
962        if isinstance(kind, str) and kind.startswith(".text"):
963            kind = 0
964        elif kind in (".rodata", ".data.rel.ro"):
965            if env.arch.separate_rodata:
966                kind = rodata_const_table_idx
967            else:
968                kind = 0
969        elif isinstance(kind, str) and kind.startswith(".bss"):
970            kind = bss_const_table_idx
971        elif kind == "mp_fun_table":
972            kind = 6
973        else:
974            kind = 7 + kind
975        assert addr % env.arch.word_size == 0, addr
976        offset = addr // env.arch.word_size
977        if kind == prev_kind and base == prev_base and offset == prev_offset + 1:
978            prev_n += 1
979            prev_offset += 1
980        else:
981            if prev_kind is not None:
982                out.write_reloc(prev_base, prev_offset - prev_n + 1, prev_kind, prev_n)
983            prev_kind = kind
984            prev_base = base
985            prev_offset = offset
986            prev_n = 1
987    if prev_kind is not None:
988        out.write_reloc(prev_base, prev_offset - prev_n + 1, prev_kind, prev_n)
989
990    # MPY: sentinel for end of relocations
991    out.write_bytes(b"\xff")
992
993    out.close()
994
995
996################################################################################
997# main
998
999
1000def do_preprocess(args):
1001    if args.output is None:
1002        assert args.files[0].endswith(".c")
1003        args.output = args.files[0][:-1] + "config.h"
1004    static_qstrs, qstr_vals, qstr_objs = extract_qstrs(args.files)
1005    with open(args.output, "w") as f:
1006        print(
1007            "#include <stdint.h>\n"
1008            "typedef uintptr_t mp_uint_t;\n"
1009            "typedef intptr_t mp_int_t;\n"
1010            "typedef uintptr_t mp_off_t;",
1011            file=f,
1012        )
1013        for i, q in enumerate(static_qstrs):
1014            print("#define %s (%u)" % (q, i + 1), file=f)
1015        for i, q in enumerate(sorted(qstr_vals)):
1016            print("#define %s (mp_native_qstr_val_table[%d])" % (q, i), file=f)
1017        for i, q in enumerate(sorted(qstr_objs)):
1018            print(
1019                "#define MP_OBJ_NEW_QSTR_%s ((mp_obj_t)mp_native_qstr_obj_table[%d])" % (q, i),
1020                file=f,
1021            )
1022        if args.arch == "xtensawin":
1023            qstr_type = "uint32_t"  # esp32 can only read 32-bit values from IRAM
1024        else:
1025            qstr_type = "uint16_t"
1026        print("extern const {} mp_native_qstr_val_table[];".format(qstr_type), file=f)
1027        print("extern const mp_uint_t mp_native_qstr_obj_table[];", file=f)
1028
1029
1030def do_link(args):
1031    if args.output is None:
1032        assert args.files[0].endswith(".o")
1033        args.output = args.files[0][:-1] + "mpy"
1034    native_qstr_vals = []
1035    native_qstr_objs = []
1036    if args.qstrs is not None:
1037        with open(args.qstrs) as f:
1038            for l in f:
1039                m = re.match(r"#define MP_QSTR_([A-Za-z0-9_]*) \(mp_native_", l)
1040                if m:
1041                    native_qstr_vals.append(m.group(1))
1042                else:
1043                    m = re.match(r"#define MP_OBJ_NEW_QSTR_MP_QSTR_([A-Za-z0-9_]*)", l)
1044                    if m:
1045                        native_qstr_objs.append(m.group(1))
1046    log(LOG_LEVEL_2, "qstr vals: " + ", ".join(native_qstr_vals))
1047    log(LOG_LEVEL_2, "qstr objs: " + ", ".join(native_qstr_objs))
1048    env = LinkEnv(args.arch)
1049    try:
1050        for file in args.files:
1051            load_object_file(env, file)
1052        link_objects(env, len(native_qstr_vals), len(native_qstr_objs))
1053        build_mpy(env, env.find_addr("mpy_init"), args.output, native_qstr_vals, native_qstr_objs)
1054    except LinkError as er:
1055        print("LinkError:", er.args[0])
1056        sys.exit(1)
1057
1058
1059def main():
1060    import argparse
1061
1062    cmd_parser = argparse.ArgumentParser(description="Run scripts on the pyboard.")
1063    cmd_parser.add_argument(
1064        "--verbose", "-v", action="count", default=1, help="increase verbosity"
1065    )
1066    cmd_parser.add_argument("--arch", default="x64", help="architecture")
1067    cmd_parser.add_argument("--preprocess", action="store_true", help="preprocess source files")
1068    cmd_parser.add_argument("--qstrs", default=None, help="file defining additional qstrs")
1069    cmd_parser.add_argument(
1070        "--output", "-o", default=None, help="output .mpy file (default to input with .o->.mpy)"
1071    )
1072    cmd_parser.add_argument("files", nargs="+", help="input files")
1073    args = cmd_parser.parse_args()
1074
1075    global log_level
1076    log_level = args.verbose
1077
1078    if args.preprocess:
1079        do_preprocess(args)
1080    else:
1081        do_link(args)
1082
1083
1084if __name__ == "__main__":
1085    main()
1086