1#!/usr/bin/env python3 2# 3# This file is part of the MicroPython project, http://micropython.org/ 4# 5# The MIT License (MIT) 6# 7# Copyright (c) 2019 Damien P. George 8# 9# Permission is hereby granted, free of charge, to any person obtaining a copy 10# of this software and associated documentation files (the "Software"), to deal 11# in the Software without restriction, including without limitation the rights 12# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13# copies of the Software, and to permit persons to whom the Software is 14# furnished to do so, subject to the following conditions: 15# 16# The above copyright notice and this permission notice shall be included in 17# all copies or substantial portions of the Software. 18# 19# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25# THE SOFTWARE. 26 27""" 28Link .o files to .mpy 29""" 30 31import sys, os, struct, re 32from elftools.elf import elffile 33 34sys.path.append(os.path.dirname(__file__) + "/../py") 35import makeqstrdata as qstrutil 36 37# MicroPython constants 38MPY_VERSION = 5 39MP_NATIVE_ARCH_X86 = 1 40MP_NATIVE_ARCH_X64 = 2 41MP_NATIVE_ARCH_ARMV7M = 5 42MP_NATIVE_ARCH_ARMV7EMSP = 7 43MP_NATIVE_ARCH_ARMV7EMDP = 8 44MP_NATIVE_ARCH_XTENSA = 9 45MP_NATIVE_ARCH_XTENSAWIN = 10 46MP_CODE_BYTECODE = 2 47MP_CODE_NATIVE_VIPER = 4 48MP_SCOPE_FLAG_VIPERRELOC = 0x10 49MP_SCOPE_FLAG_VIPERRODATA = 0x20 50MP_SCOPE_FLAG_VIPERBSS = 0x40 51MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE = 1 52MICROPY_PY_BUILTINS_STR_UNICODE = 2 53MP_SMALL_INT_BITS = 31 54QSTR_WINDOW_SIZE = 32 55 56# ELF constants 57R_386_32 = 1 58R_X86_64_64 = 1 59R_XTENSA_32 = 1 60R_386_PC32 = 2 61R_X86_64_PC32 = 2 62R_ARM_ABS32 = 2 63R_386_GOT32 = 3 64R_ARM_REL32 = 3 65R_386_PLT32 = 4 66R_X86_64_PLT32 = 4 67R_XTENSA_PLT = 6 68R_386_GOTOFF = 9 69R_386_GOTPC = 10 70R_ARM_THM_CALL = 10 71R_XTENSA_DIFF32 = 19 72R_XTENSA_SLOT0_OP = 20 73R_ARM_BASE_PREL = 25 # aka R_ARM_GOTPC 74R_ARM_GOT_BREL = 26 # aka R_ARM_GOT32 75R_ARM_THM_JUMP24 = 30 76R_X86_64_GOTPCREL = 9 77R_X86_64_REX_GOTPCRELX = 42 78R_386_GOT32X = 43 79 80################################################################################ 81# Architecture configuration 82 83 84def asm_jump_x86(entry): 85 return struct.pack("<BI", 0xE9, entry - 5) 86 87 88def asm_jump_arm(entry): 89 b_off = entry - 4 90 if b_off >> 11 == 0 or b_off >> 11 == -1: 91 # Signed value fits in 12 bits 92 b0 = 0xE000 | (b_off >> 1 & 0x07FF) 93 b1 = 0 94 else: 95 # Use large jump 96 b0 = 0xF000 | (b_off >> 12 & 0x07FF) 97 b1 = 0xB800 | (b_off >> 1 & 0x7FF) 98 return struct.pack("<HH", b0, b1) 99 100 101def asm_jump_xtensa(entry): 102 jump_offset = entry - 4 103 jump_op = jump_offset << 6 | 6 104 return struct.pack("<BH", jump_op & 0xFF, jump_op >> 8) 105 106 107class ArchData: 108 def __init__(self, name, mpy_feature, qstr_entry_size, word_size, arch_got, asm_jump): 109 self.name = name 110 self.mpy_feature = mpy_feature 111 self.qstr_entry_size = qstr_entry_size 112 self.word_size = word_size 113 self.arch_got = arch_got 114 self.asm_jump = asm_jump 115 self.separate_rodata = name == "EM_XTENSA" and qstr_entry_size == 4 116 117 118ARCH_DATA = { 119 "x86": ArchData( 120 "EM_386", 121 MP_NATIVE_ARCH_X86 << 2 122 | MICROPY_PY_BUILTINS_STR_UNICODE 123 | MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE, 124 2, 125 4, 126 (R_386_PC32, R_386_GOT32, R_386_GOT32X), 127 asm_jump_x86, 128 ), 129 "x64": ArchData( 130 "EM_X86_64", 131 MP_NATIVE_ARCH_X64 << 2 132 | MICROPY_PY_BUILTINS_STR_UNICODE 133 | MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE, 134 2, 135 8, 136 (R_X86_64_GOTPCREL, R_X86_64_REX_GOTPCRELX), 137 asm_jump_x86, 138 ), 139 "armv7m": ArchData( 140 "EM_ARM", 141 MP_NATIVE_ARCH_ARMV7M << 2 | MICROPY_PY_BUILTINS_STR_UNICODE, 142 2, 143 4, 144 (R_ARM_GOT_BREL,), 145 asm_jump_arm, 146 ), 147 "armv7emsp": ArchData( 148 "EM_ARM", 149 MP_NATIVE_ARCH_ARMV7EMSP << 2 | MICROPY_PY_BUILTINS_STR_UNICODE, 150 2, 151 4, 152 (R_ARM_GOT_BREL,), 153 asm_jump_arm, 154 ), 155 "armv7emdp": ArchData( 156 "EM_ARM", 157 MP_NATIVE_ARCH_ARMV7EMDP << 2 | MICROPY_PY_BUILTINS_STR_UNICODE, 158 2, 159 4, 160 (R_ARM_GOT_BREL,), 161 asm_jump_arm, 162 ), 163 "xtensa": ArchData( 164 "EM_XTENSA", 165 MP_NATIVE_ARCH_XTENSA << 2 | MICROPY_PY_BUILTINS_STR_UNICODE, 166 2, 167 4, 168 (R_XTENSA_32, R_XTENSA_PLT), 169 asm_jump_xtensa, 170 ), 171 "xtensawin": ArchData( 172 "EM_XTENSA", 173 MP_NATIVE_ARCH_XTENSAWIN << 2 | MICROPY_PY_BUILTINS_STR_UNICODE, 174 4, 175 4, 176 (R_XTENSA_32, R_XTENSA_PLT), 177 asm_jump_xtensa, 178 ), 179} 180 181################################################################################ 182# Helper functions 183 184 185def align_to(value, align): 186 return (value + align - 1) & ~(align - 1) 187 188 189def unpack_u24le(data, offset): 190 return data[offset] | data[offset + 1] << 8 | data[offset + 2] << 16 191 192 193def pack_u24le(data, offset, value): 194 data[offset] = value & 0xFF 195 data[offset + 1] = value >> 8 & 0xFF 196 data[offset + 2] = value >> 16 & 0xFF 197 198 199def xxd(text): 200 for i in range(0, len(text), 16): 201 print("{:08x}:".format(i), end="") 202 for j in range(4): 203 off = i + j * 4 204 if off < len(text): 205 d = int.from_bytes(text[off : off + 4], "little") 206 print(" {:08x}".format(d), end="") 207 print() 208 209 210# Smaller numbers are enabled first 211LOG_LEVEL_1 = 1 212LOG_LEVEL_2 = 2 213LOG_LEVEL_3 = 3 214log_level = LOG_LEVEL_1 215 216 217def log(level, msg): 218 if level <= log_level: 219 print(msg) 220 221 222################################################################################ 223# Qstr extraction 224 225 226def extract_qstrs(source_files): 227 def read_qstrs(f): 228 with open(f) as f: 229 vals = set() 230 objs = set() 231 for line in f: 232 while line: 233 m = re.search(r"MP_OBJ_NEW_QSTR\((MP_QSTR_[A-Za-z0-9_]*)\)", line) 234 if m: 235 objs.add(m.group(1)) 236 else: 237 m = re.search(r"MP_QSTR_[A-Za-z0-9_]*", line) 238 if m: 239 vals.add(m.group()) 240 if m: 241 s = m.span() 242 line = line[: s[0]] + line[s[1] :] 243 else: 244 line = "" 245 return vals, objs 246 247 static_qstrs = ["MP_QSTR_" + qstrutil.qstr_escape(q) for q in qstrutil.static_qstr_list] 248 249 qstr_vals = set() 250 qstr_objs = set() 251 for f in source_files: 252 vals, objs = read_qstrs(f) 253 qstr_vals.update(vals) 254 qstr_objs.update(objs) 255 qstr_vals.difference_update(static_qstrs) 256 257 return static_qstrs, qstr_vals, qstr_objs 258 259 260################################################################################ 261# Linker 262 263 264class LinkError(Exception): 265 pass 266 267 268class Section: 269 def __init__(self, name, data, alignment, filename=None): 270 self.filename = filename 271 self.name = name 272 self.data = data 273 self.alignment = alignment 274 self.addr = 0 275 self.reloc = [] 276 277 @staticmethod 278 def from_elfsec(elfsec, filename): 279 assert elfsec.header.sh_addr == 0 280 return Section(elfsec.name, elfsec.data(), elfsec.data_alignment, filename) 281 282 283class GOTEntry: 284 def __init__(self, name, sym, link_addr=0): 285 self.name = name 286 self.sym = sym 287 self.offset = None 288 self.link_addr = link_addr 289 290 def isexternal(self): 291 return self.sec_name.startswith(".external") 292 293 def istext(self): 294 return self.sec_name.startswith(".text") 295 296 def isrodata(self): 297 return self.sec_name.startswith((".rodata", ".data.rel.ro")) 298 299 def isbss(self): 300 return self.sec_name.startswith(".bss") 301 302 303class LiteralEntry: 304 def __init__(self, value, offset): 305 self.value = value 306 self.offset = offset 307 308 309class LinkEnv: 310 def __init__(self, arch): 311 self.arch = ARCH_DATA[arch] 312 self.sections = [] # list of sections in order of output 313 self.literal_sections = [] # list of literal sections (xtensa only) 314 self.known_syms = {} # dict of symbols that are defined 315 self.unresolved_syms = [] # list of unresolved symbols 316 self.mpy_relocs = [] # list of relocations needed in the output .mpy file 317 318 def check_arch(self, arch_name): 319 if arch_name != self.arch.name: 320 raise LinkError("incompatible arch") 321 322 def print_sections(self): 323 log(LOG_LEVEL_2, "sections:") 324 for sec in self.sections: 325 log(LOG_LEVEL_2, " {:08x} {} size={}".format(sec.addr, sec.name, len(sec.data))) 326 327 def find_addr(self, name): 328 if name in self.known_syms: 329 s = self.known_syms[name] 330 return s.section.addr + s["st_value"] 331 raise LinkError("unknown symbol: {}".format(name)) 332 333 334def build_got_generic(env): 335 env.got_entries = {} 336 for sec in env.sections: 337 for r in sec.reloc: 338 s = r.sym 339 if not ( 340 s.entry["st_info"]["bind"] == "STB_GLOBAL" 341 and r["r_info_type"] in env.arch.arch_got 342 ): 343 continue 344 s_type = s.entry["st_info"]["type"] 345 assert s_type in ("STT_NOTYPE", "STT_FUNC", "STT_OBJECT"), s_type 346 assert s.name 347 if s.name in env.got_entries: 348 continue 349 env.got_entries[s.name] = GOTEntry(s.name, s) 350 351 352def build_got_xtensa(env): 353 env.got_entries = {} 354 env.lit_entries = {} 355 env.xt_literals = {} 356 357 # Extract the values from the literal table 358 for sec in env.literal_sections: 359 assert len(sec.data) % env.arch.word_size == 0 360 361 # Look through literal relocations to find any global pointers that should be GOT entries 362 for r in sec.reloc: 363 s = r.sym 364 s_type = s.entry["st_info"]["type"] 365 assert s_type in ("STT_NOTYPE", "STT_FUNC", "STT_OBJECT", "STT_SECTION"), s_type 366 assert r["r_info_type"] in env.arch.arch_got 367 assert r["r_offset"] % env.arch.word_size == 0 368 # This entry is a global pointer 369 existing = struct.unpack_from("<I", sec.data, r["r_offset"])[0] 370 if s_type == "STT_SECTION": 371 assert r["r_addend"] == 0 372 name = "{}+0x{:x}".format(s.section.name, existing) 373 else: 374 assert existing == 0 375 name = s.name 376 if r["r_addend"] != 0: 377 name = "{}+0x{:x}".format(name, r["r_addend"]) 378 idx = "{}+0x{:x}".format(sec.filename, r["r_offset"]) 379 env.xt_literals[idx] = name 380 if name in env.got_entries: 381 # Deduplicate GOT entries 382 continue 383 env.got_entries[name] = GOTEntry(name, s, existing) 384 385 # Go through all literal entries finding those that aren't global pointers so must be actual literals 386 for i in range(0, len(sec.data), env.arch.word_size): 387 idx = "{}+0x{:x}".format(sec.filename, i) 388 if idx not in env.xt_literals: 389 # This entry is an actual literal 390 value = struct.unpack_from("<I", sec.data, i)[0] 391 env.xt_literals[idx] = value 392 if value in env.lit_entries: 393 # Deduplicate literals 394 continue 395 env.lit_entries[value] = LiteralEntry( 396 value, len(env.lit_entries) * env.arch.word_size 397 ) 398 399 400def populate_got(env): 401 # Compute GOT destination addresses 402 for got_entry in env.got_entries.values(): 403 sym = got_entry.sym 404 if hasattr(sym, "resolved"): 405 sym = sym.resolved 406 sec = sym.section 407 addr = sym["st_value"] 408 got_entry.sec_name = sec.name 409 got_entry.link_addr += sec.addr + addr 410 411 # Get sorted GOT, sorted by external, text, rodata, bss so relocations can be combined 412 got_list = sorted( 413 env.got_entries.values(), 414 key=lambda g: g.isexternal() + 2 * g.istext() + 3 * g.isrodata() + 4 * g.isbss(), 415 ) 416 417 # Layout and populate the GOT 418 offset = 0 419 for got_entry in got_list: 420 got_entry.offset = offset 421 offset += env.arch.word_size 422 o = env.got_section.addr + got_entry.offset 423 env.full_text[o : o + env.arch.word_size] = got_entry.link_addr.to_bytes( 424 env.arch.word_size, "little" 425 ) 426 427 # Create a relocation for each GOT entry 428 for got_entry in got_list: 429 if got_entry.name == "mp_fun_table": 430 dest = "mp_fun_table" 431 elif got_entry.name.startswith("mp_fun_table+0x"): 432 dest = int(got_entry.name.split("+")[1], 16) // env.arch.word_size 433 elif got_entry.sec_name.startswith(".text"): 434 dest = ".text" 435 elif got_entry.sec_name.startswith(".rodata"): 436 dest = ".rodata" 437 elif got_entry.sec_name.startswith(".data.rel.ro"): 438 dest = ".data.rel.ro" 439 elif got_entry.sec_name.startswith(".bss"): 440 dest = ".bss" 441 else: 442 assert 0, (got_entry.name, got_entry.sec_name) 443 env.mpy_relocs.append((".text", env.got_section.addr + got_entry.offset, dest)) 444 445 # Print out the final GOT 446 log(LOG_LEVEL_2, "GOT: {:08x}".format(env.got_section.addr)) 447 for g in got_list: 448 log( 449 LOG_LEVEL_2, 450 " {:08x} {} -> {}+{:08x}".format(g.offset, g.name, g.sec_name, g.link_addr), 451 ) 452 453 454def populate_lit(env): 455 log(LOG_LEVEL_2, "LIT: {:08x}".format(env.lit_section.addr)) 456 for lit_entry in env.lit_entries.values(): 457 value = lit_entry.value 458 log(LOG_LEVEL_2, " {:08x} = {:08x}".format(lit_entry.offset, value)) 459 o = env.lit_section.addr + lit_entry.offset 460 env.full_text[o : o + env.arch.word_size] = value.to_bytes(env.arch.word_size, "little") 461 462 463def do_relocation_text(env, text_addr, r): 464 # Extract relevant info about symbol that's being relocated 465 s = r.sym 466 s_bind = s.entry["st_info"]["bind"] 467 s_shndx = s.entry["st_shndx"] 468 s_type = s.entry["st_info"]["type"] 469 r_offset = r["r_offset"] + text_addr 470 r_info_type = r["r_info_type"] 471 try: 472 # only for RELA sections 473 r_addend = r["r_addend"] 474 except KeyError: 475 r_addend = 0 476 477 # Default relocation type and name for logging 478 reloc_type = "le32" 479 log_name = None 480 481 if ( 482 env.arch.name == "EM_386" 483 and r_info_type in (R_386_PC32, R_386_PLT32) 484 or env.arch.name == "EM_X86_64" 485 and r_info_type in (R_X86_64_PC32, R_X86_64_PLT32) 486 or env.arch.name == "EM_ARM" 487 and r_info_type in (R_ARM_REL32, R_ARM_THM_CALL, R_ARM_THM_JUMP24) 488 or s_bind == "STB_LOCAL" 489 and env.arch.name == "EM_XTENSA" 490 and r_info_type == R_XTENSA_32 # not GOT 491 ): 492 # Standard relocation to fixed location within text/rodata 493 if hasattr(s, "resolved"): 494 s = s.resolved 495 496 sec = s.section 497 498 if env.arch.separate_rodata and sec.name.startswith(".rodata"): 499 raise LinkError("fixed relocation to rodata with rodata referenced via GOT") 500 501 if sec.name.startswith(".bss"): 502 raise LinkError( 503 "{}: fixed relocation to bss (bss variables can't be static)".format(s.filename) 504 ) 505 506 if sec.name.startswith(".external"): 507 raise LinkError( 508 "{}: fixed relocation to external symbol: {}".format(s.filename, s.name) 509 ) 510 511 addr = sec.addr + s["st_value"] 512 reloc = addr - r_offset + r_addend 513 514 if r_info_type in (R_ARM_THM_CALL, R_ARM_THM_JUMP24): 515 # Both relocations have the same bit pattern to rewrite: 516 # R_ARM_THM_CALL: bl 517 # R_ARM_THM_JUMP24: b.w 518 reloc_type = "thumb_b" 519 520 elif ( 521 env.arch.name == "EM_386" 522 and r_info_type == R_386_GOTPC 523 or env.arch.name == "EM_ARM" 524 and r_info_type == R_ARM_BASE_PREL 525 ): 526 # Relocation to GOT address itself 527 assert s.name == "_GLOBAL_OFFSET_TABLE_" 528 addr = env.got_section.addr 529 reloc = addr - r_offset + r_addend 530 531 elif ( 532 env.arch.name == "EM_386" 533 and r_info_type in (R_386_GOT32, R_386_GOT32X) 534 or env.arch.name == "EM_ARM" 535 and r_info_type == R_ARM_GOT_BREL 536 ): 537 # Relcation pointing to GOT 538 reloc = addr = env.got_entries[s.name].offset 539 540 elif env.arch.name == "EM_X86_64" and r_info_type in ( 541 R_X86_64_GOTPCREL, 542 R_X86_64_REX_GOTPCRELX, 543 ): 544 # Relcation pointing to GOT 545 got_entry = env.got_entries[s.name] 546 addr = env.got_section.addr + got_entry.offset 547 reloc = addr - r_offset + r_addend 548 549 elif env.arch.name == "EM_386" and r_info_type == R_386_GOTOFF: 550 # Relocation relative to GOT 551 addr = s.section.addr + s["st_value"] 552 reloc = addr - env.got_section.addr + r_addend 553 554 elif env.arch.name == "EM_XTENSA" and r_info_type == R_XTENSA_SLOT0_OP: 555 # Relocation pointing to GOT, xtensa specific 556 sec = s.section 557 if sec.name.startswith(".text"): 558 # it looks like R_XTENSA_SLOT0_OP into .text is already correctly relocated 559 return 560 assert sec.name.startswith(".literal"), sec.name 561 lit_idx = "{}+0x{:x}".format(sec.filename, r_addend) 562 lit_ptr = env.xt_literals[lit_idx] 563 if isinstance(lit_ptr, str): 564 addr = env.got_section.addr + env.got_entries[lit_ptr].offset 565 log_name = "GOT {}".format(lit_ptr) 566 else: 567 addr = env.lit_section.addr + env.lit_entries[lit_ptr].offset 568 log_name = "LIT" 569 reloc = addr - r_offset 570 reloc_type = "xtensa_l32r" 571 572 elif env.arch.name == "EM_XTENSA" and r_info_type == R_XTENSA_DIFF32: 573 if s.section.name.startswith(".text"): 574 # it looks like R_XTENSA_DIFF32 into .text is already correctly relocated 575 return 576 assert 0 577 578 else: 579 # Unknown/unsupported relocation 580 assert 0, r_info_type 581 582 # Write relocation 583 if reloc_type == "le32": 584 (existing,) = struct.unpack_from("<I", env.full_text, r_offset) 585 struct.pack_into("<I", env.full_text, r_offset, (existing + reloc) & 0xFFFFFFFF) 586 elif reloc_type == "thumb_b": 587 b_h, b_l = struct.unpack_from("<HH", env.full_text, r_offset) 588 existing = (b_h & 0x7FF) << 12 | (b_l & 0x7FF) << 1 589 if existing >= 0x400000: # 2's complement 590 existing -= 0x800000 591 new = existing + reloc 592 b_h = (b_h & 0xF800) | (new >> 12) & 0x7FF 593 b_l = (b_l & 0xF800) | (new >> 1) & 0x7FF 594 struct.pack_into("<HH", env.full_text, r_offset, b_h, b_l) 595 elif reloc_type == "xtensa_l32r": 596 l32r = unpack_u24le(env.full_text, r_offset) 597 assert l32r & 0xF == 1 # RI16 encoded l32r 598 l32r_imm16 = l32r >> 8 599 l32r_imm16 = (l32r_imm16 + reloc >> 2) & 0xFFFF 600 l32r = l32r & 0xFF | l32r_imm16 << 8 601 pack_u24le(env.full_text, r_offset, l32r) 602 else: 603 assert 0, reloc_type 604 605 # Log information about relocation 606 if log_name is None: 607 if s_type == "STT_SECTION": 608 log_name = s.section.name 609 else: 610 log_name = s.name 611 log(LOG_LEVEL_3, " {:08x} {} -> {:08x}".format(r_offset, log_name, addr)) 612 613 614def do_relocation_data(env, text_addr, r): 615 s = r.sym 616 s_type = s.entry["st_info"]["type"] 617 r_offset = r["r_offset"] + text_addr 618 r_info_type = r["r_info_type"] 619 try: 620 # only for RELA sections 621 r_addend = r["r_addend"] 622 except KeyError: 623 r_addend = 0 624 625 if ( 626 env.arch.name == "EM_386" 627 and r_info_type == R_386_32 628 or env.arch.name == "EM_X86_64" 629 and r_info_type == R_X86_64_64 630 or env.arch.name == "EM_ARM" 631 and r_info_type == R_ARM_ABS32 632 or env.arch.name == "EM_XTENSA" 633 and r_info_type == R_XTENSA_32 634 ): 635 # Relocation in data.rel.ro to internal/external symbol 636 if env.arch.word_size == 4: 637 struct_type = "<I" 638 elif env.arch.word_size == 8: 639 struct_type = "<Q" 640 sec = s.section 641 assert r_offset % env.arch.word_size == 0 642 addr = sec.addr + s["st_value"] + r_addend 643 if s_type == "STT_SECTION": 644 log_name = sec.name 645 else: 646 log_name = s.name 647 log(LOG_LEVEL_3, " {:08x} -> {} {:08x}".format(r_offset, log_name, addr)) 648 if env.arch.separate_rodata: 649 data = env.full_rodata 650 else: 651 data = env.full_text 652 (existing,) = struct.unpack_from(struct_type, data, r_offset) 653 if sec.name.startswith((".text", ".rodata", ".data.rel.ro", ".bss")): 654 struct.pack_into(struct_type, data, r_offset, existing + addr) 655 kind = sec.name 656 elif sec.name == ".external.mp_fun_table": 657 assert addr == 0 658 kind = s.mp_fun_table_offset 659 else: 660 assert 0, sec.name 661 if env.arch.separate_rodata: 662 base = ".rodata" 663 else: 664 base = ".text" 665 env.mpy_relocs.append((base, r_offset, kind)) 666 667 else: 668 # Unknown/unsupported relocation 669 assert 0, r_info_type 670 671 672def load_object_file(env, felf): 673 with open(felf, "rb") as f: 674 elf = elffile.ELFFile(f) 675 env.check_arch(elf["e_machine"]) 676 677 # Get symbol table 678 symtab = list(elf.get_section_by_name(".symtab").iter_symbols()) 679 680 # Load needed sections from ELF file 681 sections_shndx = {} # maps elf shndx to Section object 682 for idx, s in enumerate(elf.iter_sections()): 683 if s.header.sh_type in ("SHT_PROGBITS", "SHT_NOBITS"): 684 if s.data_size == 0: 685 # Ignore empty sections 686 pass 687 elif s.name.startswith((".literal", ".text", ".rodata", ".data.rel.ro", ".bss")): 688 sec = Section.from_elfsec(s, felf) 689 sections_shndx[idx] = sec 690 if s.name.startswith(".literal"): 691 env.literal_sections.append(sec) 692 else: 693 env.sections.append(sec) 694 elif s.name.startswith(".data"): 695 raise LinkError("{}: {} non-empty".format(felf, s.name)) 696 else: 697 # Ignore section 698 pass 699 elif s.header.sh_type in ("SHT_REL", "SHT_RELA"): 700 shndx = s.header.sh_info 701 if shndx in sections_shndx: 702 sec = sections_shndx[shndx] 703 sec.reloc_name = s.name 704 sec.reloc = list(s.iter_relocations()) 705 for r in sec.reloc: 706 r.sym = symtab[r["r_info_sym"]] 707 708 # Link symbols to their sections, and update known and unresolved symbols 709 for sym in symtab: 710 sym.filename = felf 711 shndx = sym.entry["st_shndx"] 712 if shndx in sections_shndx: 713 # Symbol with associated section 714 sym.section = sections_shndx[shndx] 715 if sym["st_info"]["bind"] == "STB_GLOBAL": 716 # Defined global symbol 717 if sym.name in env.known_syms and not sym.name.startswith( 718 "__x86.get_pc_thunk." 719 ): 720 raise LinkError("duplicate symbol: {}".format(sym.name)) 721 env.known_syms[sym.name] = sym 722 elif sym.entry["st_shndx"] == "SHN_UNDEF" and sym["st_info"]["bind"] == "STB_GLOBAL": 723 # Undefined global symbol, needs resolving 724 env.unresolved_syms.append(sym) 725 726 727def link_objects(env, native_qstr_vals_len, native_qstr_objs_len): 728 # Build GOT information 729 if env.arch.name == "EM_XTENSA": 730 build_got_xtensa(env) 731 else: 732 build_got_generic(env) 733 734 # Creat GOT section 735 got_size = len(env.got_entries) * env.arch.word_size 736 env.got_section = Section("GOT", bytearray(got_size), env.arch.word_size) 737 if env.arch.name == "EM_XTENSA": 738 env.sections.insert(0, env.got_section) 739 else: 740 env.sections.append(env.got_section) 741 742 # Create optional literal section 743 if env.arch.name == "EM_XTENSA": 744 lit_size = len(env.lit_entries) * env.arch.word_size 745 env.lit_section = Section("LIT", bytearray(lit_size), env.arch.word_size) 746 env.sections.insert(1, env.lit_section) 747 748 # Create section to contain mp_native_qstr_val_table 749 env.qstr_val_section = Section( 750 ".text.QSTR_VAL", 751 bytearray(native_qstr_vals_len * env.arch.qstr_entry_size), 752 env.arch.qstr_entry_size, 753 ) 754 env.sections.append(env.qstr_val_section) 755 756 # Create section to contain mp_native_qstr_obj_table 757 env.qstr_obj_section = Section( 758 ".text.QSTR_OBJ", bytearray(native_qstr_objs_len * env.arch.word_size), env.arch.word_size 759 ) 760 env.sections.append(env.qstr_obj_section) 761 762 # Resolve unknown symbols 763 mp_fun_table_sec = Section(".external.mp_fun_table", b"", 0) 764 fun_table = { 765 key: 67 + idx 766 for idx, key in enumerate( 767 [ 768 "mp_type_type", 769 "mp_type_str", 770 "mp_type_list", 771 "mp_type_dict", 772 "mp_type_fun_builtin_0", 773 "mp_type_fun_builtin_1", 774 "mp_type_fun_builtin_2", 775 "mp_type_fun_builtin_3", 776 "mp_type_fun_builtin_var", 777 "mp_stream_read_obj", 778 "mp_stream_readinto_obj", 779 "mp_stream_unbuffered_readline_obj", 780 "mp_stream_write_obj", 781 ] 782 ) 783 } 784 for sym in env.unresolved_syms: 785 assert sym["st_value"] == 0 786 if sym.name == "_GLOBAL_OFFSET_TABLE_": 787 pass 788 elif sym.name == "mp_fun_table": 789 sym.section = Section(".external", b"", 0) 790 elif sym.name == "mp_native_qstr_val_table": 791 sym.section = env.qstr_val_section 792 elif sym.name == "mp_native_qstr_obj_table": 793 sym.section = env.qstr_obj_section 794 elif sym.name in env.known_syms: 795 sym.resolved = env.known_syms[sym.name] 796 else: 797 if sym.name in fun_table: 798 sym.section = mp_fun_table_sec 799 sym.mp_fun_table_offset = fun_table[sym.name] 800 else: 801 raise LinkError("{}: undefined symbol: {}".format(sym.filename, sym.name)) 802 803 # Align sections, assign their addresses, and create full_text 804 env.full_text = bytearray(env.arch.asm_jump(8)) # dummy, to be filled in later 805 env.full_rodata = bytearray(0) 806 env.full_bss = bytearray(0) 807 for sec in env.sections: 808 if env.arch.separate_rodata and sec.name.startswith((".rodata", ".data.rel.ro")): 809 data = env.full_rodata 810 elif sec.name.startswith(".bss"): 811 data = env.full_bss 812 else: 813 data = env.full_text 814 sec.addr = align_to(len(data), sec.alignment) 815 data.extend(b"\x00" * (sec.addr - len(data))) 816 data.extend(sec.data) 817 818 env.print_sections() 819 820 populate_got(env) 821 if env.arch.name == "EM_XTENSA": 822 populate_lit(env) 823 824 # Fill in relocations 825 for sec in env.sections: 826 if not sec.reloc: 827 continue 828 log( 829 LOG_LEVEL_3, 830 "{}: {} relocations via {}:".format(sec.filename, sec.name, sec.reloc_name), 831 ) 832 for r in sec.reloc: 833 if sec.name.startswith((".text", ".rodata")): 834 do_relocation_text(env, sec.addr, r) 835 elif sec.name.startswith(".data.rel.ro"): 836 do_relocation_data(env, sec.addr, r) 837 else: 838 assert 0, sec.name 839 840 841################################################################################ 842# .mpy output 843 844 845class MPYOutput: 846 def open(self, fname): 847 self.f = open(fname, "wb") 848 self.prev_base = -1 849 self.prev_offset = -1 850 851 def close(self): 852 self.f.close() 853 854 def write_bytes(self, buf): 855 self.f.write(buf) 856 857 def write_uint(self, val): 858 b = bytearray() 859 b.insert(0, val & 0x7F) 860 val >>= 7 861 while val: 862 b.insert(0, 0x80 | (val & 0x7F)) 863 val >>= 7 864 self.write_bytes(b) 865 866 def write_qstr(self, s): 867 if s in qstrutil.static_qstr_list: 868 self.write_bytes(bytes([0, qstrutil.static_qstr_list.index(s) + 1])) 869 else: 870 s = bytes(s, "ascii") 871 self.write_uint(len(s) << 1) 872 self.write_bytes(s) 873 874 def write_reloc(self, base, offset, dest, n): 875 need_offset = not (base == self.prev_base and offset == self.prev_offset + 1) 876 self.prev_offset = offset + n - 1 877 if dest <= 2: 878 dest = (dest << 1) | (n > 1) 879 else: 880 assert 6 <= dest <= 127 881 assert n == 1 882 dest = dest << 1 | need_offset 883 assert 0 <= dest <= 0xFE, dest 884 self.write_bytes(bytes([dest])) 885 if need_offset: 886 if base == ".text": 887 base = 0 888 elif base == ".rodata": 889 base = 1 890 self.write_uint(offset << 1 | base) 891 if n > 1: 892 self.write_uint(n) 893 894 895def build_mpy(env, entry_offset, fmpy, native_qstr_vals, native_qstr_objs): 896 # Write jump instruction to start of text 897 jump = env.arch.asm_jump(entry_offset) 898 env.full_text[: len(jump)] = jump 899 900 log(LOG_LEVEL_1, "arch: {}".format(env.arch.name)) 901 log(LOG_LEVEL_1, "text size: {}".format(len(env.full_text))) 902 if len(env.full_rodata): 903 log(LOG_LEVEL_1, "rodata size: {}".format(len(env.full_rodata))) 904 log(LOG_LEVEL_1, "bss size: {}".format(len(env.full_bss))) 905 log(LOG_LEVEL_1, "GOT entries: {}".format(len(env.got_entries))) 906 907 # xxd(env.full_text) 908 909 out = MPYOutput() 910 out.open(fmpy) 911 912 # MPY: header 913 out.write_bytes( 914 bytearray( 915 [ord("M"), MPY_VERSION, env.arch.mpy_feature, MP_SMALL_INT_BITS, QSTR_WINDOW_SIZE] 916 ) 917 ) 918 919 # MPY: kind/len 920 out.write_uint(len(env.full_text) << 2 | (MP_CODE_NATIVE_VIPER - MP_CODE_BYTECODE)) 921 922 # MPY: machine code 923 out.write_bytes(env.full_text) 924 925 # MPY: n_qstr_link (assumes little endian) 926 out.write_uint(len(native_qstr_vals) + len(native_qstr_objs)) 927 for q in range(len(native_qstr_vals)): 928 off = env.qstr_val_section.addr + q * env.arch.qstr_entry_size 929 out.write_uint(off << 2) 930 out.write_qstr(native_qstr_vals[q]) 931 for q in range(len(native_qstr_objs)): 932 off = env.qstr_obj_section.addr + q * env.arch.word_size 933 out.write_uint(off << 2 | 3) 934 out.write_qstr(native_qstr_objs[q]) 935 936 # MPY: scope_flags 937 scope_flags = MP_SCOPE_FLAG_VIPERRELOC 938 if len(env.full_rodata): 939 scope_flags |= MP_SCOPE_FLAG_VIPERRODATA 940 if len(env.full_bss): 941 scope_flags |= MP_SCOPE_FLAG_VIPERBSS 942 out.write_uint(scope_flags) 943 944 # MPY: n_obj 945 out.write_uint(0) 946 947 # MPY: n_raw_code 948 out.write_uint(0) 949 950 # MPY: rodata and/or bss 951 if len(env.full_rodata): 952 rodata_const_table_idx = 1 953 out.write_uint(len(env.full_rodata)) 954 out.write_bytes(env.full_rodata) 955 if len(env.full_bss): 956 bss_const_table_idx = bool(env.full_rodata) + 1 957 out.write_uint(len(env.full_bss)) 958 959 # MPY: relocation information 960 prev_kind = None 961 for base, addr, kind in env.mpy_relocs: 962 if isinstance(kind, str) and kind.startswith(".text"): 963 kind = 0 964 elif kind in (".rodata", ".data.rel.ro"): 965 if env.arch.separate_rodata: 966 kind = rodata_const_table_idx 967 else: 968 kind = 0 969 elif isinstance(kind, str) and kind.startswith(".bss"): 970 kind = bss_const_table_idx 971 elif kind == "mp_fun_table": 972 kind = 6 973 else: 974 kind = 7 + kind 975 assert addr % env.arch.word_size == 0, addr 976 offset = addr // env.arch.word_size 977 if kind == prev_kind and base == prev_base and offset == prev_offset + 1: 978 prev_n += 1 979 prev_offset += 1 980 else: 981 if prev_kind is not None: 982 out.write_reloc(prev_base, prev_offset - prev_n + 1, prev_kind, prev_n) 983 prev_kind = kind 984 prev_base = base 985 prev_offset = offset 986 prev_n = 1 987 if prev_kind is not None: 988 out.write_reloc(prev_base, prev_offset - prev_n + 1, prev_kind, prev_n) 989 990 # MPY: sentinel for end of relocations 991 out.write_bytes(b"\xff") 992 993 out.close() 994 995 996################################################################################ 997# main 998 999 1000def do_preprocess(args): 1001 if args.output is None: 1002 assert args.files[0].endswith(".c") 1003 args.output = args.files[0][:-1] + "config.h" 1004 static_qstrs, qstr_vals, qstr_objs = extract_qstrs(args.files) 1005 with open(args.output, "w") as f: 1006 print( 1007 "#include <stdint.h>\n" 1008 "typedef uintptr_t mp_uint_t;\n" 1009 "typedef intptr_t mp_int_t;\n" 1010 "typedef uintptr_t mp_off_t;", 1011 file=f, 1012 ) 1013 for i, q in enumerate(static_qstrs): 1014 print("#define %s (%u)" % (q, i + 1), file=f) 1015 for i, q in enumerate(sorted(qstr_vals)): 1016 print("#define %s (mp_native_qstr_val_table[%d])" % (q, i), file=f) 1017 for i, q in enumerate(sorted(qstr_objs)): 1018 print( 1019 "#define MP_OBJ_NEW_QSTR_%s ((mp_obj_t)mp_native_qstr_obj_table[%d])" % (q, i), 1020 file=f, 1021 ) 1022 if args.arch == "xtensawin": 1023 qstr_type = "uint32_t" # esp32 can only read 32-bit values from IRAM 1024 else: 1025 qstr_type = "uint16_t" 1026 print("extern const {} mp_native_qstr_val_table[];".format(qstr_type), file=f) 1027 print("extern const mp_uint_t mp_native_qstr_obj_table[];", file=f) 1028 1029 1030def do_link(args): 1031 if args.output is None: 1032 assert args.files[0].endswith(".o") 1033 args.output = args.files[0][:-1] + "mpy" 1034 native_qstr_vals = [] 1035 native_qstr_objs = [] 1036 if args.qstrs is not None: 1037 with open(args.qstrs) as f: 1038 for l in f: 1039 m = re.match(r"#define MP_QSTR_([A-Za-z0-9_]*) \(mp_native_", l) 1040 if m: 1041 native_qstr_vals.append(m.group(1)) 1042 else: 1043 m = re.match(r"#define MP_OBJ_NEW_QSTR_MP_QSTR_([A-Za-z0-9_]*)", l) 1044 if m: 1045 native_qstr_objs.append(m.group(1)) 1046 log(LOG_LEVEL_2, "qstr vals: " + ", ".join(native_qstr_vals)) 1047 log(LOG_LEVEL_2, "qstr objs: " + ", ".join(native_qstr_objs)) 1048 env = LinkEnv(args.arch) 1049 try: 1050 for file in args.files: 1051 load_object_file(env, file) 1052 link_objects(env, len(native_qstr_vals), len(native_qstr_objs)) 1053 build_mpy(env, env.find_addr("mpy_init"), args.output, native_qstr_vals, native_qstr_objs) 1054 except LinkError as er: 1055 print("LinkError:", er.args[0]) 1056 sys.exit(1) 1057 1058 1059def main(): 1060 import argparse 1061 1062 cmd_parser = argparse.ArgumentParser(description="Run scripts on the pyboard.") 1063 cmd_parser.add_argument( 1064 "--verbose", "-v", action="count", default=1, help="increase verbosity" 1065 ) 1066 cmd_parser.add_argument("--arch", default="x64", help="architecture") 1067 cmd_parser.add_argument("--preprocess", action="store_true", help="preprocess source files") 1068 cmd_parser.add_argument("--qstrs", default=None, help="file defining additional qstrs") 1069 cmd_parser.add_argument( 1070 "--output", "-o", default=None, help="output .mpy file (default to input with .o->.mpy)" 1071 ) 1072 cmd_parser.add_argument("files", nargs="+", help="input files") 1073 args = cmd_parser.parse_args() 1074 1075 global log_level 1076 log_level = args.verbose 1077 1078 if args.preprocess: 1079 do_preprocess(args) 1080 else: 1081 do_link(args) 1082 1083 1084if __name__ == "__main__": 1085 main() 1086