1// Copyright (c) 2017, Google Inc. 2// 3// Permission to use, copy, modify, and/or distribute this software for any 4// purpose with or without fee is hereby granted, provided that the above 5// copyright notice and this permission notice appear in all copies. 6// 7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 10// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION 12// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN 13// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ 14 15// delocate performs several transformations of textual assembly code. See 16// crypto/fipsmodule/FIPS.md for an overview. 17package main 18 19import ( 20 "errors" 21 "flag" 22 "fmt" 23 "io/ioutil" 24 "os" 25 "sort" 26 "strconv" 27 "strings" 28 29 "boringssl.googlesource.com/boringssl/util/ar" 30 "boringssl.googlesource.com/boringssl/util/fipstools/fipscommon" 31) 32 33// inputFile represents a textual assembly file. 34type inputFile struct { 35 path string 36 // index is a unique identifer given to this file. It's used for 37 // mapping local symbols. 38 index int 39 // isArchive indicates that the input should be processed as an ar 40 // file. 41 isArchive bool 42 // contents contains the contents of the file. 43 contents string 44 // ast points to the head of the syntax tree. 45 ast *node32 46} 47 48type stringWriter interface { 49 WriteString(string) (int, error) 50} 51 52type processorType int 53 54const ( 55 ppc64le processorType = iota + 1 56 x86_64 57) 58 59// delocation holds the state needed during a delocation operation. 60type delocation struct { 61 processor processorType 62 output stringWriter 63 64 // symbols is the set of symbols defined in the module. 65 symbols map[string]struct{} 66 // localEntrySymbols is the set of symbols with .localentry directives. 67 localEntrySymbols map[string]struct{} 68 // redirectors maps from out-call symbol name to the name of a 69 // redirector function for that symbol. E.g. “memcpy” -> 70 // “bcm_redirector_memcpy”. 71 redirectors map[string]string 72 // bssAccessorsNeeded maps from a BSS symbol name to the symbol that 73 // should be used to reference it. E.g. “P384_data_storage” -> 74 // “P384_data_storage”. 75 bssAccessorsNeeded map[string]string 76 // tocLoaders is a set of symbol names for which TOC helper functions 77 // are required. (ppc64le only.) 78 tocLoaders map[string]struct{} 79 // gotExternalsNeeded is a set of symbol names for which we need 80 // “delta” symbols: symbols that contain the offset from their location 81 // to the memory in question. 82 gotExternalsNeeded map[string]struct{} 83 // gotDeltaNeeded is true if the code needs to load the value of 84 // _GLOBAL_OFFSET_TABLE_. 85 gotDeltaNeeded bool 86 // gotOffsetsNeeded contains the symbols whose @GOT offsets are needed. 87 gotOffsetsNeeded map[string]struct{} 88 // gotOffOffsetsNeeded contains the symbols whose @GOTOFF offsets are needed. 89 gotOffOffsetsNeeded map[string]struct{} 90 91 currentInput inputFile 92} 93 94func (d *delocation) contents(node *node32) string { 95 return d.currentInput.contents[node.begin:node.end] 96} 97 98// writeNode writes out an AST node. 99func (d *delocation) writeNode(node *node32) { 100 if _, err := d.output.WriteString(d.contents(node)); err != nil { 101 panic(err) 102 } 103} 104 105func (d *delocation) writeCommentedNode(node *node32) { 106 line := d.contents(node) 107 if _, err := d.output.WriteString("# WAS " + strings.TrimSpace(line) + "\n"); err != nil { 108 panic(err) 109 } 110} 111 112func locateError(err error, with *node32, in inputFile) error { 113 posMap := translatePositions([]rune(in.contents), []int{int(with.begin)}) 114 var line int 115 for _, pos := range posMap { 116 line = pos.line 117 } 118 119 return fmt.Errorf("error while processing %q on line %d: %q", in.contents[with.begin:with.end], line, err) 120} 121 122func (d *delocation) processInput(input inputFile) (err error) { 123 d.currentInput = input 124 125 var origStatement *node32 126 defer func() { 127 if err := recover(); err != nil { 128 panic(locateError(fmt.Errorf("%s", err), origStatement, input)) 129 } 130 }() 131 132 for statement := input.ast.up; statement != nil; statement = statement.next { 133 assertNodeType(statement, ruleStatement) 134 origStatement = statement 135 136 node := skipWS(statement.up) 137 if node == nil { 138 d.writeNode(statement) 139 continue 140 } 141 142 switch node.pegRule { 143 case ruleGlobalDirective, ruleComment, ruleLocationDirective: 144 d.writeNode(statement) 145 case ruleDirective: 146 statement, err = d.processDirective(statement, node.up) 147 case ruleLabelContainingDirective: 148 statement, err = d.processLabelContainingDirective(statement, node.up) 149 case ruleLabel: 150 statement, err = d.processLabel(statement, node.up) 151 case ruleInstruction: 152 switch d.processor { 153 case x86_64: 154 statement, err = d.processIntelInstruction(statement, node.up) 155 case ppc64le: 156 statement, err = d.processPPCInstruction(statement, node.up) 157 default: 158 panic("unknown processor") 159 } 160 default: 161 panic(fmt.Sprintf("unknown top-level statement type %q", rul3s[node.pegRule])) 162 } 163 164 if err != nil { 165 return locateError(err, origStatement, input) 166 } 167 } 168 169 return nil 170} 171 172func (d *delocation) processDirective(statement, directive *node32) (*node32, error) { 173 assertNodeType(directive, ruleDirectiveName) 174 directiveName := d.contents(directive) 175 176 var args []string 177 forEachPath(directive, func(arg *node32) { 178 // If the argument is a quoted string, use the raw contents. 179 // (Note that this doesn't unescape the string, but that's not 180 // needed so far. 181 if arg.up != nil { 182 arg = arg.up 183 assertNodeType(arg, ruleQuotedArg) 184 if arg.up == nil { 185 args = append(args, "") 186 return 187 } 188 arg = arg.up 189 assertNodeType(arg, ruleQuotedText) 190 } 191 args = append(args, d.contents(arg)) 192 }, ruleArgs, ruleArg) 193 194 switch directiveName { 195 case "comm", "lcomm": 196 if len(args) < 1 { 197 return nil, errors.New("comm directive has no arguments") 198 } 199 d.bssAccessorsNeeded[args[0]] = args[0] 200 d.writeNode(statement) 201 202 case "data": 203 // ASAN and some versions of MSAN are adding a .data section, 204 // and adding references to symbols within it to the code. We 205 // will have to work around this in the future. 206 return nil, errors.New(".data section found in module") 207 208 case "section": 209 section := args[0] 210 211 if section == ".data.rel.ro" { 212 // In a normal build, this is an indication of a 213 // problem but any references from the module to this 214 // section will result in a relocation and thus will 215 // break the integrity check. ASAN can generate these 216 // sections and so we will likely have to work around 217 // that in the future. 218 return nil, errors.New(".data.rel.ro section found in module") 219 } 220 221 sectionType, ok := sectionType(section) 222 if !ok { 223 // Unknown sections are permitted in order to be robust 224 // to different compiler modes. 225 d.writeNode(statement) 226 break 227 } 228 229 switch sectionType { 230 case ".rodata", ".text": 231 // Move .rodata to .text so it may be accessed without 232 // a relocation. GCC with -fmerge-constants will place 233 // strings into separate sections, so we move all 234 // sections named like .rodata. Also move .text.startup 235 // so the self-test function is also in the module. 236 d.writeCommentedNode(statement) 237 d.output.WriteString(".text\n") 238 239 case ".data": 240 // See above about .data 241 return nil, errors.New(".data section found in module") 242 243 case ".init_array", ".fini_array", ".ctors", ".dtors": 244 // init_array/ctors/dtors contains function 245 // pointers to constructor/destructor 246 // functions. These contain relocations, but 247 // they're in a different section anyway. 248 d.writeNode(statement) 249 break 250 251 case ".debug", ".note", ".toc": 252 d.writeNode(statement) 253 break 254 255 case ".bss": 256 d.writeNode(statement) 257 return d.handleBSS(statement) 258 } 259 260 default: 261 d.writeNode(statement) 262 } 263 264 return statement, nil 265} 266 267func (d *delocation) processLabelContainingDirective(statement, directive *node32) (*node32, error) { 268 // The symbols within directives need to be mapped so that local 269 // symbols in two different .s inputs don't collide. 270 changed := false 271 assertNodeType(directive, ruleLabelContainingDirectiveName) 272 name := d.contents(directive) 273 274 node := directive.next 275 assertNodeType(node, ruleWS) 276 277 node = node.next 278 assertNodeType(node, ruleSymbolArgs) 279 280 var args []string 281 for node = skipWS(node.up); node != nil; node = skipWS(node.next) { 282 assertNodeType(node, ruleSymbolArg) 283 arg := node.up 284 var mapped string 285 286 for term := arg; term != nil; term = term.next { 287 if term.pegRule != ruleLocalSymbol { 288 mapped += d.contents(term) 289 continue 290 } 291 292 oldSymbol := d.contents(term) 293 newSymbol := d.mapLocalSymbol(oldSymbol) 294 if newSymbol != oldSymbol { 295 changed = true 296 } 297 298 mapped += newSymbol 299 } 300 301 args = append(args, mapped) 302 } 303 304 if !changed { 305 d.writeNode(statement) 306 } else { 307 d.writeCommentedNode(statement) 308 d.output.WriteString("\t" + name + "\t" + strings.Join(args, ", ") + "\n") 309 } 310 311 if name == ".localentry" { 312 d.output.WriteString(localEntryName(args[0]) + ":\n") 313 } 314 315 return statement, nil 316} 317 318func (d *delocation) processLabel(statement, label *node32) (*node32, error) { 319 symbol := d.contents(label) 320 321 switch label.pegRule { 322 case ruleLocalLabel: 323 d.output.WriteString(symbol + ":\n") 324 case ruleLocalSymbol: 325 // symbols need to be mapped so that local symbols from two 326 // different .s inputs don't collide. 327 d.output.WriteString(d.mapLocalSymbol(symbol) + ":\n") 328 case ruleSymbolName: 329 d.output.WriteString(localTargetName(symbol) + ":\n") 330 d.writeNode(statement) 331 default: 332 return nil, fmt.Errorf("unknown label type %q", rul3s[label.pegRule]) 333 } 334 335 return statement, nil 336} 337 338// instructionArgs collects all the arguments to an instruction. 339func instructionArgs(node *node32) (argNodes []*node32) { 340 for node = skipWS(node); node != nil; node = skipWS(node.next) { 341 assertNodeType(node, ruleInstructionArg) 342 argNodes = append(argNodes, node.up) 343 } 344 345 return argNodes 346} 347 348/* ppc64le 349 350[PABI]: “64-Bit ELF V2 ABI Specification. Power Architecture.” March 21st, 351 2017 352 353(Also useful is “Power ISA Version 2.07 B”. Note that version three of that 354document is /not/ good as that's POWER9 specific.) 355 356ppc64le doesn't have IP-relative addressing and does a lot to work around this. 357Rather than reference a PLT and GOT direction, it has a single structure called 358the TOC (Table Of Contents). Within the TOC is the contents of .rodata, .data, 359.got, .plt, .bss, etc sections [PABI;3.3]. 360 361A pointer to the TOC is maintained in r2 and the following pattern is used to 362load the address of an element into a register: 363 364 addis <address register>, 2, foo@toc@ha 365 addi <address register>, <address register>, foo@toc@l 366 367The “addis” instruction shifts a signed constant left 16 bits and adds the 368result to its second argument, saving the result in the first argument. The 369“addi” instruction does the same, but without shifting. Thus the “@toc@ha" 370suffix on a symbol means “the top 16 bits of the TOC offset” and “@toc@l” means 371“the bottom 16 bits of the offset”. However, note that both values are signed, 372thus offsets in the top half of a 64KB chunk will have an @ha value that's one 373greater than expected and a negative @l value. 374 375The TOC is specific to a “module” (basically an executable or shared object). 376This means that there's not a single TOC in a process and that r2 needs to 377change as control moves between modules. Thus functions have two entry points: 378the “global” entry point and the “local” entry point. Jumps from within the 379same module can use the local entry while jumps from other modules must use the 380global entry. The global entry establishes the correct value of r2 before 381running the function and the local entry skips that code. 382 383The global entry point for a function is defined by its label. The local entry 384is a power-of-two number of bytes from the global entry, set by the 385“.localentry” directive. (ppc64le instructions are always 32 bits, so an offset 386of 1 or 2 bytes is treated as an offset of zero.) 387 388In order to help the global entry code set r2 to point to the local TOC, r12 is 389set to the address of the global entry point when called [PABI;2.2.1.1]. Thus 390the global entry will typically use an addis+addi pair to add a known offset to 391r12 and store it in r2. For example: 392 393foo: 394 addis 2, 12, .TOC. - foo@ha 395 addi 2, 2, .TOC. - foo@l 396 397(It's worth noting that the '@' operator binds very loosely, so the 3rd 398arguments parse as (.TOC. - foo)@ha and (.TOC. - foo)@l.) 399 400When calling a function, the compiler doesn't know whether that function is in 401the same module or not. Thus it doesn't know whether r12 needs to be set nor 402whether r2 will be clobbered on return. Rather than always assume the worst, 403the linker fixes stuff up once it knows that a call is going out of module: 404 405Firstly, calling, say, memcpy (which we assume to be in a different module) 406won't actually jump directly to memcpy, or even a PLT resolution function. 407It'll call a synthesised function that: 408 a) saves r2 in the caller's stack frame 409 b) loads the address of memcpy@PLT into r12 410 c) jumps to r12. 411 412As this synthesised function loads memcpy@PLT, a call to memcpy from the 413compiled code just references “memcpy” directly, not “memcpy@PLT”. 414 415Since it jumps directly to memcpy@PLT, it can't restore r2 on return. Thus 416calls must be followed by a nop. If the call ends up going out-of-module, the 417linker will rewrite that nop to load r2 from the stack. 418 419Speaking of the stack, the stack pointer is kept in r1 and there's a 288-byte 420red-zone. The format of the stack frame is defined [PABI;2.2.2] and must be 421followed as called functions will write into their parent's stack frame. For 422example, the synthesised out-of-module trampolines will save r2 24 bytes into 423the caller's frame and all non-leaf functions save the return address 16 bytes 424into the caller's frame. 425 426A final point worth noting: some RISC ISAs have r0 wired to zero: all reads 427result in zero and all writes are discarded. POWER does something a little like 428that, but r0 is only special in certain argument positions for certain 429instructions. You just have to read the manual to know which they are. 430 431 432Delocation is easier than Intel because there's just TOC references, but it's 433also harder because there's no IP-relative addressing. 434 435Jumps are IP-relative however, and have a 24-bit immediate value. So we can 436jump to functions that set a register to the needed value. (r3 is the 437return-value register and so that's what is generally used here.) */ 438 439// isPPC64LEAPair recognises an addis+addi pair that's adding the offset of 440// source to relative and writing the result to target. 441func (d *delocation) isPPC64LEAPair(statement *node32) (target, source, relative string, ok bool) { 442 instruction := skipWS(statement.up).up 443 assertNodeType(instruction, ruleInstructionName) 444 name1 := d.contents(instruction) 445 args1 := instructionArgs(instruction.next) 446 447 statement = statement.next 448 instruction = skipWS(statement.up).up 449 assertNodeType(instruction, ruleInstructionName) 450 name2 := d.contents(instruction) 451 args2 := instructionArgs(instruction.next) 452 453 if name1 != "addis" || 454 len(args1) != 3 || 455 name2 != "addi" || 456 len(args2) != 3 { 457 return "", "", "", false 458 } 459 460 target = d.contents(args1[0]) 461 relative = d.contents(args1[1]) 462 source1 := d.contents(args1[2]) 463 source2 := d.contents(args2[2]) 464 465 if !strings.HasSuffix(source1, "@ha") || 466 !strings.HasSuffix(source2, "@l") || 467 source1[:len(source1)-3] != source2[:len(source2)-2] || 468 d.contents(args2[0]) != target || 469 d.contents(args2[1]) != target { 470 return "", "", "", false 471 } 472 473 source = source1[:len(source1)-3] 474 ok = true 475 return 476} 477 478// establishTOC writes the global entry prelude for a function. The standard 479// prelude involves relocations so this version moves the relocation outside 480// the integrity-checked area. 481func establishTOC(w stringWriter) { 482 w.WriteString("999:\n") 483 w.WriteString("\taddis 2, 12, .LBORINGSSL_external_toc-999b@ha\n") 484 w.WriteString("\taddi 2, 2, .LBORINGSSL_external_toc-999b@l\n") 485 w.WriteString("\tld 12, 0(2)\n") 486 w.WriteString("\tadd 2, 2, 12\n") 487} 488 489// loadTOCFuncName returns the name of a synthesized function that sets r3 to 490// the value of “symbol+offset”. 491func loadTOCFuncName(symbol, offset string) string { 492 symbol = strings.Replace(symbol, ".", "_dot_", -1) 493 ret := ".Lbcm_loadtoc_" + symbol 494 if len(offset) != 0 { 495 offset = strings.Replace(offset, "+", "_plus_", -1) 496 offset = strings.Replace(offset, "-", "_minus_", -1) 497 ret += "_" + offset 498 } 499 return ret 500} 501 502func (d *delocation) loadFromTOC(w stringWriter, symbol, offset, dest string) wrapperFunc { 503 d.tocLoaders[symbol+"\x00"+offset] = struct{}{} 504 505 return func(k func()) { 506 w.WriteString("\taddi 1, 1, -288\n") // Clear the red zone. 507 w.WriteString("\tmflr " + dest + "\n") // Stash the link register. 508 w.WriteString("\tstd " + dest + ", -8(1)\n") 509 // The TOC loader will use r3, so stash it if necessary. 510 if dest != "3" { 511 w.WriteString("\tstd 3, -16(1)\n") 512 } 513 514 // Because loadTOCFuncName returns a “.L” name, we don't need a 515 // nop after this call. 516 w.WriteString("\tbl " + loadTOCFuncName(symbol, offset) + "\n") 517 518 // Cycle registers around. We need r3 -> destReg, -8(1) -> 519 // lr and, optionally, -16(1) -> r3. 520 w.WriteString("\tstd 3, -24(1)\n") 521 w.WriteString("\tld 3, -8(1)\n") 522 w.WriteString("\tmtlr 3\n") 523 w.WriteString("\tld " + dest + ", -24(1)\n") 524 if dest != "3" { 525 w.WriteString("\tld 3, -16(1)\n") 526 } 527 w.WriteString("\taddi 1, 1, 288\n") 528 529 k() 530 } 531} 532 533func (d *delocation) gatherOffsets(symRef *node32, offsets string) (*node32, string) { 534 for symRef != nil && symRef.pegRule == ruleOffset { 535 offset := d.contents(symRef) 536 if offset[0] != '+' && offset[0] != '-' { 537 offset = "+" + offset 538 } 539 offsets = offsets + offset 540 symRef = symRef.next 541 } 542 return symRef, offsets 543} 544 545func (d *delocation) parseMemRef(memRef *node32) (symbol, offset, section string, didChange, symbolIsLocal bool, nextRef *node32) { 546 if memRef.pegRule != ruleSymbolRef { 547 return "", "", "", false, false, memRef 548 } 549 550 symRef := memRef.up 551 nextRef = memRef.next 552 553 // (Offset* '+')? 554 symRef, offset = d.gatherOffsets(symRef, offset) 555 556 // (LocalSymbol / SymbolName) 557 symbol = d.contents(symRef) 558 if symRef.pegRule == ruleLocalSymbol { 559 symbolIsLocal = true 560 mapped := d.mapLocalSymbol(symbol) 561 if mapped != symbol { 562 symbol = mapped 563 didChange = true 564 } 565 } 566 symRef = symRef.next 567 568 // Offset* 569 symRef, offset = d.gatherOffsets(symRef, offset) 570 571 // ('@' Section / Offset*)? 572 if symRef != nil { 573 assertNodeType(symRef, ruleSection) 574 section = d.contents(symRef) 575 symRef = symRef.next 576 577 symRef, offset = d.gatherOffsets(symRef, offset) 578 } 579 580 if symRef != nil { 581 panic(fmt.Sprintf("unexpected token in SymbolRef: %q", rul3s[symRef.pegRule])) 582 } 583 584 return 585} 586 587func (d *delocation) processPPCInstruction(statement, instruction *node32) (*node32, error) { 588 assertNodeType(instruction, ruleInstructionName) 589 instructionName := d.contents(instruction) 590 isBranch := instructionName[0] == 'b' 591 592 argNodes := instructionArgs(instruction.next) 593 594 var wrappers wrapperStack 595 var args []string 596 changed := false 597 598Args: 599 for i, arg := range argNodes { 600 fullArg := arg 601 isIndirect := false 602 603 if arg.pegRule == ruleIndirectionIndicator { 604 arg = arg.next 605 isIndirect = true 606 } 607 608 switch arg.pegRule { 609 case ruleRegisterOrConstant, ruleLocalLabelRef: 610 args = append(args, d.contents(fullArg)) 611 612 case ruleTOCRefLow: 613 return nil, errors.New("Found low TOC reference outside preamble pattern") 614 615 case ruleTOCRefHigh: 616 target, _, relative, ok := d.isPPC64LEAPair(statement) 617 if !ok { 618 return nil, errors.New("Found high TOC reference outside preamble pattern") 619 } 620 621 if relative != "12" { 622 return nil, fmt.Errorf("preamble is relative to %q, not r12", relative) 623 } 624 625 if target != "2" { 626 return nil, fmt.Errorf("preamble is setting %q, not r2", target) 627 } 628 629 statement = statement.next 630 establishTOC(d.output) 631 instructionName = "" 632 changed = true 633 break Args 634 635 case ruleMemoryRef: 636 symbol, offset, section, didChange, symbolIsLocal, memRef := d.parseMemRef(arg.up) 637 changed = didChange 638 639 if len(symbol) > 0 { 640 if _, localEntrySymbol := d.localEntrySymbols[symbol]; localEntrySymbol && isBranch { 641 symbol = localEntryName(symbol) 642 changed = true 643 } else if _, knownSymbol := d.symbols[symbol]; knownSymbol { 644 symbol = localTargetName(symbol) 645 changed = true 646 } else if !symbolIsLocal && !isSynthesized(symbol) && len(section) == 0 { 647 changed = true 648 d.redirectors[symbol] = redirectorName(symbol) 649 symbol = redirectorName(symbol) 650 // TODO(davidben): This should sanity-check the next 651 // instruction is a nop and ideally remove it. 652 wrappers = append(wrappers, func(k func()) { 653 k() 654 // Like the linker's PLT stubs, redirector functions 655 // expect callers to restore r2. 656 d.output.WriteString("\tld 2, 24(1)\n") 657 }) 658 } 659 } 660 661 switch section { 662 case "": 663 664 case "tls": 665 // This section identifier just tells the 666 // assembler to use r13, the pointer to the 667 // thread-local data [PABI;3.7.3.3]. 668 669 case "toc@ha": 670 // Delete toc@ha instructions. Per 671 // [PABI;3.6.3], the linker is allowed to erase 672 // toc@ha instructions. We take advantage of 673 // this by unconditionally erasing the toc@ha 674 // instructions and doing the full lookup when 675 // processing toc@l. 676 // 677 // Note that any offset here applies before @ha 678 // and @l. That is, 42+foo@toc@ha is 679 // #ha(42+foo-.TOC.), not 42+#ha(foo-.TOC.). Any 680 // corresponding toc@l references are required 681 // by the ABI to have the same offset. The 682 // offset will be incorporated in full when 683 // those are processed. 684 if instructionName != "addis" || len(argNodes) != 3 || i != 2 || args[1] != "2" { 685 return nil, errors.New("can't process toc@ha reference") 686 } 687 changed = true 688 instructionName = "" 689 break Args 690 691 case "toc@l": 692 // Per [PAB;3.6.3], this instruction must take 693 // as input a register which was the output of 694 // a toc@ha computation and compute the actual 695 // address of some symbol. The toc@ha 696 // computation was elided, so we ignore that 697 // input register and compute the address 698 // directly. 699 changed = true 700 701 // For all supported toc@l instructions, the 702 // destination register is the first argument. 703 destReg := args[0] 704 705 wrappers = append(wrappers, d.loadFromTOC(d.output, symbol, offset, destReg)) 706 switch instructionName { 707 case "addi": 708 // The original instruction was: 709 // addi destReg, tocHaReg, offset+symbol@toc@l 710 instructionName = "" 711 712 case "ld", "lhz", "lwz": 713 // The original instruction was: 714 // l?? destReg, offset+symbol@toc@l(tocHaReg) 715 // 716 // We transform that into the 717 // equivalent dereference of destReg: 718 // l?? destReg, 0(destReg) 719 origInstructionName := instructionName 720 instructionName = "" 721 722 assertNodeType(memRef, ruleBaseIndexScale) 723 assertNodeType(memRef.up, ruleRegisterOrConstant) 724 if memRef.next != nil || memRef.up.next != nil { 725 return nil, errors.New("expected single register in BaseIndexScale for ld argument") 726 } 727 728 baseReg := destReg 729 if baseReg == "0" { 730 // Register zero is special as the base register for a load. 731 // Avoid it by spilling and using r3 instead. 732 baseReg = "3" 733 wrappers = append(wrappers, func(k func()) { 734 d.output.WriteString("\taddi 1, 1, -288\n") // Clear the red zone. 735 d.output.WriteString("\tstd " + baseReg + ", -8(1)\n") 736 d.output.WriteString("\tmr " + baseReg + ", " + destReg + "\n") 737 k() 738 d.output.WriteString("\tld " + baseReg + ", -8(1)\n") 739 d.output.WriteString("\taddi 1, 1, 288\n") // Clear the red zone. 740 }) 741 } 742 743 wrappers = append(wrappers, func(k func()) { 744 d.output.WriteString("\t" + origInstructionName + " " + destReg + ", 0(" + baseReg + ")\n") 745 }) 746 default: 747 return nil, fmt.Errorf("can't process TOC argument to %q", instructionName) 748 } 749 750 default: 751 return nil, fmt.Errorf("Unknown section type %q", section) 752 } 753 754 argStr := "" 755 if isIndirect { 756 argStr += "*" 757 } 758 argStr += symbol 759 if len(offset) > 0 { 760 argStr += offset 761 } 762 if len(section) > 0 { 763 argStr += "@" 764 argStr += section 765 } 766 767 for ; memRef != nil; memRef = memRef.next { 768 argStr += d.contents(memRef) 769 } 770 771 args = append(args, argStr) 772 773 default: 774 panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule])) 775 } 776 } 777 778 if changed { 779 d.writeCommentedNode(statement) 780 781 var replacement string 782 if len(instructionName) > 0 { 783 replacement = "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n" 784 } 785 786 wrappers.do(func() { 787 d.output.WriteString(replacement) 788 }) 789 } else { 790 d.writeNode(statement) 791 } 792 793 return statement, nil 794} 795 796/* Intel */ 797 798type instructionType int 799 800const ( 801 instrPush instructionType = iota 802 instrMove 803 // instrTransformingMove is essentially a move, but it performs some 804 // transformation of the data during the process. 805 instrTransformingMove 806 instrJump 807 instrConditionalMove 808 // instrCombine merges the source and destination in some fashion, for example 809 // a 2-operand bitwise operation. 810 instrCombine 811 // instrThreeArg merges two sources into a destination in some fashion. 812 instrThreeArg 813 // instrCompare takes two arguments and writes outputs to the flags register. 814 instrCompare 815 instrOther 816) 817 818func classifyInstruction(instr string, args []*node32) instructionType { 819 switch instr { 820 case "push", "pushq": 821 if len(args) == 1 { 822 return instrPush 823 } 824 825 case "mov", "movq", "vmovq", "movsd", "vmovsd": 826 if len(args) == 2 { 827 return instrMove 828 } 829 830 case "cmovneq", "cmoveq": 831 if len(args) == 2 { 832 return instrConditionalMove 833 } 834 835 case "call", "callq", "jmp", "jo", "jno", "js", "jns", "je", "jz", "jne", "jnz", "jb", "jnae", "jc", "jnb", "jae", "jnc", "jbe", "jna", "ja", "jnbe", "jl", "jnge", "jge", "jnl", "jle", "jng", "jg", "jnle", "jp", "jpe", "jnp", "jpo": 836 if len(args) == 1 { 837 return instrJump 838 } 839 840 case "orq", "andq", "xorq": 841 if len(args) == 2 { 842 return instrCombine 843 } 844 845 case "cmpq": 846 if len(args) == 2 { 847 return instrCompare 848 } 849 850 case "sarxq", "shlxq", "shrxq": 851 if len(args) == 3 { 852 return instrThreeArg 853 } 854 855 case "vpbroadcastq": 856 if len(args) == 2 { 857 return instrTransformingMove 858 } 859 } 860 861 return instrOther 862} 863 864func push(w stringWriter) wrapperFunc { 865 return func(k func()) { 866 w.WriteString("\tpushq %rax\n") 867 k() 868 w.WriteString("\txchg %rax, (%rsp)\n") 869 } 870} 871 872func compare(w stringWriter, instr, a, b string) wrapperFunc { 873 return func(k func()) { 874 k() 875 w.WriteString(fmt.Sprintf("\t%s %s, %s\n", instr, a, b)) 876 } 877} 878 879func (d *delocation) loadFromGOT(w stringWriter, destination, symbol, section string, redzoneCleared bool) wrapperFunc { 880 d.gotExternalsNeeded[symbol+"@"+section] = struct{}{} 881 882 return func(k func()) { 883 if !redzoneCleared { 884 w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone. 885 } 886 w.WriteString("\tpushf\n") 887 w.WriteString(fmt.Sprintf("\tleaq %s_%s_external(%%rip), %s\n", symbol, section, destination)) 888 w.WriteString(fmt.Sprintf("\taddq (%s), %s\n", destination, destination)) 889 w.WriteString(fmt.Sprintf("\tmovq (%s), %s\n", destination, destination)) 890 w.WriteString("\tpopf\n") 891 if !redzoneCleared { 892 w.WriteString("\tleaq\t128(%rsp), %rsp\n") 893 } 894 } 895} 896 897func saveFlags(w stringWriter, redzoneCleared bool) wrapperFunc { 898 return func(k func()) { 899 if !redzoneCleared { 900 w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone. 901 defer w.WriteString("\tleaq 128(%rsp), %rsp\n") 902 } 903 w.WriteString("\tpushfq\n") 904 k() 905 w.WriteString("\tpopfq\n") 906 } 907} 908 909func saveRegister(w stringWriter, avoidRegs []string) (wrapperFunc, string) { 910 candidates := []string{"%rax", "%rbx", "%rcx", "%rdx"} 911 912 var reg string 913NextCandidate: 914 for _, candidate := range candidates { 915 for _, avoid := range avoidRegs { 916 if candidate == avoid { 917 continue NextCandidate 918 } 919 } 920 921 reg = candidate 922 break 923 } 924 925 if len(reg) == 0 { 926 panic("too many excluded registers") 927 } 928 929 return func(k func()) { 930 w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone. 931 w.WriteString("\tpushq " + reg + "\n") 932 k() 933 w.WriteString("\tpopq " + reg + "\n") 934 w.WriteString("\tleaq 128(%rsp), %rsp\n") 935 }, reg 936} 937 938func moveTo(w stringWriter, target string, isAVX bool, source string) wrapperFunc { 939 return func(k func()) { 940 k() 941 prefix := "" 942 if isAVX { 943 prefix = "v" 944 } 945 w.WriteString("\t" + prefix + "movq " + source + ", " + target + "\n") 946 } 947} 948 949func finalTransform(w stringWriter, transformInstruction, reg string) wrapperFunc { 950 return func(k func()) { 951 k() 952 w.WriteString("\t" + transformInstruction + " " + reg + ", " + reg + "\n") 953 } 954} 955 956func combineOp(w stringWriter, instructionName, source, dest string) wrapperFunc { 957 return func(k func()) { 958 k() 959 w.WriteString("\t" + instructionName + " " + source + ", " + dest + "\n") 960 } 961} 962 963func threeArgCombineOp(w stringWriter, instructionName, source1, source2, dest string) wrapperFunc { 964 return func(k func()) { 965 k() 966 w.WriteString("\t" + instructionName + " " + source1 + ", " + source2 + ", " + dest + "\n") 967 } 968} 969 970func isValidLEATarget(reg string) bool { 971 return !strings.HasPrefix(reg, "%xmm") && !strings.HasPrefix(reg, "%ymm") && !strings.HasPrefix(reg, "%zmm") 972} 973 974func undoConditionalMove(w stringWriter, instr string) wrapperFunc { 975 var invertedCondition string 976 977 switch instr { 978 case "cmoveq": 979 invertedCondition = "ne" 980 case "cmovneq": 981 invertedCondition = "e" 982 default: 983 panic(fmt.Sprintf("don't know how to handle conditional move instruction %q", instr)) 984 } 985 986 return func(k func()) { 987 w.WriteString("\tj" + invertedCondition + " 999f\n") 988 k() 989 w.WriteString("999:\n") 990 } 991} 992 993func (d *delocation) isRIPRelative(node *node32) bool { 994 return node != nil && node.pegRule == ruleBaseIndexScale && d.contents(node) == "(%rip)" 995} 996 997func (d *delocation) processIntelInstruction(statement, instruction *node32) (*node32, error) { 998 assertNodeType(instruction, ruleInstructionName) 999 instructionName := d.contents(instruction) 1000 1001 argNodes := instructionArgs(instruction.next) 1002 1003 var wrappers wrapperStack 1004 var args []string 1005 changed := false 1006 1007Args: 1008 for i, arg := range argNodes { 1009 fullArg := arg 1010 isIndirect := false 1011 1012 if arg.pegRule == ruleIndirectionIndicator { 1013 arg = arg.next 1014 isIndirect = true 1015 } 1016 1017 switch arg.pegRule { 1018 case ruleRegisterOrConstant, ruleLocalLabelRef: 1019 args = append(args, d.contents(fullArg)) 1020 1021 case ruleMemoryRef: 1022 symbol, offset, section, didChange, symbolIsLocal, memRef := d.parseMemRef(arg.up) 1023 changed = didChange 1024 1025 if symbol == "OPENSSL_ia32cap_P" && section == "" { 1026 if instructionName != "leaq" { 1027 return nil, fmt.Errorf("non-leaq instruction %q referenced OPENSSL_ia32cap_P directly", instructionName) 1028 } 1029 1030 if i != 0 || len(argNodes) != 2 || !d.isRIPRelative(memRef) || len(offset) > 0 { 1031 return nil, fmt.Errorf("invalid OPENSSL_ia32cap_P reference in instruction %q", instructionName) 1032 } 1033 1034 target := argNodes[1] 1035 assertNodeType(target, ruleRegisterOrConstant) 1036 reg := d.contents(target) 1037 1038 if !strings.HasPrefix(reg, "%r") { 1039 return nil, fmt.Errorf("tried to load OPENSSL_ia32cap_P into %q, which is not a standard register.", reg) 1040 } 1041 1042 changed = true 1043 1044 // Flag-altering instructions (i.e. addq) are going to be used so the 1045 // flags need to be preserved. 1046 wrappers = append(wrappers, saveFlags(d.output, false /* Red Zone not yet cleared */)) 1047 1048 wrappers = append(wrappers, func(k func()) { 1049 d.output.WriteString("\tleaq\tOPENSSL_ia32cap_addr_delta(%rip), " + reg + "\n") 1050 d.output.WriteString("\taddq\t(" + reg + "), " + reg + "\n") 1051 }) 1052 1053 break Args 1054 } 1055 1056 switch section { 1057 case "": 1058 if _, knownSymbol := d.symbols[symbol]; knownSymbol { 1059 symbol = localTargetName(symbol) 1060 changed = true 1061 } 1062 1063 case "PLT": 1064 if classifyInstruction(instructionName, argNodes) != instrJump { 1065 return nil, fmt.Errorf("Cannot rewrite PLT reference for non-jump instruction %q", instructionName) 1066 } 1067 1068 if _, knownSymbol := d.symbols[symbol]; knownSymbol { 1069 symbol = localTargetName(symbol) 1070 changed = true 1071 } else if !symbolIsLocal && !isSynthesized(symbol) { 1072 // Unknown symbol via PLT is an 1073 // out-call from the module, e.g. 1074 // memcpy. 1075 d.redirectors[symbol+"@"+section] = redirectorName(symbol) 1076 symbol = redirectorName(symbol) 1077 } 1078 1079 changed = true 1080 1081 case "GOTPCREL": 1082 if len(offset) > 0 { 1083 return nil, errors.New("loading from GOT with offset is unsupported") 1084 } 1085 if !d.isRIPRelative(memRef) { 1086 return nil, errors.New("GOT access must be IP-relative") 1087 } 1088 1089 useGOT := false 1090 if _, knownSymbol := d.symbols[symbol]; knownSymbol { 1091 symbol = localTargetName(symbol) 1092 changed = true 1093 } else if !isSynthesized(symbol) { 1094 useGOT = true 1095 } 1096 1097 classification := classifyInstruction(instructionName, argNodes) 1098 if classification != instrThreeArg && classification != instrCompare && i != 0 { 1099 return nil, errors.New("GOT access must be source operand") 1100 } 1101 1102 // Reduce the instruction to movq symbol@GOTPCREL, targetReg. 1103 var targetReg string 1104 var redzoneCleared bool 1105 switch classification { 1106 case instrPush: 1107 wrappers = append(wrappers, push(d.output)) 1108 targetReg = "%rax" 1109 case instrConditionalMove: 1110 wrappers = append(wrappers, undoConditionalMove(d.output, instructionName)) 1111 fallthrough 1112 case instrMove: 1113 assertNodeType(argNodes[1], ruleRegisterOrConstant) 1114 targetReg = d.contents(argNodes[1]) 1115 case instrCompare: 1116 otherSource := d.contents(argNodes[i^1]) 1117 saveRegWrapper, tempReg := saveRegister(d.output, []string{otherSource}) 1118 redzoneCleared = true 1119 wrappers = append(wrappers, saveRegWrapper) 1120 if i == 0 { 1121 wrappers = append(wrappers, compare(d.output, instructionName, tempReg, otherSource)) 1122 } else { 1123 wrappers = append(wrappers, compare(d.output, instructionName, otherSource, tempReg)) 1124 } 1125 targetReg = tempReg 1126 case instrTransformingMove: 1127 assertNodeType(argNodes[1], ruleRegisterOrConstant) 1128 targetReg = d.contents(argNodes[1]) 1129 wrappers = append(wrappers, finalTransform(d.output, instructionName, targetReg)) 1130 if isValidLEATarget(targetReg) { 1131 return nil, errors.New("Currently transforming moves are assumed to target XMM registers. Otherwise we'll pop %rax before reading it to do the transform.") 1132 } 1133 case instrCombine: 1134 targetReg = d.contents(argNodes[1]) 1135 if !isValidLEATarget(targetReg) { 1136 return nil, fmt.Errorf("cannot handle combining instructions targeting non-general registers") 1137 } 1138 saveRegWrapper, tempReg := saveRegister(d.output, []string{targetReg}) 1139 redzoneCleared = true 1140 wrappers = append(wrappers, saveRegWrapper) 1141 1142 wrappers = append(wrappers, combineOp(d.output, instructionName, tempReg, targetReg)) 1143 targetReg = tempReg 1144 case instrThreeArg: 1145 if n := len(argNodes); n != 3 { 1146 return nil, fmt.Errorf("three-argument instruction has %d arguments", n) 1147 } 1148 if i != 0 && i != 1 { 1149 return nil, errors.New("GOT access must be from source operand") 1150 } 1151 targetReg = d.contents(argNodes[2]) 1152 1153 otherSource := d.contents(argNodes[1]) 1154 if i == 1 { 1155 otherSource = d.contents(argNodes[0]) 1156 } 1157 1158 saveRegWrapper, tempReg := saveRegister(d.output, []string{targetReg, otherSource}) 1159 redzoneCleared = true 1160 wrappers = append(wrappers, saveRegWrapper) 1161 1162 if i == 0 { 1163 wrappers = append(wrappers, threeArgCombineOp(d.output, instructionName, tempReg, otherSource, targetReg)) 1164 } else { 1165 wrappers = append(wrappers, threeArgCombineOp(d.output, instructionName, otherSource, tempReg, targetReg)) 1166 } 1167 targetReg = tempReg 1168 default: 1169 return nil, fmt.Errorf("Cannot rewrite GOTPCREL reference for instruction %q", instructionName) 1170 } 1171 1172 if !isValidLEATarget(targetReg) { 1173 // Sometimes the compiler will load from the GOT to an 1174 // XMM register, which is not a valid target of an LEA 1175 // instruction. 1176 saveRegWrapper, tempReg := saveRegister(d.output, nil) 1177 wrappers = append(wrappers, saveRegWrapper) 1178 isAVX := strings.HasPrefix(instructionName, "v") 1179 wrappers = append(wrappers, moveTo(d.output, targetReg, isAVX, tempReg)) 1180 targetReg = tempReg 1181 if redzoneCleared { 1182 return nil, fmt.Errorf("internal error: Red Zone was already cleared") 1183 } 1184 redzoneCleared = true 1185 } 1186 1187 if symbol == "OPENSSL_ia32cap_P" { 1188 // Flag-altering instructions (i.e. addq) are going to be used so the 1189 // flags need to be preserved. 1190 wrappers = append(wrappers, saveFlags(d.output, redzoneCleared)) 1191 wrappers = append(wrappers, func(k func()) { 1192 d.output.WriteString("\tleaq\tOPENSSL_ia32cap_addr_delta(%rip), " + targetReg + "\n") 1193 d.output.WriteString("\taddq\t(" + targetReg + "), " + targetReg + "\n") 1194 }) 1195 } else if useGOT { 1196 wrappers = append(wrappers, d.loadFromGOT(d.output, targetReg, symbol, section, redzoneCleared)) 1197 } else { 1198 wrappers = append(wrappers, func(k func()) { 1199 d.output.WriteString(fmt.Sprintf("\tleaq\t%s(%%rip), %s\n", symbol, targetReg)) 1200 }) 1201 } 1202 changed = true 1203 break Args 1204 1205 default: 1206 return nil, fmt.Errorf("Unknown section type %q", section) 1207 } 1208 1209 if !changed && len(section) > 0 { 1210 panic("section was not handled") 1211 } 1212 section = "" 1213 1214 argStr := "" 1215 if isIndirect { 1216 argStr += "*" 1217 } 1218 argStr += symbol 1219 argStr += offset 1220 1221 for ; memRef != nil; memRef = memRef.next { 1222 argStr += d.contents(memRef) 1223 } 1224 1225 args = append(args, argStr) 1226 1227 case ruleGOTLocation: 1228 if instructionName != "movabsq" { 1229 return nil, fmt.Errorf("_GLOBAL_OFFSET_TABLE_ lookup didn't use movabsq") 1230 } 1231 if i != 0 || len(argNodes) != 2 { 1232 return nil, fmt.Errorf("movabs of _GLOBAL_OFFSET_TABLE_ didn't expected form") 1233 } 1234 1235 d.gotDeltaNeeded = true 1236 changed = true 1237 instructionName = "movq" 1238 assertNodeType(arg.up, ruleLocalSymbol) 1239 baseSymbol := d.mapLocalSymbol(d.contents(arg.up)) 1240 targetReg := d.contents(argNodes[1]) 1241 args = append(args, ".Lboringssl_got_delta(%rip)") 1242 wrappers = append(wrappers, func(k func()) { 1243 k() 1244 d.output.WriteString(fmt.Sprintf("\taddq $.Lboringssl_got_delta-%s, %s\n", baseSymbol, targetReg)) 1245 }) 1246 1247 case ruleGOTSymbolOffset: 1248 if instructionName != "movabsq" { 1249 return nil, fmt.Errorf("_GLOBAL_OFFSET_TABLE_ offset didn't use movabsq") 1250 } 1251 if i != 0 || len(argNodes) != 2 { 1252 return nil, fmt.Errorf("movabs of _GLOBAL_OFFSET_TABLE_ offset didn't have expected form") 1253 } 1254 1255 assertNodeType(arg.up, ruleSymbolName) 1256 symbol := d.contents(arg.up) 1257 if strings.HasPrefix(symbol, ".L") { 1258 symbol = d.mapLocalSymbol(symbol) 1259 } 1260 targetReg := d.contents(argNodes[1]) 1261 1262 var prefix string 1263 isGOTOFF := strings.HasSuffix(d.contents(arg), "@GOTOFF") 1264 if isGOTOFF { 1265 prefix = "gotoff" 1266 d.gotOffOffsetsNeeded[symbol] = struct{}{} 1267 } else { 1268 prefix = "got" 1269 d.gotOffsetsNeeded[symbol] = struct{}{} 1270 } 1271 changed = true 1272 1273 wrappers = append(wrappers, func(k func()) { 1274 // Even if one tries to use 32-bit GOT offsets, Clang's linker (at the time 1275 // of writing) emits 64-bit relocations anyway, so the following four bytes 1276 // get stomped. Thus we use 64-bit offsets. 1277 d.output.WriteString(fmt.Sprintf("\tmovq .Lboringssl_%s_%s(%%rip), %s\n", prefix, symbol, targetReg)) 1278 }) 1279 1280 default: 1281 panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule])) 1282 } 1283 } 1284 1285 if changed { 1286 d.writeCommentedNode(statement) 1287 replacement := "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n" 1288 wrappers.do(func() { 1289 d.output.WriteString(replacement) 1290 }) 1291 } else { 1292 d.writeNode(statement) 1293 } 1294 1295 return statement, nil 1296} 1297 1298func (d *delocation) handleBSS(statement *node32) (*node32, error) { 1299 lastStatement := statement 1300 for statement = statement.next; statement != nil; lastStatement, statement = statement, statement.next { 1301 node := skipWS(statement.up) 1302 if node == nil { 1303 d.writeNode(statement) 1304 continue 1305 } 1306 1307 switch node.pegRule { 1308 case ruleGlobalDirective, ruleComment, ruleInstruction, ruleLocationDirective: 1309 d.writeNode(statement) 1310 1311 case ruleDirective: 1312 directive := node.up 1313 assertNodeType(directive, ruleDirectiveName) 1314 directiveName := d.contents(directive) 1315 if directiveName == "text" || directiveName == "section" || directiveName == "data" { 1316 return lastStatement, nil 1317 } 1318 d.writeNode(statement) 1319 1320 case ruleLabel: 1321 label := node.up 1322 d.writeNode(statement) 1323 1324 if label.pegRule != ruleLocalSymbol { 1325 symbol := d.contents(label) 1326 localSymbol := localTargetName(symbol) 1327 d.output.WriteString(fmt.Sprintf("\n%s:\n", localSymbol)) 1328 1329 d.bssAccessorsNeeded[symbol] = localSymbol 1330 } 1331 1332 case ruleLabelContainingDirective: 1333 var err error 1334 statement, err = d.processLabelContainingDirective(statement, node.up) 1335 if err != nil { 1336 return nil, err 1337 } 1338 1339 default: 1340 return nil, fmt.Errorf("unknown BSS statement type %q in %q", rul3s[node.pegRule], d.contents(statement)) 1341 } 1342 } 1343 1344 return lastStatement, nil 1345} 1346 1347func transform(w stringWriter, inputs []inputFile) error { 1348 // symbols contains all defined symbols. 1349 symbols := make(map[string]struct{}) 1350 // localEntrySymbols contains all symbols with a .localentry directive. 1351 localEntrySymbols := make(map[string]struct{}) 1352 // fileNumbers is the set of IDs seen in .file directives. 1353 fileNumbers := make(map[int]struct{}) 1354 // maxObservedFileNumber contains the largest seen file number in a 1355 // .file directive. Zero is not a valid number. 1356 maxObservedFileNumber := 0 1357 // fileDirectivesContainMD5 is true if the compiler is outputting MD5 1358 // checksums in .file directives. If it does so, then this script needs 1359 // to match that behaviour otherwise warnings result. 1360 fileDirectivesContainMD5 := false 1361 1362 // OPENSSL_ia32cap_get will be synthesized by this script. 1363 symbols["OPENSSL_ia32cap_get"] = struct{}{} 1364 1365 for _, input := range inputs { 1366 forEachPath(input.ast.up, func(node *node32) { 1367 symbol := input.contents[node.begin:node.end] 1368 if _, ok := symbols[symbol]; ok { 1369 panic(fmt.Sprintf("Duplicate symbol found: %q in %q", symbol, input.path)) 1370 } 1371 symbols[symbol] = struct{}{} 1372 }, ruleStatement, ruleLabel, ruleSymbolName) 1373 1374 forEachPath(input.ast.up, func(node *node32) { 1375 node = node.up 1376 assertNodeType(node, ruleLabelContainingDirectiveName) 1377 directive := input.contents[node.begin:node.end] 1378 if directive != ".localentry" { 1379 return 1380 } 1381 // Extract the first argument. 1382 node = skipWS(node.next) 1383 assertNodeType(node, ruleSymbolArgs) 1384 node = node.up 1385 assertNodeType(node, ruleSymbolArg) 1386 symbol := input.contents[node.begin:node.end] 1387 if _, ok := localEntrySymbols[symbol]; ok { 1388 panic(fmt.Sprintf("Duplicate .localentry directive found: %q in %q", symbol, input.path)) 1389 } 1390 localEntrySymbols[symbol] = struct{}{} 1391 }, ruleStatement, ruleLabelContainingDirective) 1392 1393 forEachPath(input.ast.up, func(node *node32) { 1394 assertNodeType(node, ruleLocationDirective) 1395 directive := input.contents[node.begin:node.end] 1396 if !strings.HasPrefix(directive, ".file") { 1397 return 1398 } 1399 parts := strings.Fields(directive) 1400 if len(parts) == 2 { 1401 // This is a .file directive with just a 1402 // filename. Clang appears to generate just one 1403 // of these at the beginning of the output for 1404 // the compilation unit. Ignore it. 1405 return 1406 } 1407 fileNo, err := strconv.Atoi(parts[1]) 1408 if err != nil { 1409 panic(fmt.Sprintf("Failed to parse file number from .file: %q", directive)) 1410 } 1411 1412 if _, ok := fileNumbers[fileNo]; ok { 1413 panic(fmt.Sprintf("Duplicate file number %d observed", fileNo)) 1414 } 1415 fileNumbers[fileNo] = struct{}{} 1416 1417 if fileNo > maxObservedFileNumber { 1418 maxObservedFileNumber = fileNo 1419 } 1420 1421 for _, token := range parts[2:] { 1422 if token == "md5" { 1423 fileDirectivesContainMD5 = true 1424 } 1425 } 1426 }, ruleStatement, ruleLocationDirective) 1427 } 1428 1429 processor := x86_64 1430 if len(inputs) > 0 { 1431 processor = detectProcessor(inputs[0]) 1432 } 1433 1434 d := &delocation{ 1435 symbols: symbols, 1436 localEntrySymbols: localEntrySymbols, 1437 processor: processor, 1438 output: w, 1439 redirectors: make(map[string]string), 1440 bssAccessorsNeeded: make(map[string]string), 1441 tocLoaders: make(map[string]struct{}), 1442 gotExternalsNeeded: make(map[string]struct{}), 1443 gotOffsetsNeeded: make(map[string]struct{}), 1444 gotOffOffsetsNeeded: make(map[string]struct{}), 1445 } 1446 1447 w.WriteString(".text\n") 1448 var fileTrailing string 1449 if fileDirectivesContainMD5 { 1450 fileTrailing = " md5 0x00000000000000000000000000000000" 1451 } 1452 w.WriteString(fmt.Sprintf(".file %d \"inserted_by_delocate.c\"%s\n", maxObservedFileNumber+1, fileTrailing)) 1453 w.WriteString(fmt.Sprintf(".loc %d 1 0\n", maxObservedFileNumber+1)) 1454 w.WriteString("BORINGSSL_bcm_text_start:\n") 1455 1456 for _, input := range inputs { 1457 if err := d.processInput(input); err != nil { 1458 return err 1459 } 1460 } 1461 1462 w.WriteString(".text\n") 1463 w.WriteString(fmt.Sprintf(".loc %d 2 0\n", maxObservedFileNumber+1)) 1464 w.WriteString("BORINGSSL_bcm_text_end:\n") 1465 1466 // Emit redirector functions. Each is a single jump instruction. 1467 var redirectorNames []string 1468 for name := range d.redirectors { 1469 redirectorNames = append(redirectorNames, name) 1470 } 1471 sort.Strings(redirectorNames) 1472 1473 for _, name := range redirectorNames { 1474 redirector := d.redirectors[name] 1475 if d.processor == ppc64le { 1476 w.WriteString(".section \".toc\", \"aw\"\n") 1477 w.WriteString(".Lredirector_toc_" + name + ":\n") 1478 w.WriteString(".quad " + name + "\n") 1479 w.WriteString(".text\n") 1480 w.WriteString(".type " + redirector + ", @function\n") 1481 w.WriteString(redirector + ":\n") 1482 // |name| will clobber r2, so save it. This is matched by a restore in 1483 // redirector calls. 1484 w.WriteString("\tstd 2, 24(1)\n") 1485 // Load and call |name|'s global entry point. 1486 w.WriteString("\taddis 12, 2, .Lredirector_toc_" + name + "@toc@ha\n") 1487 w.WriteString("\tld 12, .Lredirector_toc_" + name + "@toc@l(12)\n") 1488 w.WriteString("\tmtctr 12\n") 1489 w.WriteString("\tbctr\n") 1490 } else { 1491 w.WriteString(".type " + redirector + ", @function\n") 1492 w.WriteString(redirector + ":\n") 1493 w.WriteString("\tjmp\t" + name + "\n") 1494 } 1495 } 1496 1497 var accessorNames []string 1498 for accessor := range d.bssAccessorsNeeded { 1499 accessorNames = append(accessorNames, accessor) 1500 } 1501 sort.Strings(accessorNames) 1502 1503 // Emit BSS accessor functions. Each is a single LEA followed by RET. 1504 for _, name := range accessorNames { 1505 funcName := accessorName(name) 1506 w.WriteString(".type " + funcName + ", @function\n") 1507 w.WriteString(funcName + ":\n") 1508 target := d.bssAccessorsNeeded[name] 1509 1510 if d.processor == ppc64le { 1511 w.WriteString("\taddis 3, 2, " + target + "@toc@ha\n") 1512 w.WriteString("\taddi 3, 3, " + target + "@toc@l\n") 1513 w.WriteString("\tblr\n") 1514 } else { 1515 w.WriteString("\tleaq\t" + target + "(%rip), %rax\n\tret\n") 1516 } 1517 } 1518 1519 if d.processor == ppc64le { 1520 loadTOCNames := sortedSet(d.tocLoaders) 1521 for _, symbolAndOffset := range loadTOCNames { 1522 parts := strings.SplitN(symbolAndOffset, "\x00", 2) 1523 symbol, offset := parts[0], parts[1] 1524 1525 funcName := loadTOCFuncName(symbol, offset) 1526 ref := symbol + offset 1527 1528 w.WriteString(".type " + funcName[2:] + ", @function\n") 1529 w.WriteString(funcName[2:] + ":\n") 1530 w.WriteString(funcName + ":\n") 1531 w.WriteString("\taddis 3, 2, " + ref + "@toc@ha\n") 1532 w.WriteString("\taddi 3, 3, " + ref + "@toc@l\n") 1533 w.WriteString("\tblr\n") 1534 } 1535 1536 w.WriteString(".LBORINGSSL_external_toc:\n") 1537 w.WriteString(".quad .TOC.-.LBORINGSSL_external_toc\n") 1538 } else { 1539 externalNames := sortedSet(d.gotExternalsNeeded) 1540 for _, name := range externalNames { 1541 parts := strings.SplitN(name, "@", 2) 1542 symbol, section := parts[0], parts[1] 1543 w.WriteString(".type " + symbol + "_" + section + "_external, @object\n") 1544 w.WriteString(".size " + symbol + "_" + section + "_external, 8\n") 1545 w.WriteString(symbol + "_" + section + "_external:\n") 1546 // Ideally this would be .quad foo@GOTPCREL, but clang's 1547 // assembler cannot emit a 64-bit GOTPCREL relocation. Instead, 1548 // we manually sign-extend the value, knowing that the GOT is 1549 // always at the end, thus foo@GOTPCREL has a positive value. 1550 w.WriteString("\t.long " + symbol + "@" + section + "\n") 1551 w.WriteString("\t.long 0\n") 1552 } 1553 1554 w.WriteString(".type OPENSSL_ia32cap_get, @function\n") 1555 w.WriteString(".globl OPENSSL_ia32cap_get\n") 1556 w.WriteString(localTargetName("OPENSSL_ia32cap_get") + ":\n") 1557 w.WriteString("OPENSSL_ia32cap_get:\n") 1558 w.WriteString("\tleaq OPENSSL_ia32cap_P(%rip), %rax\n") 1559 w.WriteString("\tret\n") 1560 1561 w.WriteString(".extern OPENSSL_ia32cap_P\n") 1562 w.WriteString(".type OPENSSL_ia32cap_addr_delta, @object\n") 1563 w.WriteString(".size OPENSSL_ia32cap_addr_delta, 8\n") 1564 w.WriteString("OPENSSL_ia32cap_addr_delta:\n") 1565 w.WriteString(".quad OPENSSL_ia32cap_P-OPENSSL_ia32cap_addr_delta\n") 1566 1567 if d.gotDeltaNeeded { 1568 w.WriteString(".Lboringssl_got_delta:\n") 1569 w.WriteString("\t.quad _GLOBAL_OFFSET_TABLE_-.Lboringssl_got_delta\n") 1570 } 1571 1572 for _, name := range sortedSet(d.gotOffsetsNeeded) { 1573 w.WriteString(".Lboringssl_got_" + name + ":\n") 1574 w.WriteString("\t.quad " + name + "@GOT\n") 1575 } 1576 for _, name := range sortedSet(d.gotOffOffsetsNeeded) { 1577 w.WriteString(".Lboringssl_gotoff_" + name + ":\n") 1578 w.WriteString("\t.quad " + name + "@GOTOFF\n") 1579 } 1580 } 1581 1582 w.WriteString(".type BORINGSSL_bcm_text_hash, @object\n") 1583 w.WriteString(".size BORINGSSL_bcm_text_hash, 64\n") 1584 w.WriteString("BORINGSSL_bcm_text_hash:\n") 1585 for _, b := range fipscommon.UninitHashValue { 1586 w.WriteString(".byte 0x" + strconv.FormatUint(uint64(b), 16) + "\n") 1587 } 1588 1589 return nil 1590} 1591 1592func parseInputs(inputs []inputFile) error { 1593 for i, input := range inputs { 1594 var contents string 1595 1596 if input.isArchive { 1597 arFile, err := os.Open(input.path) 1598 if err != nil { 1599 return err 1600 } 1601 defer arFile.Close() 1602 1603 ar, err := ar.ParseAR(arFile) 1604 if err != nil { 1605 return err 1606 } 1607 1608 if len(ar) != 1 { 1609 return fmt.Errorf("expected one file in archive, but found %d", len(ar)) 1610 } 1611 1612 for _, c := range ar { 1613 contents = string(c) 1614 } 1615 } else { 1616 inBytes, err := ioutil.ReadFile(input.path) 1617 if err != nil { 1618 return err 1619 } 1620 1621 contents = string(inBytes) 1622 } 1623 1624 asm := Asm{Buffer: contents, Pretty: true} 1625 asm.Init() 1626 if err := asm.Parse(); err != nil { 1627 return fmt.Errorf("error while parsing %q: %s", input.path, err) 1628 } 1629 ast := asm.AST() 1630 1631 inputs[i].contents = contents 1632 inputs[i].ast = ast 1633 } 1634 1635 return nil 1636} 1637 1638func main() { 1639 // The .a file, if given, is expected to be an archive of textual 1640 // assembly sources. That's odd, but CMake really wants to create 1641 // archive files so it's the only way that we can make it work. 1642 arInput := flag.String("a", "", "Path to a .a file containing assembly sources") 1643 outFile := flag.String("o", "", "Path to output assembly") 1644 1645 flag.Parse() 1646 1647 if len(*outFile) == 0 { 1648 fmt.Fprintf(os.Stderr, "Must give argument to -o.\n") 1649 os.Exit(1) 1650 } 1651 1652 var inputs []inputFile 1653 if len(*arInput) > 0 { 1654 inputs = append(inputs, inputFile{ 1655 path: *arInput, 1656 index: 0, 1657 isArchive: true, 1658 }) 1659 } 1660 1661 for i, path := range flag.Args() { 1662 if len(path) == 0 { 1663 continue 1664 } 1665 1666 inputs = append(inputs, inputFile{ 1667 path: path, 1668 index: i + 1, 1669 }) 1670 } 1671 1672 if err := parseInputs(inputs); err != nil { 1673 fmt.Fprintf(os.Stderr, "%s\n", err) 1674 os.Exit(1) 1675 } 1676 1677 out, err := os.OpenFile(*outFile, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644) 1678 if err != nil { 1679 panic(err) 1680 } 1681 defer out.Close() 1682 1683 if err := transform(out, inputs); err != nil { 1684 fmt.Fprintf(os.Stderr, "%s\n", err) 1685 os.Exit(1) 1686 } 1687} 1688 1689func forEachPath(node *node32, cb func(*node32), rules ...pegRule) { 1690 if node == nil { 1691 return 1692 } 1693 1694 if len(rules) == 0 { 1695 cb(node) 1696 return 1697 } 1698 1699 rule := rules[0] 1700 childRules := rules[1:] 1701 1702 for ; node != nil; node = node.next { 1703 if node.pegRule != rule { 1704 continue 1705 } 1706 1707 if len(childRules) == 0 { 1708 cb(node) 1709 } else { 1710 forEachPath(node.up, cb, childRules...) 1711 } 1712 } 1713} 1714 1715func skipNodes(node *node32, ruleToSkip pegRule) *node32 { 1716 for ; node != nil && node.pegRule == ruleToSkip; node = node.next { 1717 } 1718 return node 1719} 1720 1721func skipWS(node *node32) *node32 { 1722 return skipNodes(node, ruleWS) 1723} 1724 1725func assertNodeType(node *node32, expected pegRule) { 1726 if rule := node.pegRule; rule != expected { 1727 panic(fmt.Sprintf("node was %q, but wanted %q", rul3s[rule], rul3s[expected])) 1728 } 1729} 1730 1731type wrapperFunc func(func()) 1732 1733type wrapperStack []wrapperFunc 1734 1735func (w *wrapperStack) do(baseCase func()) { 1736 if len(*w) == 0 { 1737 baseCase() 1738 return 1739 } 1740 1741 wrapper := (*w)[0] 1742 *w = (*w)[1:] 1743 wrapper(func() { w.do(baseCase) }) 1744} 1745 1746// localTargetName returns the name of the local target label for a global 1747// symbol named name. 1748func localTargetName(name string) string { 1749 return ".L" + name + "_local_target" 1750} 1751 1752func localEntryName(name string) string { 1753 return ".L" + name + "_local_entry" 1754} 1755 1756func isSynthesized(symbol string) bool { 1757 return strings.HasSuffix(symbol, "_bss_get") || 1758 symbol == "OPENSSL_ia32cap_get" || 1759 strings.HasPrefix(symbol, "BORINGSSL_bcm_text_") 1760} 1761 1762func redirectorName(symbol string) string { 1763 return "bcm_redirector_" + symbol 1764} 1765 1766// sectionType returns the type of a section. I.e. a section called “.text.foo” 1767// is a “.text” section. 1768func sectionType(section string) (string, bool) { 1769 if len(section) == 0 || section[0] != '.' { 1770 return "", false 1771 } 1772 1773 i := strings.Index(section[1:], ".") 1774 if i != -1 { 1775 section = section[:i+1] 1776 } 1777 1778 if strings.HasPrefix(section, ".debug_") { 1779 return ".debug", true 1780 } 1781 1782 return section, true 1783} 1784 1785// accessorName returns the name of the accessor function for a BSS symbol 1786// named name. 1787func accessorName(name string) string { 1788 return name + "_bss_get" 1789} 1790 1791func (d *delocation) mapLocalSymbol(symbol string) string { 1792 if d.currentInput.index == 0 { 1793 return symbol 1794 } 1795 return symbol + "_BCM_" + strconv.Itoa(d.currentInput.index) 1796} 1797 1798func detectProcessor(input inputFile) processorType { 1799 for statement := input.ast.up; statement != nil; statement = statement.next { 1800 node := skipNodes(statement.up, ruleWS) 1801 if node == nil || node.pegRule != ruleInstruction { 1802 continue 1803 } 1804 1805 instruction := node.up 1806 instructionName := input.contents[instruction.begin:instruction.end] 1807 1808 switch instructionName { 1809 case "movq", "call", "leaq": 1810 return x86_64 1811 case "addis", "addi", "mflr": 1812 return ppc64le 1813 } 1814 } 1815 1816 panic("processed entire input and didn't recognise any instructions.") 1817} 1818 1819func sortedSet(m map[string]struct{}) []string { 1820 ret := make([]string, 0, len(m)) 1821 for key := range m { 1822 ret = append(ret, key) 1823 } 1824 sort.Strings(ret) 1825 return ret 1826} 1827