1// Copyright (c) 2017, Google Inc. 2// 3// Permission to use, copy, modify, and/or distribute this software for any 4// purpose with or without fee is hereby granted, provided that the above 5// copyright notice and this permission notice appear in all copies. 6// 7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 10// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION 12// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN 13// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ 14 15// delocate performs several transformations of textual assembly code. See 16// crypto/fipsmodule/FIPS.md for an overview. 17package main 18 19import ( 20 "errors" 21 "flag" 22 "fmt" 23 "io/ioutil" 24 "os" 25 "sort" 26 "strconv" 27 "strings" 28 29 "boringssl.googlesource.com/boringssl/util/ar" 30 "boringssl.googlesource.com/boringssl/util/fipstools/fipscommon" 31) 32 33// inputFile represents a textual assembly file. 34type inputFile struct { 35 path string 36 // index is a unique identifer given to this file. It's used for 37 // mapping local symbols. 38 index int 39 // isArchive indicates that the input should be processed as an ar 40 // file. 41 isArchive bool 42 // contents contains the contents of the file. 43 contents string 44 // ast points to the head of the syntax tree. 45 ast *node32 46} 47 48type stringWriter interface { 49 WriteString(string) (int, error) 50} 51 52type processorType int 53 54const ( 55 ppc64le processorType = iota + 1 56 x86_64 57 aarch64 58) 59 60// delocation holds the state needed during a delocation operation. 61type delocation struct { 62 processor processorType 63 output stringWriter 64 // commentIndicator starts a comment, e.g. "//" or "#" 65 commentIndicator string 66 67 // symbols is the set of symbols defined in the module. 68 symbols map[string]struct{} 69 // localEntrySymbols is the set of symbols with .localentry directives. 70 localEntrySymbols map[string]struct{} 71 // redirectors maps from out-call symbol name to the name of a 72 // redirector function for that symbol. E.g. “memcpy” -> 73 // “bcm_redirector_memcpy”. 74 redirectors map[string]string 75 // bssAccessorsNeeded maps from a BSS symbol name to the symbol that 76 // should be used to reference it. E.g. “P384_data_storage” -> 77 // “P384_data_storage”. 78 bssAccessorsNeeded map[string]string 79 // tocLoaders is a set of symbol names for which TOC helper functions 80 // are required. (ppc64le only.) 81 tocLoaders map[string]struct{} 82 // gotExternalsNeeded is a set of symbol names for which we need 83 // “delta” symbols: symbols that contain the offset from their location 84 // to the memory in question. 85 gotExternalsNeeded map[string]struct{} 86 // gotDeltaNeeded is true if the code needs to load the value of 87 // _GLOBAL_OFFSET_TABLE_. 88 gotDeltaNeeded bool 89 // gotOffsetsNeeded contains the symbols whose @GOT offsets are needed. 90 gotOffsetsNeeded map[string]struct{} 91 // gotOffOffsetsNeeded contains the symbols whose @GOTOFF offsets are needed. 92 gotOffOffsetsNeeded map[string]struct{} 93 94 currentInput inputFile 95} 96 97func (d *delocation) contents(node *node32) string { 98 return d.currentInput.contents[node.begin:node.end] 99} 100 101// writeNode writes out an AST node. 102func (d *delocation) writeNode(node *node32) { 103 if _, err := d.output.WriteString(d.contents(node)); err != nil { 104 panic(err) 105 } 106} 107 108func (d *delocation) writeCommentedNode(node *node32) { 109 line := d.contents(node) 110 if _, err := d.output.WriteString(d.commentIndicator + " WAS " + strings.TrimSpace(line) + "\n"); err != nil { 111 panic(err) 112 } 113} 114 115func locateError(err error, with *node32, in inputFile) error { 116 posMap := translatePositions([]rune(in.contents), []int{int(with.begin)}) 117 var line int 118 for _, pos := range posMap { 119 line = pos.line 120 } 121 122 return fmt.Errorf("error while processing %q on line %d: %q", in.contents[with.begin:with.end], line, err) 123} 124 125func (d *delocation) processInput(input inputFile) (err error) { 126 d.currentInput = input 127 128 var origStatement *node32 129 defer func() { 130 if err := recover(); err != nil { 131 panic(locateError(fmt.Errorf("%s", err), origStatement, input)) 132 } 133 }() 134 135 for statement := input.ast.up; statement != nil; statement = statement.next { 136 assertNodeType(statement, ruleStatement) 137 origStatement = statement 138 139 node := skipWS(statement.up) 140 if node == nil { 141 d.writeNode(statement) 142 continue 143 } 144 145 switch node.pegRule { 146 case ruleGlobalDirective, ruleComment, ruleLocationDirective: 147 d.writeNode(statement) 148 case ruleDirective: 149 statement, err = d.processDirective(statement, node.up) 150 case ruleLabelContainingDirective: 151 statement, err = d.processLabelContainingDirective(statement, node.up) 152 case ruleLabel: 153 statement, err = d.processLabel(statement, node.up) 154 case ruleInstruction: 155 switch d.processor { 156 case x86_64: 157 statement, err = d.processIntelInstruction(statement, node.up) 158 case ppc64le: 159 statement, err = d.processPPCInstruction(statement, node.up) 160 case aarch64: 161 statement, err = d.processAarch64Instruction(statement, node.up) 162 default: 163 panic("unknown processor") 164 } 165 default: 166 panic(fmt.Sprintf("unknown top-level statement type %q", rul3s[node.pegRule])) 167 } 168 169 if err != nil { 170 return locateError(err, origStatement, input) 171 } 172 } 173 174 return nil 175} 176 177func (d *delocation) processDirective(statement, directive *node32) (*node32, error) { 178 assertNodeType(directive, ruleDirectiveName) 179 directiveName := d.contents(directive) 180 181 var args []string 182 forEachPath(directive, func(arg *node32) { 183 // If the argument is a quoted string, use the raw contents. 184 // (Note that this doesn't unescape the string, but that's not 185 // needed so far. 186 if arg.up != nil { 187 arg = arg.up 188 assertNodeType(arg, ruleQuotedArg) 189 if arg.up == nil { 190 args = append(args, "") 191 return 192 } 193 arg = arg.up 194 assertNodeType(arg, ruleQuotedText) 195 } 196 args = append(args, d.contents(arg)) 197 }, ruleArgs, ruleArg) 198 199 switch directiveName { 200 case "comm", "lcomm": 201 if len(args) < 1 { 202 return nil, errors.New("comm directive has no arguments") 203 } 204 d.bssAccessorsNeeded[args[0]] = args[0] 205 d.writeNode(statement) 206 207 case "data": 208 // ASAN and some versions of MSAN are adding a .data section, 209 // and adding references to symbols within it to the code. We 210 // will have to work around this in the future. 211 return nil, errors.New(".data section found in module") 212 213 case "section": 214 section := args[0] 215 216 if section == ".data.rel.ro" { 217 // In a normal build, this is an indication of a 218 // problem but any references from the module to this 219 // section will result in a relocation and thus will 220 // break the integrity check. ASAN can generate these 221 // sections and so we will likely have to work around 222 // that in the future. 223 return nil, errors.New(".data.rel.ro section found in module") 224 } 225 226 sectionType, ok := sectionType(section) 227 if !ok { 228 // Unknown sections are permitted in order to be robust 229 // to different compiler modes. 230 d.writeNode(statement) 231 break 232 } 233 234 switch sectionType { 235 case ".rodata", ".text": 236 // Move .rodata to .text so it may be accessed without 237 // a relocation. GCC with -fmerge-constants will place 238 // strings into separate sections, so we move all 239 // sections named like .rodata. Also move .text.startup 240 // so the self-test function is also in the module. 241 d.writeCommentedNode(statement) 242 d.output.WriteString(".text\n") 243 244 case ".data": 245 // See above about .data 246 return nil, errors.New(".data section found in module") 247 248 case ".init_array", ".fini_array", ".ctors", ".dtors": 249 // init_array/ctors/dtors contains function 250 // pointers to constructor/destructor 251 // functions. These contain relocations, but 252 // they're in a different section anyway. 253 d.writeNode(statement) 254 break 255 256 case ".debug", ".note", ".toc": 257 d.writeNode(statement) 258 break 259 260 case ".bss": 261 d.writeNode(statement) 262 return d.handleBSS(statement) 263 } 264 265 default: 266 d.writeNode(statement) 267 } 268 269 return statement, nil 270} 271 272func (d *delocation) processLabelContainingDirective(statement, directive *node32) (*node32, error) { 273 // The symbols within directives need to be mapped so that local 274 // symbols in two different .s inputs don't collide. 275 changed := false 276 assertNodeType(directive, ruleLabelContainingDirectiveName) 277 name := d.contents(directive) 278 279 node := directive.next 280 assertNodeType(node, ruleWS) 281 282 node = node.next 283 assertNodeType(node, ruleSymbolArgs) 284 285 var args []string 286 for node = skipWS(node.up); node != nil; node = skipWS(node.next) { 287 assertNodeType(node, ruleSymbolArg) 288 arg := node.up 289 var mapped string 290 291 for term := arg; term != nil; term = term.next { 292 if term.pegRule != ruleLocalSymbol { 293 mapped += d.contents(term) 294 continue 295 } 296 297 oldSymbol := d.contents(term) 298 newSymbol := d.mapLocalSymbol(oldSymbol) 299 if newSymbol != oldSymbol { 300 changed = true 301 } 302 303 mapped += newSymbol 304 } 305 306 args = append(args, mapped) 307 } 308 309 if !changed { 310 d.writeNode(statement) 311 } else { 312 d.writeCommentedNode(statement) 313 d.output.WriteString("\t" + name + "\t" + strings.Join(args, ", ") + "\n") 314 } 315 316 if name == ".localentry" { 317 d.output.WriteString(localEntryName(args[0]) + ":\n") 318 } 319 320 return statement, nil 321} 322 323func (d *delocation) processLabel(statement, label *node32) (*node32, error) { 324 symbol := d.contents(label) 325 326 switch label.pegRule { 327 case ruleLocalLabel: 328 d.output.WriteString(symbol + ":\n") 329 case ruleLocalSymbol: 330 // symbols need to be mapped so that local symbols from two 331 // different .s inputs don't collide. 332 d.output.WriteString(d.mapLocalSymbol(symbol) + ":\n") 333 case ruleSymbolName: 334 d.output.WriteString(localTargetName(symbol) + ":\n") 335 d.writeNode(statement) 336 default: 337 return nil, fmt.Errorf("unknown label type %q", rul3s[label.pegRule]) 338 } 339 340 return statement, nil 341} 342 343// instructionArgs collects all the arguments to an instruction. 344func instructionArgs(node *node32) (argNodes []*node32) { 345 for node = skipWS(node); node != nil; node = skipWS(node.next) { 346 assertNodeType(node, ruleInstructionArg) 347 argNodes = append(argNodes, node.up) 348 } 349 350 return argNodes 351} 352 353// Aarch64 support 354 355// gotHelperName returns the name of a synthesised function that returns an 356// address from the GOT. 357func gotHelperName(symbol string) string { 358 return ".Lboringssl_loadgot_" + symbol 359} 360 361// loadAarch64Address emits instructions to put the address of |symbol| 362// (optionally adjusted by |offsetStr|) into |targetReg|. 363func (d *delocation) loadAarch64Address(statement *node32, targetReg string, symbol string, offsetStr string) (*node32, error) { 364 // There are two paths here: either the symbol is known to be local in which 365 // case adr is used to get the address (within 1MiB), or a GOT reference is 366 // really needed in which case the code needs to jump to a helper function. 367 // 368 // A helper function is needed because using code appears to be the only way 369 // to load a GOT value. On other platforms we have ".quad foo@GOT" outside of 370 // the module, but on Aarch64 that results in a "COPY" relocation and linker 371 // comments suggest it's a weird hack. So, for each GOT symbol needed, we emit 372 // a function outside of the module that returns the address from the GOT in 373 // x0. 374 375 d.writeCommentedNode(statement) 376 377 _, isKnown := d.symbols[symbol] 378 isLocal := strings.HasPrefix(symbol, ".L") 379 if isKnown || isLocal || isSynthesized(symbol) { 380 if isLocal { 381 symbol = d.mapLocalSymbol(symbol) 382 } else if isKnown { 383 symbol = localTargetName(symbol) 384 } 385 386 d.output.WriteString("\tadr " + targetReg + ", " + symbol + offsetStr + "\n") 387 388 return statement, nil 389 } 390 391 if len(offsetStr) != 0 { 392 panic("non-zero offset for helper-based reference") 393 } 394 395 var helperFunc string 396 if symbol == "OPENSSL_armcap_P" { 397 helperFunc = ".LOPENSSL_armcap_P_addr" 398 } else { 399 // GOT helpers also dereference the GOT entry, thus the subsequent ldr 400 // instruction, which would normally do the dereferencing, needs to be 401 // dropped. GOT helpers have to include the dereference because the 402 // assembler doesn't support ":got_lo12:foo" offsets except in an ldr 403 // instruction. 404 d.gotExternalsNeeded[symbol] = struct{}{} 405 helperFunc = gotHelperName(symbol) 406 } 407 408 // Clear the red-zone. I can't find a definitive answer about whether Linux 409 // Aarch64 includes a red-zone, but Microsoft has a 16-byte one and Apple a 410 // 128-byte one. Thus conservatively clear a 128-byte red-zone. 411 d.output.WriteString("\tsub sp, sp, 128\n") 412 413 // Save x0 (which will be stomped by the return value) and the link register 414 // to the stack. Then save the program counter into the link register and 415 // jump to the helper function. 416 d.output.WriteString("\tstp x0, lr, [sp, #-16]!\n") 417 d.output.WriteString("\tbl " + helperFunc + "\n") 418 419 if targetReg == "x0" { 420 // If the target happens to be x0 then restore the link register from the 421 // stack and send the saved value of x0 to the zero register. 422 d.output.WriteString("\tldp xzr, lr, [sp], #16\n") 423 } else { 424 // Otherwise move the result into place and restore registers. 425 d.output.WriteString("\tmov " + targetReg + ", x0\n") 426 d.output.WriteString("\tldp x0, lr, [sp], #16\n") 427 } 428 429 // Revert the red-zone adjustment. 430 d.output.WriteString("\tadd sp, sp, 128\n") 431 432 return statement, nil 433} 434 435func (d *delocation) processAarch64Instruction(statement, instruction *node32) (*node32, error) { 436 assertNodeType(instruction, ruleInstructionName) 437 instructionName := d.contents(instruction) 438 439 argNodes := instructionArgs(instruction.next) 440 441 switch instructionName { 442 case "cset", "csel", "csetm", "cneg", "csinv", "cinc", "csinc", "csneg": 443 // These functions are special because they take a condition-code name as 444 // an argument and that looks like a symbol reference. 445 d.writeNode(statement) 446 return statement, nil 447 448 case "mrs": 449 // Functions that take special register names also look like a symbol 450 // reference to the parser. 451 d.writeNode(statement) 452 return statement, nil 453 454 case "adrp": 455 // adrp always generates a relocation, even when the target symbol is in the 456 // same segment, because the page-offset of the code isn't known until link 457 // time. Thus adrp instructions are turned into either adr instructions 458 // (limiting the module to 1MiB offsets) or calls to helper functions, both of 459 // which load the full address. Later instructions, which add the low 12 bits 460 // of offset, are tweaked to remove the offset since it's already included. 461 // Loads of GOT symbols are slightly more complex because it's not possible to 462 // avoid dereferencing a GOT entry with Clang's assembler. Thus the later ldr 463 // instruction, which would normally do the dereferencing, is dropped 464 // completely. (Or turned into a mov if it targets a different register.) 465 assertNodeType(argNodes[0], ruleRegisterOrConstant) 466 targetReg := d.contents(argNodes[0]) 467 if !strings.HasPrefix(targetReg, "x") { 468 panic("adrp targetting register " + targetReg + ", which has the wrong size") 469 } 470 471 var symbol, offset string 472 switch argNodes[1].pegRule { 473 case ruleGOTSymbolOffset: 474 symbol = d.contents(argNodes[1].up) 475 case ruleMemoryRef: 476 assertNodeType(argNodes[1].up, ruleSymbolRef) 477 node, empty := d.gatherOffsets(argNodes[1].up.up, "") 478 if len(empty) != 0 { 479 panic("prefix offsets found for adrp") 480 } 481 symbol = d.contents(node) 482 _, offset = d.gatherOffsets(node.next, "") 483 default: 484 panic("Unhandled adrp argument type " + rul3s[argNodes[1].pegRule]) 485 } 486 487 return d.loadAarch64Address(statement, targetReg, symbol, offset) 488 } 489 490 var args []string 491 changed := false 492 493 for _, arg := range argNodes { 494 fullArg := arg 495 496 switch arg.pegRule { 497 case ruleRegisterOrConstant, ruleLocalLabelRef, ruleARMConstantTweak: 498 args = append(args, d.contents(fullArg)) 499 500 case ruleGOTSymbolOffset: 501 // These should only be arguments to adrp and thus unreachable. 502 panic("unreachable") 503 504 case ruleMemoryRef: 505 ref := arg.up 506 507 switch ref.pegRule { 508 case ruleSymbolRef: 509 // This is a branch. Either the target needs to be written to a local 510 // version of the symbol to ensure that no relocations are emitted, or 511 // it needs to jump to a redirector function. 512 symbol, _, _, didChange, symbolIsLocal, _ := d.parseMemRef(arg.up) 513 changed = didChange 514 515 if _, knownSymbol := d.symbols[symbol]; knownSymbol { 516 symbol = localTargetName(symbol) 517 changed = true 518 } else if !symbolIsLocal && !isSynthesized(symbol) { 519 redirector := redirectorName(symbol) 520 d.redirectors[symbol] = redirector 521 symbol = redirector 522 changed = true 523 } 524 525 args = append(args, symbol) 526 527 case ruleARMBaseIndexScale: 528 parts := ref.up 529 assertNodeType(parts, ruleARMRegister) 530 baseAddrReg := d.contents(parts) 531 parts = skipWS(parts.next) 532 533 // Only two forms need special handling. First there's memory references 534 // like "[x*, :got_lo12:foo]". The base register here will have been the 535 // target of an adrp instruction to load the page address, but the adrp 536 // will have turned into loading the full address *and dereferencing it*, 537 // above. Thus this instruction needs to be dropped otherwise we'll be 538 // dereferencing twice. 539 // 540 // Second there are forms like "[x*, :lo12:foo]" where the code has used 541 // adrp to load the page address into x*. That adrp will have been turned 542 // into loading the full address so just the offset needs to be dropped. 543 544 if parts != nil { 545 if parts.pegRule == ruleARMGOTLow12 { 546 if instructionName != "ldr" { 547 panic("Symbol reference outside of ldr instruction") 548 } 549 550 if skipWS(parts.next) != nil || parts.up.next != nil { 551 panic("can't handle tweak or post-increment with symbol references") 552 } 553 554 // The GOT helper already dereferenced the entry so, at most, just a mov 555 // is needed to put things in the right register. 556 d.writeCommentedNode(statement) 557 if baseAddrReg != args[0] { 558 d.output.WriteString("\tmov " + args[0] + ", " + baseAddrReg + "\n") 559 } 560 return statement, nil 561 } else if parts.pegRule == ruleLow12BitsSymbolRef { 562 if instructionName != "ldr" { 563 panic("Symbol reference outside of ldr instruction") 564 } 565 566 if skipWS(parts.next) != nil || parts.up.next != nil { 567 panic("can't handle tweak or post-increment with symbol references") 568 } 569 570 // Suppress the offset; adrp loaded the full address. 571 args = append(args, "["+baseAddrReg+"]") 572 changed = true 573 continue 574 } 575 } 576 577 args = append(args, d.contents(fullArg)) 578 579 case ruleLow12BitsSymbolRef: 580 // These are the second instruction in a pair: 581 // adrp x0, symbol // Load the page address into x0 582 // add x1, x0, :lo12:symbol // Adds the page offset. 583 // 584 // The adrp instruction will have been turned into a sequence that loads 585 // the full address, above, thus the offset is turned into zero. If that 586 // results in the instruction being a nop, then it is deleted. 587 if instructionName != "add" { 588 panic(fmt.Sprintf("unsure how to handle %q instruction using lo12", instructionName)) 589 } 590 591 if !strings.HasPrefix(args[0], "x") || !strings.HasPrefix(args[1], "x") { 592 panic("address arithmetic with incorrectly sized register") 593 } 594 595 if args[0] == args[1] { 596 d.writeCommentedNode(statement) 597 return statement, nil 598 } 599 600 args = append(args, "#0") 601 changed = true 602 603 default: 604 panic(fmt.Sprintf("unhandled MemoryRef type %s", rul3s[ref.pegRule])) 605 } 606 607 default: 608 panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule])) 609 } 610 } 611 612 if changed { 613 d.writeCommentedNode(statement) 614 replacement := "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n" 615 d.output.WriteString(replacement) 616 } else { 617 d.writeNode(statement) 618 } 619 620 return statement, nil 621} 622 623/* ppc64le 624 625[PABI]: “64-Bit ELF V2 ABI Specification. Power Architecture.” March 21st, 626 2017 627 628(Also useful is “Power ISA Version 2.07 B”. Note that version three of that 629document is /not/ good as that's POWER9 specific.) 630 631ppc64le doesn't have IP-relative addressing and does a lot to work around this. 632Rather than reference a PLT and GOT direction, it has a single structure called 633the TOC (Table Of Contents). Within the TOC is the contents of .rodata, .data, 634.got, .plt, .bss, etc sections [PABI;3.3]. 635 636A pointer to the TOC is maintained in r2 and the following pattern is used to 637load the address of an element into a register: 638 639 addis <address register>, 2, foo@toc@ha 640 addi <address register>, <address register>, foo@toc@l 641 642The “addis” instruction shifts a signed constant left 16 bits and adds the 643result to its second argument, saving the result in the first argument. The 644“addi” instruction does the same, but without shifting. Thus the “@toc@ha" 645suffix on a symbol means “the top 16 bits of the TOC offset” and “@toc@l” means 646“the bottom 16 bits of the offset”. However, note that both values are signed, 647thus offsets in the top half of a 64KB chunk will have an @ha value that's one 648greater than expected and a negative @l value. 649 650The TOC is specific to a “module” (basically an executable or shared object). 651This means that there's not a single TOC in a process and that r2 needs to 652change as control moves between modules. Thus functions have two entry points: 653the “global” entry point and the “local” entry point. Jumps from within the 654same module can use the local entry while jumps from other modules must use the 655global entry. The global entry establishes the correct value of r2 before 656running the function and the local entry skips that code. 657 658The global entry point for a function is defined by its label. The local entry 659is a power-of-two number of bytes from the global entry, set by the 660“.localentry” directive. (ppc64le instructions are always 32 bits, so an offset 661of 1 or 2 bytes is treated as an offset of zero.) 662 663In order to help the global entry code set r2 to point to the local TOC, r12 is 664set to the address of the global entry point when called [PABI;2.2.1.1]. Thus 665the global entry will typically use an addis+addi pair to add a known offset to 666r12 and store it in r2. For example: 667 668foo: 669 addis 2, 12, .TOC. - foo@ha 670 addi 2, 2, .TOC. - foo@l 671 672(It's worth noting that the '@' operator binds very loosely, so the 3rd 673arguments parse as (.TOC. - foo)@ha and (.TOC. - foo)@l.) 674 675When calling a function, the compiler doesn't know whether that function is in 676the same module or not. Thus it doesn't know whether r12 needs to be set nor 677whether r2 will be clobbered on return. Rather than always assume the worst, 678the linker fixes stuff up once it knows that a call is going out of module: 679 680Firstly, calling, say, memcpy (which we assume to be in a different module) 681won't actually jump directly to memcpy, or even a PLT resolution function. 682It'll call a synthesised function that: 683 a) saves r2 in the caller's stack frame 684 b) loads the address of memcpy@PLT into r12 685 c) jumps to r12. 686 687As this synthesised function loads memcpy@PLT, a call to memcpy from the 688compiled code just references “memcpy” directly, not “memcpy@PLT”. 689 690Since it jumps directly to memcpy@PLT, it can't restore r2 on return. Thus 691calls must be followed by a nop. If the call ends up going out-of-module, the 692linker will rewrite that nop to load r2 from the stack. 693 694Speaking of the stack, the stack pointer is kept in r1 and there's a 288-byte 695red-zone. The format of the stack frame is defined [PABI;2.2.2] and must be 696followed as called functions will write into their parent's stack frame. For 697example, the synthesised out-of-module trampolines will save r2 24 bytes into 698the caller's frame and all non-leaf functions save the return address 16 bytes 699into the caller's frame. 700 701A final point worth noting: some RISC ISAs have r0 wired to zero: all reads 702result in zero and all writes are discarded. POWER does something a little like 703that, but r0 is only special in certain argument positions for certain 704instructions. You just have to read the manual to know which they are. 705 706 707Delocation is easier than Intel because there's just TOC references, but it's 708also harder because there's no IP-relative addressing. 709 710Jumps are IP-relative however, and have a 24-bit immediate value. So we can 711jump to functions that set a register to the needed value. (r3 is the 712return-value register and so that's what is generally used here.) */ 713 714// isPPC64LEAPair recognises an addis+addi pair that's adding the offset of 715// source to relative and writing the result to target. 716func (d *delocation) isPPC64LEAPair(statement *node32) (target, source, relative string, ok bool) { 717 instruction := skipWS(statement.up).up 718 assertNodeType(instruction, ruleInstructionName) 719 name1 := d.contents(instruction) 720 args1 := instructionArgs(instruction.next) 721 722 statement = statement.next 723 instruction = skipWS(statement.up).up 724 assertNodeType(instruction, ruleInstructionName) 725 name2 := d.contents(instruction) 726 args2 := instructionArgs(instruction.next) 727 728 if name1 != "addis" || 729 len(args1) != 3 || 730 name2 != "addi" || 731 len(args2) != 3 { 732 return "", "", "", false 733 } 734 735 target = d.contents(args1[0]) 736 relative = d.contents(args1[1]) 737 source1 := d.contents(args1[2]) 738 source2 := d.contents(args2[2]) 739 740 if !strings.HasSuffix(source1, "@ha") || 741 !strings.HasSuffix(source2, "@l") || 742 source1[:len(source1)-3] != source2[:len(source2)-2] || 743 d.contents(args2[0]) != target || 744 d.contents(args2[1]) != target { 745 return "", "", "", false 746 } 747 748 source = source1[:len(source1)-3] 749 ok = true 750 return 751} 752 753// establishTOC writes the global entry prelude for a function. The standard 754// prelude involves relocations so this version moves the relocation outside 755// the integrity-checked area. 756func establishTOC(w stringWriter) { 757 w.WriteString("999:\n") 758 w.WriteString("\taddis 2, 12, .LBORINGSSL_external_toc-999b@ha\n") 759 w.WriteString("\taddi 2, 2, .LBORINGSSL_external_toc-999b@l\n") 760 w.WriteString("\tld 12, 0(2)\n") 761 w.WriteString("\tadd 2, 2, 12\n") 762} 763 764// loadTOCFuncName returns the name of a synthesized function that sets r3 to 765// the value of “symbol+offset”. 766func loadTOCFuncName(symbol, offset string) string { 767 symbol = strings.Replace(symbol, ".", "_dot_", -1) 768 ret := ".Lbcm_loadtoc_" + symbol 769 if len(offset) != 0 { 770 offset = strings.Replace(offset, "+", "_plus_", -1) 771 offset = strings.Replace(offset, "-", "_minus_", -1) 772 ret += "_" + offset 773 } 774 return ret 775} 776 777func (d *delocation) loadFromTOC(w stringWriter, symbol, offset, dest string) wrapperFunc { 778 d.tocLoaders[symbol+"\x00"+offset] = struct{}{} 779 780 return func(k func()) { 781 w.WriteString("\taddi 1, 1, -288\n") // Clear the red zone. 782 w.WriteString("\tmflr " + dest + "\n") // Stash the link register. 783 w.WriteString("\tstd " + dest + ", -8(1)\n") 784 // The TOC loader will use r3, so stash it if necessary. 785 if dest != "3" { 786 w.WriteString("\tstd 3, -16(1)\n") 787 } 788 789 // Because loadTOCFuncName returns a “.L” name, we don't need a 790 // nop after this call. 791 w.WriteString("\tbl " + loadTOCFuncName(symbol, offset) + "\n") 792 793 // Cycle registers around. We need r3 -> destReg, -8(1) -> 794 // lr and, optionally, -16(1) -> r3. 795 w.WriteString("\tstd 3, -24(1)\n") 796 w.WriteString("\tld 3, -8(1)\n") 797 w.WriteString("\tmtlr 3\n") 798 w.WriteString("\tld " + dest + ", -24(1)\n") 799 if dest != "3" { 800 w.WriteString("\tld 3, -16(1)\n") 801 } 802 w.WriteString("\taddi 1, 1, 288\n") 803 804 k() 805 } 806} 807 808func (d *delocation) gatherOffsets(symRef *node32, offsets string) (*node32, string) { 809 for symRef != nil && symRef.pegRule == ruleOffset { 810 offset := d.contents(symRef) 811 if offset[0] != '+' && offset[0] != '-' { 812 offset = "+" + offset 813 } 814 offsets = offsets + offset 815 symRef = symRef.next 816 } 817 return symRef, offsets 818} 819 820func (d *delocation) parseMemRef(memRef *node32) (symbol, offset, section string, didChange, symbolIsLocal bool, nextRef *node32) { 821 if memRef.pegRule != ruleSymbolRef { 822 return "", "", "", false, false, memRef 823 } 824 825 symRef := memRef.up 826 nextRef = memRef.next 827 828 // (Offset* '+')? 829 symRef, offset = d.gatherOffsets(symRef, offset) 830 831 // (LocalSymbol / SymbolName) 832 symbol = d.contents(symRef) 833 if symRef.pegRule == ruleLocalSymbol { 834 symbolIsLocal = true 835 mapped := d.mapLocalSymbol(symbol) 836 if mapped != symbol { 837 symbol = mapped 838 didChange = true 839 } 840 } 841 symRef = symRef.next 842 843 // Offset* 844 symRef, offset = d.gatherOffsets(symRef, offset) 845 846 // ('@' Section / Offset*)? 847 if symRef != nil { 848 assertNodeType(symRef, ruleSection) 849 section = d.contents(symRef) 850 symRef = symRef.next 851 852 symRef, offset = d.gatherOffsets(symRef, offset) 853 } 854 855 if symRef != nil { 856 panic(fmt.Sprintf("unexpected token in SymbolRef: %q", rul3s[symRef.pegRule])) 857 } 858 859 return 860} 861 862func (d *delocation) processPPCInstruction(statement, instruction *node32) (*node32, error) { 863 assertNodeType(instruction, ruleInstructionName) 864 instructionName := d.contents(instruction) 865 isBranch := instructionName[0] == 'b' 866 867 argNodes := instructionArgs(instruction.next) 868 869 var wrappers wrapperStack 870 var args []string 871 changed := false 872 873Args: 874 for i, arg := range argNodes { 875 fullArg := arg 876 isIndirect := false 877 878 if arg.pegRule == ruleIndirectionIndicator { 879 arg = arg.next 880 isIndirect = true 881 } 882 883 switch arg.pegRule { 884 case ruleRegisterOrConstant, ruleLocalLabelRef: 885 args = append(args, d.contents(fullArg)) 886 887 case ruleTOCRefLow: 888 return nil, errors.New("Found low TOC reference outside preamble pattern") 889 890 case ruleTOCRefHigh: 891 target, _, relative, ok := d.isPPC64LEAPair(statement) 892 if !ok { 893 return nil, errors.New("Found high TOC reference outside preamble pattern") 894 } 895 896 if relative != "12" { 897 return nil, fmt.Errorf("preamble is relative to %q, not r12", relative) 898 } 899 900 if target != "2" { 901 return nil, fmt.Errorf("preamble is setting %q, not r2", target) 902 } 903 904 statement = statement.next 905 establishTOC(d.output) 906 instructionName = "" 907 changed = true 908 break Args 909 910 case ruleMemoryRef: 911 symbol, offset, section, didChange, symbolIsLocal, memRef := d.parseMemRef(arg.up) 912 changed = didChange 913 914 if len(symbol) > 0 { 915 if _, localEntrySymbol := d.localEntrySymbols[symbol]; localEntrySymbol && isBranch { 916 symbol = localEntryName(symbol) 917 changed = true 918 } else if _, knownSymbol := d.symbols[symbol]; knownSymbol { 919 symbol = localTargetName(symbol) 920 changed = true 921 } else if !symbolIsLocal && !isSynthesized(symbol) && len(section) == 0 { 922 changed = true 923 d.redirectors[symbol] = redirectorName(symbol) 924 symbol = redirectorName(symbol) 925 // TODO(davidben): This should sanity-check the next 926 // instruction is a nop and ideally remove it. 927 wrappers = append(wrappers, func(k func()) { 928 k() 929 // Like the linker's PLT stubs, redirector functions 930 // expect callers to restore r2. 931 d.output.WriteString("\tld 2, 24(1)\n") 932 }) 933 } 934 } 935 936 switch section { 937 case "": 938 939 case "tls": 940 // This section identifier just tells the 941 // assembler to use r13, the pointer to the 942 // thread-local data [PABI;3.7.3.3]. 943 944 case "toc@ha": 945 // Delete toc@ha instructions. Per 946 // [PABI;3.6.3], the linker is allowed to erase 947 // toc@ha instructions. We take advantage of 948 // this by unconditionally erasing the toc@ha 949 // instructions and doing the full lookup when 950 // processing toc@l. 951 // 952 // Note that any offset here applies before @ha 953 // and @l. That is, 42+foo@toc@ha is 954 // #ha(42+foo-.TOC.), not 42+#ha(foo-.TOC.). Any 955 // corresponding toc@l references are required 956 // by the ABI to have the same offset. The 957 // offset will be incorporated in full when 958 // those are processed. 959 if instructionName != "addis" || len(argNodes) != 3 || i != 2 || args[1] != "2" { 960 return nil, errors.New("can't process toc@ha reference") 961 } 962 changed = true 963 instructionName = "" 964 break Args 965 966 case "toc@l": 967 // Per [PAB;3.6.3], this instruction must take 968 // as input a register which was the output of 969 // a toc@ha computation and compute the actual 970 // address of some symbol. The toc@ha 971 // computation was elided, so we ignore that 972 // input register and compute the address 973 // directly. 974 changed = true 975 976 // For all supported toc@l instructions, the 977 // destination register is the first argument. 978 destReg := args[0] 979 980 wrappers = append(wrappers, d.loadFromTOC(d.output, symbol, offset, destReg)) 981 switch instructionName { 982 case "addi": 983 // The original instruction was: 984 // addi destReg, tocHaReg, offset+symbol@toc@l 985 instructionName = "" 986 987 case "ld", "lhz", "lwz": 988 // The original instruction was: 989 // l?? destReg, offset+symbol@toc@l(tocHaReg) 990 // 991 // We transform that into the 992 // equivalent dereference of destReg: 993 // l?? destReg, 0(destReg) 994 origInstructionName := instructionName 995 instructionName = "" 996 997 assertNodeType(memRef, ruleBaseIndexScale) 998 assertNodeType(memRef.up, ruleRegisterOrConstant) 999 if memRef.next != nil || memRef.up.next != nil { 1000 return nil, errors.New("expected single register in BaseIndexScale for ld argument") 1001 } 1002 1003 baseReg := destReg 1004 if baseReg == "0" { 1005 // Register zero is special as the base register for a load. 1006 // Avoid it by spilling and using r3 instead. 1007 baseReg = "3" 1008 wrappers = append(wrappers, func(k func()) { 1009 d.output.WriteString("\taddi 1, 1, -288\n") // Clear the red zone. 1010 d.output.WriteString("\tstd " + baseReg + ", -8(1)\n") 1011 d.output.WriteString("\tmr " + baseReg + ", " + destReg + "\n") 1012 k() 1013 d.output.WriteString("\tld " + baseReg + ", -8(1)\n") 1014 d.output.WriteString("\taddi 1, 1, 288\n") // Clear the red zone. 1015 }) 1016 } 1017 1018 wrappers = append(wrappers, func(k func()) { 1019 d.output.WriteString("\t" + origInstructionName + " " + destReg + ", 0(" + baseReg + ")\n") 1020 }) 1021 default: 1022 return nil, fmt.Errorf("can't process TOC argument to %q", instructionName) 1023 } 1024 1025 default: 1026 return nil, fmt.Errorf("Unknown section type %q", section) 1027 } 1028 1029 argStr := "" 1030 if isIndirect { 1031 argStr += "*" 1032 } 1033 argStr += symbol 1034 if len(offset) > 0 { 1035 argStr += offset 1036 } 1037 if len(section) > 0 { 1038 argStr += "@" 1039 argStr += section 1040 } 1041 1042 for ; memRef != nil; memRef = memRef.next { 1043 argStr += d.contents(memRef) 1044 } 1045 1046 args = append(args, argStr) 1047 1048 default: 1049 panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule])) 1050 } 1051 } 1052 1053 if changed { 1054 d.writeCommentedNode(statement) 1055 1056 var replacement string 1057 if len(instructionName) > 0 { 1058 replacement = "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n" 1059 } 1060 1061 wrappers.do(func() { 1062 d.output.WriteString(replacement) 1063 }) 1064 } else { 1065 d.writeNode(statement) 1066 } 1067 1068 return statement, nil 1069} 1070 1071/* Intel */ 1072 1073type instructionType int 1074 1075const ( 1076 instrPush instructionType = iota 1077 instrMove 1078 // instrTransformingMove is essentially a move, but it performs some 1079 // transformation of the data during the process. 1080 instrTransformingMove 1081 instrJump 1082 instrConditionalMove 1083 // instrCombine merges the source and destination in some fashion, for example 1084 // a 2-operand bitwise operation. 1085 instrCombine 1086 // instrMemoryVectorCombine is similer to instrCombine, but the source 1087 // register must be a memory reference and the destination register 1088 // must be a vector register. 1089 instrMemoryVectorCombine 1090 // instrThreeArg merges two sources into a destination in some fashion. 1091 instrThreeArg 1092 // instrCompare takes two arguments and writes outputs to the flags register. 1093 instrCompare 1094 instrOther 1095) 1096 1097func classifyInstruction(instr string, args []*node32) instructionType { 1098 switch instr { 1099 case "push", "pushq": 1100 if len(args) == 1 { 1101 return instrPush 1102 } 1103 1104 case "mov", "movq", "vmovq", "movsd", "vmovsd": 1105 if len(args) == 2 { 1106 return instrMove 1107 } 1108 1109 case "cmovneq", "cmoveq": 1110 if len(args) == 2 { 1111 return instrConditionalMove 1112 } 1113 1114 case "call", "callq", "jmp", "jo", "jno", "js", "jns", "je", "jz", "jne", "jnz", "jb", "jnae", "jc", "jnb", "jae", "jnc", "jbe", "jna", "ja", "jnbe", "jl", "jnge", "jge", "jnl", "jle", "jng", "jg", "jnle", "jp", "jpe", "jnp", "jpo": 1115 if len(args) == 1 { 1116 return instrJump 1117 } 1118 1119 case "orq", "andq", "xorq": 1120 if len(args) == 2 { 1121 return instrCombine 1122 } 1123 1124 case "cmpq": 1125 if len(args) == 2 { 1126 return instrCompare 1127 } 1128 1129 case "sarxq", "shlxq", "shrxq": 1130 if len(args) == 3 { 1131 return instrThreeArg 1132 } 1133 1134 case "vpbroadcastq": 1135 if len(args) == 2 { 1136 return instrTransformingMove 1137 } 1138 1139 case "movlps", "movhps": 1140 if len(args) == 2 { 1141 return instrMemoryVectorCombine 1142 } 1143 } 1144 1145 return instrOther 1146} 1147 1148func push(w stringWriter) wrapperFunc { 1149 return func(k func()) { 1150 w.WriteString("\tpushq %rax\n") 1151 k() 1152 w.WriteString("\txchg %rax, (%rsp)\n") 1153 } 1154} 1155 1156func compare(w stringWriter, instr, a, b string) wrapperFunc { 1157 return func(k func()) { 1158 k() 1159 w.WriteString(fmt.Sprintf("\t%s %s, %s\n", instr, a, b)) 1160 } 1161} 1162 1163func (d *delocation) loadFromGOT(w stringWriter, destination, symbol, section string, redzoneCleared bool) wrapperFunc { 1164 d.gotExternalsNeeded[symbol+"@"+section] = struct{}{} 1165 1166 return func(k func()) { 1167 if !redzoneCleared { 1168 w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone. 1169 } 1170 w.WriteString("\tpushf\n") 1171 w.WriteString(fmt.Sprintf("\tleaq %s_%s_external(%%rip), %s\n", symbol, section, destination)) 1172 w.WriteString(fmt.Sprintf("\taddq (%s), %s\n", destination, destination)) 1173 w.WriteString(fmt.Sprintf("\tmovq (%s), %s\n", destination, destination)) 1174 w.WriteString("\tpopf\n") 1175 if !redzoneCleared { 1176 w.WriteString("\tleaq\t128(%rsp), %rsp\n") 1177 } 1178 } 1179} 1180 1181func saveFlags(w stringWriter, redzoneCleared bool) wrapperFunc { 1182 return func(k func()) { 1183 if !redzoneCleared { 1184 w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone. 1185 defer w.WriteString("\tleaq 128(%rsp), %rsp\n") 1186 } 1187 w.WriteString("\tpushfq\n") 1188 k() 1189 w.WriteString("\tpopfq\n") 1190 } 1191} 1192 1193func saveRegister(w stringWriter, avoidRegs []string) (wrapperFunc, string) { 1194 candidates := []string{"%rax", "%rbx", "%rcx", "%rdx"} 1195 1196 var reg string 1197NextCandidate: 1198 for _, candidate := range candidates { 1199 for _, avoid := range avoidRegs { 1200 if candidate == avoid { 1201 continue NextCandidate 1202 } 1203 } 1204 1205 reg = candidate 1206 break 1207 } 1208 1209 if len(reg) == 0 { 1210 panic("too many excluded registers") 1211 } 1212 1213 return func(k func()) { 1214 w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone. 1215 w.WriteString("\tpushq " + reg + "\n") 1216 k() 1217 w.WriteString("\tpopq " + reg + "\n") 1218 w.WriteString("\tleaq 128(%rsp), %rsp\n") 1219 }, reg 1220} 1221 1222func moveTo(w stringWriter, target string, isAVX bool, source string) wrapperFunc { 1223 return func(k func()) { 1224 k() 1225 prefix := "" 1226 if isAVX { 1227 prefix = "v" 1228 } 1229 w.WriteString("\t" + prefix + "movq " + source + ", " + target + "\n") 1230 } 1231} 1232 1233func finalTransform(w stringWriter, transformInstruction, reg string) wrapperFunc { 1234 return func(k func()) { 1235 k() 1236 w.WriteString("\t" + transformInstruction + " " + reg + ", " + reg + "\n") 1237 } 1238} 1239 1240func combineOp(w stringWriter, instructionName, source, dest string) wrapperFunc { 1241 return func(k func()) { 1242 k() 1243 w.WriteString("\t" + instructionName + " " + source + ", " + dest + "\n") 1244 } 1245} 1246 1247func threeArgCombineOp(w stringWriter, instructionName, source1, source2, dest string) wrapperFunc { 1248 return func(k func()) { 1249 k() 1250 w.WriteString("\t" + instructionName + " " + source1 + ", " + source2 + ", " + dest + "\n") 1251 } 1252} 1253 1254func memoryVectorCombineOp(w stringWriter, instructionName, source, dest string) wrapperFunc { 1255 return func(k func()) { 1256 k() 1257 // These instructions can only read from memory, so push 1258 // tempReg and read from the stack. Note we assume the red zone 1259 // was previously cleared by saveRegister(). 1260 w.WriteString("\tpushq " + source + "\n") 1261 w.WriteString("\t" + instructionName + " (%rsp), " + dest + "\n") 1262 w.WriteString("\tleaq 8(%rsp), %rsp\n") 1263 } 1264} 1265 1266func isValidLEATarget(reg string) bool { 1267 return !strings.HasPrefix(reg, "%xmm") && !strings.HasPrefix(reg, "%ymm") && !strings.HasPrefix(reg, "%zmm") 1268} 1269 1270func undoConditionalMove(w stringWriter, instr string) wrapperFunc { 1271 var invertedCondition string 1272 1273 switch instr { 1274 case "cmoveq": 1275 invertedCondition = "ne" 1276 case "cmovneq": 1277 invertedCondition = "e" 1278 default: 1279 panic(fmt.Sprintf("don't know how to handle conditional move instruction %q", instr)) 1280 } 1281 1282 return func(k func()) { 1283 w.WriteString("\tj" + invertedCondition + " 999f\n") 1284 k() 1285 w.WriteString("999:\n") 1286 } 1287} 1288 1289func (d *delocation) isRIPRelative(node *node32) bool { 1290 return node != nil && node.pegRule == ruleBaseIndexScale && d.contents(node) == "(%rip)" 1291} 1292 1293func (d *delocation) processIntelInstruction(statement, instruction *node32) (*node32, error) { 1294 assertNodeType(instruction, ruleInstructionName) 1295 instructionName := d.contents(instruction) 1296 1297 argNodes := instructionArgs(instruction.next) 1298 1299 var wrappers wrapperStack 1300 var args []string 1301 changed := false 1302 1303Args: 1304 for i, arg := range argNodes { 1305 fullArg := arg 1306 isIndirect := false 1307 1308 if arg.pegRule == ruleIndirectionIndicator { 1309 arg = arg.next 1310 isIndirect = true 1311 } 1312 1313 switch arg.pegRule { 1314 case ruleRegisterOrConstant, ruleLocalLabelRef: 1315 args = append(args, d.contents(fullArg)) 1316 1317 case ruleMemoryRef: 1318 symbol, offset, section, didChange, symbolIsLocal, memRef := d.parseMemRef(arg.up) 1319 changed = didChange 1320 1321 if symbol == "OPENSSL_ia32cap_P" && section == "" { 1322 if instructionName != "leaq" { 1323 return nil, fmt.Errorf("non-leaq instruction %q referenced OPENSSL_ia32cap_P directly", instructionName) 1324 } 1325 1326 if i != 0 || len(argNodes) != 2 || !d.isRIPRelative(memRef) || len(offset) > 0 { 1327 return nil, fmt.Errorf("invalid OPENSSL_ia32cap_P reference in instruction %q", instructionName) 1328 } 1329 1330 target := argNodes[1] 1331 assertNodeType(target, ruleRegisterOrConstant) 1332 reg := d.contents(target) 1333 1334 if !strings.HasPrefix(reg, "%r") { 1335 return nil, fmt.Errorf("tried to load OPENSSL_ia32cap_P into %q, which is not a standard register.", reg) 1336 } 1337 1338 changed = true 1339 1340 // Flag-altering instructions (i.e. addq) are going to be used so the 1341 // flags need to be preserved. 1342 wrappers = append(wrappers, saveFlags(d.output, false /* Red Zone not yet cleared */)) 1343 1344 wrappers = append(wrappers, func(k func()) { 1345 d.output.WriteString("\tleaq\tOPENSSL_ia32cap_addr_delta(%rip), " + reg + "\n") 1346 d.output.WriteString("\taddq\t(" + reg + "), " + reg + "\n") 1347 }) 1348 1349 break Args 1350 } 1351 1352 switch section { 1353 case "": 1354 if _, knownSymbol := d.symbols[symbol]; knownSymbol { 1355 symbol = localTargetName(symbol) 1356 changed = true 1357 } 1358 1359 case "PLT": 1360 if classifyInstruction(instructionName, argNodes) != instrJump { 1361 return nil, fmt.Errorf("Cannot rewrite PLT reference for non-jump instruction %q", instructionName) 1362 } 1363 1364 if _, knownSymbol := d.symbols[symbol]; knownSymbol { 1365 symbol = localTargetName(symbol) 1366 changed = true 1367 } else if !symbolIsLocal && !isSynthesized(symbol) { 1368 // Unknown symbol via PLT is an 1369 // out-call from the module, e.g. 1370 // memcpy. 1371 d.redirectors[symbol+"@"+section] = redirectorName(symbol) 1372 symbol = redirectorName(symbol) 1373 } 1374 1375 changed = true 1376 1377 case "GOTPCREL": 1378 if len(offset) > 0 { 1379 return nil, errors.New("loading from GOT with offset is unsupported") 1380 } 1381 if !d.isRIPRelative(memRef) { 1382 return nil, errors.New("GOT access must be IP-relative") 1383 } 1384 1385 useGOT := false 1386 if _, knownSymbol := d.symbols[symbol]; knownSymbol { 1387 symbol = localTargetName(symbol) 1388 changed = true 1389 } else if !isSynthesized(symbol) { 1390 useGOT = true 1391 } 1392 1393 classification := classifyInstruction(instructionName, argNodes) 1394 if classification != instrThreeArg && classification != instrCompare && i != 0 { 1395 return nil, errors.New("GOT access must be source operand") 1396 } 1397 1398 // Reduce the instruction to movq symbol@GOTPCREL, targetReg. 1399 var targetReg string 1400 var redzoneCleared bool 1401 switch classification { 1402 case instrPush: 1403 wrappers = append(wrappers, push(d.output)) 1404 targetReg = "%rax" 1405 case instrConditionalMove: 1406 wrappers = append(wrappers, undoConditionalMove(d.output, instructionName)) 1407 fallthrough 1408 case instrMove: 1409 assertNodeType(argNodes[1], ruleRegisterOrConstant) 1410 targetReg = d.contents(argNodes[1]) 1411 case instrCompare: 1412 otherSource := d.contents(argNodes[i^1]) 1413 saveRegWrapper, tempReg := saveRegister(d.output, []string{otherSource}) 1414 redzoneCleared = true 1415 wrappers = append(wrappers, saveRegWrapper) 1416 if i == 0 { 1417 wrappers = append(wrappers, compare(d.output, instructionName, tempReg, otherSource)) 1418 } else { 1419 wrappers = append(wrappers, compare(d.output, instructionName, otherSource, tempReg)) 1420 } 1421 targetReg = tempReg 1422 case instrTransformingMove: 1423 assertNodeType(argNodes[1], ruleRegisterOrConstant) 1424 targetReg = d.contents(argNodes[1]) 1425 wrappers = append(wrappers, finalTransform(d.output, instructionName, targetReg)) 1426 if isValidLEATarget(targetReg) { 1427 return nil, errors.New("Currently transforming moves are assumed to target XMM registers. Otherwise we'll pop %rax before reading it to do the transform.") 1428 } 1429 case instrCombine: 1430 targetReg = d.contents(argNodes[1]) 1431 if !isValidLEATarget(targetReg) { 1432 return nil, fmt.Errorf("cannot handle combining instructions targeting non-general registers") 1433 } 1434 saveRegWrapper, tempReg := saveRegister(d.output, []string{targetReg}) 1435 redzoneCleared = true 1436 wrappers = append(wrappers, saveRegWrapper) 1437 1438 wrappers = append(wrappers, combineOp(d.output, instructionName, tempReg, targetReg)) 1439 targetReg = tempReg 1440 case instrMemoryVectorCombine: 1441 assertNodeType(argNodes[1], ruleRegisterOrConstant) 1442 targetReg = d.contents(argNodes[1]) 1443 if isValidLEATarget(targetReg) { 1444 return nil, errors.New("target register must be an XMM register") 1445 } 1446 saveRegWrapper, tempReg := saveRegister(d.output, nil) 1447 wrappers = append(wrappers, saveRegWrapper) 1448 redzoneCleared = true 1449 wrappers = append(wrappers, memoryVectorCombineOp(d.output, instructionName, tempReg, targetReg)) 1450 targetReg = tempReg 1451 case instrThreeArg: 1452 if n := len(argNodes); n != 3 { 1453 return nil, fmt.Errorf("three-argument instruction has %d arguments", n) 1454 } 1455 if i != 0 && i != 1 { 1456 return nil, errors.New("GOT access must be from source operand") 1457 } 1458 targetReg = d.contents(argNodes[2]) 1459 1460 otherSource := d.contents(argNodes[1]) 1461 if i == 1 { 1462 otherSource = d.contents(argNodes[0]) 1463 } 1464 1465 saveRegWrapper, tempReg := saveRegister(d.output, []string{targetReg, otherSource}) 1466 redzoneCleared = true 1467 wrappers = append(wrappers, saveRegWrapper) 1468 1469 if i == 0 { 1470 wrappers = append(wrappers, threeArgCombineOp(d.output, instructionName, tempReg, otherSource, targetReg)) 1471 } else { 1472 wrappers = append(wrappers, threeArgCombineOp(d.output, instructionName, otherSource, tempReg, targetReg)) 1473 } 1474 targetReg = tempReg 1475 default: 1476 return nil, fmt.Errorf("Cannot rewrite GOTPCREL reference for instruction %q", instructionName) 1477 } 1478 1479 if !isValidLEATarget(targetReg) { 1480 // Sometimes the compiler will load from the GOT to an 1481 // XMM register, which is not a valid target of an LEA 1482 // instruction. 1483 saveRegWrapper, tempReg := saveRegister(d.output, nil) 1484 wrappers = append(wrappers, saveRegWrapper) 1485 isAVX := strings.HasPrefix(instructionName, "v") 1486 wrappers = append(wrappers, moveTo(d.output, targetReg, isAVX, tempReg)) 1487 targetReg = tempReg 1488 if redzoneCleared { 1489 return nil, fmt.Errorf("internal error: Red Zone was already cleared") 1490 } 1491 redzoneCleared = true 1492 } 1493 1494 if symbol == "OPENSSL_ia32cap_P" { 1495 // Flag-altering instructions (i.e. addq) are going to be used so the 1496 // flags need to be preserved. 1497 wrappers = append(wrappers, saveFlags(d.output, redzoneCleared)) 1498 wrappers = append(wrappers, func(k func()) { 1499 d.output.WriteString("\tleaq\tOPENSSL_ia32cap_addr_delta(%rip), " + targetReg + "\n") 1500 d.output.WriteString("\taddq\t(" + targetReg + "), " + targetReg + "\n") 1501 }) 1502 } else if useGOT { 1503 wrappers = append(wrappers, d.loadFromGOT(d.output, targetReg, symbol, section, redzoneCleared)) 1504 } else { 1505 wrappers = append(wrappers, func(k func()) { 1506 d.output.WriteString(fmt.Sprintf("\tleaq\t%s(%%rip), %s\n", symbol, targetReg)) 1507 }) 1508 } 1509 changed = true 1510 break Args 1511 1512 default: 1513 return nil, fmt.Errorf("Unknown section type %q", section) 1514 } 1515 1516 if !changed && len(section) > 0 { 1517 panic("section was not handled") 1518 } 1519 section = "" 1520 1521 argStr := "" 1522 if isIndirect { 1523 argStr += "*" 1524 } 1525 argStr += symbol 1526 argStr += offset 1527 1528 for ; memRef != nil; memRef = memRef.next { 1529 argStr += d.contents(memRef) 1530 } 1531 1532 args = append(args, argStr) 1533 1534 case ruleGOTLocation: 1535 if instructionName != "movabsq" { 1536 return nil, fmt.Errorf("_GLOBAL_OFFSET_TABLE_ lookup didn't use movabsq") 1537 } 1538 if i != 0 || len(argNodes) != 2 { 1539 return nil, fmt.Errorf("movabs of _GLOBAL_OFFSET_TABLE_ didn't expected form") 1540 } 1541 1542 d.gotDeltaNeeded = true 1543 changed = true 1544 instructionName = "movq" 1545 assertNodeType(arg.up, ruleLocalSymbol) 1546 baseSymbol := d.mapLocalSymbol(d.contents(arg.up)) 1547 targetReg := d.contents(argNodes[1]) 1548 args = append(args, ".Lboringssl_got_delta(%rip)") 1549 wrappers = append(wrappers, func(k func()) { 1550 k() 1551 d.output.WriteString(fmt.Sprintf("\taddq $.Lboringssl_got_delta-%s, %s\n", baseSymbol, targetReg)) 1552 }) 1553 1554 case ruleGOTSymbolOffset: 1555 if instructionName != "movabsq" { 1556 return nil, fmt.Errorf("_GLOBAL_OFFSET_TABLE_ offset didn't use movabsq") 1557 } 1558 if i != 0 || len(argNodes) != 2 { 1559 return nil, fmt.Errorf("movabs of _GLOBAL_OFFSET_TABLE_ offset didn't have expected form") 1560 } 1561 1562 assertNodeType(arg.up, ruleSymbolName) 1563 symbol := d.contents(arg.up) 1564 if strings.HasPrefix(symbol, ".L") { 1565 symbol = d.mapLocalSymbol(symbol) 1566 } 1567 targetReg := d.contents(argNodes[1]) 1568 1569 var prefix string 1570 isGOTOFF := strings.HasSuffix(d.contents(arg), "@GOTOFF") 1571 if isGOTOFF { 1572 prefix = "gotoff" 1573 d.gotOffOffsetsNeeded[symbol] = struct{}{} 1574 } else { 1575 prefix = "got" 1576 d.gotOffsetsNeeded[symbol] = struct{}{} 1577 } 1578 changed = true 1579 1580 wrappers = append(wrappers, func(k func()) { 1581 // Even if one tries to use 32-bit GOT offsets, Clang's linker (at the time 1582 // of writing) emits 64-bit relocations anyway, so the following four bytes 1583 // get stomped. Thus we use 64-bit offsets. 1584 d.output.WriteString(fmt.Sprintf("\tmovq .Lboringssl_%s_%s(%%rip), %s\n", prefix, symbol, targetReg)) 1585 }) 1586 1587 default: 1588 panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule])) 1589 } 1590 } 1591 1592 if changed { 1593 d.writeCommentedNode(statement) 1594 replacement := "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n" 1595 wrappers.do(func() { 1596 d.output.WriteString(replacement) 1597 }) 1598 } else { 1599 d.writeNode(statement) 1600 } 1601 1602 return statement, nil 1603} 1604 1605func (d *delocation) handleBSS(statement *node32) (*node32, error) { 1606 lastStatement := statement 1607 for statement = statement.next; statement != nil; lastStatement, statement = statement, statement.next { 1608 node := skipWS(statement.up) 1609 if node == nil { 1610 d.writeNode(statement) 1611 continue 1612 } 1613 1614 switch node.pegRule { 1615 case ruleGlobalDirective, ruleComment, ruleInstruction, ruleLocationDirective: 1616 d.writeNode(statement) 1617 1618 case ruleDirective: 1619 directive := node.up 1620 assertNodeType(directive, ruleDirectiveName) 1621 directiveName := d.contents(directive) 1622 if directiveName == "text" || directiveName == "section" || directiveName == "data" { 1623 return lastStatement, nil 1624 } 1625 d.writeNode(statement) 1626 1627 case ruleLabel: 1628 label := node.up 1629 d.writeNode(statement) 1630 1631 if label.pegRule != ruleLocalSymbol { 1632 symbol := d.contents(label) 1633 localSymbol := localTargetName(symbol) 1634 d.output.WriteString(fmt.Sprintf("\n%s:\n", localSymbol)) 1635 1636 d.bssAccessorsNeeded[symbol] = localSymbol 1637 } 1638 1639 case ruleLabelContainingDirective: 1640 var err error 1641 statement, err = d.processLabelContainingDirective(statement, node.up) 1642 if err != nil { 1643 return nil, err 1644 } 1645 1646 default: 1647 return nil, fmt.Errorf("unknown BSS statement type %q in %q", rul3s[node.pegRule], d.contents(statement)) 1648 } 1649 } 1650 1651 return lastStatement, nil 1652} 1653 1654func writeAarch64Function(w stringWriter, funcName string, writeContents func(stringWriter)) { 1655 w.WriteString(".p2align 2\n") 1656 w.WriteString(".hidden " + funcName + "\n") 1657 w.WriteString(".type " + funcName + ", @function\n") 1658 w.WriteString(funcName + ":\n") 1659 w.WriteString(".cfi_startproc\n") 1660 writeContents(w) 1661 w.WriteString(".cfi_endproc\n") 1662 w.WriteString(".size " + funcName + ", .-" + funcName + "\n") 1663} 1664 1665func transform(w stringWriter, inputs []inputFile) error { 1666 // symbols contains all defined symbols. 1667 symbols := make(map[string]struct{}) 1668 // localEntrySymbols contains all symbols with a .localentry directive. 1669 localEntrySymbols := make(map[string]struct{}) 1670 // fileNumbers is the set of IDs seen in .file directives. 1671 fileNumbers := make(map[int]struct{}) 1672 // maxObservedFileNumber contains the largest seen file number in a 1673 // .file directive. Zero is not a valid number. 1674 maxObservedFileNumber := 0 1675 // fileDirectivesContainMD5 is true if the compiler is outputting MD5 1676 // checksums in .file directives. If it does so, then this script needs 1677 // to match that behaviour otherwise warnings result. 1678 fileDirectivesContainMD5 := false 1679 1680 // OPENSSL_ia32cap_get will be synthesized by this script. 1681 symbols["OPENSSL_ia32cap_get"] = struct{}{} 1682 1683 for _, input := range inputs { 1684 forEachPath(input.ast.up, func(node *node32) { 1685 symbol := input.contents[node.begin:node.end] 1686 if _, ok := symbols[symbol]; ok { 1687 panic(fmt.Sprintf("Duplicate symbol found: %q in %q", symbol, input.path)) 1688 } 1689 symbols[symbol] = struct{}{} 1690 }, ruleStatement, ruleLabel, ruleSymbolName) 1691 1692 forEachPath(input.ast.up, func(node *node32) { 1693 node = node.up 1694 assertNodeType(node, ruleLabelContainingDirectiveName) 1695 directive := input.contents[node.begin:node.end] 1696 if directive != ".localentry" { 1697 return 1698 } 1699 // Extract the first argument. 1700 node = skipWS(node.next) 1701 assertNodeType(node, ruleSymbolArgs) 1702 node = node.up 1703 assertNodeType(node, ruleSymbolArg) 1704 symbol := input.contents[node.begin:node.end] 1705 if _, ok := localEntrySymbols[symbol]; ok { 1706 panic(fmt.Sprintf("Duplicate .localentry directive found: %q in %q", symbol, input.path)) 1707 } 1708 localEntrySymbols[symbol] = struct{}{} 1709 }, ruleStatement, ruleLabelContainingDirective) 1710 1711 forEachPath(input.ast.up, func(node *node32) { 1712 assertNodeType(node, ruleLocationDirective) 1713 directive := input.contents[node.begin:node.end] 1714 if !strings.HasPrefix(directive, ".file") { 1715 return 1716 } 1717 parts := strings.Fields(directive) 1718 if len(parts) == 2 { 1719 // This is a .file directive with just a 1720 // filename. Clang appears to generate just one 1721 // of these at the beginning of the output for 1722 // the compilation unit. Ignore it. 1723 return 1724 } 1725 fileNo, err := strconv.Atoi(parts[1]) 1726 if err != nil { 1727 panic(fmt.Sprintf("Failed to parse file number from .file: %q", directive)) 1728 } 1729 1730 if _, ok := fileNumbers[fileNo]; ok { 1731 panic(fmt.Sprintf("Duplicate file number %d observed", fileNo)) 1732 } 1733 fileNumbers[fileNo] = struct{}{} 1734 1735 if fileNo > maxObservedFileNumber { 1736 maxObservedFileNumber = fileNo 1737 } 1738 1739 for _, token := range parts[2:] { 1740 if token == "md5" { 1741 fileDirectivesContainMD5 = true 1742 } 1743 } 1744 }, ruleStatement, ruleLocationDirective) 1745 } 1746 1747 processor := x86_64 1748 if len(inputs) > 0 { 1749 processor = detectProcessor(inputs[0]) 1750 } 1751 1752 commentIndicator := "#" 1753 if processor == aarch64 { 1754 commentIndicator = "//" 1755 } 1756 1757 d := &delocation{ 1758 symbols: symbols, 1759 localEntrySymbols: localEntrySymbols, 1760 processor: processor, 1761 commentIndicator: commentIndicator, 1762 output: w, 1763 redirectors: make(map[string]string), 1764 bssAccessorsNeeded: make(map[string]string), 1765 tocLoaders: make(map[string]struct{}), 1766 gotExternalsNeeded: make(map[string]struct{}), 1767 gotOffsetsNeeded: make(map[string]struct{}), 1768 gotOffOffsetsNeeded: make(map[string]struct{}), 1769 } 1770 1771 w.WriteString(".text\n") 1772 var fileTrailing string 1773 if fileDirectivesContainMD5 { 1774 fileTrailing = " md5 0x00000000000000000000000000000000" 1775 } 1776 w.WriteString(fmt.Sprintf(".file %d \"inserted_by_delocate.c\"%s\n", maxObservedFileNumber+1, fileTrailing)) 1777 w.WriteString(fmt.Sprintf(".loc %d 1 0\n", maxObservedFileNumber+1)) 1778 w.WriteString("BORINGSSL_bcm_text_start:\n") 1779 1780 for _, input := range inputs { 1781 if err := d.processInput(input); err != nil { 1782 return err 1783 } 1784 } 1785 1786 w.WriteString(".text\n") 1787 w.WriteString(fmt.Sprintf(".loc %d 2 0\n", maxObservedFileNumber+1)) 1788 w.WriteString("BORINGSSL_bcm_text_end:\n") 1789 1790 // Emit redirector functions. Each is a single jump instruction. 1791 var redirectorNames []string 1792 for name := range d.redirectors { 1793 redirectorNames = append(redirectorNames, name) 1794 } 1795 sort.Strings(redirectorNames) 1796 1797 for _, name := range redirectorNames { 1798 redirector := d.redirectors[name] 1799 switch d.processor { 1800 case ppc64le: 1801 w.WriteString(".section \".toc\", \"aw\"\n") 1802 w.WriteString(".Lredirector_toc_" + name + ":\n") 1803 w.WriteString(".quad " + name + "\n") 1804 w.WriteString(".text\n") 1805 w.WriteString(".type " + redirector + ", @function\n") 1806 w.WriteString(redirector + ":\n") 1807 // |name| will clobber r2, so save it. This is matched by a restore in 1808 // redirector calls. 1809 w.WriteString("\tstd 2, 24(1)\n") 1810 // Load and call |name|'s global entry point. 1811 w.WriteString("\taddis 12, 2, .Lredirector_toc_" + name + "@toc@ha\n") 1812 w.WriteString("\tld 12, .Lredirector_toc_" + name + "@toc@l(12)\n") 1813 w.WriteString("\tmtctr 12\n") 1814 w.WriteString("\tbctr\n") 1815 1816 case aarch64: 1817 writeAarch64Function(w, redirector, func(w stringWriter) { 1818 w.WriteString("\tb " + name + "\n") 1819 }) 1820 1821 case x86_64: 1822 w.WriteString(".type " + redirector + ", @function\n") 1823 w.WriteString(redirector + ":\n") 1824 w.WriteString("\tjmp\t" + name + "\n") 1825 } 1826 } 1827 1828 var accessorNames []string 1829 for accessor := range d.bssAccessorsNeeded { 1830 accessorNames = append(accessorNames, accessor) 1831 } 1832 sort.Strings(accessorNames) 1833 1834 // Emit BSS accessor functions. Each is a single LEA followed by RET. 1835 for _, name := range accessorNames { 1836 funcName := accessorName(name) 1837 target := d.bssAccessorsNeeded[name] 1838 1839 switch d.processor { 1840 case ppc64le: 1841 w.WriteString(".type " + funcName + ", @function\n") 1842 w.WriteString(funcName + ":\n") 1843 w.WriteString("\taddis 3, 2, " + target + "@toc@ha\n") 1844 w.WriteString("\taddi 3, 3, " + target + "@toc@l\n") 1845 w.WriteString("\tblr\n") 1846 1847 case x86_64: 1848 w.WriteString(".type " + funcName + ", @function\n") 1849 w.WriteString(funcName + ":\n") 1850 w.WriteString("\tleaq\t" + target + "(%rip), %rax\n\tret\n") 1851 1852 case aarch64: 1853 writeAarch64Function(w, funcName, func(w stringWriter) { 1854 w.WriteString("\tadrp x0, " + target + "\n") 1855 w.WriteString("\tadd x0, x0, :lo12:" + target + "\n") 1856 w.WriteString("\tret\n") 1857 }) 1858 } 1859 } 1860 1861 switch d.processor { 1862 case ppc64le: 1863 loadTOCNames := sortedSet(d.tocLoaders) 1864 for _, symbolAndOffset := range loadTOCNames { 1865 parts := strings.SplitN(symbolAndOffset, "\x00", 2) 1866 symbol, offset := parts[0], parts[1] 1867 1868 funcName := loadTOCFuncName(symbol, offset) 1869 ref := symbol + offset 1870 1871 w.WriteString(".type " + funcName[2:] + ", @function\n") 1872 w.WriteString(funcName[2:] + ":\n") 1873 w.WriteString(funcName + ":\n") 1874 w.WriteString("\taddis 3, 2, " + ref + "@toc@ha\n") 1875 w.WriteString("\taddi 3, 3, " + ref + "@toc@l\n") 1876 w.WriteString("\tblr\n") 1877 } 1878 1879 w.WriteString(".LBORINGSSL_external_toc:\n") 1880 w.WriteString(".quad .TOC.-.LBORINGSSL_external_toc\n") 1881 1882 case aarch64: 1883 externalNames := sortedSet(d.gotExternalsNeeded) 1884 for _, symbol := range externalNames { 1885 writeAarch64Function(w, gotHelperName(symbol), func(w stringWriter) { 1886 w.WriteString("\tadrp x0, :got:" + symbol + "\n") 1887 w.WriteString("\tldr x0, [x0, :got_lo12:" + symbol + "]\n") 1888 w.WriteString("\tret\n") 1889 }) 1890 } 1891 1892 writeAarch64Function(w, ".LOPENSSL_armcap_P_addr", func(w stringWriter) { 1893 w.WriteString("\tadrp x0, OPENSSL_armcap_P\n") 1894 w.WriteString("\tadd x0, x0, :lo12:OPENSSL_armcap_P\n") 1895 w.WriteString("\tret\n") 1896 }) 1897 1898 case x86_64: 1899 externalNames := sortedSet(d.gotExternalsNeeded) 1900 for _, name := range externalNames { 1901 parts := strings.SplitN(name, "@", 2) 1902 symbol, section := parts[0], parts[1] 1903 w.WriteString(".type " + symbol + "_" + section + "_external, @object\n") 1904 w.WriteString(".size " + symbol + "_" + section + "_external, 8\n") 1905 w.WriteString(symbol + "_" + section + "_external:\n") 1906 // Ideally this would be .quad foo@GOTPCREL, but clang's 1907 // assembler cannot emit a 64-bit GOTPCREL relocation. Instead, 1908 // we manually sign-extend the value, knowing that the GOT is 1909 // always at the end, thus foo@GOTPCREL has a positive value. 1910 w.WriteString("\t.long " + symbol + "@" + section + "\n") 1911 w.WriteString("\t.long 0\n") 1912 } 1913 1914 w.WriteString(".type OPENSSL_ia32cap_get, @function\n") 1915 w.WriteString(".globl OPENSSL_ia32cap_get\n") 1916 w.WriteString(localTargetName("OPENSSL_ia32cap_get") + ":\n") 1917 w.WriteString("OPENSSL_ia32cap_get:\n") 1918 w.WriteString("\tleaq OPENSSL_ia32cap_P(%rip), %rax\n") 1919 w.WriteString("\tret\n") 1920 1921 w.WriteString(".extern OPENSSL_ia32cap_P\n") 1922 w.WriteString(".type OPENSSL_ia32cap_addr_delta, @object\n") 1923 w.WriteString(".size OPENSSL_ia32cap_addr_delta, 8\n") 1924 w.WriteString("OPENSSL_ia32cap_addr_delta:\n") 1925 w.WriteString(".quad OPENSSL_ia32cap_P-OPENSSL_ia32cap_addr_delta\n") 1926 1927 if d.gotDeltaNeeded { 1928 w.WriteString(".Lboringssl_got_delta:\n") 1929 w.WriteString("\t.quad _GLOBAL_OFFSET_TABLE_-.Lboringssl_got_delta\n") 1930 } 1931 1932 for _, name := range sortedSet(d.gotOffsetsNeeded) { 1933 w.WriteString(".Lboringssl_got_" + name + ":\n") 1934 w.WriteString("\t.quad " + name + "@GOT\n") 1935 } 1936 for _, name := range sortedSet(d.gotOffOffsetsNeeded) { 1937 w.WriteString(".Lboringssl_gotoff_" + name + ":\n") 1938 w.WriteString("\t.quad " + name + "@GOTOFF\n") 1939 } 1940 } 1941 1942 w.WriteString(".type BORINGSSL_bcm_text_hash, @object\n") 1943 w.WriteString(".size BORINGSSL_bcm_text_hash, 64\n") 1944 w.WriteString("BORINGSSL_bcm_text_hash:\n") 1945 for _, b := range fipscommon.UninitHashValue { 1946 w.WriteString(".byte 0x" + strconv.FormatUint(uint64(b), 16) + "\n") 1947 } 1948 1949 return nil 1950} 1951 1952func parseInputs(inputs []inputFile) error { 1953 for i, input := range inputs { 1954 var contents string 1955 1956 if input.isArchive { 1957 arFile, err := os.Open(input.path) 1958 if err != nil { 1959 return err 1960 } 1961 defer arFile.Close() 1962 1963 ar, err := ar.ParseAR(arFile) 1964 if err != nil { 1965 return err 1966 } 1967 1968 if len(ar) != 1 { 1969 return fmt.Errorf("expected one file in archive, but found %d", len(ar)) 1970 } 1971 1972 for _, c := range ar { 1973 contents = string(c) 1974 } 1975 } else { 1976 inBytes, err := ioutil.ReadFile(input.path) 1977 if err != nil { 1978 return err 1979 } 1980 1981 contents = string(inBytes) 1982 } 1983 1984 asm := Asm{Buffer: contents, Pretty: true} 1985 asm.Init() 1986 if err := asm.Parse(); err != nil { 1987 return fmt.Errorf("error while parsing %q: %s", input.path, err) 1988 } 1989 ast := asm.AST() 1990 1991 inputs[i].contents = contents 1992 inputs[i].ast = ast 1993 } 1994 1995 return nil 1996} 1997 1998func main() { 1999 // The .a file, if given, is expected to be an archive of textual 2000 // assembly sources. That's odd, but CMake really wants to create 2001 // archive files so it's the only way that we can make it work. 2002 arInput := flag.String("a", "", "Path to a .a file containing assembly sources") 2003 outFile := flag.String("o", "", "Path to output assembly") 2004 2005 flag.Parse() 2006 2007 if len(*outFile) == 0 { 2008 fmt.Fprintf(os.Stderr, "Must give argument to -o.\n") 2009 os.Exit(1) 2010 } 2011 2012 var inputs []inputFile 2013 if len(*arInput) > 0 { 2014 inputs = append(inputs, inputFile{ 2015 path: *arInput, 2016 index: 0, 2017 isArchive: true, 2018 }) 2019 } 2020 2021 for i, path := range flag.Args() { 2022 if len(path) == 0 { 2023 continue 2024 } 2025 2026 inputs = append(inputs, inputFile{ 2027 path: path, 2028 index: i + 1, 2029 }) 2030 } 2031 2032 if err := parseInputs(inputs); err != nil { 2033 fmt.Fprintf(os.Stderr, "%s\n", err) 2034 os.Exit(1) 2035 } 2036 2037 out, err := os.OpenFile(*outFile, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644) 2038 if err != nil { 2039 panic(err) 2040 } 2041 defer out.Close() 2042 2043 if err := transform(out, inputs); err != nil { 2044 fmt.Fprintf(os.Stderr, "%s\n", err) 2045 os.Exit(1) 2046 } 2047} 2048 2049func forEachPath(node *node32, cb func(*node32), rules ...pegRule) { 2050 if node == nil { 2051 return 2052 } 2053 2054 if len(rules) == 0 { 2055 cb(node) 2056 return 2057 } 2058 2059 rule := rules[0] 2060 childRules := rules[1:] 2061 2062 for ; node != nil; node = node.next { 2063 if node.pegRule != rule { 2064 continue 2065 } 2066 2067 if len(childRules) == 0 { 2068 cb(node) 2069 } else { 2070 forEachPath(node.up, cb, childRules...) 2071 } 2072 } 2073} 2074 2075func skipNodes(node *node32, ruleToSkip pegRule) *node32 { 2076 for ; node != nil && node.pegRule == ruleToSkip; node = node.next { 2077 } 2078 return node 2079} 2080 2081func skipWS(node *node32) *node32 { 2082 return skipNodes(node, ruleWS) 2083} 2084 2085func assertNodeType(node *node32, expected pegRule) { 2086 if rule := node.pegRule; rule != expected { 2087 panic(fmt.Sprintf("node was %q, but wanted %q", rul3s[rule], rul3s[expected])) 2088 } 2089} 2090 2091type wrapperFunc func(func()) 2092 2093type wrapperStack []wrapperFunc 2094 2095func (w *wrapperStack) do(baseCase func()) { 2096 if len(*w) == 0 { 2097 baseCase() 2098 return 2099 } 2100 2101 wrapper := (*w)[0] 2102 *w = (*w)[1:] 2103 wrapper(func() { w.do(baseCase) }) 2104} 2105 2106// localTargetName returns the name of the local target label for a global 2107// symbol named name. 2108func localTargetName(name string) string { 2109 return ".L" + name + "_local_target" 2110} 2111 2112func localEntryName(name string) string { 2113 return ".L" + name + "_local_entry" 2114} 2115 2116func isSynthesized(symbol string) bool { 2117 return strings.HasSuffix(symbol, "_bss_get") || 2118 symbol == "OPENSSL_ia32cap_get" || 2119 strings.HasPrefix(symbol, "BORINGSSL_bcm_text_") 2120} 2121 2122func redirectorName(symbol string) string { 2123 return "bcm_redirector_" + symbol 2124} 2125 2126// sectionType returns the type of a section. I.e. a section called “.text.foo” 2127// is a “.text” section. 2128func sectionType(section string) (string, bool) { 2129 if len(section) == 0 || section[0] != '.' { 2130 return "", false 2131 } 2132 2133 i := strings.Index(section[1:], ".") 2134 if i != -1 { 2135 section = section[:i+1] 2136 } 2137 2138 if strings.HasPrefix(section, ".debug_") { 2139 return ".debug", true 2140 } 2141 2142 return section, true 2143} 2144 2145// accessorName returns the name of the accessor function for a BSS symbol 2146// named name. 2147func accessorName(name string) string { 2148 return name + "_bss_get" 2149} 2150 2151func (d *delocation) mapLocalSymbol(symbol string) string { 2152 if d.currentInput.index == 0 { 2153 return symbol 2154 } 2155 return symbol + "_BCM_" + strconv.Itoa(d.currentInput.index) 2156} 2157 2158func detectProcessor(input inputFile) processorType { 2159 for statement := input.ast.up; statement != nil; statement = statement.next { 2160 node := skipNodes(statement.up, ruleWS) 2161 if node == nil || node.pegRule != ruleInstruction { 2162 continue 2163 } 2164 2165 instruction := node.up 2166 instructionName := input.contents[instruction.begin:instruction.end] 2167 2168 switch instructionName { 2169 case "movq", "call", "leaq": 2170 return x86_64 2171 case "addis", "addi", "mflr": 2172 return ppc64le 2173 case "str", "bl", "ldr", "st1": 2174 return aarch64 2175 } 2176 } 2177 2178 panic("processed entire input and didn't recognise any instructions.") 2179} 2180 2181func sortedSet(m map[string]struct{}) []string { 2182 ret := make([]string, 0, len(m)) 2183 for key := range m { 2184 ret = append(ret, key) 2185 } 2186 sort.Strings(ret) 2187 return ret 2188} 2189