1// Copyright (c) 2017, Google Inc.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
14
15// delocate performs several transformations of textual assembly code. See
16// crypto/fipsmodule/FIPS.md for an overview.
17package main
18
19import (
20	"errors"
21	"flag"
22	"fmt"
23	"io/ioutil"
24	"os"
25	"sort"
26	"strconv"
27	"strings"
28
29	"boringssl.googlesource.com/boringssl/util/ar"
30	"boringssl.googlesource.com/boringssl/util/fipstools/fipscommon"
31)
32
33// inputFile represents a textual assembly file.
34type inputFile struct {
35	path string
36	// index is a unique identifer given to this file. It's used for
37	// mapping local symbols.
38	index int
39	// isArchive indicates that the input should be processed as an ar
40	// file.
41	isArchive bool
42	// contents contains the contents of the file.
43	contents string
44	// ast points to the head of the syntax tree.
45	ast *node32
46}
47
48type stringWriter interface {
49	WriteString(string) (int, error)
50}
51
52type processorType int
53
54const (
55	ppc64le processorType = iota + 1
56	x86_64
57	aarch64
58)
59
60// delocation holds the state needed during a delocation operation.
61type delocation struct {
62	processor processorType
63	output    stringWriter
64	// commentIndicator starts a comment, e.g. "//" or "#"
65	commentIndicator string
66
67	// symbols is the set of symbols defined in the module.
68	symbols map[string]struct{}
69	// localEntrySymbols is the set of symbols with .localentry directives.
70	localEntrySymbols map[string]struct{}
71	// redirectors maps from out-call symbol name to the name of a
72	// redirector function for that symbol. E.g. “memcpy” ->
73	// “bcm_redirector_memcpy”.
74	redirectors map[string]string
75	// bssAccessorsNeeded maps from a BSS symbol name to the symbol that
76	// should be used to reference it. E.g. “P384_data_storage” ->
77	// “P384_data_storage”.
78	bssAccessorsNeeded map[string]string
79	// tocLoaders is a set of symbol names for which TOC helper functions
80	// are required. (ppc64le only.)
81	tocLoaders map[string]struct{}
82	// gotExternalsNeeded is a set of symbol names for which we need
83	// “delta” symbols: symbols that contain the offset from their location
84	// to the memory in question.
85	gotExternalsNeeded map[string]struct{}
86	// gotDeltaNeeded is true if the code needs to load the value of
87	// _GLOBAL_OFFSET_TABLE_.
88	gotDeltaNeeded bool
89	// gotOffsetsNeeded contains the symbols whose @GOT offsets are needed.
90	gotOffsetsNeeded map[string]struct{}
91	// gotOffOffsetsNeeded contains the symbols whose @GOTOFF offsets are needed.
92	gotOffOffsetsNeeded map[string]struct{}
93
94	currentInput inputFile
95}
96
97func (d *delocation) contents(node *node32) string {
98	return d.currentInput.contents[node.begin:node.end]
99}
100
101// writeNode writes out an AST node.
102func (d *delocation) writeNode(node *node32) {
103	if _, err := d.output.WriteString(d.contents(node)); err != nil {
104		panic(err)
105	}
106}
107
108func (d *delocation) writeCommentedNode(node *node32) {
109	line := d.contents(node)
110	if _, err := d.output.WriteString(d.commentIndicator + " WAS " + strings.TrimSpace(line) + "\n"); err != nil {
111		panic(err)
112	}
113}
114
115func locateError(err error, with *node32, in inputFile) error {
116	posMap := translatePositions([]rune(in.contents), []int{int(with.begin)})
117	var line int
118	for _, pos := range posMap {
119		line = pos.line
120	}
121
122	return fmt.Errorf("error while processing %q on line %d: %q", in.contents[with.begin:with.end], line, err)
123}
124
125func (d *delocation) processInput(input inputFile) (err error) {
126	d.currentInput = input
127
128	var origStatement *node32
129	defer func() {
130		if err := recover(); err != nil {
131			panic(locateError(fmt.Errorf("%s", err), origStatement, input))
132		}
133	}()
134
135	for statement := input.ast.up; statement != nil; statement = statement.next {
136		assertNodeType(statement, ruleStatement)
137		origStatement = statement
138
139		node := skipWS(statement.up)
140		if node == nil {
141			d.writeNode(statement)
142			continue
143		}
144
145		switch node.pegRule {
146		case ruleGlobalDirective, ruleComment, ruleLocationDirective:
147			d.writeNode(statement)
148		case ruleDirective:
149			statement, err = d.processDirective(statement, node.up)
150		case ruleLabelContainingDirective:
151			statement, err = d.processLabelContainingDirective(statement, node.up)
152		case ruleLabel:
153			statement, err = d.processLabel(statement, node.up)
154		case ruleInstruction:
155			switch d.processor {
156			case x86_64:
157				statement, err = d.processIntelInstruction(statement, node.up)
158			case ppc64le:
159				statement, err = d.processPPCInstruction(statement, node.up)
160			case aarch64:
161				statement, err = d.processAarch64Instruction(statement, node.up)
162			default:
163				panic("unknown processor")
164			}
165		default:
166			panic(fmt.Sprintf("unknown top-level statement type %q", rul3s[node.pegRule]))
167		}
168
169		if err != nil {
170			return locateError(err, origStatement, input)
171		}
172	}
173
174	return nil
175}
176
177func (d *delocation) processDirective(statement, directive *node32) (*node32, error) {
178	assertNodeType(directive, ruleDirectiveName)
179	directiveName := d.contents(directive)
180
181	var args []string
182	forEachPath(directive, func(arg *node32) {
183		// If the argument is a quoted string, use the raw contents.
184		// (Note that this doesn't unescape the string, but that's not
185		// needed so far.
186		if arg.up != nil {
187			arg = arg.up
188			assertNodeType(arg, ruleQuotedArg)
189			if arg.up == nil {
190				args = append(args, "")
191				return
192			}
193			arg = arg.up
194			assertNodeType(arg, ruleQuotedText)
195		}
196		args = append(args, d.contents(arg))
197	}, ruleArgs, ruleArg)
198
199	switch directiveName {
200	case "comm", "lcomm":
201		if len(args) < 1 {
202			return nil, errors.New("comm directive has no arguments")
203		}
204		d.bssAccessorsNeeded[args[0]] = args[0]
205		d.writeNode(statement)
206
207	case "data":
208		// ASAN and some versions of MSAN are adding a .data section,
209		// and adding references to symbols within it to the code. We
210		// will have to work around this in the future.
211		return nil, errors.New(".data section found in module")
212
213	case "section":
214		section := args[0]
215
216		if section == ".data.rel.ro" {
217			// In a normal build, this is an indication of a
218			// problem but any references from the module to this
219			// section will result in a relocation and thus will
220			// break the integrity check. ASAN can generate these
221			// sections and so we will likely have to work around
222			// that in the future.
223			return nil, errors.New(".data.rel.ro section found in module")
224		}
225
226		sectionType, ok := sectionType(section)
227		if !ok {
228			// Unknown sections are permitted in order to be robust
229			// to different compiler modes.
230			d.writeNode(statement)
231			break
232		}
233
234		switch sectionType {
235		case ".rodata", ".text":
236			// Move .rodata to .text so it may be accessed without
237			// a relocation. GCC with -fmerge-constants will place
238			// strings into separate sections, so we move all
239			// sections named like .rodata. Also move .text.startup
240			// so the self-test function is also in the module.
241			d.writeCommentedNode(statement)
242			d.output.WriteString(".text\n")
243
244		case ".data":
245			// See above about .data
246			return nil, errors.New(".data section found in module")
247
248		case ".init_array", ".fini_array", ".ctors", ".dtors":
249			// init_array/ctors/dtors contains function
250			// pointers to constructor/destructor
251			// functions. These contain relocations, but
252			// they're in a different section anyway.
253			d.writeNode(statement)
254			break
255
256		case ".debug", ".note", ".toc":
257			d.writeNode(statement)
258			break
259
260		case ".bss":
261			d.writeNode(statement)
262			return d.handleBSS(statement)
263		}
264
265	default:
266		d.writeNode(statement)
267	}
268
269	return statement, nil
270}
271
272func (d *delocation) processLabelContainingDirective(statement, directive *node32) (*node32, error) {
273	// The symbols within directives need to be mapped so that local
274	// symbols in two different .s inputs don't collide.
275	changed := false
276	assertNodeType(directive, ruleLabelContainingDirectiveName)
277	name := d.contents(directive)
278
279	node := directive.next
280	assertNodeType(node, ruleWS)
281
282	node = node.next
283	assertNodeType(node, ruleSymbolArgs)
284
285	var args []string
286	for node = skipWS(node.up); node != nil; node = skipWS(node.next) {
287		assertNodeType(node, ruleSymbolArg)
288		arg := node.up
289		var mapped string
290
291		for term := arg; term != nil; term = term.next {
292			if term.pegRule != ruleLocalSymbol {
293				mapped += d.contents(term)
294				continue
295			}
296
297			oldSymbol := d.contents(term)
298			newSymbol := d.mapLocalSymbol(oldSymbol)
299			if newSymbol != oldSymbol {
300				changed = true
301			}
302
303			mapped += newSymbol
304		}
305
306		args = append(args, mapped)
307	}
308
309	if !changed {
310		d.writeNode(statement)
311	} else {
312		d.writeCommentedNode(statement)
313		d.output.WriteString("\t" + name + "\t" + strings.Join(args, ", ") + "\n")
314	}
315
316	if name == ".localentry" {
317		d.output.WriteString(localEntryName(args[0]) + ":\n")
318	}
319
320	return statement, nil
321}
322
323func (d *delocation) processLabel(statement, label *node32) (*node32, error) {
324	symbol := d.contents(label)
325
326	switch label.pegRule {
327	case ruleLocalLabel:
328		d.output.WriteString(symbol + ":\n")
329	case ruleLocalSymbol:
330		// symbols need to be mapped so that local symbols from two
331		// different .s inputs don't collide.
332		d.output.WriteString(d.mapLocalSymbol(symbol) + ":\n")
333	case ruleSymbolName:
334		d.output.WriteString(localTargetName(symbol) + ":\n")
335		d.writeNode(statement)
336	default:
337		return nil, fmt.Errorf("unknown label type %q", rul3s[label.pegRule])
338	}
339
340	return statement, nil
341}
342
343// instructionArgs collects all the arguments to an instruction.
344func instructionArgs(node *node32) (argNodes []*node32) {
345	for node = skipWS(node); node != nil; node = skipWS(node.next) {
346		assertNodeType(node, ruleInstructionArg)
347		argNodes = append(argNodes, node.up)
348	}
349
350	return argNodes
351}
352
353// Aarch64 support
354
355// gotHelperName returns the name of a synthesised function that returns an
356// address from the GOT.
357func gotHelperName(symbol string) string {
358	return ".Lboringssl_loadgot_" + symbol
359}
360
361// loadAarch64Address emits instructions to put the address of |symbol|
362// (optionally adjusted by |offsetStr|) into |targetReg|.
363func (d *delocation) loadAarch64Address(statement *node32, targetReg string, symbol string, offsetStr string) (*node32, error) {
364	// There are two paths here: either the symbol is known to be local in which
365	// case adr is used to get the address (within 1MiB), or a GOT reference is
366	// really needed in which case the code needs to jump to a helper function.
367	//
368	// A helper function is needed because using code appears to be the only way
369	// to load a GOT value. On other platforms we have ".quad foo@GOT" outside of
370	// the module, but on Aarch64 that results in a "COPY" relocation and linker
371	// comments suggest it's a weird hack. So, for each GOT symbol needed, we emit
372	// a function outside of the module that returns the address from the GOT in
373	// x0.
374
375	d.writeCommentedNode(statement)
376
377	_, isKnown := d.symbols[symbol]
378	isLocal := strings.HasPrefix(symbol, ".L")
379	if isKnown || isLocal || isSynthesized(symbol) {
380		if isLocal {
381			symbol = d.mapLocalSymbol(symbol)
382		} else if isKnown {
383			symbol = localTargetName(symbol)
384		}
385
386		d.output.WriteString("\tadr " + targetReg + ", " + symbol + offsetStr + "\n")
387
388		return statement, nil
389	}
390
391	if len(offsetStr) != 0 {
392		panic("non-zero offset for helper-based reference")
393	}
394
395	var helperFunc string
396	if symbol == "OPENSSL_armcap_P" {
397		helperFunc = ".LOPENSSL_armcap_P_addr"
398	} else {
399		// GOT helpers also dereference the GOT entry, thus the subsequent ldr
400		// instruction, which would normally do the dereferencing, needs to be
401		// dropped. GOT helpers have to include the dereference because the
402		// assembler doesn't support ":got_lo12:foo" offsets except in an ldr
403		// instruction.
404		d.gotExternalsNeeded[symbol] = struct{}{}
405		helperFunc = gotHelperName(symbol)
406	}
407
408	// Clear the red-zone. I can't find a definitive answer about whether Linux
409	// Aarch64 includes a red-zone, but Microsoft has a 16-byte one and Apple a
410	// 128-byte one. Thus conservatively clear a 128-byte red-zone.
411	d.output.WriteString("\tsub sp, sp, 128\n")
412
413	// Save x0 (which will be stomped by the return value) and the link register
414	// to the stack. Then save the program counter into the link register and
415	// jump to the helper function.
416	d.output.WriteString("\tstp x0, lr, [sp, #-16]!\n")
417	d.output.WriteString("\tbl " + helperFunc + "\n")
418
419	if targetReg == "x0" {
420		// If the target happens to be x0 then restore the link register from the
421		// stack and send the saved value of x0 to the zero register.
422		d.output.WriteString("\tldp xzr, lr, [sp], #16\n")
423	} else {
424		// Otherwise move the result into place and restore registers.
425		d.output.WriteString("\tmov " + targetReg + ", x0\n")
426		d.output.WriteString("\tldp x0, lr, [sp], #16\n")
427	}
428
429	// Revert the red-zone adjustment.
430	d.output.WriteString("\tadd sp, sp, 128\n")
431
432	return statement, nil
433}
434
435func (d *delocation) processAarch64Instruction(statement, instruction *node32) (*node32, error) {
436	assertNodeType(instruction, ruleInstructionName)
437	instructionName := d.contents(instruction)
438
439	argNodes := instructionArgs(instruction.next)
440
441	switch instructionName {
442	case "cset", "csel", "csetm", "cneg", "csinv", "cinc", "csinc", "csneg":
443		// These functions are special because they take a condition-code name as
444		// an argument and that looks like a symbol reference.
445		d.writeNode(statement)
446		return statement, nil
447
448	case "mrs":
449		// Functions that take special register names also look like a symbol
450		// reference to the parser.
451		d.writeNode(statement)
452		return statement, nil
453
454	case "adrp":
455		// adrp always generates a relocation, even when the target symbol is in the
456		// same segment, because the page-offset of the code isn't known until link
457		// time. Thus adrp instructions are turned into either adr instructions
458		// (limiting the module to 1MiB offsets) or calls to helper functions, both of
459		// which load the full address. Later instructions, which add the low 12 bits
460		// of offset, are tweaked to remove the offset since it's already included.
461		// Loads of GOT symbols are slightly more complex because it's not possible to
462		// avoid dereferencing a GOT entry with Clang's assembler. Thus the later ldr
463		// instruction, which would normally do the dereferencing, is dropped
464		// completely. (Or turned into a mov if it targets a different register.)
465		assertNodeType(argNodes[0], ruleRegisterOrConstant)
466		targetReg := d.contents(argNodes[0])
467		if !strings.HasPrefix(targetReg, "x") {
468			panic("adrp targetting register " + targetReg + ", which has the wrong size")
469		}
470
471		var symbol, offset string
472		switch argNodes[1].pegRule {
473		case ruleGOTSymbolOffset:
474			symbol = d.contents(argNodes[1].up)
475		case ruleMemoryRef:
476			assertNodeType(argNodes[1].up, ruleSymbolRef)
477			node, empty := d.gatherOffsets(argNodes[1].up.up, "")
478			if len(empty) != 0 {
479				panic("prefix offsets found for adrp")
480			}
481			symbol = d.contents(node)
482			_, offset = d.gatherOffsets(node.next, "")
483		default:
484			panic("Unhandled adrp argument type " + rul3s[argNodes[1].pegRule])
485		}
486
487		return d.loadAarch64Address(statement, targetReg, symbol, offset)
488	}
489
490	var args []string
491	changed := false
492
493	for _, arg := range argNodes {
494		fullArg := arg
495
496		switch arg.pegRule {
497		case ruleRegisterOrConstant, ruleLocalLabelRef, ruleARMConstantTweak:
498			args = append(args, d.contents(fullArg))
499
500		case ruleGOTSymbolOffset:
501			// These should only be arguments to adrp and thus unreachable.
502			panic("unreachable")
503
504		case ruleMemoryRef:
505			ref := arg.up
506
507			switch ref.pegRule {
508			case ruleSymbolRef:
509				// This is a branch. Either the target needs to be written to a local
510				// version of the symbol to ensure that no relocations are emitted, or
511				// it needs to jump to a redirector function.
512				symbol, _, _, didChange, symbolIsLocal, _ := d.parseMemRef(arg.up)
513				changed = didChange
514
515				if _, knownSymbol := d.symbols[symbol]; knownSymbol {
516					symbol = localTargetName(symbol)
517					changed = true
518				} else if !symbolIsLocal && !isSynthesized(symbol) {
519					redirector := redirectorName(symbol)
520					d.redirectors[symbol] = redirector
521					symbol = redirector
522					changed = true
523				}
524
525				args = append(args, symbol)
526
527			case ruleARMBaseIndexScale:
528				parts := ref.up
529				assertNodeType(parts, ruleARMRegister)
530				baseAddrReg := d.contents(parts)
531				parts = skipWS(parts.next)
532
533				// Only two forms need special handling. First there's memory references
534				// like "[x*, :got_lo12:foo]". The base register here will have been the
535				// target of an adrp instruction to load the page address, but the adrp
536				// will have turned into loading the full address *and dereferencing it*,
537				// above. Thus this instruction needs to be dropped otherwise we'll be
538				// dereferencing twice.
539				//
540				// Second there are forms like "[x*, :lo12:foo]" where the code has used
541				// adrp to load the page address into x*. That adrp will have been turned
542				// into loading the full address so just the offset needs to be dropped.
543
544				if parts != nil {
545					if parts.pegRule == ruleARMGOTLow12 {
546						if instructionName != "ldr" {
547							panic("Symbol reference outside of ldr instruction")
548						}
549
550						if skipWS(parts.next) != nil || parts.up.next != nil {
551							panic("can't handle tweak or post-increment with symbol references")
552						}
553
554						// The GOT helper already dereferenced the entry so, at most, just a mov
555						// is needed to put things in the right register.
556						d.writeCommentedNode(statement)
557						if baseAddrReg != args[0] {
558							d.output.WriteString("\tmov " + args[0] + ", " + baseAddrReg + "\n")
559						}
560						return statement, nil
561					} else if parts.pegRule == ruleLow12BitsSymbolRef {
562						if instructionName != "ldr" {
563							panic("Symbol reference outside of ldr instruction")
564						}
565
566						if skipWS(parts.next) != nil || parts.up.next != nil {
567							panic("can't handle tweak or post-increment with symbol references")
568						}
569
570						// Suppress the offset; adrp loaded the full address.
571						args = append(args, "["+baseAddrReg+"]")
572						changed = true
573						continue
574					}
575				}
576
577				args = append(args, d.contents(fullArg))
578
579			case ruleLow12BitsSymbolRef:
580				// These are the second instruction in a pair:
581				//   adrp x0, symbol           // Load the page address into x0
582				//   add x1, x0, :lo12:symbol  // Adds the page offset.
583				//
584				// The adrp instruction will have been turned into a sequence that loads
585				// the full address, above, thus the offset is turned into zero. If that
586				// results in the instruction being a nop, then it is deleted.
587				if instructionName != "add" {
588					panic(fmt.Sprintf("unsure how to handle %q instruction using lo12", instructionName))
589				}
590
591				if !strings.HasPrefix(args[0], "x") || !strings.HasPrefix(args[1], "x") {
592					panic("address arithmetic with incorrectly sized register")
593				}
594
595				if args[0] == args[1] {
596					d.writeCommentedNode(statement)
597					return statement, nil
598				}
599
600				args = append(args, "#0")
601				changed = true
602
603			default:
604				panic(fmt.Sprintf("unhandled MemoryRef type %s", rul3s[ref.pegRule]))
605			}
606
607		default:
608			panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule]))
609		}
610	}
611
612	if changed {
613		d.writeCommentedNode(statement)
614		replacement := "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n"
615		d.output.WriteString(replacement)
616	} else {
617		d.writeNode(statement)
618	}
619
620	return statement, nil
621}
622
623/* ppc64le
624
625[PABI]: “64-Bit ELF V2 ABI Specification. Power Architecture.” March 21st,
626        2017
627
628(Also useful is “Power ISA Version 2.07 B”. Note that version three of that
629document is /not/ good as that's POWER9 specific.)
630
631ppc64le doesn't have IP-relative addressing and does a lot to work around this.
632Rather than reference a PLT and GOT direction, it has a single structure called
633the TOC (Table Of Contents). Within the TOC is the contents of .rodata, .data,
634.got, .plt, .bss, etc sections [PABI;3.3].
635
636A pointer to the TOC is maintained in r2 and the following pattern is used to
637load the address of an element into a register:
638
639  addis <address register>, 2, foo@toc@ha
640  addi <address register>, <address register>, foo@toc@l
641
642The “addis” instruction shifts a signed constant left 16 bits and adds the
643result to its second argument, saving the result in the first argument. The
644“addi” instruction does the same, but without shifting. Thus the “@toc@ha"
645suffix on a symbol means “the top 16 bits of the TOC offset” and “@toc@l” means
646“the bottom 16 bits of the offset”. However, note that both values are signed,
647thus offsets in the top half of a 64KB chunk will have an @ha value that's one
648greater than expected and a negative @l value.
649
650The TOC is specific to a “module” (basically an executable or shared object).
651This means that there's not a single TOC in a process and that r2 needs to
652change as control moves between modules. Thus functions have two entry points:
653the “global” entry point and the “local” entry point. Jumps from within the
654same module can use the local entry while jumps from other modules must use the
655global entry. The global entry establishes the correct value of r2 before
656running the function and the local entry skips that code.
657
658The global entry point for a function is defined by its label. The local entry
659is a power-of-two number of bytes from the global entry, set by the
660“.localentry” directive. (ppc64le instructions are always 32 bits, so an offset
661of 1 or 2 bytes is treated as an offset of zero.)
662
663In order to help the global entry code set r2 to point to the local TOC, r12 is
664set to the address of the global entry point when called [PABI;2.2.1.1]. Thus
665the global entry will typically use an addis+addi pair to add a known offset to
666r12 and store it in r2. For example:
667
668foo:
669  addis 2, 12, .TOC. - foo@ha
670  addi  2, 2,  .TOC. - foo@l
671
672(It's worth noting that the '@' operator binds very loosely, so the 3rd
673arguments parse as (.TOC. - foo)@ha and (.TOC. - foo)@l.)
674
675When calling a function, the compiler doesn't know whether that function is in
676the same module or not. Thus it doesn't know whether r12 needs to be set nor
677whether r2 will be clobbered on return. Rather than always assume the worst,
678the linker fixes stuff up once it knows that a call is going out of module:
679
680Firstly, calling, say, memcpy (which we assume to be in a different module)
681won't actually jump directly to memcpy, or even a PLT resolution function.
682It'll call a synthesised function that:
683  a) saves r2 in the caller's stack frame
684  b) loads the address of memcpy@PLT into r12
685  c) jumps to r12.
686
687As this synthesised function loads memcpy@PLT, a call to memcpy from the
688compiled code just references “memcpy” directly, not “memcpy@PLT”.
689
690Since it jumps directly to memcpy@PLT, it can't restore r2 on return. Thus
691calls must be followed by a nop. If the call ends up going out-of-module, the
692linker will rewrite that nop to load r2 from the stack.
693
694Speaking of the stack, the stack pointer is kept in r1 and there's a 288-byte
695red-zone. The format of the stack frame is defined [PABI;2.2.2] and must be
696followed as called functions will write into their parent's stack frame. For
697example, the synthesised out-of-module trampolines will save r2 24 bytes into
698the caller's frame and all non-leaf functions save the return address 16 bytes
699into the caller's frame.
700
701A final point worth noting: some RISC ISAs have r0 wired to zero: all reads
702result in zero and all writes are discarded. POWER does something a little like
703that, but r0 is only special in certain argument positions for certain
704instructions. You just have to read the manual to know which they are.
705
706
707Delocation is easier than Intel because there's just TOC references, but it's
708also harder because there's no IP-relative addressing.
709
710Jumps are IP-relative however, and have a 24-bit immediate value. So we can
711jump to functions that set a register to the needed value. (r3 is the
712return-value register and so that's what is generally used here.) */
713
714// isPPC64LEAPair recognises an addis+addi pair that's adding the offset of
715// source to relative and writing the result to target.
716func (d *delocation) isPPC64LEAPair(statement *node32) (target, source, relative string, ok bool) {
717	instruction := skipWS(statement.up).up
718	assertNodeType(instruction, ruleInstructionName)
719	name1 := d.contents(instruction)
720	args1 := instructionArgs(instruction.next)
721
722	statement = statement.next
723	instruction = skipWS(statement.up).up
724	assertNodeType(instruction, ruleInstructionName)
725	name2 := d.contents(instruction)
726	args2 := instructionArgs(instruction.next)
727
728	if name1 != "addis" ||
729		len(args1) != 3 ||
730		name2 != "addi" ||
731		len(args2) != 3 {
732		return "", "", "", false
733	}
734
735	target = d.contents(args1[0])
736	relative = d.contents(args1[1])
737	source1 := d.contents(args1[2])
738	source2 := d.contents(args2[2])
739
740	if !strings.HasSuffix(source1, "@ha") ||
741		!strings.HasSuffix(source2, "@l") ||
742		source1[:len(source1)-3] != source2[:len(source2)-2] ||
743		d.contents(args2[0]) != target ||
744		d.contents(args2[1]) != target {
745		return "", "", "", false
746	}
747
748	source = source1[:len(source1)-3]
749	ok = true
750	return
751}
752
753// establishTOC writes the global entry prelude for a function. The standard
754// prelude involves relocations so this version moves the relocation outside
755// the integrity-checked area.
756func establishTOC(w stringWriter) {
757	w.WriteString("999:\n")
758	w.WriteString("\taddis 2, 12, .LBORINGSSL_external_toc-999b@ha\n")
759	w.WriteString("\taddi 2, 2, .LBORINGSSL_external_toc-999b@l\n")
760	w.WriteString("\tld 12, 0(2)\n")
761	w.WriteString("\tadd 2, 2, 12\n")
762}
763
764// loadTOCFuncName returns the name of a synthesized function that sets r3 to
765// the value of “symbol+offset”.
766func loadTOCFuncName(symbol, offset string) string {
767	symbol = strings.Replace(symbol, ".", "_dot_", -1)
768	ret := ".Lbcm_loadtoc_" + symbol
769	if len(offset) != 0 {
770		offset = strings.Replace(offset, "+", "_plus_", -1)
771		offset = strings.Replace(offset, "-", "_minus_", -1)
772		ret += "_" + offset
773	}
774	return ret
775}
776
777func (d *delocation) loadFromTOC(w stringWriter, symbol, offset, dest string) wrapperFunc {
778	d.tocLoaders[symbol+"\x00"+offset] = struct{}{}
779
780	return func(k func()) {
781		w.WriteString("\taddi 1, 1, -288\n")   // Clear the red zone.
782		w.WriteString("\tmflr " + dest + "\n") // Stash the link register.
783		w.WriteString("\tstd " + dest + ", -8(1)\n")
784		// The TOC loader will use r3, so stash it if necessary.
785		if dest != "3" {
786			w.WriteString("\tstd 3, -16(1)\n")
787		}
788
789		// Because loadTOCFuncName returns a “.L” name, we don't need a
790		// nop after this call.
791		w.WriteString("\tbl " + loadTOCFuncName(symbol, offset) + "\n")
792
793		// Cycle registers around. We need r3 -> destReg, -8(1) ->
794		// lr and, optionally, -16(1) -> r3.
795		w.WriteString("\tstd 3, -24(1)\n")
796		w.WriteString("\tld 3, -8(1)\n")
797		w.WriteString("\tmtlr 3\n")
798		w.WriteString("\tld " + dest + ", -24(1)\n")
799		if dest != "3" {
800			w.WriteString("\tld 3, -16(1)\n")
801		}
802		w.WriteString("\taddi 1, 1, 288\n")
803
804		k()
805	}
806}
807
808func (d *delocation) gatherOffsets(symRef *node32, offsets string) (*node32, string) {
809	for symRef != nil && symRef.pegRule == ruleOffset {
810		offset := d.contents(symRef)
811		if offset[0] != '+' && offset[0] != '-' {
812			offset = "+" + offset
813		}
814		offsets = offsets + offset
815		symRef = symRef.next
816	}
817	return symRef, offsets
818}
819
820func (d *delocation) parseMemRef(memRef *node32) (symbol, offset, section string, didChange, symbolIsLocal bool, nextRef *node32) {
821	if memRef.pegRule != ruleSymbolRef {
822		return "", "", "", false, false, memRef
823	}
824
825	symRef := memRef.up
826	nextRef = memRef.next
827
828	// (Offset* '+')?
829	symRef, offset = d.gatherOffsets(symRef, offset)
830
831	// (LocalSymbol / SymbolName)
832	symbol = d.contents(symRef)
833	if symRef.pegRule == ruleLocalSymbol {
834		symbolIsLocal = true
835		mapped := d.mapLocalSymbol(symbol)
836		if mapped != symbol {
837			symbol = mapped
838			didChange = true
839		}
840	}
841	symRef = symRef.next
842
843	// Offset*
844	symRef, offset = d.gatherOffsets(symRef, offset)
845
846	// ('@' Section / Offset*)?
847	if symRef != nil {
848		assertNodeType(symRef, ruleSection)
849		section = d.contents(symRef)
850		symRef = symRef.next
851
852		symRef, offset = d.gatherOffsets(symRef, offset)
853	}
854
855	if symRef != nil {
856		panic(fmt.Sprintf("unexpected token in SymbolRef: %q", rul3s[symRef.pegRule]))
857	}
858
859	return
860}
861
862func (d *delocation) processPPCInstruction(statement, instruction *node32) (*node32, error) {
863	assertNodeType(instruction, ruleInstructionName)
864	instructionName := d.contents(instruction)
865	isBranch := instructionName[0] == 'b'
866
867	argNodes := instructionArgs(instruction.next)
868
869	var wrappers wrapperStack
870	var args []string
871	changed := false
872
873Args:
874	for i, arg := range argNodes {
875		fullArg := arg
876		isIndirect := false
877
878		if arg.pegRule == ruleIndirectionIndicator {
879			arg = arg.next
880			isIndirect = true
881		}
882
883		switch arg.pegRule {
884		case ruleRegisterOrConstant, ruleLocalLabelRef:
885			args = append(args, d.contents(fullArg))
886
887		case ruleTOCRefLow:
888			return nil, errors.New("Found low TOC reference outside preamble pattern")
889
890		case ruleTOCRefHigh:
891			target, _, relative, ok := d.isPPC64LEAPair(statement)
892			if !ok {
893				return nil, errors.New("Found high TOC reference outside preamble pattern")
894			}
895
896			if relative != "12" {
897				return nil, fmt.Errorf("preamble is relative to %q, not r12", relative)
898			}
899
900			if target != "2" {
901				return nil, fmt.Errorf("preamble is setting %q, not r2", target)
902			}
903
904			statement = statement.next
905			establishTOC(d.output)
906			instructionName = ""
907			changed = true
908			break Args
909
910		case ruleMemoryRef:
911			symbol, offset, section, didChange, symbolIsLocal, memRef := d.parseMemRef(arg.up)
912			changed = didChange
913
914			if len(symbol) > 0 {
915				if _, localEntrySymbol := d.localEntrySymbols[symbol]; localEntrySymbol && isBranch {
916					symbol = localEntryName(symbol)
917					changed = true
918				} else if _, knownSymbol := d.symbols[symbol]; knownSymbol {
919					symbol = localTargetName(symbol)
920					changed = true
921				} else if !symbolIsLocal && !isSynthesized(symbol) && len(section) == 0 {
922					changed = true
923					d.redirectors[symbol] = redirectorName(symbol)
924					symbol = redirectorName(symbol)
925					// TODO(davidben): This should sanity-check the next
926					// instruction is a nop and ideally remove it.
927					wrappers = append(wrappers, func(k func()) {
928						k()
929						// Like the linker's PLT stubs, redirector functions
930						// expect callers to restore r2.
931						d.output.WriteString("\tld 2, 24(1)\n")
932					})
933				}
934			}
935
936			switch section {
937			case "":
938
939			case "tls":
940				// This section identifier just tells the
941				// assembler to use r13, the pointer to the
942				// thread-local data [PABI;3.7.3.3].
943
944			case "toc@ha":
945				// Delete toc@ha instructions. Per
946				// [PABI;3.6.3], the linker is allowed to erase
947				// toc@ha instructions. We take advantage of
948				// this by unconditionally erasing the toc@ha
949				// instructions and doing the full lookup when
950				// processing toc@l.
951				//
952				// Note that any offset here applies before @ha
953				// and @l. That is, 42+foo@toc@ha is
954				// #ha(42+foo-.TOC.), not 42+#ha(foo-.TOC.). Any
955				// corresponding toc@l references are required
956				// by the ABI to have the same offset. The
957				// offset will be incorporated in full when
958				// those are processed.
959				if instructionName != "addis" || len(argNodes) != 3 || i != 2 || args[1] != "2" {
960					return nil, errors.New("can't process toc@ha reference")
961				}
962				changed = true
963				instructionName = ""
964				break Args
965
966			case "toc@l":
967				// Per [PAB;3.6.3], this instruction must take
968				// as input a register which was the output of
969				// a toc@ha computation and compute the actual
970				// address of some symbol. The toc@ha
971				// computation was elided, so we ignore that
972				// input register and compute the address
973				// directly.
974				changed = true
975
976				// For all supported toc@l instructions, the
977				// destination register is the first argument.
978				destReg := args[0]
979
980				wrappers = append(wrappers, d.loadFromTOC(d.output, symbol, offset, destReg))
981				switch instructionName {
982				case "addi":
983					// The original instruction was:
984					//   addi destReg, tocHaReg, offset+symbol@toc@l
985					instructionName = ""
986
987				case "ld", "lhz", "lwz":
988					// The original instruction was:
989					//   l?? destReg, offset+symbol@toc@l(tocHaReg)
990					//
991					// We transform that into the
992					// equivalent dereference of destReg:
993					//   l?? destReg, 0(destReg)
994					origInstructionName := instructionName
995					instructionName = ""
996
997					assertNodeType(memRef, ruleBaseIndexScale)
998					assertNodeType(memRef.up, ruleRegisterOrConstant)
999					if memRef.next != nil || memRef.up.next != nil {
1000						return nil, errors.New("expected single register in BaseIndexScale for ld argument")
1001					}
1002
1003					baseReg := destReg
1004					if baseReg == "0" {
1005						// Register zero is special as the base register for a load.
1006						// Avoid it by spilling and using r3 instead.
1007						baseReg = "3"
1008						wrappers = append(wrappers, func(k func()) {
1009							d.output.WriteString("\taddi 1, 1, -288\n") // Clear the red zone.
1010							d.output.WriteString("\tstd " + baseReg + ", -8(1)\n")
1011							d.output.WriteString("\tmr " + baseReg + ", " + destReg + "\n")
1012							k()
1013							d.output.WriteString("\tld " + baseReg + ", -8(1)\n")
1014							d.output.WriteString("\taddi 1, 1, 288\n") // Clear the red zone.
1015						})
1016					}
1017
1018					wrappers = append(wrappers, func(k func()) {
1019						d.output.WriteString("\t" + origInstructionName + " " + destReg + ", 0(" + baseReg + ")\n")
1020					})
1021				default:
1022					return nil, fmt.Errorf("can't process TOC argument to %q", instructionName)
1023				}
1024
1025			default:
1026				return nil, fmt.Errorf("Unknown section type %q", section)
1027			}
1028
1029			argStr := ""
1030			if isIndirect {
1031				argStr += "*"
1032			}
1033			argStr += symbol
1034			if len(offset) > 0 {
1035				argStr += offset
1036			}
1037			if len(section) > 0 {
1038				argStr += "@"
1039				argStr += section
1040			}
1041
1042			for ; memRef != nil; memRef = memRef.next {
1043				argStr += d.contents(memRef)
1044			}
1045
1046			args = append(args, argStr)
1047
1048		default:
1049			panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule]))
1050		}
1051	}
1052
1053	if changed {
1054		d.writeCommentedNode(statement)
1055
1056		var replacement string
1057		if len(instructionName) > 0 {
1058			replacement = "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n"
1059		}
1060
1061		wrappers.do(func() {
1062			d.output.WriteString(replacement)
1063		})
1064	} else {
1065		d.writeNode(statement)
1066	}
1067
1068	return statement, nil
1069}
1070
1071/* Intel */
1072
1073type instructionType int
1074
1075const (
1076	instrPush instructionType = iota
1077	instrMove
1078	// instrTransformingMove is essentially a move, but it performs some
1079	// transformation of the data during the process.
1080	instrTransformingMove
1081	instrJump
1082	instrConditionalMove
1083	// instrCombine merges the source and destination in some fashion, for example
1084	// a 2-operand bitwise operation.
1085	instrCombine
1086	// instrMemoryVectorCombine is similer to instrCombine, but the source
1087	// register must be a memory reference and the destination register
1088	// must be a vector register.
1089	instrMemoryVectorCombine
1090	// instrThreeArg merges two sources into a destination in some fashion.
1091	instrThreeArg
1092	// instrCompare takes two arguments and writes outputs to the flags register.
1093	instrCompare
1094	instrOther
1095)
1096
1097func classifyInstruction(instr string, args []*node32) instructionType {
1098	switch instr {
1099	case "push", "pushq":
1100		if len(args) == 1 {
1101			return instrPush
1102		}
1103
1104	case "mov", "movq", "vmovq", "movsd", "vmovsd":
1105		if len(args) == 2 {
1106			return instrMove
1107		}
1108
1109	case "cmovneq", "cmoveq":
1110		if len(args) == 2 {
1111			return instrConditionalMove
1112		}
1113
1114	case "call", "callq", "jmp", "jo", "jno", "js", "jns", "je", "jz", "jne", "jnz", "jb", "jnae", "jc", "jnb", "jae", "jnc", "jbe", "jna", "ja", "jnbe", "jl", "jnge", "jge", "jnl", "jle", "jng", "jg", "jnle", "jp", "jpe", "jnp", "jpo":
1115		if len(args) == 1 {
1116			return instrJump
1117		}
1118
1119	case "orq", "andq", "xorq":
1120		if len(args) == 2 {
1121			return instrCombine
1122		}
1123
1124	case "cmpq":
1125		if len(args) == 2 {
1126			return instrCompare
1127		}
1128
1129	case "sarxq", "shlxq", "shrxq":
1130		if len(args) == 3 {
1131			return instrThreeArg
1132		}
1133
1134	case "vpbroadcastq":
1135		if len(args) == 2 {
1136			return instrTransformingMove
1137		}
1138
1139	case "movlps", "movhps":
1140		if len(args) == 2 {
1141			return instrMemoryVectorCombine
1142		}
1143	}
1144
1145	return instrOther
1146}
1147
1148func push(w stringWriter) wrapperFunc {
1149	return func(k func()) {
1150		w.WriteString("\tpushq %rax\n")
1151		k()
1152		w.WriteString("\txchg %rax, (%rsp)\n")
1153	}
1154}
1155
1156func compare(w stringWriter, instr, a, b string) wrapperFunc {
1157	return func(k func()) {
1158		k()
1159		w.WriteString(fmt.Sprintf("\t%s %s, %s\n", instr, a, b))
1160	}
1161}
1162
1163func (d *delocation) loadFromGOT(w stringWriter, destination, symbol, section string, redzoneCleared bool) wrapperFunc {
1164	d.gotExternalsNeeded[symbol+"@"+section] = struct{}{}
1165
1166	return func(k func()) {
1167		if !redzoneCleared {
1168			w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone.
1169		}
1170		w.WriteString("\tpushf\n")
1171		w.WriteString(fmt.Sprintf("\tleaq %s_%s_external(%%rip), %s\n", symbol, section, destination))
1172		w.WriteString(fmt.Sprintf("\taddq (%s), %s\n", destination, destination))
1173		w.WriteString(fmt.Sprintf("\tmovq (%s), %s\n", destination, destination))
1174		w.WriteString("\tpopf\n")
1175		if !redzoneCleared {
1176			w.WriteString("\tleaq\t128(%rsp), %rsp\n")
1177		}
1178	}
1179}
1180
1181func saveFlags(w stringWriter, redzoneCleared bool) wrapperFunc {
1182	return func(k func()) {
1183		if !redzoneCleared {
1184			w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone.
1185			defer w.WriteString("\tleaq 128(%rsp), %rsp\n")
1186		}
1187		w.WriteString("\tpushfq\n")
1188		k()
1189		w.WriteString("\tpopfq\n")
1190	}
1191}
1192
1193func saveRegister(w stringWriter, avoidRegs []string) (wrapperFunc, string) {
1194	candidates := []string{"%rax", "%rbx", "%rcx", "%rdx"}
1195
1196	var reg string
1197NextCandidate:
1198	for _, candidate := range candidates {
1199		for _, avoid := range avoidRegs {
1200			if candidate == avoid {
1201				continue NextCandidate
1202			}
1203		}
1204
1205		reg = candidate
1206		break
1207	}
1208
1209	if len(reg) == 0 {
1210		panic("too many excluded registers")
1211	}
1212
1213	return func(k func()) {
1214		w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone.
1215		w.WriteString("\tpushq " + reg + "\n")
1216		k()
1217		w.WriteString("\tpopq " + reg + "\n")
1218		w.WriteString("\tleaq 128(%rsp), %rsp\n")
1219	}, reg
1220}
1221
1222func moveTo(w stringWriter, target string, isAVX bool, source string) wrapperFunc {
1223	return func(k func()) {
1224		k()
1225		prefix := ""
1226		if isAVX {
1227			prefix = "v"
1228		}
1229		w.WriteString("\t" + prefix + "movq " + source + ", " + target + "\n")
1230	}
1231}
1232
1233func finalTransform(w stringWriter, transformInstruction, reg string) wrapperFunc {
1234	return func(k func()) {
1235		k()
1236		w.WriteString("\t" + transformInstruction + " " + reg + ", " + reg + "\n")
1237	}
1238}
1239
1240func combineOp(w stringWriter, instructionName, source, dest string) wrapperFunc {
1241	return func(k func()) {
1242		k()
1243		w.WriteString("\t" + instructionName + " " + source + ", " + dest + "\n")
1244	}
1245}
1246
1247func threeArgCombineOp(w stringWriter, instructionName, source1, source2, dest string) wrapperFunc {
1248	return func(k func()) {
1249		k()
1250		w.WriteString("\t" + instructionName + " " + source1 + ", " + source2 + ", " + dest + "\n")
1251	}
1252}
1253
1254func memoryVectorCombineOp(w stringWriter, instructionName, source, dest string) wrapperFunc {
1255	return func(k func()) {
1256		k()
1257		// These instructions can only read from memory, so push
1258		// tempReg and read from the stack. Note we assume the red zone
1259		// was previously cleared by saveRegister().
1260		w.WriteString("\tpushq " + source + "\n")
1261		w.WriteString("\t" + instructionName + " (%rsp), " + dest + "\n")
1262		w.WriteString("\tleaq 8(%rsp), %rsp\n")
1263	}
1264}
1265
1266func isValidLEATarget(reg string) bool {
1267	return !strings.HasPrefix(reg, "%xmm") && !strings.HasPrefix(reg, "%ymm") && !strings.HasPrefix(reg, "%zmm")
1268}
1269
1270func undoConditionalMove(w stringWriter, instr string) wrapperFunc {
1271	var invertedCondition string
1272
1273	switch instr {
1274	case "cmoveq":
1275		invertedCondition = "ne"
1276	case "cmovneq":
1277		invertedCondition = "e"
1278	default:
1279		panic(fmt.Sprintf("don't know how to handle conditional move instruction %q", instr))
1280	}
1281
1282	return func(k func()) {
1283		w.WriteString("\tj" + invertedCondition + " 999f\n")
1284		k()
1285		w.WriteString("999:\n")
1286	}
1287}
1288
1289func (d *delocation) isRIPRelative(node *node32) bool {
1290	return node != nil && node.pegRule == ruleBaseIndexScale && d.contents(node) == "(%rip)"
1291}
1292
1293func (d *delocation) processIntelInstruction(statement, instruction *node32) (*node32, error) {
1294	assertNodeType(instruction, ruleInstructionName)
1295	instructionName := d.contents(instruction)
1296
1297	argNodes := instructionArgs(instruction.next)
1298
1299	var wrappers wrapperStack
1300	var args []string
1301	changed := false
1302
1303Args:
1304	for i, arg := range argNodes {
1305		fullArg := arg
1306		isIndirect := false
1307
1308		if arg.pegRule == ruleIndirectionIndicator {
1309			arg = arg.next
1310			isIndirect = true
1311		}
1312
1313		switch arg.pegRule {
1314		case ruleRegisterOrConstant, ruleLocalLabelRef:
1315			args = append(args, d.contents(fullArg))
1316
1317		case ruleMemoryRef:
1318			symbol, offset, section, didChange, symbolIsLocal, memRef := d.parseMemRef(arg.up)
1319			changed = didChange
1320
1321			if symbol == "OPENSSL_ia32cap_P" && section == "" {
1322				if instructionName != "leaq" {
1323					return nil, fmt.Errorf("non-leaq instruction %q referenced OPENSSL_ia32cap_P directly", instructionName)
1324				}
1325
1326				if i != 0 || len(argNodes) != 2 || !d.isRIPRelative(memRef) || len(offset) > 0 {
1327					return nil, fmt.Errorf("invalid OPENSSL_ia32cap_P reference in instruction %q", instructionName)
1328				}
1329
1330				target := argNodes[1]
1331				assertNodeType(target, ruleRegisterOrConstant)
1332				reg := d.contents(target)
1333
1334				if !strings.HasPrefix(reg, "%r") {
1335					return nil, fmt.Errorf("tried to load OPENSSL_ia32cap_P into %q, which is not a standard register.", reg)
1336				}
1337
1338				changed = true
1339
1340				// Flag-altering instructions (i.e. addq) are going to be used so the
1341				// flags need to be preserved.
1342				wrappers = append(wrappers, saveFlags(d.output, false /* Red Zone not yet cleared */))
1343
1344				wrappers = append(wrappers, func(k func()) {
1345					d.output.WriteString("\tleaq\tOPENSSL_ia32cap_addr_delta(%rip), " + reg + "\n")
1346					d.output.WriteString("\taddq\t(" + reg + "), " + reg + "\n")
1347				})
1348
1349				break Args
1350			}
1351
1352			switch section {
1353			case "":
1354				if _, knownSymbol := d.symbols[symbol]; knownSymbol {
1355					symbol = localTargetName(symbol)
1356					changed = true
1357				}
1358
1359			case "PLT":
1360				if classifyInstruction(instructionName, argNodes) != instrJump {
1361					return nil, fmt.Errorf("Cannot rewrite PLT reference for non-jump instruction %q", instructionName)
1362				}
1363
1364				if _, knownSymbol := d.symbols[symbol]; knownSymbol {
1365					symbol = localTargetName(symbol)
1366					changed = true
1367				} else if !symbolIsLocal && !isSynthesized(symbol) {
1368					// Unknown symbol via PLT is an
1369					// out-call from the module, e.g.
1370					// memcpy.
1371					d.redirectors[symbol+"@"+section] = redirectorName(symbol)
1372					symbol = redirectorName(symbol)
1373				}
1374
1375				changed = true
1376
1377			case "GOTPCREL":
1378				if len(offset) > 0 {
1379					return nil, errors.New("loading from GOT with offset is unsupported")
1380				}
1381				if !d.isRIPRelative(memRef) {
1382					return nil, errors.New("GOT access must be IP-relative")
1383				}
1384
1385				useGOT := false
1386				if _, knownSymbol := d.symbols[symbol]; knownSymbol {
1387					symbol = localTargetName(symbol)
1388					changed = true
1389				} else if !isSynthesized(symbol) {
1390					useGOT = true
1391				}
1392
1393				classification := classifyInstruction(instructionName, argNodes)
1394				if classification != instrThreeArg && classification != instrCompare && i != 0 {
1395					return nil, errors.New("GOT access must be source operand")
1396				}
1397
1398				// Reduce the instruction to movq symbol@GOTPCREL, targetReg.
1399				var targetReg string
1400				var redzoneCleared bool
1401				switch classification {
1402				case instrPush:
1403					wrappers = append(wrappers, push(d.output))
1404					targetReg = "%rax"
1405				case instrConditionalMove:
1406					wrappers = append(wrappers, undoConditionalMove(d.output, instructionName))
1407					fallthrough
1408				case instrMove:
1409					assertNodeType(argNodes[1], ruleRegisterOrConstant)
1410					targetReg = d.contents(argNodes[1])
1411				case instrCompare:
1412					otherSource := d.contents(argNodes[i^1])
1413					saveRegWrapper, tempReg := saveRegister(d.output, []string{otherSource})
1414					redzoneCleared = true
1415					wrappers = append(wrappers, saveRegWrapper)
1416					if i == 0 {
1417						wrappers = append(wrappers, compare(d.output, instructionName, tempReg, otherSource))
1418					} else {
1419						wrappers = append(wrappers, compare(d.output, instructionName, otherSource, tempReg))
1420					}
1421					targetReg = tempReg
1422				case instrTransformingMove:
1423					assertNodeType(argNodes[1], ruleRegisterOrConstant)
1424					targetReg = d.contents(argNodes[1])
1425					wrappers = append(wrappers, finalTransform(d.output, instructionName, targetReg))
1426					if isValidLEATarget(targetReg) {
1427						return nil, errors.New("Currently transforming moves are assumed to target XMM registers. Otherwise we'll pop %rax before reading it to do the transform.")
1428					}
1429				case instrCombine:
1430					targetReg = d.contents(argNodes[1])
1431					if !isValidLEATarget(targetReg) {
1432						return nil, fmt.Errorf("cannot handle combining instructions targeting non-general registers")
1433					}
1434					saveRegWrapper, tempReg := saveRegister(d.output, []string{targetReg})
1435					redzoneCleared = true
1436					wrappers = append(wrappers, saveRegWrapper)
1437
1438					wrappers = append(wrappers, combineOp(d.output, instructionName, tempReg, targetReg))
1439					targetReg = tempReg
1440				case instrMemoryVectorCombine:
1441					assertNodeType(argNodes[1], ruleRegisterOrConstant)
1442					targetReg = d.contents(argNodes[1])
1443					if isValidLEATarget(targetReg) {
1444						return nil, errors.New("target register must be an XMM register")
1445					}
1446					saveRegWrapper, tempReg := saveRegister(d.output, nil)
1447					wrappers = append(wrappers, saveRegWrapper)
1448					redzoneCleared = true
1449					wrappers = append(wrappers, memoryVectorCombineOp(d.output, instructionName, tempReg, targetReg))
1450					targetReg = tempReg
1451				case instrThreeArg:
1452					if n := len(argNodes); n != 3 {
1453						return nil, fmt.Errorf("three-argument instruction has %d arguments", n)
1454					}
1455					if i != 0 && i != 1 {
1456						return nil, errors.New("GOT access must be from source operand")
1457					}
1458					targetReg = d.contents(argNodes[2])
1459
1460					otherSource := d.contents(argNodes[1])
1461					if i == 1 {
1462						otherSource = d.contents(argNodes[0])
1463					}
1464
1465					saveRegWrapper, tempReg := saveRegister(d.output, []string{targetReg, otherSource})
1466					redzoneCleared = true
1467					wrappers = append(wrappers, saveRegWrapper)
1468
1469					if i == 0 {
1470						wrappers = append(wrappers, threeArgCombineOp(d.output, instructionName, tempReg, otherSource, targetReg))
1471					} else {
1472						wrappers = append(wrappers, threeArgCombineOp(d.output, instructionName, otherSource, tempReg, targetReg))
1473					}
1474					targetReg = tempReg
1475				default:
1476					return nil, fmt.Errorf("Cannot rewrite GOTPCREL reference for instruction %q", instructionName)
1477				}
1478
1479				if !isValidLEATarget(targetReg) {
1480					// Sometimes the compiler will load from the GOT to an
1481					// XMM register, which is not a valid target of an LEA
1482					// instruction.
1483					saveRegWrapper, tempReg := saveRegister(d.output, nil)
1484					wrappers = append(wrappers, saveRegWrapper)
1485					isAVX := strings.HasPrefix(instructionName, "v")
1486					wrappers = append(wrappers, moveTo(d.output, targetReg, isAVX, tempReg))
1487					targetReg = tempReg
1488					if redzoneCleared {
1489						return nil, fmt.Errorf("internal error: Red Zone was already cleared")
1490					}
1491					redzoneCleared = true
1492				}
1493
1494				if symbol == "OPENSSL_ia32cap_P" {
1495					// Flag-altering instructions (i.e. addq) are going to be used so the
1496					// flags need to be preserved.
1497					wrappers = append(wrappers, saveFlags(d.output, redzoneCleared))
1498					wrappers = append(wrappers, func(k func()) {
1499						d.output.WriteString("\tleaq\tOPENSSL_ia32cap_addr_delta(%rip), " + targetReg + "\n")
1500						d.output.WriteString("\taddq\t(" + targetReg + "), " + targetReg + "\n")
1501					})
1502				} else if useGOT {
1503					wrappers = append(wrappers, d.loadFromGOT(d.output, targetReg, symbol, section, redzoneCleared))
1504				} else {
1505					wrappers = append(wrappers, func(k func()) {
1506						d.output.WriteString(fmt.Sprintf("\tleaq\t%s(%%rip), %s\n", symbol, targetReg))
1507					})
1508				}
1509				changed = true
1510				break Args
1511
1512			default:
1513				return nil, fmt.Errorf("Unknown section type %q", section)
1514			}
1515
1516			if !changed && len(section) > 0 {
1517				panic("section was not handled")
1518			}
1519			section = ""
1520
1521			argStr := ""
1522			if isIndirect {
1523				argStr += "*"
1524			}
1525			argStr += symbol
1526			argStr += offset
1527
1528			for ; memRef != nil; memRef = memRef.next {
1529				argStr += d.contents(memRef)
1530			}
1531
1532			args = append(args, argStr)
1533
1534		case ruleGOTLocation:
1535			if instructionName != "movabsq" {
1536				return nil, fmt.Errorf("_GLOBAL_OFFSET_TABLE_ lookup didn't use movabsq")
1537			}
1538			if i != 0 || len(argNodes) != 2 {
1539				return nil, fmt.Errorf("movabs of _GLOBAL_OFFSET_TABLE_ didn't expected form")
1540			}
1541
1542			d.gotDeltaNeeded = true
1543			changed = true
1544			instructionName = "movq"
1545			assertNodeType(arg.up, ruleLocalSymbol)
1546			baseSymbol := d.mapLocalSymbol(d.contents(arg.up))
1547			targetReg := d.contents(argNodes[1])
1548			args = append(args, ".Lboringssl_got_delta(%rip)")
1549			wrappers = append(wrappers, func(k func()) {
1550				k()
1551				d.output.WriteString(fmt.Sprintf("\taddq $.Lboringssl_got_delta-%s, %s\n", baseSymbol, targetReg))
1552			})
1553
1554		case ruleGOTSymbolOffset:
1555			if instructionName != "movabsq" {
1556				return nil, fmt.Errorf("_GLOBAL_OFFSET_TABLE_ offset didn't use movabsq")
1557			}
1558			if i != 0 || len(argNodes) != 2 {
1559				return nil, fmt.Errorf("movabs of _GLOBAL_OFFSET_TABLE_ offset didn't have expected form")
1560			}
1561
1562			assertNodeType(arg.up, ruleSymbolName)
1563			symbol := d.contents(arg.up)
1564			if strings.HasPrefix(symbol, ".L") {
1565				symbol = d.mapLocalSymbol(symbol)
1566			}
1567			targetReg := d.contents(argNodes[1])
1568
1569			var prefix string
1570			isGOTOFF := strings.HasSuffix(d.contents(arg), "@GOTOFF")
1571			if isGOTOFF {
1572				prefix = "gotoff"
1573				d.gotOffOffsetsNeeded[symbol] = struct{}{}
1574			} else {
1575				prefix = "got"
1576				d.gotOffsetsNeeded[symbol] = struct{}{}
1577			}
1578			changed = true
1579
1580			wrappers = append(wrappers, func(k func()) {
1581				// Even if one tries to use 32-bit GOT offsets, Clang's linker (at the time
1582				// of writing) emits 64-bit relocations anyway, so the following four bytes
1583				// get stomped. Thus we use 64-bit offsets.
1584				d.output.WriteString(fmt.Sprintf("\tmovq .Lboringssl_%s_%s(%%rip), %s\n", prefix, symbol, targetReg))
1585			})
1586
1587		default:
1588			panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule]))
1589		}
1590	}
1591
1592	if changed {
1593		d.writeCommentedNode(statement)
1594		replacement := "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n"
1595		wrappers.do(func() {
1596			d.output.WriteString(replacement)
1597		})
1598	} else {
1599		d.writeNode(statement)
1600	}
1601
1602	return statement, nil
1603}
1604
1605func (d *delocation) handleBSS(statement *node32) (*node32, error) {
1606	lastStatement := statement
1607	for statement = statement.next; statement != nil; lastStatement, statement = statement, statement.next {
1608		node := skipWS(statement.up)
1609		if node == nil {
1610			d.writeNode(statement)
1611			continue
1612		}
1613
1614		switch node.pegRule {
1615		case ruleGlobalDirective, ruleComment, ruleInstruction, ruleLocationDirective:
1616			d.writeNode(statement)
1617
1618		case ruleDirective:
1619			directive := node.up
1620			assertNodeType(directive, ruleDirectiveName)
1621			directiveName := d.contents(directive)
1622			if directiveName == "text" || directiveName == "section" || directiveName == "data" {
1623				return lastStatement, nil
1624			}
1625			d.writeNode(statement)
1626
1627		case ruleLabel:
1628			label := node.up
1629			d.writeNode(statement)
1630
1631			if label.pegRule != ruleLocalSymbol {
1632				symbol := d.contents(label)
1633				localSymbol := localTargetName(symbol)
1634				d.output.WriteString(fmt.Sprintf("\n%s:\n", localSymbol))
1635
1636				d.bssAccessorsNeeded[symbol] = localSymbol
1637			}
1638
1639		case ruleLabelContainingDirective:
1640			var err error
1641			statement, err = d.processLabelContainingDirective(statement, node.up)
1642			if err != nil {
1643				return nil, err
1644			}
1645
1646		default:
1647			return nil, fmt.Errorf("unknown BSS statement type %q in %q", rul3s[node.pegRule], d.contents(statement))
1648		}
1649	}
1650
1651	return lastStatement, nil
1652}
1653
1654func writeAarch64Function(w stringWriter, funcName string, writeContents func(stringWriter)) {
1655	w.WriteString(".p2align 2\n")
1656	w.WriteString(".hidden " + funcName + "\n")
1657	w.WriteString(".type " + funcName + ", @function\n")
1658	w.WriteString(funcName + ":\n")
1659	w.WriteString(".cfi_startproc\n")
1660	writeContents(w)
1661	w.WriteString(".cfi_endproc\n")
1662	w.WriteString(".size " + funcName + ", .-" + funcName + "\n")
1663}
1664
1665func transform(w stringWriter, inputs []inputFile) error {
1666	// symbols contains all defined symbols.
1667	symbols := make(map[string]struct{})
1668	// localEntrySymbols contains all symbols with a .localentry directive.
1669	localEntrySymbols := make(map[string]struct{})
1670	// fileNumbers is the set of IDs seen in .file directives.
1671	fileNumbers := make(map[int]struct{})
1672	// maxObservedFileNumber contains the largest seen file number in a
1673	// .file directive. Zero is not a valid number.
1674	maxObservedFileNumber := 0
1675	// fileDirectivesContainMD5 is true if the compiler is outputting MD5
1676	// checksums in .file directives. If it does so, then this script needs
1677	// to match that behaviour otherwise warnings result.
1678	fileDirectivesContainMD5 := false
1679
1680	// OPENSSL_ia32cap_get will be synthesized by this script.
1681	symbols["OPENSSL_ia32cap_get"] = struct{}{}
1682
1683	for _, input := range inputs {
1684		forEachPath(input.ast.up, func(node *node32) {
1685			symbol := input.contents[node.begin:node.end]
1686			if _, ok := symbols[symbol]; ok {
1687				panic(fmt.Sprintf("Duplicate symbol found: %q in %q", symbol, input.path))
1688			}
1689			symbols[symbol] = struct{}{}
1690		}, ruleStatement, ruleLabel, ruleSymbolName)
1691
1692		forEachPath(input.ast.up, func(node *node32) {
1693			node = node.up
1694			assertNodeType(node, ruleLabelContainingDirectiveName)
1695			directive := input.contents[node.begin:node.end]
1696			if directive != ".localentry" {
1697				return
1698			}
1699			// Extract the first argument.
1700			node = skipWS(node.next)
1701			assertNodeType(node, ruleSymbolArgs)
1702			node = node.up
1703			assertNodeType(node, ruleSymbolArg)
1704			symbol := input.contents[node.begin:node.end]
1705			if _, ok := localEntrySymbols[symbol]; ok {
1706				panic(fmt.Sprintf("Duplicate .localentry directive found: %q in %q", symbol, input.path))
1707			}
1708			localEntrySymbols[symbol] = struct{}{}
1709		}, ruleStatement, ruleLabelContainingDirective)
1710
1711		forEachPath(input.ast.up, func(node *node32) {
1712			assertNodeType(node, ruleLocationDirective)
1713			directive := input.contents[node.begin:node.end]
1714			if !strings.HasPrefix(directive, ".file") {
1715				return
1716			}
1717			parts := strings.Fields(directive)
1718			if len(parts) == 2 {
1719				// This is a .file directive with just a
1720				// filename. Clang appears to generate just one
1721				// of these at the beginning of the output for
1722				// the compilation unit. Ignore it.
1723				return
1724			}
1725			fileNo, err := strconv.Atoi(parts[1])
1726			if err != nil {
1727				panic(fmt.Sprintf("Failed to parse file number from .file: %q", directive))
1728			}
1729
1730			if _, ok := fileNumbers[fileNo]; ok {
1731				panic(fmt.Sprintf("Duplicate file number %d observed", fileNo))
1732			}
1733			fileNumbers[fileNo] = struct{}{}
1734
1735			if fileNo > maxObservedFileNumber {
1736				maxObservedFileNumber = fileNo
1737			}
1738
1739			for _, token := range parts[2:] {
1740				if token == "md5" {
1741					fileDirectivesContainMD5 = true
1742				}
1743			}
1744		}, ruleStatement, ruleLocationDirective)
1745	}
1746
1747	processor := x86_64
1748	if len(inputs) > 0 {
1749		processor = detectProcessor(inputs[0])
1750	}
1751
1752	commentIndicator := "#"
1753	if processor == aarch64 {
1754		commentIndicator = "//"
1755	}
1756
1757	d := &delocation{
1758		symbols:             symbols,
1759		localEntrySymbols:   localEntrySymbols,
1760		processor:           processor,
1761		commentIndicator:    commentIndicator,
1762		output:              w,
1763		redirectors:         make(map[string]string),
1764		bssAccessorsNeeded:  make(map[string]string),
1765		tocLoaders:          make(map[string]struct{}),
1766		gotExternalsNeeded:  make(map[string]struct{}),
1767		gotOffsetsNeeded:    make(map[string]struct{}),
1768		gotOffOffsetsNeeded: make(map[string]struct{}),
1769	}
1770
1771	w.WriteString(".text\n")
1772	var fileTrailing string
1773	if fileDirectivesContainMD5 {
1774		fileTrailing = " md5 0x00000000000000000000000000000000"
1775	}
1776	w.WriteString(fmt.Sprintf(".file %d \"inserted_by_delocate.c\"%s\n", maxObservedFileNumber+1, fileTrailing))
1777	w.WriteString(fmt.Sprintf(".loc %d 1 0\n", maxObservedFileNumber+1))
1778	w.WriteString("BORINGSSL_bcm_text_start:\n")
1779
1780	for _, input := range inputs {
1781		if err := d.processInput(input); err != nil {
1782			return err
1783		}
1784	}
1785
1786	w.WriteString(".text\n")
1787	w.WriteString(fmt.Sprintf(".loc %d 2 0\n", maxObservedFileNumber+1))
1788	w.WriteString("BORINGSSL_bcm_text_end:\n")
1789
1790	// Emit redirector functions. Each is a single jump instruction.
1791	var redirectorNames []string
1792	for name := range d.redirectors {
1793		redirectorNames = append(redirectorNames, name)
1794	}
1795	sort.Strings(redirectorNames)
1796
1797	for _, name := range redirectorNames {
1798		redirector := d.redirectors[name]
1799		switch d.processor {
1800		case ppc64le:
1801			w.WriteString(".section \".toc\", \"aw\"\n")
1802			w.WriteString(".Lredirector_toc_" + name + ":\n")
1803			w.WriteString(".quad " + name + "\n")
1804			w.WriteString(".text\n")
1805			w.WriteString(".type " + redirector + ", @function\n")
1806			w.WriteString(redirector + ":\n")
1807			// |name| will clobber r2, so save it. This is matched by a restore in
1808			// redirector calls.
1809			w.WriteString("\tstd 2, 24(1)\n")
1810			// Load and call |name|'s global entry point.
1811			w.WriteString("\taddis 12, 2, .Lredirector_toc_" + name + "@toc@ha\n")
1812			w.WriteString("\tld 12, .Lredirector_toc_" + name + "@toc@l(12)\n")
1813			w.WriteString("\tmtctr 12\n")
1814			w.WriteString("\tbctr\n")
1815
1816		case aarch64:
1817			writeAarch64Function(w, redirector, func(w stringWriter) {
1818				w.WriteString("\tb " + name + "\n")
1819			})
1820
1821		case x86_64:
1822			w.WriteString(".type " + redirector + ", @function\n")
1823			w.WriteString(redirector + ":\n")
1824			w.WriteString("\tjmp\t" + name + "\n")
1825		}
1826	}
1827
1828	var accessorNames []string
1829	for accessor := range d.bssAccessorsNeeded {
1830		accessorNames = append(accessorNames, accessor)
1831	}
1832	sort.Strings(accessorNames)
1833
1834	// Emit BSS accessor functions. Each is a single LEA followed by RET.
1835	for _, name := range accessorNames {
1836		funcName := accessorName(name)
1837		target := d.bssAccessorsNeeded[name]
1838
1839		switch d.processor {
1840		case ppc64le:
1841			w.WriteString(".type " + funcName + ", @function\n")
1842			w.WriteString(funcName + ":\n")
1843			w.WriteString("\taddis 3, 2, " + target + "@toc@ha\n")
1844			w.WriteString("\taddi 3, 3, " + target + "@toc@l\n")
1845			w.WriteString("\tblr\n")
1846
1847		case x86_64:
1848			w.WriteString(".type " + funcName + ", @function\n")
1849			w.WriteString(funcName + ":\n")
1850			w.WriteString("\tleaq\t" + target + "(%rip), %rax\n\tret\n")
1851
1852		case aarch64:
1853			writeAarch64Function(w, funcName, func(w stringWriter) {
1854				w.WriteString("\tadrp x0, " + target + "\n")
1855				w.WriteString("\tadd x0, x0, :lo12:" + target + "\n")
1856				w.WriteString("\tret\n")
1857			})
1858		}
1859	}
1860
1861	switch d.processor {
1862	case ppc64le:
1863		loadTOCNames := sortedSet(d.tocLoaders)
1864		for _, symbolAndOffset := range loadTOCNames {
1865			parts := strings.SplitN(symbolAndOffset, "\x00", 2)
1866			symbol, offset := parts[0], parts[1]
1867
1868			funcName := loadTOCFuncName(symbol, offset)
1869			ref := symbol + offset
1870
1871			w.WriteString(".type " + funcName[2:] + ", @function\n")
1872			w.WriteString(funcName[2:] + ":\n")
1873			w.WriteString(funcName + ":\n")
1874			w.WriteString("\taddis 3, 2, " + ref + "@toc@ha\n")
1875			w.WriteString("\taddi 3, 3, " + ref + "@toc@l\n")
1876			w.WriteString("\tblr\n")
1877		}
1878
1879		w.WriteString(".LBORINGSSL_external_toc:\n")
1880		w.WriteString(".quad .TOC.-.LBORINGSSL_external_toc\n")
1881
1882	case aarch64:
1883		externalNames := sortedSet(d.gotExternalsNeeded)
1884		for _, symbol := range externalNames {
1885			writeAarch64Function(w, gotHelperName(symbol), func(w stringWriter) {
1886				w.WriteString("\tadrp x0, :got:" + symbol + "\n")
1887				w.WriteString("\tldr x0, [x0, :got_lo12:" + symbol + "]\n")
1888				w.WriteString("\tret\n")
1889			})
1890		}
1891
1892		writeAarch64Function(w, ".LOPENSSL_armcap_P_addr", func(w stringWriter) {
1893			w.WriteString("\tadrp x0, OPENSSL_armcap_P\n")
1894			w.WriteString("\tadd x0, x0, :lo12:OPENSSL_armcap_P\n")
1895			w.WriteString("\tret\n")
1896		})
1897
1898	case x86_64:
1899		externalNames := sortedSet(d.gotExternalsNeeded)
1900		for _, name := range externalNames {
1901			parts := strings.SplitN(name, "@", 2)
1902			symbol, section := parts[0], parts[1]
1903			w.WriteString(".type " + symbol + "_" + section + "_external, @object\n")
1904			w.WriteString(".size " + symbol + "_" + section + "_external, 8\n")
1905			w.WriteString(symbol + "_" + section + "_external:\n")
1906			// Ideally this would be .quad foo@GOTPCREL, but clang's
1907			// assembler cannot emit a 64-bit GOTPCREL relocation. Instead,
1908			// we manually sign-extend the value, knowing that the GOT is
1909			// always at the end, thus foo@GOTPCREL has a positive value.
1910			w.WriteString("\t.long " + symbol + "@" + section + "\n")
1911			w.WriteString("\t.long 0\n")
1912		}
1913
1914		w.WriteString(".type OPENSSL_ia32cap_get, @function\n")
1915		w.WriteString(".globl OPENSSL_ia32cap_get\n")
1916		w.WriteString(localTargetName("OPENSSL_ia32cap_get") + ":\n")
1917		w.WriteString("OPENSSL_ia32cap_get:\n")
1918		w.WriteString("\tleaq OPENSSL_ia32cap_P(%rip), %rax\n")
1919		w.WriteString("\tret\n")
1920
1921		w.WriteString(".extern OPENSSL_ia32cap_P\n")
1922		w.WriteString(".type OPENSSL_ia32cap_addr_delta, @object\n")
1923		w.WriteString(".size OPENSSL_ia32cap_addr_delta, 8\n")
1924		w.WriteString("OPENSSL_ia32cap_addr_delta:\n")
1925		w.WriteString(".quad OPENSSL_ia32cap_P-OPENSSL_ia32cap_addr_delta\n")
1926
1927		if d.gotDeltaNeeded {
1928			w.WriteString(".Lboringssl_got_delta:\n")
1929			w.WriteString("\t.quad _GLOBAL_OFFSET_TABLE_-.Lboringssl_got_delta\n")
1930		}
1931
1932		for _, name := range sortedSet(d.gotOffsetsNeeded) {
1933			w.WriteString(".Lboringssl_got_" + name + ":\n")
1934			w.WriteString("\t.quad " + name + "@GOT\n")
1935		}
1936		for _, name := range sortedSet(d.gotOffOffsetsNeeded) {
1937			w.WriteString(".Lboringssl_gotoff_" + name + ":\n")
1938			w.WriteString("\t.quad " + name + "@GOTOFF\n")
1939		}
1940	}
1941
1942	w.WriteString(".type BORINGSSL_bcm_text_hash, @object\n")
1943	w.WriteString(".size BORINGSSL_bcm_text_hash, 64\n")
1944	w.WriteString("BORINGSSL_bcm_text_hash:\n")
1945	for _, b := range fipscommon.UninitHashValue {
1946		w.WriteString(".byte 0x" + strconv.FormatUint(uint64(b), 16) + "\n")
1947	}
1948
1949	return nil
1950}
1951
1952func parseInputs(inputs []inputFile) error {
1953	for i, input := range inputs {
1954		var contents string
1955
1956		if input.isArchive {
1957			arFile, err := os.Open(input.path)
1958			if err != nil {
1959				return err
1960			}
1961			defer arFile.Close()
1962
1963			ar, err := ar.ParseAR(arFile)
1964			if err != nil {
1965				return err
1966			}
1967
1968			if len(ar) != 1 {
1969				return fmt.Errorf("expected one file in archive, but found %d", len(ar))
1970			}
1971
1972			for _, c := range ar {
1973				contents = string(c)
1974			}
1975		} else {
1976			inBytes, err := ioutil.ReadFile(input.path)
1977			if err != nil {
1978				return err
1979			}
1980
1981			contents = string(inBytes)
1982		}
1983
1984		asm := Asm{Buffer: contents, Pretty: true}
1985		asm.Init()
1986		if err := asm.Parse(); err != nil {
1987			return fmt.Errorf("error while parsing %q: %s", input.path, err)
1988		}
1989		ast := asm.AST()
1990
1991		inputs[i].contents = contents
1992		inputs[i].ast = ast
1993	}
1994
1995	return nil
1996}
1997
1998func main() {
1999	// The .a file, if given, is expected to be an archive of textual
2000	// assembly sources. That's odd, but CMake really wants to create
2001	// archive files so it's the only way that we can make it work.
2002	arInput := flag.String("a", "", "Path to a .a file containing assembly sources")
2003	outFile := flag.String("o", "", "Path to output assembly")
2004
2005	flag.Parse()
2006
2007	if len(*outFile) == 0 {
2008		fmt.Fprintf(os.Stderr, "Must give argument to -o.\n")
2009		os.Exit(1)
2010	}
2011
2012	var inputs []inputFile
2013	if len(*arInput) > 0 {
2014		inputs = append(inputs, inputFile{
2015			path:      *arInput,
2016			index:     0,
2017			isArchive: true,
2018		})
2019	}
2020
2021	for i, path := range flag.Args() {
2022		if len(path) == 0 {
2023			continue
2024		}
2025
2026		inputs = append(inputs, inputFile{
2027			path:  path,
2028			index: i + 1,
2029		})
2030	}
2031
2032	if err := parseInputs(inputs); err != nil {
2033		fmt.Fprintf(os.Stderr, "%s\n", err)
2034		os.Exit(1)
2035	}
2036
2037	out, err := os.OpenFile(*outFile, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
2038	if err != nil {
2039		panic(err)
2040	}
2041	defer out.Close()
2042
2043	if err := transform(out, inputs); err != nil {
2044		fmt.Fprintf(os.Stderr, "%s\n", err)
2045		os.Exit(1)
2046	}
2047}
2048
2049func forEachPath(node *node32, cb func(*node32), rules ...pegRule) {
2050	if node == nil {
2051		return
2052	}
2053
2054	if len(rules) == 0 {
2055		cb(node)
2056		return
2057	}
2058
2059	rule := rules[0]
2060	childRules := rules[1:]
2061
2062	for ; node != nil; node = node.next {
2063		if node.pegRule != rule {
2064			continue
2065		}
2066
2067		if len(childRules) == 0 {
2068			cb(node)
2069		} else {
2070			forEachPath(node.up, cb, childRules...)
2071		}
2072	}
2073}
2074
2075func skipNodes(node *node32, ruleToSkip pegRule) *node32 {
2076	for ; node != nil && node.pegRule == ruleToSkip; node = node.next {
2077	}
2078	return node
2079}
2080
2081func skipWS(node *node32) *node32 {
2082	return skipNodes(node, ruleWS)
2083}
2084
2085func assertNodeType(node *node32, expected pegRule) {
2086	if rule := node.pegRule; rule != expected {
2087		panic(fmt.Sprintf("node was %q, but wanted %q", rul3s[rule], rul3s[expected]))
2088	}
2089}
2090
2091type wrapperFunc func(func())
2092
2093type wrapperStack []wrapperFunc
2094
2095func (w *wrapperStack) do(baseCase func()) {
2096	if len(*w) == 0 {
2097		baseCase()
2098		return
2099	}
2100
2101	wrapper := (*w)[0]
2102	*w = (*w)[1:]
2103	wrapper(func() { w.do(baseCase) })
2104}
2105
2106// localTargetName returns the name of the local target label for a global
2107// symbol named name.
2108func localTargetName(name string) string {
2109	return ".L" + name + "_local_target"
2110}
2111
2112func localEntryName(name string) string {
2113	return ".L" + name + "_local_entry"
2114}
2115
2116func isSynthesized(symbol string) bool {
2117	return strings.HasSuffix(symbol, "_bss_get") ||
2118		symbol == "OPENSSL_ia32cap_get" ||
2119		strings.HasPrefix(symbol, "BORINGSSL_bcm_text_")
2120}
2121
2122func redirectorName(symbol string) string {
2123	return "bcm_redirector_" + symbol
2124}
2125
2126// sectionType returns the type of a section. I.e. a section called “.text.foo”
2127// is a “.text” section.
2128func sectionType(section string) (string, bool) {
2129	if len(section) == 0 || section[0] != '.' {
2130		return "", false
2131	}
2132
2133	i := strings.Index(section[1:], ".")
2134	if i != -1 {
2135		section = section[:i+1]
2136	}
2137
2138	if strings.HasPrefix(section, ".debug_") {
2139		return ".debug", true
2140	}
2141
2142	return section, true
2143}
2144
2145// accessorName returns the name of the accessor function for a BSS symbol
2146// named name.
2147func accessorName(name string) string {
2148	return name + "_bss_get"
2149}
2150
2151func (d *delocation) mapLocalSymbol(symbol string) string {
2152	if d.currentInput.index == 0 {
2153		return symbol
2154	}
2155	return symbol + "_BCM_" + strconv.Itoa(d.currentInput.index)
2156}
2157
2158func detectProcessor(input inputFile) processorType {
2159	for statement := input.ast.up; statement != nil; statement = statement.next {
2160		node := skipNodes(statement.up, ruleWS)
2161		if node == nil || node.pegRule != ruleInstruction {
2162			continue
2163		}
2164
2165		instruction := node.up
2166		instructionName := input.contents[instruction.begin:instruction.end]
2167
2168		switch instructionName {
2169		case "movq", "call", "leaq":
2170			return x86_64
2171		case "addis", "addi", "mflr":
2172			return ppc64le
2173		case "str", "bl", "ldr", "st1":
2174			return aarch64
2175		}
2176	}
2177
2178	panic("processed entire input and didn't recognise any instructions.")
2179}
2180
2181func sortedSet(m map[string]struct{}) []string {
2182	ret := make([]string, 0, len(m))
2183	for key := range m {
2184		ret = append(ret, key)
2185	}
2186	sort.Strings(ret)
2187	return ret
2188}
2189