1// Copyright (c) 2017, Google Inc.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
14
15// delocate performs several transformations of textual assembly code. See
16// crypto/fipsmodule/FIPS.md for an overview.
17package main
18
19import (
20	"errors"
21	"flag"
22	"fmt"
23	"io/ioutil"
24	"os"
25	"sort"
26	"strconv"
27	"strings"
28
29	"boringssl.googlesource.com/boringssl/util/ar"
30	"boringssl.googlesource.com/boringssl/util/fipstools/fipscommon"
31)
32
33// inputFile represents a textual assembly file.
34type inputFile struct {
35	path string
36	// index is a unique identifer given to this file. It's used for
37	// mapping local symbols.
38	index int
39	// isArchive indicates that the input should be processed as an ar
40	// file.
41	isArchive bool
42	// contents contains the contents of the file.
43	contents string
44	// ast points to the head of the syntax tree.
45	ast *node32
46}
47
48type stringWriter interface {
49	WriteString(string) (int, error)
50}
51
52type processorType int
53
54const (
55	ppc64le processorType = iota + 1
56	x86_64
57)
58
59// delocation holds the state needed during a delocation operation.
60type delocation struct {
61	processor processorType
62	output    stringWriter
63
64	// symbols is the set of symbols defined in the module.
65	symbols map[string]struct{}
66	// localEntrySymbols is the set of symbols with .localentry directives.
67	localEntrySymbols map[string]struct{}
68	// redirectors maps from out-call symbol name to the name of a
69	// redirector function for that symbol. E.g. “memcpy” ->
70	// “bcm_redirector_memcpy”.
71	redirectors map[string]string
72	// bssAccessorsNeeded maps from a BSS symbol name to the symbol that
73	// should be used to reference it. E.g. “P384_data_storage” ->
74	// “P384_data_storage”.
75	bssAccessorsNeeded map[string]string
76	// tocLoaders is a set of symbol names for which TOC helper functions
77	// are required. (ppc64le only.)
78	tocLoaders map[string]struct{}
79	// gotExternalsNeeded is a set of symbol names for which we need
80	// “delta” symbols: symbols that contain the offset from their location
81	// to the memory in question.
82	gotExternalsNeeded map[string]struct{}
83	// gotDeltaNeeded is true if the code needs to load the value of
84	// _GLOBAL_OFFSET_TABLE_.
85	gotDeltaNeeded bool
86	// gotOffsetsNeeded contains the symbols whose @GOT offsets are needed.
87	gotOffsetsNeeded map[string]struct{}
88	// gotOffOffsetsNeeded contains the symbols whose @GOTOFF offsets are needed.
89	gotOffOffsetsNeeded map[string]struct{}
90
91	currentInput inputFile
92}
93
94func (d *delocation) contents(node *node32) string {
95	return d.currentInput.contents[node.begin:node.end]
96}
97
98// writeNode writes out an AST node.
99func (d *delocation) writeNode(node *node32) {
100	if _, err := d.output.WriteString(d.contents(node)); err != nil {
101		panic(err)
102	}
103}
104
105func (d *delocation) writeCommentedNode(node *node32) {
106	line := d.contents(node)
107	if _, err := d.output.WriteString("# WAS " + strings.TrimSpace(line) + "\n"); err != nil {
108		panic(err)
109	}
110}
111
112func locateError(err error, with *node32, in inputFile) error {
113	posMap := translatePositions([]rune(in.contents), []int{int(with.begin)})
114	var line int
115	for _, pos := range posMap {
116		line = pos.line
117	}
118
119	return fmt.Errorf("error while processing %q on line %d: %q", in.contents[with.begin:with.end], line, err)
120}
121
122func (d *delocation) processInput(input inputFile) (err error) {
123	d.currentInput = input
124
125	var origStatement *node32
126	defer func() {
127		if err := recover(); err != nil {
128			panic(locateError(fmt.Errorf("%s", err), origStatement, input))
129		}
130	}()
131
132	for statement := input.ast.up; statement != nil; statement = statement.next {
133		assertNodeType(statement, ruleStatement)
134		origStatement = statement
135
136		node := skipWS(statement.up)
137		if node == nil {
138			d.writeNode(statement)
139			continue
140		}
141
142		switch node.pegRule {
143		case ruleGlobalDirective, ruleComment, ruleLocationDirective:
144			d.writeNode(statement)
145		case ruleDirective:
146			statement, err = d.processDirective(statement, node.up)
147		case ruleLabelContainingDirective:
148			statement, err = d.processLabelContainingDirective(statement, node.up)
149		case ruleLabel:
150			statement, err = d.processLabel(statement, node.up)
151		case ruleInstruction:
152			switch d.processor {
153			case x86_64:
154				statement, err = d.processIntelInstruction(statement, node.up)
155			case ppc64le:
156				statement, err = d.processPPCInstruction(statement, node.up)
157			default:
158				panic("unknown processor")
159			}
160		default:
161			panic(fmt.Sprintf("unknown top-level statement type %q", rul3s[node.pegRule]))
162		}
163
164		if err != nil {
165			return locateError(err, origStatement, input)
166		}
167	}
168
169	return nil
170}
171
172func (d *delocation) processDirective(statement, directive *node32) (*node32, error) {
173	assertNodeType(directive, ruleDirectiveName)
174	directiveName := d.contents(directive)
175
176	var args []string
177	forEachPath(directive, func(arg *node32) {
178		// If the argument is a quoted string, use the raw contents.
179		// (Note that this doesn't unescape the string, but that's not
180		// needed so far.
181		if arg.up != nil {
182			arg = arg.up
183			assertNodeType(arg, ruleQuotedArg)
184			if arg.up == nil {
185				args = append(args, "")
186				return
187			}
188			arg = arg.up
189			assertNodeType(arg, ruleQuotedText)
190		}
191		args = append(args, d.contents(arg))
192	}, ruleArgs, ruleArg)
193
194	switch directiveName {
195	case "comm", "lcomm":
196		if len(args) < 1 {
197			return nil, errors.New("comm directive has no arguments")
198		}
199		d.bssAccessorsNeeded[args[0]] = args[0]
200		d.writeNode(statement)
201
202	case "data":
203		// ASAN and some versions of MSAN are adding a .data section,
204		// and adding references to symbols within it to the code. We
205		// will have to work around this in the future.
206		return nil, errors.New(".data section found in module")
207
208	case "section":
209		section := args[0]
210
211		if section == ".data.rel.ro" {
212			// In a normal build, this is an indication of a
213			// problem but any references from the module to this
214			// section will result in a relocation and thus will
215			// break the integrity check. ASAN can generate these
216			// sections and so we will likely have to work around
217			// that in the future.
218			return nil, errors.New(".data.rel.ro section found in module")
219		}
220
221		sectionType, ok := sectionType(section)
222		if !ok {
223			// Unknown sections are permitted in order to be robust
224			// to different compiler modes.
225			d.writeNode(statement)
226			break
227		}
228
229		switch sectionType {
230		case ".rodata", ".text":
231			// Move .rodata to .text so it may be accessed without
232			// a relocation. GCC with -fmerge-constants will place
233			// strings into separate sections, so we move all
234			// sections named like .rodata. Also move .text.startup
235			// so the self-test function is also in the module.
236			d.writeCommentedNode(statement)
237			d.output.WriteString(".text\n")
238
239		case ".data":
240			// See above about .data
241			return nil, errors.New(".data section found in module")
242
243		case ".init_array", ".fini_array", ".ctors", ".dtors":
244			// init_array/ctors/dtors contains function
245			// pointers to constructor/destructor
246			// functions. These contain relocations, but
247			// they're in a different section anyway.
248			d.writeNode(statement)
249			break
250
251		case ".debug", ".note", ".toc":
252			d.writeNode(statement)
253			break
254
255		case ".bss":
256			d.writeNode(statement)
257			return d.handleBSS(statement)
258		}
259
260	default:
261		d.writeNode(statement)
262	}
263
264	return statement, nil
265}
266
267func (d *delocation) processLabelContainingDirective(statement, directive *node32) (*node32, error) {
268	// The symbols within directives need to be mapped so that local
269	// symbols in two different .s inputs don't collide.
270	changed := false
271	assertNodeType(directive, ruleLabelContainingDirectiveName)
272	name := d.contents(directive)
273
274	node := directive.next
275	assertNodeType(node, ruleWS)
276
277	node = node.next
278	assertNodeType(node, ruleSymbolArgs)
279
280	var args []string
281	for node = skipWS(node.up); node != nil; node = skipWS(node.next) {
282		assertNodeType(node, ruleSymbolArg)
283		arg := node.up
284		var mapped string
285
286		for term := arg; term != nil; term = term.next {
287			if term.pegRule != ruleLocalSymbol {
288				mapped += d.contents(term)
289				continue
290			}
291
292			oldSymbol := d.contents(term)
293			newSymbol := d.mapLocalSymbol(oldSymbol)
294			if newSymbol != oldSymbol {
295				changed = true
296			}
297
298			mapped += newSymbol
299		}
300
301		args = append(args, mapped)
302	}
303
304	if !changed {
305		d.writeNode(statement)
306	} else {
307		d.writeCommentedNode(statement)
308		d.output.WriteString("\t" + name + "\t" + strings.Join(args, ", ") + "\n")
309	}
310
311	if name == ".localentry" {
312		d.output.WriteString(localEntryName(args[0]) + ":\n")
313	}
314
315	return statement, nil
316}
317
318func (d *delocation) processLabel(statement, label *node32) (*node32, error) {
319	symbol := d.contents(label)
320
321	switch label.pegRule {
322	case ruleLocalLabel:
323		d.output.WriteString(symbol + ":\n")
324	case ruleLocalSymbol:
325		// symbols need to be mapped so that local symbols from two
326		// different .s inputs don't collide.
327		d.output.WriteString(d.mapLocalSymbol(symbol) + ":\n")
328	case ruleSymbolName:
329		d.output.WriteString(localTargetName(symbol) + ":\n")
330		d.writeNode(statement)
331	default:
332		return nil, fmt.Errorf("unknown label type %q", rul3s[label.pegRule])
333	}
334
335	return statement, nil
336}
337
338// instructionArgs collects all the arguments to an instruction.
339func instructionArgs(node *node32) (argNodes []*node32) {
340	for node = skipWS(node); node != nil; node = skipWS(node.next) {
341		assertNodeType(node, ruleInstructionArg)
342		argNodes = append(argNodes, node.up)
343	}
344
345	return argNodes
346}
347
348/* ppc64le
349
350[PABI]: “64-Bit ELF V2 ABI Specification. Power Architecture.” March 21st,
351        2017
352
353(Also useful is “Power ISA Version 2.07 B”. Note that version three of that
354document is /not/ good as that's POWER9 specific.)
355
356ppc64le doesn't have IP-relative addressing and does a lot to work around this.
357Rather than reference a PLT and GOT direction, it has a single structure called
358the TOC (Table Of Contents). Within the TOC is the contents of .rodata, .data,
359.got, .plt, .bss, etc sections [PABI;3.3].
360
361A pointer to the TOC is maintained in r2 and the following pattern is used to
362load the address of an element into a register:
363
364  addis <address register>, 2, foo@toc@ha
365  addi <address register>, <address register>, foo@toc@l
366
367The “addis” instruction shifts a signed constant left 16 bits and adds the
368result to its second argument, saving the result in the first argument. The
369“addi” instruction does the same, but without shifting. Thus the “@toc@ha"
370suffix on a symbol means “the top 16 bits of the TOC offset” and “@toc@l” means
371“the bottom 16 bits of the offset”. However, note that both values are signed,
372thus offsets in the top half of a 64KB chunk will have an @ha value that's one
373greater than expected and a negative @l value.
374
375The TOC is specific to a “module” (basically an executable or shared object).
376This means that there's not a single TOC in a process and that r2 needs to
377change as control moves between modules. Thus functions have two entry points:
378the “global” entry point and the “local” entry point. Jumps from within the
379same module can use the local entry while jumps from other modules must use the
380global entry. The global entry establishes the correct value of r2 before
381running the function and the local entry skips that code.
382
383The global entry point for a function is defined by its label. The local entry
384is a power-of-two number of bytes from the global entry, set by the
385“.localentry” directive. (ppc64le instructions are always 32 bits, so an offset
386of 1 or 2 bytes is treated as an offset of zero.)
387
388In order to help the global entry code set r2 to point to the local TOC, r12 is
389set to the address of the global entry point when called [PABI;2.2.1.1]. Thus
390the global entry will typically use an addis+addi pair to add a known offset to
391r12 and store it in r2. For example:
392
393foo:
394  addis 2, 12, .TOC. - foo@ha
395  addi  2, 2,  .TOC. - foo@l
396
397(It's worth noting that the '@' operator binds very loosely, so the 3rd
398arguments parse as (.TOC. - foo)@ha and (.TOC. - foo)@l.)
399
400When calling a function, the compiler doesn't know whether that function is in
401the same module or not. Thus it doesn't know whether r12 needs to be set nor
402whether r2 will be clobbered on return. Rather than always assume the worst,
403the linker fixes stuff up once it knows that a call is going out of module:
404
405Firstly, calling, say, memcpy (which we assume to be in a different module)
406won't actually jump directly to memcpy, or even a PLT resolution function.
407It'll call a synthesised function that:
408  a) saves r2 in the caller's stack frame
409  b) loads the address of memcpy@PLT into r12
410  c) jumps to r12.
411
412As this synthesised function loads memcpy@PLT, a call to memcpy from the
413compiled code just references “memcpy” directly, not “memcpy@PLT”.
414
415Since it jumps directly to memcpy@PLT, it can't restore r2 on return. Thus
416calls must be followed by a nop. If the call ends up going out-of-module, the
417linker will rewrite that nop to load r2 from the stack.
418
419Speaking of the stack, the stack pointer is kept in r1 and there's a 288-byte
420red-zone. The format of the stack frame is defined [PABI;2.2.2] and must be
421followed as called functions will write into their parent's stack frame. For
422example, the synthesised out-of-module trampolines will save r2 24 bytes into
423the caller's frame and all non-leaf functions save the return address 16 bytes
424into the caller's frame.
425
426A final point worth noting: some RISC ISAs have r0 wired to zero: all reads
427result in zero and all writes are discarded. POWER does something a little like
428that, but r0 is only special in certain argument positions for certain
429instructions. You just have to read the manual to know which they are.
430
431
432Delocation is easier than Intel because there's just TOC references, but it's
433also harder because there's no IP-relative addressing.
434
435Jumps are IP-relative however, and have a 24-bit immediate value. So we can
436jump to functions that set a register to the needed value. (r3 is the
437return-value register and so that's what is generally used here.) */
438
439// isPPC64LEAPair recognises an addis+addi pair that's adding the offset of
440// source to relative and writing the result to target.
441func (d *delocation) isPPC64LEAPair(statement *node32) (target, source, relative string, ok bool) {
442	instruction := skipWS(statement.up).up
443	assertNodeType(instruction, ruleInstructionName)
444	name1 := d.contents(instruction)
445	args1 := instructionArgs(instruction.next)
446
447	statement = statement.next
448	instruction = skipWS(statement.up).up
449	assertNodeType(instruction, ruleInstructionName)
450	name2 := d.contents(instruction)
451	args2 := instructionArgs(instruction.next)
452
453	if name1 != "addis" ||
454		len(args1) != 3 ||
455		name2 != "addi" ||
456		len(args2) != 3 {
457		return "", "", "", false
458	}
459
460	target = d.contents(args1[0])
461	relative = d.contents(args1[1])
462	source1 := d.contents(args1[2])
463	source2 := d.contents(args2[2])
464
465	if !strings.HasSuffix(source1, "@ha") ||
466		!strings.HasSuffix(source2, "@l") ||
467		source1[:len(source1)-3] != source2[:len(source2)-2] ||
468		d.contents(args2[0]) != target ||
469		d.contents(args2[1]) != target {
470		return "", "", "", false
471	}
472
473	source = source1[:len(source1)-3]
474	ok = true
475	return
476}
477
478// establishTOC writes the global entry prelude for a function. The standard
479// prelude involves relocations so this version moves the relocation outside
480// the integrity-checked area.
481func establishTOC(w stringWriter) {
482	w.WriteString("999:\n")
483	w.WriteString("\taddis 2, 12, .LBORINGSSL_external_toc-999b@ha\n")
484	w.WriteString("\taddi 2, 2, .LBORINGSSL_external_toc-999b@l\n")
485	w.WriteString("\tld 12, 0(2)\n")
486	w.WriteString("\tadd 2, 2, 12\n")
487}
488
489// loadTOCFuncName returns the name of a synthesized function that sets r3 to
490// the value of “symbol+offset”.
491func loadTOCFuncName(symbol, offset string) string {
492	symbol = strings.Replace(symbol, ".", "_dot_", -1)
493	ret := ".Lbcm_loadtoc_" + symbol
494	if len(offset) != 0 {
495		offset = strings.Replace(offset, "+", "_plus_", -1)
496		offset = strings.Replace(offset, "-", "_minus_", -1)
497		ret += "_" + offset
498	}
499	return ret
500}
501
502func (d *delocation) loadFromTOC(w stringWriter, symbol, offset, dest string) wrapperFunc {
503	d.tocLoaders[symbol+"\x00"+offset] = struct{}{}
504
505	return func(k func()) {
506		w.WriteString("\taddi 1, 1, -288\n")   // Clear the red zone.
507		w.WriteString("\tmflr " + dest + "\n") // Stash the link register.
508		w.WriteString("\tstd " + dest + ", -8(1)\n")
509		// The TOC loader will use r3, so stash it if necessary.
510		if dest != "3" {
511			w.WriteString("\tstd 3, -16(1)\n")
512		}
513
514		// Because loadTOCFuncName returns a “.L” name, we don't need a
515		// nop after this call.
516		w.WriteString("\tbl " + loadTOCFuncName(symbol, offset) + "\n")
517
518		// Cycle registers around. We need r3 -> destReg, -8(1) ->
519		// lr and, optionally, -16(1) -> r3.
520		w.WriteString("\tstd 3, -24(1)\n")
521		w.WriteString("\tld 3, -8(1)\n")
522		w.WriteString("\tmtlr 3\n")
523		w.WriteString("\tld " + dest + ", -24(1)\n")
524		if dest != "3" {
525			w.WriteString("\tld 3, -16(1)\n")
526		}
527		w.WriteString("\taddi 1, 1, 288\n")
528
529		k()
530	}
531}
532
533func (d *delocation) gatherOffsets(symRef *node32, offsets string) (*node32, string) {
534	for symRef != nil && symRef.pegRule == ruleOffset {
535		offset := d.contents(symRef)
536		if offset[0] != '+' && offset[0] != '-' {
537			offset = "+" + offset
538		}
539		offsets = offsets + offset
540		symRef = symRef.next
541	}
542	return symRef, offsets
543}
544
545func (d *delocation) parseMemRef(memRef *node32) (symbol, offset, section string, didChange, symbolIsLocal bool, nextRef *node32) {
546	if memRef.pegRule != ruleSymbolRef {
547		return "", "", "", false, false, memRef
548	}
549
550	symRef := memRef.up
551	nextRef = memRef.next
552
553	// (Offset* '+')?
554	symRef, offset = d.gatherOffsets(symRef, offset)
555
556	// (LocalSymbol / SymbolName)
557	symbol = d.contents(symRef)
558	if symRef.pegRule == ruleLocalSymbol {
559		symbolIsLocal = true
560		mapped := d.mapLocalSymbol(symbol)
561		if mapped != symbol {
562			symbol = mapped
563			didChange = true
564		}
565	}
566	symRef = symRef.next
567
568	// Offset*
569	symRef, offset = d.gatherOffsets(symRef, offset)
570
571	// ('@' Section / Offset*)?
572	if symRef != nil {
573		assertNodeType(symRef, ruleSection)
574		section = d.contents(symRef)
575		symRef = symRef.next
576
577		symRef, offset = d.gatherOffsets(symRef, offset)
578	}
579
580	if symRef != nil {
581		panic(fmt.Sprintf("unexpected token in SymbolRef: %q", rul3s[symRef.pegRule]))
582	}
583
584	return
585}
586
587func (d *delocation) processPPCInstruction(statement, instruction *node32) (*node32, error) {
588	assertNodeType(instruction, ruleInstructionName)
589	instructionName := d.contents(instruction)
590	isBranch := instructionName[0] == 'b'
591
592	argNodes := instructionArgs(instruction.next)
593
594	var wrappers wrapperStack
595	var args []string
596	changed := false
597
598Args:
599	for i, arg := range argNodes {
600		fullArg := arg
601		isIndirect := false
602
603		if arg.pegRule == ruleIndirectionIndicator {
604			arg = arg.next
605			isIndirect = true
606		}
607
608		switch arg.pegRule {
609		case ruleRegisterOrConstant, ruleLocalLabelRef:
610			args = append(args, d.contents(fullArg))
611
612		case ruleTOCRefLow:
613			return nil, errors.New("Found low TOC reference outside preamble pattern")
614
615		case ruleTOCRefHigh:
616			target, _, relative, ok := d.isPPC64LEAPair(statement)
617			if !ok {
618				return nil, errors.New("Found high TOC reference outside preamble pattern")
619			}
620
621			if relative != "12" {
622				return nil, fmt.Errorf("preamble is relative to %q, not r12", relative)
623			}
624
625			if target != "2" {
626				return nil, fmt.Errorf("preamble is setting %q, not r2", target)
627			}
628
629			statement = statement.next
630			establishTOC(d.output)
631			instructionName = ""
632			changed = true
633			break Args
634
635		case ruleMemoryRef:
636			symbol, offset, section, didChange, symbolIsLocal, memRef := d.parseMemRef(arg.up)
637			changed = didChange
638
639			if len(symbol) > 0 {
640				if _, localEntrySymbol := d.localEntrySymbols[symbol]; localEntrySymbol && isBranch {
641					symbol = localEntryName(symbol)
642					changed = true
643				} else if _, knownSymbol := d.symbols[symbol]; knownSymbol {
644					symbol = localTargetName(symbol)
645					changed = true
646				} else if !symbolIsLocal && !isSynthesized(symbol) && len(section) == 0 {
647					changed = true
648					d.redirectors[symbol] = redirectorName(symbol)
649					symbol = redirectorName(symbol)
650					// TODO(davidben): This should sanity-check the next
651					// instruction is a nop and ideally remove it.
652					wrappers = append(wrappers, func(k func()) {
653						k()
654						// Like the linker's PLT stubs, redirector functions
655						// expect callers to restore r2.
656						d.output.WriteString("\tld 2, 24(1)\n")
657					})
658				}
659			}
660
661			switch section {
662			case "":
663
664			case "tls":
665				// This section identifier just tells the
666				// assembler to use r13, the pointer to the
667				// thread-local data [PABI;3.7.3.3].
668
669			case "toc@ha":
670				// Delete toc@ha instructions. Per
671				// [PABI;3.6.3], the linker is allowed to erase
672				// toc@ha instructions. We take advantage of
673				// this by unconditionally erasing the toc@ha
674				// instructions and doing the full lookup when
675				// processing toc@l.
676				//
677				// Note that any offset here applies before @ha
678				// and @l. That is, 42+foo@toc@ha is
679				// #ha(42+foo-.TOC.), not 42+#ha(foo-.TOC.). Any
680				// corresponding toc@l references are required
681				// by the ABI to have the same offset. The
682				// offset will be incorporated in full when
683				// those are processed.
684				if instructionName != "addis" || len(argNodes) != 3 || i != 2 || args[1] != "2" {
685					return nil, errors.New("can't process toc@ha reference")
686				}
687				changed = true
688				instructionName = ""
689				break Args
690
691			case "toc@l":
692				// Per [PAB;3.6.3], this instruction must take
693				// as input a register which was the output of
694				// a toc@ha computation and compute the actual
695				// address of some symbol. The toc@ha
696				// computation was elided, so we ignore that
697				// input register and compute the address
698				// directly.
699				changed = true
700
701				// For all supported toc@l instructions, the
702				// destination register is the first argument.
703				destReg := args[0]
704
705				wrappers = append(wrappers, d.loadFromTOC(d.output, symbol, offset, destReg))
706				switch instructionName {
707				case "addi":
708					// The original instruction was:
709					//   addi destReg, tocHaReg, offset+symbol@toc@l
710					instructionName = ""
711
712				case "ld", "lhz", "lwz":
713					// The original instruction was:
714					//   l?? destReg, offset+symbol@toc@l(tocHaReg)
715					//
716					// We transform that into the
717					// equivalent dereference of destReg:
718					//   l?? destReg, 0(destReg)
719					origInstructionName := instructionName
720					instructionName = ""
721
722					assertNodeType(memRef, ruleBaseIndexScale)
723					assertNodeType(memRef.up, ruleRegisterOrConstant)
724					if memRef.next != nil || memRef.up.next != nil {
725						return nil, errors.New("expected single register in BaseIndexScale for ld argument")
726					}
727
728					baseReg := destReg
729					if baseReg == "0" {
730						// Register zero is special as the base register for a load.
731						// Avoid it by spilling and using r3 instead.
732						baseReg = "3"
733						wrappers = append(wrappers, func(k func()) {
734							d.output.WriteString("\taddi 1, 1, -288\n") // Clear the red zone.
735							d.output.WriteString("\tstd " + baseReg + ", -8(1)\n")
736							d.output.WriteString("\tmr " + baseReg + ", " + destReg + "\n")
737							k()
738							d.output.WriteString("\tld " + baseReg + ", -8(1)\n")
739							d.output.WriteString("\taddi 1, 1, 288\n") // Clear the red zone.
740						})
741					}
742
743					wrappers = append(wrappers, func(k func()) {
744						d.output.WriteString("\t" + origInstructionName + " " + destReg + ", 0(" + baseReg + ")\n")
745					})
746				default:
747					return nil, fmt.Errorf("can't process TOC argument to %q", instructionName)
748				}
749
750			default:
751				return nil, fmt.Errorf("Unknown section type %q", section)
752			}
753
754			argStr := ""
755			if isIndirect {
756				argStr += "*"
757			}
758			argStr += symbol
759			if len(offset) > 0 {
760				argStr += offset
761			}
762			if len(section) > 0 {
763				argStr += "@"
764				argStr += section
765			}
766
767			for ; memRef != nil; memRef = memRef.next {
768				argStr += d.contents(memRef)
769			}
770
771			args = append(args, argStr)
772
773		default:
774			panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule]))
775		}
776	}
777
778	if changed {
779		d.writeCommentedNode(statement)
780
781		var replacement string
782		if len(instructionName) > 0 {
783			replacement = "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n"
784		}
785
786		wrappers.do(func() {
787			d.output.WriteString(replacement)
788		})
789	} else {
790		d.writeNode(statement)
791	}
792
793	return statement, nil
794}
795
796/* Intel */
797
798type instructionType int
799
800const (
801	instrPush instructionType = iota
802	instrMove
803	// instrTransformingMove is essentially a move, but it performs some
804	// transformation of the data during the process.
805	instrTransformingMove
806	instrJump
807	instrConditionalMove
808	// instrCombine merges the source and destination in some fashion, for example
809	// a 2-operand bitwise operation.
810	instrCombine
811	// instrThreeArg merges two sources into a destination in some fashion.
812	instrThreeArg
813	// instrCompare takes two arguments and writes outputs to the flags register.
814	instrCompare
815	instrOther
816)
817
818func classifyInstruction(instr string, args []*node32) instructionType {
819	switch instr {
820	case "push", "pushq":
821		if len(args) == 1 {
822			return instrPush
823		}
824
825	case "mov", "movq", "vmovq", "movsd", "vmovsd":
826		if len(args) == 2 {
827			return instrMove
828		}
829
830	case "cmovneq", "cmoveq":
831		if len(args) == 2 {
832			return instrConditionalMove
833		}
834
835	case "call", "callq", "jmp", "jo", "jno", "js", "jns", "je", "jz", "jne", "jnz", "jb", "jnae", "jc", "jnb", "jae", "jnc", "jbe", "jna", "ja", "jnbe", "jl", "jnge", "jge", "jnl", "jle", "jng", "jg", "jnle", "jp", "jpe", "jnp", "jpo":
836		if len(args) == 1 {
837			return instrJump
838		}
839
840	case "orq", "andq", "xorq":
841		if len(args) == 2 {
842			return instrCombine
843		}
844
845	case "cmpq":
846		if len(args) == 2 {
847			return instrCompare
848		}
849
850	case "sarxq", "shlxq", "shrxq":
851		if len(args) == 3 {
852			return instrThreeArg
853		}
854
855	case "vpbroadcastq":
856		if len(args) == 2 {
857			return instrTransformingMove
858		}
859	}
860
861	return instrOther
862}
863
864func push(w stringWriter) wrapperFunc {
865	return func(k func()) {
866		w.WriteString("\tpushq %rax\n")
867		k()
868		w.WriteString("\txchg %rax, (%rsp)\n")
869	}
870}
871
872func compare(w stringWriter, instr, a, b string) wrapperFunc {
873	return func(k func()) {
874		k()
875		w.WriteString(fmt.Sprintf("\t%s %s, %s\n", instr, a, b))
876	}
877}
878
879func (d *delocation) loadFromGOT(w stringWriter, destination, symbol, section string, redzoneCleared bool) wrapperFunc {
880	d.gotExternalsNeeded[symbol+"@"+section] = struct{}{}
881
882	return func(k func()) {
883		if !redzoneCleared {
884			w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone.
885		}
886		w.WriteString("\tpushf\n")
887		w.WriteString(fmt.Sprintf("\tleaq %s_%s_external(%%rip), %s\n", symbol, section, destination))
888		w.WriteString(fmt.Sprintf("\taddq (%s), %s\n", destination, destination))
889		w.WriteString(fmt.Sprintf("\tmovq (%s), %s\n", destination, destination))
890		w.WriteString("\tpopf\n")
891		if !redzoneCleared {
892			w.WriteString("\tleaq\t128(%rsp), %rsp\n")
893		}
894	}
895}
896
897func saveFlags(w stringWriter, redzoneCleared bool) wrapperFunc {
898	return func(k func()) {
899		if !redzoneCleared {
900			w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone.
901			defer w.WriteString("\tleaq 128(%rsp), %rsp\n")
902		}
903		w.WriteString("\tpushfq\n")
904		k()
905		w.WriteString("\tpopfq\n")
906	}
907}
908
909func saveRegister(w stringWriter, avoidRegs []string) (wrapperFunc, string) {
910	candidates := []string{"%rax", "%rbx", "%rcx", "%rdx"}
911
912	var reg string
913NextCandidate:
914	for _, candidate := range candidates {
915		for _, avoid := range avoidRegs {
916			if candidate == avoid {
917				continue NextCandidate
918			}
919		}
920
921		reg = candidate
922		break
923	}
924
925	if len(reg) == 0 {
926		panic("too many excluded registers")
927	}
928
929	return func(k func()) {
930		w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone.
931		w.WriteString("\tpushq " + reg + "\n")
932		k()
933		w.WriteString("\tpopq " + reg + "\n")
934		w.WriteString("\tleaq 128(%rsp), %rsp\n")
935	}, reg
936}
937
938func moveTo(w stringWriter, target string, isAVX bool, source string) wrapperFunc {
939	return func(k func()) {
940		k()
941		prefix := ""
942		if isAVX {
943			prefix = "v"
944		}
945		w.WriteString("\t" + prefix + "movq " + source + ", " + target + "\n")
946	}
947}
948
949func finalTransform(w stringWriter, transformInstruction, reg string) wrapperFunc {
950	return func(k func()) {
951		k()
952		w.WriteString("\t" + transformInstruction + " " + reg + ", " + reg + "\n")
953	}
954}
955
956func combineOp(w stringWriter, instructionName, source, dest string) wrapperFunc {
957	return func(k func()) {
958		k()
959		w.WriteString("\t" + instructionName + " " + source + ", " + dest + "\n")
960	}
961}
962
963func threeArgCombineOp(w stringWriter, instructionName, source1, source2, dest string) wrapperFunc {
964	return func(k func()) {
965		k()
966		w.WriteString("\t" + instructionName + " " + source1 + ", " + source2 + ", " + dest + "\n")
967	}
968}
969
970func isValidLEATarget(reg string) bool {
971	return !strings.HasPrefix(reg, "%xmm") && !strings.HasPrefix(reg, "%ymm") && !strings.HasPrefix(reg, "%zmm")
972}
973
974func undoConditionalMove(w stringWriter, instr string) wrapperFunc {
975	var invertedCondition string
976
977	switch instr {
978	case "cmoveq":
979		invertedCondition = "ne"
980	case "cmovneq":
981		invertedCondition = "e"
982	default:
983		panic(fmt.Sprintf("don't know how to handle conditional move instruction %q", instr))
984	}
985
986	return func(k func()) {
987		w.WriteString("\tj" + invertedCondition + " 999f\n")
988		k()
989		w.WriteString("999:\n")
990	}
991}
992
993func (d *delocation) isRIPRelative(node *node32) bool {
994	return node != nil && node.pegRule == ruleBaseIndexScale && d.contents(node) == "(%rip)"
995}
996
997func (d *delocation) processIntelInstruction(statement, instruction *node32) (*node32, error) {
998	assertNodeType(instruction, ruleInstructionName)
999	instructionName := d.contents(instruction)
1000
1001	argNodes := instructionArgs(instruction.next)
1002
1003	var wrappers wrapperStack
1004	var args []string
1005	changed := false
1006
1007Args:
1008	for i, arg := range argNodes {
1009		fullArg := arg
1010		isIndirect := false
1011
1012		if arg.pegRule == ruleIndirectionIndicator {
1013			arg = arg.next
1014			isIndirect = true
1015		}
1016
1017		switch arg.pegRule {
1018		case ruleRegisterOrConstant, ruleLocalLabelRef:
1019			args = append(args, d.contents(fullArg))
1020
1021		case ruleMemoryRef:
1022			symbol, offset, section, didChange, symbolIsLocal, memRef := d.parseMemRef(arg.up)
1023			changed = didChange
1024
1025			if symbol == "OPENSSL_ia32cap_P" && section == "" {
1026				if instructionName != "leaq" {
1027					return nil, fmt.Errorf("non-leaq instruction %q referenced OPENSSL_ia32cap_P directly", instructionName)
1028				}
1029
1030				if i != 0 || len(argNodes) != 2 || !d.isRIPRelative(memRef) || len(offset) > 0 {
1031					return nil, fmt.Errorf("invalid OPENSSL_ia32cap_P reference in instruction %q", instructionName)
1032				}
1033
1034				target := argNodes[1]
1035				assertNodeType(target, ruleRegisterOrConstant)
1036				reg := d.contents(target)
1037
1038				if !strings.HasPrefix(reg, "%r") {
1039					return nil, fmt.Errorf("tried to load OPENSSL_ia32cap_P into %q, which is not a standard register.", reg)
1040				}
1041
1042				changed = true
1043
1044				// Flag-altering instructions (i.e. addq) are going to be used so the
1045				// flags need to be preserved.
1046				wrappers = append(wrappers, saveFlags(d.output, false /* Red Zone not yet cleared */))
1047
1048				wrappers = append(wrappers, func(k func()) {
1049					d.output.WriteString("\tleaq\tOPENSSL_ia32cap_addr_delta(%rip), " + reg + "\n")
1050					d.output.WriteString("\taddq\t(" + reg + "), " + reg + "\n")
1051				})
1052
1053				break Args
1054			}
1055
1056			switch section {
1057			case "":
1058				if _, knownSymbol := d.symbols[symbol]; knownSymbol {
1059					symbol = localTargetName(symbol)
1060					changed = true
1061				}
1062
1063			case "PLT":
1064				if classifyInstruction(instructionName, argNodes) != instrJump {
1065					return nil, fmt.Errorf("Cannot rewrite PLT reference for non-jump instruction %q", instructionName)
1066				}
1067
1068				if _, knownSymbol := d.symbols[symbol]; knownSymbol {
1069					symbol = localTargetName(symbol)
1070					changed = true
1071				} else if !symbolIsLocal && !isSynthesized(symbol) {
1072					// Unknown symbol via PLT is an
1073					// out-call from the module, e.g.
1074					// memcpy.
1075					d.redirectors[symbol+"@"+section] = redirectorName(symbol)
1076					symbol = redirectorName(symbol)
1077				}
1078
1079				changed = true
1080
1081			case "GOTPCREL":
1082				if len(offset) > 0 {
1083					return nil, errors.New("loading from GOT with offset is unsupported")
1084				}
1085				if !d.isRIPRelative(memRef) {
1086					return nil, errors.New("GOT access must be IP-relative")
1087				}
1088
1089				useGOT := false
1090				if _, knownSymbol := d.symbols[symbol]; knownSymbol {
1091					symbol = localTargetName(symbol)
1092					changed = true
1093				} else if !isSynthesized(symbol) {
1094					useGOT = true
1095				}
1096
1097				classification := classifyInstruction(instructionName, argNodes)
1098				if classification != instrThreeArg && classification != instrCompare && i != 0 {
1099					return nil, errors.New("GOT access must be source operand")
1100				}
1101
1102				// Reduce the instruction to movq symbol@GOTPCREL, targetReg.
1103				var targetReg string
1104				var redzoneCleared bool
1105				switch classification {
1106				case instrPush:
1107					wrappers = append(wrappers, push(d.output))
1108					targetReg = "%rax"
1109				case instrConditionalMove:
1110					wrappers = append(wrappers, undoConditionalMove(d.output, instructionName))
1111					fallthrough
1112				case instrMove:
1113					assertNodeType(argNodes[1], ruleRegisterOrConstant)
1114					targetReg = d.contents(argNodes[1])
1115				case instrCompare:
1116					otherSource := d.contents(argNodes[i^1])
1117					saveRegWrapper, tempReg := saveRegister(d.output, []string{otherSource})
1118					redzoneCleared = true
1119					wrappers = append(wrappers, saveRegWrapper)
1120					if i == 0 {
1121						wrappers = append(wrappers, compare(d.output, instructionName, tempReg, otherSource))
1122					} else {
1123						wrappers = append(wrappers, compare(d.output, instructionName, otherSource, tempReg))
1124					}
1125					targetReg = tempReg
1126				case instrTransformingMove:
1127					assertNodeType(argNodes[1], ruleRegisterOrConstant)
1128					targetReg = d.contents(argNodes[1])
1129					wrappers = append(wrappers, finalTransform(d.output, instructionName, targetReg))
1130					if isValidLEATarget(targetReg) {
1131						return nil, errors.New("Currently transforming moves are assumed to target XMM registers. Otherwise we'll pop %rax before reading it to do the transform.")
1132					}
1133				case instrCombine:
1134					targetReg = d.contents(argNodes[1])
1135					if !isValidLEATarget(targetReg) {
1136						return nil, fmt.Errorf("cannot handle combining instructions targeting non-general registers")
1137					}
1138					saveRegWrapper, tempReg := saveRegister(d.output, []string{targetReg})
1139					redzoneCleared = true
1140					wrappers = append(wrappers, saveRegWrapper)
1141
1142					wrappers = append(wrappers, combineOp(d.output, instructionName, tempReg, targetReg))
1143					targetReg = tempReg
1144				case instrThreeArg:
1145					if n := len(argNodes); n != 3 {
1146						return nil, fmt.Errorf("three-argument instruction has %d arguments", n)
1147					}
1148					if i != 0 && i != 1 {
1149						return nil, errors.New("GOT access must be from source operand")
1150					}
1151					targetReg = d.contents(argNodes[2])
1152
1153					otherSource := d.contents(argNodes[1])
1154					if i == 1 {
1155						otherSource = d.contents(argNodes[0])
1156					}
1157
1158					saveRegWrapper, tempReg := saveRegister(d.output, []string{targetReg, otherSource})
1159					redzoneCleared = true
1160					wrappers = append(wrappers, saveRegWrapper)
1161
1162					if i == 0 {
1163						wrappers = append(wrappers, threeArgCombineOp(d.output, instructionName, tempReg, otherSource, targetReg))
1164					} else {
1165						wrappers = append(wrappers, threeArgCombineOp(d.output, instructionName, otherSource, tempReg, targetReg))
1166					}
1167					targetReg = tempReg
1168				default:
1169					return nil, fmt.Errorf("Cannot rewrite GOTPCREL reference for instruction %q", instructionName)
1170				}
1171
1172				if !isValidLEATarget(targetReg) {
1173					// Sometimes the compiler will load from the GOT to an
1174					// XMM register, which is not a valid target of an LEA
1175					// instruction.
1176					saveRegWrapper, tempReg := saveRegister(d.output, nil)
1177					wrappers = append(wrappers, saveRegWrapper)
1178					isAVX := strings.HasPrefix(instructionName, "v")
1179					wrappers = append(wrappers, moveTo(d.output, targetReg, isAVX, tempReg))
1180					targetReg = tempReg
1181					if redzoneCleared {
1182						return nil, fmt.Errorf("internal error: Red Zone was already cleared")
1183					}
1184					redzoneCleared = true
1185				}
1186
1187				if symbol == "OPENSSL_ia32cap_P" {
1188					// Flag-altering instructions (i.e. addq) are going to be used so the
1189					// flags need to be preserved.
1190					wrappers = append(wrappers, saveFlags(d.output, redzoneCleared))
1191					wrappers = append(wrappers, func(k func()) {
1192						d.output.WriteString("\tleaq\tOPENSSL_ia32cap_addr_delta(%rip), " + targetReg + "\n")
1193						d.output.WriteString("\taddq\t(" + targetReg + "), " + targetReg + "\n")
1194					})
1195				} else if useGOT {
1196					wrappers = append(wrappers, d.loadFromGOT(d.output, targetReg, symbol, section, redzoneCleared))
1197				} else {
1198					wrappers = append(wrappers, func(k func()) {
1199						d.output.WriteString(fmt.Sprintf("\tleaq\t%s(%%rip), %s\n", symbol, targetReg))
1200					})
1201				}
1202				changed = true
1203				break Args
1204
1205			default:
1206				return nil, fmt.Errorf("Unknown section type %q", section)
1207			}
1208
1209			if !changed && len(section) > 0 {
1210				panic("section was not handled")
1211			}
1212			section = ""
1213
1214			argStr := ""
1215			if isIndirect {
1216				argStr += "*"
1217			}
1218			argStr += symbol
1219			argStr += offset
1220
1221			for ; memRef != nil; memRef = memRef.next {
1222				argStr += d.contents(memRef)
1223			}
1224
1225			args = append(args, argStr)
1226
1227		case ruleGOTLocation:
1228			if instructionName != "movabsq" {
1229				return nil, fmt.Errorf("_GLOBAL_OFFSET_TABLE_ lookup didn't use movabsq")
1230			}
1231			if i != 0 || len(argNodes) != 2 {
1232				return nil, fmt.Errorf("movabs of _GLOBAL_OFFSET_TABLE_ didn't expected form")
1233			}
1234
1235			d.gotDeltaNeeded = true
1236			changed = true
1237			instructionName = "movq"
1238			assertNodeType(arg.up, ruleLocalSymbol)
1239			baseSymbol := d.mapLocalSymbol(d.contents(arg.up))
1240			targetReg := d.contents(argNodes[1])
1241			args = append(args, ".Lboringssl_got_delta(%rip)")
1242			wrappers = append(wrappers, func(k func()) {
1243				k()
1244				d.output.WriteString(fmt.Sprintf("\taddq $.Lboringssl_got_delta-%s, %s\n", baseSymbol, targetReg))
1245			})
1246
1247		case ruleGOTSymbolOffset:
1248			if instructionName != "movabsq" {
1249				return nil, fmt.Errorf("_GLOBAL_OFFSET_TABLE_ offset didn't use movabsq")
1250			}
1251			if i != 0 || len(argNodes) != 2 {
1252				return nil, fmt.Errorf("movabs of _GLOBAL_OFFSET_TABLE_ offset didn't have expected form")
1253			}
1254
1255			assertNodeType(arg.up, ruleSymbolName)
1256			symbol := d.contents(arg.up)
1257			if strings.HasPrefix(symbol, ".L") {
1258				symbol = d.mapLocalSymbol(symbol)
1259			}
1260			targetReg := d.contents(argNodes[1])
1261
1262			var prefix string
1263			isGOTOFF := strings.HasSuffix(d.contents(arg), "@GOTOFF")
1264			if isGOTOFF {
1265				prefix = "gotoff"
1266				d.gotOffOffsetsNeeded[symbol] = struct{}{}
1267			} else {
1268				prefix = "got"
1269				d.gotOffsetsNeeded[symbol] = struct{}{}
1270			}
1271			changed = true
1272
1273			wrappers = append(wrappers, func(k func()) {
1274				// Even if one tries to use 32-bit GOT offsets, Clang's linker (at the time
1275				// of writing) emits 64-bit relocations anyway, so the following four bytes
1276				// get stomped. Thus we use 64-bit offsets.
1277				d.output.WriteString(fmt.Sprintf("\tmovq .Lboringssl_%s_%s(%%rip), %s\n", prefix, symbol, targetReg))
1278			})
1279
1280		default:
1281			panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule]))
1282		}
1283	}
1284
1285	if changed {
1286		d.writeCommentedNode(statement)
1287		replacement := "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n"
1288		wrappers.do(func() {
1289			d.output.WriteString(replacement)
1290		})
1291	} else {
1292		d.writeNode(statement)
1293	}
1294
1295	return statement, nil
1296}
1297
1298func (d *delocation) handleBSS(statement *node32) (*node32, error) {
1299	lastStatement := statement
1300	for statement = statement.next; statement != nil; lastStatement, statement = statement, statement.next {
1301		node := skipWS(statement.up)
1302		if node == nil {
1303			d.writeNode(statement)
1304			continue
1305		}
1306
1307		switch node.pegRule {
1308		case ruleGlobalDirective, ruleComment, ruleInstruction, ruleLocationDirective:
1309			d.writeNode(statement)
1310
1311		case ruleDirective:
1312			directive := node.up
1313			assertNodeType(directive, ruleDirectiveName)
1314			directiveName := d.contents(directive)
1315			if directiveName == "text" || directiveName == "section" || directiveName == "data" {
1316				return lastStatement, nil
1317			}
1318			d.writeNode(statement)
1319
1320		case ruleLabel:
1321			label := node.up
1322			d.writeNode(statement)
1323
1324			if label.pegRule != ruleLocalSymbol {
1325				symbol := d.contents(label)
1326				localSymbol := localTargetName(symbol)
1327				d.output.WriteString(fmt.Sprintf("\n%s:\n", localSymbol))
1328
1329				d.bssAccessorsNeeded[symbol] = localSymbol
1330			}
1331
1332		case ruleLabelContainingDirective:
1333			var err error
1334			statement, err = d.processLabelContainingDirective(statement, node.up)
1335			if err != nil {
1336				return nil, err
1337			}
1338
1339		default:
1340			return nil, fmt.Errorf("unknown BSS statement type %q in %q", rul3s[node.pegRule], d.contents(statement))
1341		}
1342	}
1343
1344	return lastStatement, nil
1345}
1346
1347func transform(w stringWriter, inputs []inputFile) error {
1348	// symbols contains all defined symbols.
1349	symbols := make(map[string]struct{})
1350	// localEntrySymbols contains all symbols with a .localentry directive.
1351	localEntrySymbols := make(map[string]struct{})
1352	// fileNumbers is the set of IDs seen in .file directives.
1353	fileNumbers := make(map[int]struct{})
1354	// maxObservedFileNumber contains the largest seen file number in a
1355	// .file directive. Zero is not a valid number.
1356	maxObservedFileNumber := 0
1357	// fileDirectivesContainMD5 is true if the compiler is outputting MD5
1358	// checksums in .file directives. If it does so, then this script needs
1359	// to match that behaviour otherwise warnings result.
1360	fileDirectivesContainMD5 := false
1361
1362	// OPENSSL_ia32cap_get will be synthesized by this script.
1363	symbols["OPENSSL_ia32cap_get"] = struct{}{}
1364
1365	for _, input := range inputs {
1366		forEachPath(input.ast.up, func(node *node32) {
1367			symbol := input.contents[node.begin:node.end]
1368			if _, ok := symbols[symbol]; ok {
1369				panic(fmt.Sprintf("Duplicate symbol found: %q in %q", symbol, input.path))
1370			}
1371			symbols[symbol] = struct{}{}
1372		}, ruleStatement, ruleLabel, ruleSymbolName)
1373
1374		forEachPath(input.ast.up, func(node *node32) {
1375			node = node.up
1376			assertNodeType(node, ruleLabelContainingDirectiveName)
1377			directive := input.contents[node.begin:node.end]
1378			if directive != ".localentry" {
1379				return
1380			}
1381			// Extract the first argument.
1382			node = skipWS(node.next)
1383			assertNodeType(node, ruleSymbolArgs)
1384			node = node.up
1385			assertNodeType(node, ruleSymbolArg)
1386			symbol := input.contents[node.begin:node.end]
1387			if _, ok := localEntrySymbols[symbol]; ok {
1388				panic(fmt.Sprintf("Duplicate .localentry directive found: %q in %q", symbol, input.path))
1389			}
1390			localEntrySymbols[symbol] = struct{}{}
1391		}, ruleStatement, ruleLabelContainingDirective)
1392
1393		forEachPath(input.ast.up, func(node *node32) {
1394			assertNodeType(node, ruleLocationDirective)
1395			directive := input.contents[node.begin:node.end]
1396			if !strings.HasPrefix(directive, ".file") {
1397				return
1398			}
1399			parts := strings.Fields(directive)
1400			if len(parts) == 2 {
1401				// This is a .file directive with just a
1402				// filename. Clang appears to generate just one
1403				// of these at the beginning of the output for
1404				// the compilation unit. Ignore it.
1405				return
1406			}
1407			fileNo, err := strconv.Atoi(parts[1])
1408			if err != nil {
1409				panic(fmt.Sprintf("Failed to parse file number from .file: %q", directive))
1410			}
1411
1412			if _, ok := fileNumbers[fileNo]; ok {
1413				panic(fmt.Sprintf("Duplicate file number %d observed", fileNo))
1414			}
1415			fileNumbers[fileNo] = struct{}{}
1416
1417			if fileNo > maxObservedFileNumber {
1418				maxObservedFileNumber = fileNo
1419			}
1420
1421			for _, token := range parts[2:] {
1422				if token == "md5" {
1423					fileDirectivesContainMD5 = true
1424				}
1425			}
1426		}, ruleStatement, ruleLocationDirective)
1427	}
1428
1429	processor := x86_64
1430	if len(inputs) > 0 {
1431		processor = detectProcessor(inputs[0])
1432	}
1433
1434	d := &delocation{
1435		symbols:             symbols,
1436		localEntrySymbols:   localEntrySymbols,
1437		processor:           processor,
1438		output:              w,
1439		redirectors:         make(map[string]string),
1440		bssAccessorsNeeded:  make(map[string]string),
1441		tocLoaders:          make(map[string]struct{}),
1442		gotExternalsNeeded:  make(map[string]struct{}),
1443		gotOffsetsNeeded:    make(map[string]struct{}),
1444		gotOffOffsetsNeeded: make(map[string]struct{}),
1445	}
1446
1447	w.WriteString(".text\n")
1448	var fileTrailing string
1449	if fileDirectivesContainMD5 {
1450		fileTrailing = " md5 0x00000000000000000000000000000000"
1451	}
1452	w.WriteString(fmt.Sprintf(".file %d \"inserted_by_delocate.c\"%s\n", maxObservedFileNumber+1, fileTrailing))
1453	w.WriteString(fmt.Sprintf(".loc %d 1 0\n", maxObservedFileNumber+1))
1454	w.WriteString("BORINGSSL_bcm_text_start:\n")
1455
1456	for _, input := range inputs {
1457		if err := d.processInput(input); err != nil {
1458			return err
1459		}
1460	}
1461
1462	w.WriteString(".text\n")
1463	w.WriteString(fmt.Sprintf(".loc %d 2 0\n", maxObservedFileNumber+1))
1464	w.WriteString("BORINGSSL_bcm_text_end:\n")
1465
1466	// Emit redirector functions. Each is a single jump instruction.
1467	var redirectorNames []string
1468	for name := range d.redirectors {
1469		redirectorNames = append(redirectorNames, name)
1470	}
1471	sort.Strings(redirectorNames)
1472
1473	for _, name := range redirectorNames {
1474		redirector := d.redirectors[name]
1475		if d.processor == ppc64le {
1476			w.WriteString(".section \".toc\", \"aw\"\n")
1477			w.WriteString(".Lredirector_toc_" + name + ":\n")
1478			w.WriteString(".quad " + name + "\n")
1479			w.WriteString(".text\n")
1480			w.WriteString(".type " + redirector + ", @function\n")
1481			w.WriteString(redirector + ":\n")
1482			// |name| will clobber r2, so save it. This is matched by a restore in
1483			// redirector calls.
1484			w.WriteString("\tstd 2, 24(1)\n")
1485			// Load and call |name|'s global entry point.
1486			w.WriteString("\taddis 12, 2, .Lredirector_toc_" + name + "@toc@ha\n")
1487			w.WriteString("\tld 12, .Lredirector_toc_" + name + "@toc@l(12)\n")
1488			w.WriteString("\tmtctr 12\n")
1489			w.WriteString("\tbctr\n")
1490		} else {
1491			w.WriteString(".type " + redirector + ", @function\n")
1492			w.WriteString(redirector + ":\n")
1493			w.WriteString("\tjmp\t" + name + "\n")
1494		}
1495	}
1496
1497	var accessorNames []string
1498	for accessor := range d.bssAccessorsNeeded {
1499		accessorNames = append(accessorNames, accessor)
1500	}
1501	sort.Strings(accessorNames)
1502
1503	// Emit BSS accessor functions. Each is a single LEA followed by RET.
1504	for _, name := range accessorNames {
1505		funcName := accessorName(name)
1506		w.WriteString(".type " + funcName + ", @function\n")
1507		w.WriteString(funcName + ":\n")
1508		target := d.bssAccessorsNeeded[name]
1509
1510		if d.processor == ppc64le {
1511			w.WriteString("\taddis 3, 2, " + target + "@toc@ha\n")
1512			w.WriteString("\taddi 3, 3, " + target + "@toc@l\n")
1513			w.WriteString("\tblr\n")
1514		} else {
1515			w.WriteString("\tleaq\t" + target + "(%rip), %rax\n\tret\n")
1516		}
1517	}
1518
1519	if d.processor == ppc64le {
1520		loadTOCNames := sortedSet(d.tocLoaders)
1521		for _, symbolAndOffset := range loadTOCNames {
1522			parts := strings.SplitN(symbolAndOffset, "\x00", 2)
1523			symbol, offset := parts[0], parts[1]
1524
1525			funcName := loadTOCFuncName(symbol, offset)
1526			ref := symbol + offset
1527
1528			w.WriteString(".type " + funcName[2:] + ", @function\n")
1529			w.WriteString(funcName[2:] + ":\n")
1530			w.WriteString(funcName + ":\n")
1531			w.WriteString("\taddis 3, 2, " + ref + "@toc@ha\n")
1532			w.WriteString("\taddi 3, 3, " + ref + "@toc@l\n")
1533			w.WriteString("\tblr\n")
1534		}
1535
1536		w.WriteString(".LBORINGSSL_external_toc:\n")
1537		w.WriteString(".quad .TOC.-.LBORINGSSL_external_toc\n")
1538	} else {
1539		externalNames := sortedSet(d.gotExternalsNeeded)
1540		for _, name := range externalNames {
1541			parts := strings.SplitN(name, "@", 2)
1542			symbol, section := parts[0], parts[1]
1543			w.WriteString(".type " + symbol + "_" + section + "_external, @object\n")
1544			w.WriteString(".size " + symbol + "_" + section + "_external, 8\n")
1545			w.WriteString(symbol + "_" + section + "_external:\n")
1546			// Ideally this would be .quad foo@GOTPCREL, but clang's
1547			// assembler cannot emit a 64-bit GOTPCREL relocation. Instead,
1548			// we manually sign-extend the value, knowing that the GOT is
1549			// always at the end, thus foo@GOTPCREL has a positive value.
1550			w.WriteString("\t.long " + symbol + "@" + section + "\n")
1551			w.WriteString("\t.long 0\n")
1552		}
1553
1554		w.WriteString(".type OPENSSL_ia32cap_get, @function\n")
1555		w.WriteString(".globl OPENSSL_ia32cap_get\n")
1556		w.WriteString(localTargetName("OPENSSL_ia32cap_get") + ":\n")
1557		w.WriteString("OPENSSL_ia32cap_get:\n")
1558		w.WriteString("\tleaq OPENSSL_ia32cap_P(%rip), %rax\n")
1559		w.WriteString("\tret\n")
1560
1561		w.WriteString(".extern OPENSSL_ia32cap_P\n")
1562		w.WriteString(".type OPENSSL_ia32cap_addr_delta, @object\n")
1563		w.WriteString(".size OPENSSL_ia32cap_addr_delta, 8\n")
1564		w.WriteString("OPENSSL_ia32cap_addr_delta:\n")
1565		w.WriteString(".quad OPENSSL_ia32cap_P-OPENSSL_ia32cap_addr_delta\n")
1566
1567		if d.gotDeltaNeeded {
1568			w.WriteString(".Lboringssl_got_delta:\n")
1569			w.WriteString("\t.quad _GLOBAL_OFFSET_TABLE_-.Lboringssl_got_delta\n")
1570		}
1571
1572		for _, name := range sortedSet(d.gotOffsetsNeeded) {
1573			w.WriteString(".Lboringssl_got_" + name + ":\n")
1574			w.WriteString("\t.quad " + name + "@GOT\n")
1575		}
1576		for _, name := range sortedSet(d.gotOffOffsetsNeeded) {
1577			w.WriteString(".Lboringssl_gotoff_" + name + ":\n")
1578			w.WriteString("\t.quad " + name + "@GOTOFF\n")
1579		}
1580	}
1581
1582	w.WriteString(".type BORINGSSL_bcm_text_hash, @object\n")
1583	w.WriteString(".size BORINGSSL_bcm_text_hash, 64\n")
1584	w.WriteString("BORINGSSL_bcm_text_hash:\n")
1585	for _, b := range fipscommon.UninitHashValue {
1586		w.WriteString(".byte 0x" + strconv.FormatUint(uint64(b), 16) + "\n")
1587	}
1588
1589	return nil
1590}
1591
1592func parseInputs(inputs []inputFile) error {
1593	for i, input := range inputs {
1594		var contents string
1595
1596		if input.isArchive {
1597			arFile, err := os.Open(input.path)
1598			if err != nil {
1599				return err
1600			}
1601			defer arFile.Close()
1602
1603			ar, err := ar.ParseAR(arFile)
1604			if err != nil {
1605				return err
1606			}
1607
1608			if len(ar) != 1 {
1609				return fmt.Errorf("expected one file in archive, but found %d", len(ar))
1610			}
1611
1612			for _, c := range ar {
1613				contents = string(c)
1614			}
1615		} else {
1616			inBytes, err := ioutil.ReadFile(input.path)
1617			if err != nil {
1618				return err
1619			}
1620
1621			contents = string(inBytes)
1622		}
1623
1624		asm := Asm{Buffer: contents, Pretty: true}
1625		asm.Init()
1626		if err := asm.Parse(); err != nil {
1627			return fmt.Errorf("error while parsing %q: %s", input.path, err)
1628		}
1629		ast := asm.AST()
1630
1631		inputs[i].contents = contents
1632		inputs[i].ast = ast
1633	}
1634
1635	return nil
1636}
1637
1638func main() {
1639	// The .a file, if given, is expected to be an archive of textual
1640	// assembly sources. That's odd, but CMake really wants to create
1641	// archive files so it's the only way that we can make it work.
1642	arInput := flag.String("a", "", "Path to a .a file containing assembly sources")
1643	outFile := flag.String("o", "", "Path to output assembly")
1644
1645	flag.Parse()
1646
1647	if len(*outFile) == 0 {
1648		fmt.Fprintf(os.Stderr, "Must give argument to -o.\n")
1649		os.Exit(1)
1650	}
1651
1652	var inputs []inputFile
1653	if len(*arInput) > 0 {
1654		inputs = append(inputs, inputFile{
1655			path:      *arInput,
1656			index:     0,
1657			isArchive: true,
1658		})
1659	}
1660
1661	for i, path := range flag.Args() {
1662		if len(path) == 0 {
1663			continue
1664		}
1665
1666		inputs = append(inputs, inputFile{
1667			path:  path,
1668			index: i + 1,
1669		})
1670	}
1671
1672	if err := parseInputs(inputs); err != nil {
1673		fmt.Fprintf(os.Stderr, "%s\n", err)
1674		os.Exit(1)
1675	}
1676
1677	out, err := os.OpenFile(*outFile, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
1678	if err != nil {
1679		panic(err)
1680	}
1681	defer out.Close()
1682
1683	if err := transform(out, inputs); err != nil {
1684		fmt.Fprintf(os.Stderr, "%s\n", err)
1685		os.Exit(1)
1686	}
1687}
1688
1689func forEachPath(node *node32, cb func(*node32), rules ...pegRule) {
1690	if node == nil {
1691		return
1692	}
1693
1694	if len(rules) == 0 {
1695		cb(node)
1696		return
1697	}
1698
1699	rule := rules[0]
1700	childRules := rules[1:]
1701
1702	for ; node != nil; node = node.next {
1703		if node.pegRule != rule {
1704			continue
1705		}
1706
1707		if len(childRules) == 0 {
1708			cb(node)
1709		} else {
1710			forEachPath(node.up, cb, childRules...)
1711		}
1712	}
1713}
1714
1715func skipNodes(node *node32, ruleToSkip pegRule) *node32 {
1716	for ; node != nil && node.pegRule == ruleToSkip; node = node.next {
1717	}
1718	return node
1719}
1720
1721func skipWS(node *node32) *node32 {
1722	return skipNodes(node, ruleWS)
1723}
1724
1725func assertNodeType(node *node32, expected pegRule) {
1726	if rule := node.pegRule; rule != expected {
1727		panic(fmt.Sprintf("node was %q, but wanted %q", rul3s[rule], rul3s[expected]))
1728	}
1729}
1730
1731type wrapperFunc func(func())
1732
1733type wrapperStack []wrapperFunc
1734
1735func (w *wrapperStack) do(baseCase func()) {
1736	if len(*w) == 0 {
1737		baseCase()
1738		return
1739	}
1740
1741	wrapper := (*w)[0]
1742	*w = (*w)[1:]
1743	wrapper(func() { w.do(baseCase) })
1744}
1745
1746// localTargetName returns the name of the local target label for a global
1747// symbol named name.
1748func localTargetName(name string) string {
1749	return ".L" + name + "_local_target"
1750}
1751
1752func localEntryName(name string) string {
1753	return ".L" + name + "_local_entry"
1754}
1755
1756func isSynthesized(symbol string) bool {
1757	return strings.HasSuffix(symbol, "_bss_get") ||
1758		symbol == "OPENSSL_ia32cap_get" ||
1759		strings.HasPrefix(symbol, "BORINGSSL_bcm_text_")
1760}
1761
1762func redirectorName(symbol string) string {
1763	return "bcm_redirector_" + symbol
1764}
1765
1766// sectionType returns the type of a section. I.e. a section called “.text.foo”
1767// is a “.text” section.
1768func sectionType(section string) (string, bool) {
1769	if len(section) == 0 || section[0] != '.' {
1770		return "", false
1771	}
1772
1773	i := strings.Index(section[1:], ".")
1774	if i != -1 {
1775		section = section[:i+1]
1776	}
1777
1778	if strings.HasPrefix(section, ".debug_") {
1779		return ".debug", true
1780	}
1781
1782	return section, true
1783}
1784
1785// accessorName returns the name of the accessor function for a BSS symbol
1786// named name.
1787func accessorName(name string) string {
1788	return name + "_bss_get"
1789}
1790
1791func (d *delocation) mapLocalSymbol(symbol string) string {
1792	if d.currentInput.index == 0 {
1793		return symbol
1794	}
1795	return symbol + "_BCM_" + strconv.Itoa(d.currentInput.index)
1796}
1797
1798func detectProcessor(input inputFile) processorType {
1799	for statement := input.ast.up; statement != nil; statement = statement.next {
1800		node := skipNodes(statement.up, ruleWS)
1801		if node == nil || node.pegRule != ruleInstruction {
1802			continue
1803		}
1804
1805		instruction := node.up
1806		instructionName := input.contents[instruction.begin:instruction.end]
1807
1808		switch instructionName {
1809		case "movq", "call", "leaq":
1810			return x86_64
1811		case "addis", "addi", "mflr":
1812			return ppc64le
1813		}
1814	}
1815
1816	panic("processed entire input and didn't recognise any instructions.")
1817}
1818
1819func sortedSet(m map[string]struct{}) []string {
1820	ret := make([]string, 0, len(m))
1821	for key := range m {
1822		ret = append(ret, key)
1823	}
1824	sort.Strings(ret)
1825	return ret
1826}
1827