1// Copyright 2014 The Go Authors.  All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Support for testing against external disassembler program.
6// Copied and simplified from rsc.io/arm/armasm/ext_test.go.
7
8package ppc64asm
9
10import (
11	"bufio"
12	"bytes"
13	"encoding/binary"
14	"encoding/hex"
15	"flag"
16	"fmt"
17	"io/ioutil"
18	"log"
19	"math/rand"
20	"os"
21	"os/exec"
22	"regexp"
23	"runtime"
24	"strings"
25	"testing"
26	"time"
27)
28
29var (
30	printTests = flag.Bool("printtests", false, "print test cases that exercise new code paths")
31	dumpTest   = flag.Bool("dump", false, "dump all encodings")
32	mismatch   = flag.Bool("mismatch", false, "log allowed mismatches")
33	longTest   = flag.Bool("long", false, "long test")
34	keep       = flag.Bool("keep", false, "keep object files around")
35	debug      = false
36)
37
38// A ExtInst represents a single decoded instruction parsed
39// from an external disassembler's output.
40type ExtInst struct {
41	addr uint32
42	enc  [4]byte
43	nenc int
44	text string
45}
46
47func (r ExtInst) String() string {
48	return fmt.Sprintf("%#x: % x: %s", r.addr, r.enc, r.text)
49}
50
51// An ExtDis is a connection between an external disassembler and a test.
52type ExtDis struct {
53	Dec      chan ExtInst
54	File     *os.File
55	Size     int
56	KeepFile bool
57	Cmd      *exec.Cmd
58}
59
60// Run runs the given command - the external disassembler - and returns
61// a buffered reader of its standard output.
62func (ext *ExtDis) Run(cmd ...string) (*bufio.Reader, error) {
63	if *keep {
64		log.Printf("%s\n", strings.Join(cmd, " "))
65	}
66	ext.Cmd = exec.Command(cmd[0], cmd[1:]...)
67	out, err := ext.Cmd.StdoutPipe()
68	if err != nil {
69		return nil, fmt.Errorf("stdoutpipe: %v", err)
70	}
71	if err := ext.Cmd.Start(); err != nil {
72		return nil, fmt.Errorf("exec: %v", err)
73	}
74
75	b := bufio.NewReaderSize(out, 1<<20)
76	return b, nil
77}
78
79// Wait waits for the command started with Run to exit.
80func (ext *ExtDis) Wait() error {
81	return ext.Cmd.Wait()
82}
83
84// testExtDis tests a set of byte sequences against an external disassembler.
85// The disassembler is expected to produce the given syntax and be run
86// in the given architecture mode (16, 32, or 64-bit).
87// The extdis function must start the external disassembler
88// and then parse its output, sending the parsed instructions on ext.Dec.
89// The generate function calls its argument f once for each byte sequence
90// to be tested. The generate function itself will be called twice, and it must
91// make the same sequence of calls to f each time.
92// When a disassembly does not match the internal decoding,
93// allowedMismatch determines whether this mismatch should be
94// allowed, or else considered an error.
95func testExtDis(
96	t *testing.T,
97	syntax string,
98	extdis func(ext *ExtDis) error,
99	generate func(f func([]byte)),
100	allowedMismatch func(text string, size int, inst *Inst, dec ExtInst) bool,
101) {
102	start := time.Now()
103	ext := &ExtDis{
104		Dec: make(chan ExtInst),
105	}
106	errc := make(chan error)
107
108	// First pass: write instructions to input file for external disassembler.
109	file, f, size, err := writeInst(generate)
110	if err != nil {
111		t.Fatal(err)
112	}
113	ext.Size = size
114	ext.File = f
115	defer func() {
116		f.Close()
117		if !*keep {
118			os.Remove(file)
119		}
120	}()
121
122	// Second pass: compare disassembly against our decodings.
123	var (
124		totalTests  = 0
125		totalSkips  = 0
126		totalErrors = 0
127
128		errors = make([]string, 0, 100) // sampled errors, at most cap
129	)
130	go func() {
131		errc <- extdis(ext)
132	}()
133	generate(func(enc []byte) {
134		dec, ok := <-ext.Dec
135		if !ok {
136			t.Errorf("decoding stream ended early")
137			return
138		}
139		inst, text := disasm(syntax, pad(enc))
140		totalTests++
141		if *dumpTest {
142			fmt.Printf("%x -> %s [%d]\n", enc[:len(enc)], dec.text, dec.nenc)
143		}
144		if text != dec.text || inst.Len != dec.nenc {
145			suffix := ""
146			if allowedMismatch(text, size, &inst, dec) {
147				totalSkips++
148				if !*mismatch {
149					return
150				}
151				suffix += " (allowed mismatch)"
152			}
153			totalErrors++
154			if len(errors) >= cap(errors) {
155				j := rand.Intn(totalErrors)
156				if j >= cap(errors) {
157					return
158				}
159				errors = append(errors[:j], errors[j+1:]...)
160			}
161			errors = append(errors, fmt.Sprintf("decode(%x) = %q, %d, want %q, %d%s", enc, text, inst.Len, dec.text, dec.nenc, suffix))
162		}
163	})
164
165	if *mismatch {
166		totalErrors -= totalSkips
167	}
168
169	for _, b := range errors {
170		t.Log(b)
171	}
172
173	if totalErrors > 0 {
174		t.Fail()
175	}
176	t.Logf("%d test cases, %d expected mismatches, %d failures; %.0f cases/second", totalTests, totalSkips, totalErrors, float64(totalTests)/time.Since(start).Seconds())
177
178	if err := <-errc; err != nil {
179		t.Fatalf("external disassembler: %v", err)
180	}
181
182}
183
184const start = 0x8000 // start address of text
185
186// writeInst writes the generated byte sequences to a new file
187// starting at offset start. That file is intended to be the input to
188// the external disassembler.
189func writeInst(generate func(func([]byte))) (file string, f *os.File, size int, err error) {
190	f, err = ioutil.TempFile("", "ppc64asm")
191	if err != nil {
192		return
193	}
194
195	file = f.Name()
196
197	f.Seek(start, 0)
198	w := bufio.NewWriter(f)
199	defer w.Flush()
200	size = 0
201	generate(func(x []byte) {
202		if len(x) > 4 {
203			x = x[:4]
204		}
205		if debug {
206			fmt.Printf("%#x: %x%x\n", start+size, x, zeros[len(x):])
207		}
208		w.Write(x)
209		w.Write(zeros[len(x):])
210		size += len(zeros)
211	})
212	return file, f, size, nil
213}
214
215var zeros = []byte{0, 0, 0, 0}
216
217// pad pads the code sequence with pops.
218func pad(enc []byte) []byte {
219	if len(enc) < 4 {
220		enc = append(enc[:len(enc):len(enc)], zeros[:4-len(enc)]...)
221	}
222	return enc
223}
224
225// disasm returns the decoded instruction and text
226// for the given source bytes, using the given syntax and mode.
227func disasm(syntax string, src []byte) (inst Inst, text string) {
228	// If printTests is set, we record the coverage value
229	// before and after, and we write out the inputs for which
230	// coverage went up, in the format expected in testdata/decode.text.
231	// This produces a fairly small set of test cases that exercise nearly
232	// all the code.
233	var cover float64
234	if *printTests {
235		cover -= coverage()
236	}
237
238	inst, err := Decode(src, binary.BigEndian)
239	if err != nil {
240		text = "error: " + err.Error()
241	} else {
242		text = inst.String()
243		switch syntax {
244		//case "arm":
245		//	text = ARMSyntax(inst)
246		case "gnu":
247			text = GNUSyntax(inst)
248		//case "plan9":
249		//	text = GoSyntax(inst, 0, nil)
250		default:
251			text = "error: unknown syntax " + syntax
252		}
253	}
254
255	if *printTests {
256		cover += coverage()
257		if cover > 0 {
258			max := len(src)
259			if max > 4 && inst.Len <= 4 {
260				max = 4
261			}
262			fmt.Printf("%x|%x\t%s\t%s\n", src[:inst.Len], src[inst.Len:max], syntax, text)
263		}
264	}
265
266	return
267}
268
269// coverage returns a floating point number denoting the
270// test coverage until now. The number increases when new code paths are exercised,
271// both in the Go program and in the decoder byte code.
272func coverage() float64 {
273	var f float64
274	f += testing.Coverage()
275	f += decodeCoverage()
276	return f
277}
278
279func decodeCoverage() float64 {
280	n := 0
281	for _, t := range decoderCover {
282		if t {
283			n++
284		}
285	}
286	return float64(1+n) / float64(1+len(decoderCover))
287}
288
289// Helpers for writing disassembler output parsers.
290
291// hasPrefix reports whether any of the space-separated words in the text s
292// begins with any of the given prefixes.
293func hasPrefix(s string, prefixes ...string) bool {
294	for _, prefix := range prefixes {
295		for s := s; s != ""; {
296			if strings.HasPrefix(s, prefix) {
297				return true
298			}
299			i := strings.Index(s, " ")
300			if i < 0 {
301				break
302			}
303			s = s[i+1:]
304		}
305	}
306	return false
307}
308
309// contains reports whether the text s contains any of the given substrings.
310func contains(s string, substrings ...string) bool {
311	for _, sub := range substrings {
312		if strings.Contains(s, sub) {
313			return true
314		}
315	}
316	return false
317}
318
319// isHex reports whether b is a hexadecimal character (0-9A-Fa-f).
320func isHex(b byte) bool { return b == '0' || unhex[b] > 0 }
321
322// parseHex parses the hexadecimal byte dump in hex,
323// appending the parsed bytes to raw and returning the updated slice.
324// The returned bool signals whether any invalid hex was found.
325// Spaces and tabs between bytes are okay but any other non-hex is not.
326func parseHex(hex []byte, raw []byte) ([]byte, bool) {
327	hex = trimSpace(hex)
328	for j := 0; j < len(hex); {
329		for hex[j] == ' ' || hex[j] == '\t' {
330			j++
331		}
332		if j >= len(hex) {
333			break
334		}
335		if j+2 > len(hex) || !isHex(hex[j]) || !isHex(hex[j+1]) {
336			return nil, false
337		}
338		raw = append(raw, unhex[hex[j]]<<4|unhex[hex[j+1]])
339		j += 2
340	}
341	return raw, true
342}
343
344var unhex = [256]byte{
345	'0': 0,
346	'1': 1,
347	'2': 2,
348	'3': 3,
349	'4': 4,
350	'5': 5,
351	'6': 6,
352	'7': 7,
353	'8': 8,
354	'9': 9,
355	'A': 10,
356	'B': 11,
357	'C': 12,
358	'D': 13,
359	'E': 14,
360	'F': 15,
361	'a': 10,
362	'b': 11,
363	'c': 12,
364	'd': 13,
365	'e': 14,
366	'f': 15,
367}
368
369// index is like bytes.Index(s, []byte(t)) but avoids the allocation.
370func index(s []byte, t string) int {
371	i := 0
372	for {
373		j := bytes.IndexByte(s[i:], t[0])
374		if j < 0 {
375			return -1
376		}
377		i = i + j
378		if i+len(t) > len(s) {
379			return -1
380		}
381		for k := 1; k < len(t); k++ {
382			if s[i+k] != t[k] {
383				goto nomatch
384			}
385		}
386		return i
387	nomatch:
388		i++
389	}
390}
391
392// fixSpace rewrites runs of spaces, tabs, and newline characters into single spaces in s.
393// If s must be rewritten, it is rewritten in place.
394func fixSpace(s []byte) []byte {
395	s = trimSpace(s)
396	for i := 0; i < len(s); i++ {
397		if s[i] == '\t' || s[i] == '\n' || i > 0 && s[i] == ' ' && s[i-1] == ' ' {
398			goto Fix
399		}
400	}
401	return s
402
403Fix:
404	b := s
405	w := 0
406	for i := 0; i < len(s); i++ {
407		c := s[i]
408		if c == '\t' || c == '\n' {
409			c = ' '
410		}
411		if c == ' ' && w > 0 && b[w-1] == ' ' {
412			continue
413		}
414		b[w] = c
415		w++
416	}
417	if w > 0 && b[w-1] == ' ' {
418		w--
419	}
420	return b[:w]
421}
422
423// trimSpace trims leading and trailing space from s, returning a subslice of s.
424func trimSpace(s []byte) []byte {
425	j := len(s)
426	for j > 0 && (s[j-1] == ' ' || s[j-1] == '\t' || s[j-1] == '\n') {
427		j--
428	}
429	i := 0
430	for i < j && (s[i] == ' ' || s[i] == '\t') {
431		i++
432	}
433	return s[i:j]
434}
435
436// pcrel matches instructions using relative addressing mode.
437var (
438	pcrel = regexp.MustCompile(`^((?:.* )?(?:b|bc)[^ac ]* (?:(?:[0-9]{1,2},)|(?:[0-7]\*)|\+|lt|gt|eq|so|cr[0-7]|,)*)0x([0-9a-f]+)$`)
439)
440
441// Generators.
442//
443// The test cases are described as functions that invoke a callback repeatedly,
444// with a new input sequence each time. These helpers make writing those
445// a little easier.
446
447// randomCases generates random instructions.
448func randomCases(t *testing.T) func(func([]byte)) {
449	return func(try func([]byte)) {
450		// All the strides are relatively prime to 2 and therefore to 2²⁸,
451		// so we will not repeat any instructions until we have tried all 2²⁸.
452		// Using a stride other than 1 is meant to visit the instructions in a
453		// pseudorandom order, which gives better variety in the set of
454		// test cases chosen by -printtests.
455		stride := uint32(10007)
456		n := 1 << 28 / 7
457		if testing.Short() {
458			stride = 100003
459			n = 1 << 28 / 1001
460		} else if *longTest {
461			stride = 2000033
462			n = 1 << 29
463		}
464		x := uint32(0)
465		for i := 0; i < n; i++ {
466			enc := (x%15)<<28 | x&(1<<28-1)
467			try([]byte{byte(enc), byte(enc >> 8), byte(enc >> 16), byte(enc >> 24)})
468			x += stride
469		}
470	}
471}
472
473// hexCases generates the cases written in hexadecimal in the encoded string.
474// Spaces in 'encoded' separate entire test cases, not individual bytes.
475func hexCases(t *testing.T, encoded string) func(func([]byte)) {
476	return func(try func([]byte)) {
477		for _, x := range strings.Fields(encoded) {
478			src, err := hex.DecodeString(x)
479			if err != nil {
480				t.Errorf("parsing %q: %v", x, err)
481			}
482			try(src)
483		}
484	}
485}
486
487// testdataCases generates the test cases recorded in testdata/decode.txt.
488// It only uses the inputs; it ignores the answers recorded in that file.
489func testdataCases(t *testing.T) func(func([]byte)) {
490	var codes [][]byte
491	data, err := ioutil.ReadFile("testdata/decode.txt")
492	if err != nil {
493		t.Fatal(err)
494	}
495	for _, line := range strings.Split(string(data), "\n") {
496		line = strings.TrimSpace(line)
497		if line == "" || strings.HasPrefix(line, "#") {
498			continue
499		}
500		f := strings.Fields(line)[0]
501		i := strings.Index(f, "|")
502		if i < 0 {
503			t.Errorf("parsing %q: missing | separator", f)
504			continue
505		}
506		if i%2 != 0 {
507			t.Errorf("parsing %q: misaligned | separator", f)
508		}
509		code, err := hex.DecodeString(f[:i] + f[i+1:])
510		if err != nil {
511			t.Errorf("parsing %q: %v", f, err)
512			continue
513		}
514		codes = append(codes, code)
515	}
516
517	return func(try func([]byte)) {
518		for _, code := range codes {
519			try(code)
520		}
521	}
522}
523
524func caller(skip int) string {
525	pc, _, _, _ := runtime.Caller(skip)
526	f := runtime.FuncForPC(pc)
527	name := "?"
528	if f != nil {
529		name = f.Name()
530		if i := strings.LastIndex(name, "."); i >= 0 {
531			name = name[i+1:]
532		}
533	}
534	return name
535}
536