go-cmp/cmp/report_slices.go

// Copyright 2019, The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.

package cmp

import (
	"bytes"
	"fmt"
	"reflect"
	"strconv"
	"strings"
	"unicode"
	"unicode/utf8"

	"github.com/google/go-cmp/cmp/internal/diff"
)

// CanFormatDiffSlice reports whether we support custom formatting for nodes
// that are slices of primitive kinds or strings.
func (opts formatOptions) CanFormatDiffSlice(v *valueNode) bool {
	switch {
	case opts.DiffMode != diffUnknown:
		return false // Must be formatting in diff mode
	case v.NumDiff == 0:
		return false // No differences detected
	case !v.ValueX.IsValid() || !v.ValueY.IsValid():
		return false // Both values must be valid
	case v.Type.Kind() == reflect.Slice && (v.ValueX.Len() == 0 || v.ValueY.Len() == 0):
		return false // Both slice values have to be non-empty
	case v.NumIgnored > 0:
		return false // Some ignore option was used
	case v.NumTransformed > 0:
		return false // Some transform option was used
	case v.NumCompared > 1:
		return false // More than one comparison was used
	case v.NumCompared == 1 && v.Type.Name() != "":
		// The need for cmp to check applicability of options on every element
		// in a slice is a significant performance detriment for large []byte.
		// The workaround is to specify Comparer(bytes.Equal),
		// which enables cmp to compare []byte more efficiently.
		// If they differ, we still want to provide batched diffing.
		// The logic disallows named types since they tend to have their own
		// String method, with nicer formatting than what this provides.
		return false
	}

	switch t := v.Type; t.Kind() {
	case reflect.String:
	case reflect.Array, reflect.Slice:
		// Only slices of primitive types have specialized handling.
		switch t.Elem().Kind() {
		case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
			reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr,
			reflect.Bool, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
		default:
			return false
		}

		// If a sufficient number of elements already differ,
		// use specialized formatting even if length requirement is not met.
		if v.NumDiff > v.NumSame {
			return true
		}
	default:
		return false
	}

	// Use specialized string diffing for longer slices or strings.
	const minLength = 64
	return v.ValueX.Len() >= minLength && v.ValueY.Len() >= minLength
}

// FormatDiffSlice prints a diff for the slices (or strings) represented by v.
// This provides custom-tailored logic to make printing of differences in
// textual strings and slices of primitive kinds more readable.
func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
	assert(opts.DiffMode == diffUnknown)
	t, vx, vy := v.Type, v.ValueX, v.ValueY

	// Auto-detect the type of the data.
	var isLinedText, isText, isBinary bool
	var sx, sy string
	switch {
	case t.Kind() == reflect.String:
		sx, sy = vx.String(), vy.String()
		isText = true // Initial estimate, verify later
	case t.Kind() == reflect.Slice && t.Elem() == reflect.TypeOf(byte(0)):
		sx, sy = string(vx.Bytes()), string(vy.Bytes())
		isBinary = true // Initial estimate, verify later
	case t.Kind() == reflect.Array:
		// Arrays need to be addressable for slice operations to work.
		vx2, vy2 := reflect.New(t).Elem(), reflect.New(t).Elem()
		vx2.Set(vx)
		vy2.Set(vy)
		vx, vy = vx2, vy2
	}
	if isText || isBinary {
		var numLines, lastLineIdx, maxLineLen int
		isBinary = !utf8.ValidString(sx) || !utf8.ValidString(sy)
		for i, r := range sx + sy {
			if !(unicode.IsPrint(r) || unicode.IsSpace(r)) || r == utf8.RuneError {
				isBinary = true
				break
			}
			if r == '\n' {
				if maxLineLen < i-lastLineIdx {
					maxLineLen = i - lastLineIdx
				}
				lastLineIdx = i + 1
				numLines++
			}
		}
		isText = !isBinary
		isLinedText = isText && numLines >= 4 && maxLineLen <= 1024
	}

	// Format the string into printable records.
	var list textList
	var delim string
	switch {
	// If the text appears to be multi-lined text,
	// then perform differencing across individual lines.
	case isLinedText:
		ssx := strings.Split(sx, "\n")
		ssy := strings.Split(sy, "\n")
		list = opts.formatDiffSlice(
			reflect.ValueOf(ssx), reflect.ValueOf(ssy), 1, "line",
			func(v reflect.Value, d diffMode) textRecord {
				s := formatString(v.Index(0).String())
				return textRecord{Diff: d, Value: textLine(s)}
			},
		)
		delim = "\n"

		// If possible, use a custom triple-quote (""") syntax for printing
		// differences in a string literal. This format is more readable,
		// but has edge-cases where differences are visually indistinguishable.
		// This format is avoided under the following conditions:
		//	• A line starts with `"""`
		//	• A line starts with "..."
		//	• A line contains non-printable characters
		//	• Adjacent different lines differ only by whitespace
		//
		// For example:
		//		"""
		//		... // 3 identical lines
		//		foo
		//		bar
		//	-	baz
		//	+	BAZ
		//		"""
		isTripleQuoted := true
		prevRemoveLines := map[string]bool{}
		prevInsertLines := map[string]bool{}
		var list2 textList
		list2 = append(list2, textRecord{Value: textLine(`"""`), ElideComma: true})
		for _, r := range list {
			if !r.Value.Equal(textEllipsis) {
				line, _ := strconv.Unquote(string(r.Value.(textLine)))
				line = strings.TrimPrefix(strings.TrimSuffix(line, "\r"), "\r") // trim leading/trailing carriage returns for legacy Windows endline support
				normLine := strings.Map(func(r rune) rune {
					if unicode.IsSpace(r) {
						return -1 // drop whitespace to avoid visually indistinguishable output
					}
					return r
				}, line)
				isPrintable := func(r rune) bool {
					return unicode.IsPrint(r) || r == '\t' // specially treat tab as printable
				}
				isTripleQuoted = !strings.HasPrefix(line, `"""`) && !strings.HasPrefix(line, "...") && strings.TrimFunc(line, isPrintable) == ""
				switch r.Diff {
				case diffRemoved:
					isTripleQuoted = isTripleQuoted && !prevInsertLines[normLine]
					prevRemoveLines[normLine] = true
				case diffInserted:
					isTripleQuoted = isTripleQuoted && !prevRemoveLines[normLine]
					prevInsertLines[normLine] = true
				}
				if !isTripleQuoted {
					break
				}
				r.Value = textLine(line)
				r.ElideComma = true
			}
			if !(r.Diff == diffRemoved || r.Diff == diffInserted) { // start a new non-adjacent difference group
				prevRemoveLines = map[string]bool{}
				prevInsertLines = map[string]bool{}
			}
			list2 = append(list2, r)
		}
		if r := list2[len(list2)-1]; r.Diff == diffIdentical && len(r.Value.(textLine)) == 0 {
			list2 = list2[:len(list2)-1] // elide single empty line at the end
		}
		list2 = append(list2, textRecord{Value: textLine(`"""`), ElideComma: true})
		if isTripleQuoted {
			var out textNode = &textWrap{Prefix: "(", Value: list2, Suffix: ")"}
			switch t.Kind() {
			case reflect.String:
				if t != reflect.TypeOf(string("")) {
					out = opts.FormatType(t, out)
				}
			case reflect.Slice:
				// Always emit type for slices since the triple-quote syntax
				// looks like a string (not a slice).
				opts = opts.WithTypeMode(emitType)
				out = opts.FormatType(t, out)
			}
			return out
		}

	// If the text appears to be single-lined text,
	// then perform differencing in approximately fixed-sized chunks.
	// The output is printed as quoted strings.
	case isText:
		list = opts.formatDiffSlice(
			reflect.ValueOf(sx), reflect.ValueOf(sy), 64, "byte",
			func(v reflect.Value, d diffMode) textRecord {
				s := formatString(v.String())
				return textRecord{Diff: d, Value: textLine(s)}
			},
		)
		delim = ""

	// If the text appears to be binary data,
	// then perform differencing in approximately fixed-sized chunks.
	// The output is inspired by hexdump.
	case isBinary:
		list = opts.formatDiffSlice(
			reflect.ValueOf(sx), reflect.ValueOf(sy), 16, "byte",
			func(v reflect.Value, d diffMode) textRecord {
				var ss []string
				for i := 0; i < v.Len(); i++ {
					ss = append(ss, formatHex(v.Index(i).Uint()))
				}
				s := strings.Join(ss, ", ")
				comment := commentString(fmt.Sprintf("%c|%v|", d, formatASCII(v.String())))
				return textRecord{Diff: d, Value: textLine(s), Comment: comment}
			},
		)

	// For all other slices of primitive types,
	// then perform differencing in approximately fixed-sized chunks.
	// The size of each chunk depends on the width of the element kind.
	default:
		var chunkSize int
		if t.Elem().Kind() == reflect.Bool {
			chunkSize = 16
		} else {
			switch t.Elem().Bits() {
			case 8:
				chunkSize = 16
			case 16:
				chunkSize = 12
			case 32:
				chunkSize = 8
			default:
				chunkSize = 8
			}
		}
		list = opts.formatDiffSlice(
			vx, vy, chunkSize, t.Elem().Kind().String(),
			func(v reflect.Value, d diffMode) textRecord {
				var ss []string
				for i := 0; i < v.Len(); i++ {
					switch t.Elem().Kind() {
					case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
						ss = append(ss, fmt.Sprint(v.Index(i).Int()))
					case reflect.Uint, reflect.Uint16, reflect.Uint32, reflect.Uint64:
						ss = append(ss, fmt.Sprint(v.Index(i).Uint()))
					case reflect.Uint8, reflect.Uintptr:
						ss = append(ss, formatHex(v.Index(i).Uint()))
					case reflect.Bool, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
						ss = append(ss, fmt.Sprint(v.Index(i).Interface()))
					}
				}
				s := strings.Join(ss, ", ")
				return textRecord{Diff: d, Value: textLine(s)}
			},
		)
	}

	// Wrap the output with appropriate type information.
	var out textNode = &textWrap{Prefix: "{", Value: list, Suffix: "}"}
	if !isText {
		// The "{...}" byte-sequence literal is not valid Go syntax for strings.
		// Emit the type for extra clarity (e.g. "string{...}").
		if t.Kind() == reflect.String {
			opts = opts.WithTypeMode(emitType)
		}
		return opts.FormatType(t, out)
	}
	switch t.Kind() {
	case reflect.String:
		out = &textWrap{Prefix: "strings.Join(", Value: out, Suffix: fmt.Sprintf(", %q)", delim)}
		if t != reflect.TypeOf(string("")) {
			out = opts.FormatType(t, out)
		}
	case reflect.Slice:
		out = &textWrap{Prefix: "bytes.Join(", Value: out, Suffix: fmt.Sprintf(", %q)", delim)}
		if t != reflect.TypeOf([]byte(nil)) {
			out = opts.FormatType(t, out)
		}
	}
	return out
}

// formatASCII formats s as an ASCII string.
// This is useful for printing binary strings in a semi-legible way.
func formatASCII(s string) string {
	b := bytes.Repeat([]byte{'.'}, len(s))
	for i := 0; i < len(s); i++ {
		if ' ' <= s[i] && s[i] <= '~' {
			b[i] = s[i]
		}
	}
	return string(b)
}

func (opts formatOptions) formatDiffSlice(
	vx, vy reflect.Value, chunkSize int, name string,
	makeRec func(reflect.Value, diffMode) textRecord,
) (list textList) {
	es := diff.Difference(vx.Len(), vy.Len(), func(ix int, iy int) diff.Result {
		return diff.BoolResult(vx.Index(ix).Interface() == vy.Index(iy).Interface())
	})

	appendChunks := func(v reflect.Value, d diffMode) int {
		n0 := v.Len()
		for v.Len() > 0 {
			n := chunkSize
			if n > v.Len() {
				n = v.Len()
			}
			list = append(list, makeRec(v.Slice(0, n), d))
			v = v.Slice(n, v.Len())
		}
		return n0 - v.Len()
	}

	var numDiffs int
	maxLen := -1
	if opts.LimitVerbosity {
		maxLen = (1 << opts.verbosity()) << 2 // 4, 8, 16, 32, 64, etc...
		opts.VerbosityLevel--
	}

	groups := coalesceAdjacentEdits(name, es)
	groups = coalesceInterveningIdentical(groups, chunkSize/4)
	maxGroup := diffStats{Name: name}
	for i, ds := range groups {
		if maxLen >= 0 && numDiffs >= maxLen {
			maxGroup = maxGroup.Append(ds)
			continue
		}

		// Print equal.
		if ds.NumDiff() == 0 {
			// Compute the number of leading and trailing equal bytes to print.
			var numLo, numHi int
			numEqual := ds.NumIgnored + ds.NumIdentical
			for numLo < chunkSize*numContextRecords && numLo+numHi < numEqual && i != 0 {
				numLo++
			}
			for numHi < chunkSize*numContextRecords && numLo+numHi < numEqual && i != len(groups)-1 {
				numHi++
			}
			if numEqual-(numLo+numHi) <= chunkSize && ds.NumIgnored == 0 {
				numHi = numEqual - numLo // Avoid pointless coalescing of single equal row
			}

			// Print the equal bytes.
			appendChunks(vx.Slice(0, numLo), diffIdentical)
			if numEqual > numLo+numHi {
				ds.NumIdentical -= numLo + numHi
				list.AppendEllipsis(ds)
			}
			appendChunks(vx.Slice(numEqual-numHi, numEqual), diffIdentical)
			vx = vx.Slice(numEqual, vx.Len())
			vy = vy.Slice(numEqual, vy.Len())
			continue
		}

		// Print unequal.
		len0 := len(list)
		nx := appendChunks(vx.Slice(0, ds.NumIdentical+ds.NumRemoved+ds.NumModified), diffRemoved)
		vx = vx.Slice(nx, vx.Len())
		ny := appendChunks(vy.Slice(0, ds.NumIdentical+ds.NumInserted+ds.NumModified), diffInserted)
		vy = vy.Slice(ny, vy.Len())
		numDiffs += len(list) - len0
	}
	if maxGroup.IsZero() {
		assert(vx.Len() == 0 && vy.Len() == 0)
	} else {
		list.AppendEllipsis(maxGroup)
	}
	return list
}

// coalesceAdjacentEdits coalesces the list of edits into groups of adjacent
// equal or unequal counts.
func coalesceAdjacentEdits(name string, es diff.EditScript) (groups []diffStats) {
	var prevCase int // Arbitrary index into which case last occurred
	lastStats := func(i int) *diffStats {
		if prevCase != i {
			groups = append(groups, diffStats{Name: name})
			prevCase = i
		}
		return &groups[len(groups)-1]
	}
	for _, e := range es {
		switch e {
		case diff.Identity:
			lastStats(1).NumIdentical++
		case diff.UniqueX:
			lastStats(2).NumRemoved++
		case diff.UniqueY:
			lastStats(2).NumInserted++
		case diff.Modified:
			lastStats(2).NumModified++
		}
	}
	return groups
}

// coalesceInterveningIdentical coalesces sufficiently short (<= windowSize)
// equal groups into adjacent unequal groups that currently result in a
// dual inserted/removed printout. This acts as a high-pass filter to smooth
// out high-frequency changes within the windowSize.
func coalesceInterveningIdentical(groups []diffStats, windowSize int) []diffStats {
	groups, groupsOrig := groups[:0], groups
	for i, ds := range groupsOrig {
		if len(groups) >= 2 && ds.NumDiff() > 0 {
			prev := &groups[len(groups)-2] // Unequal group
			curr := &groups[len(groups)-1] // Equal group
			next := &groupsOrig[i]         // Unequal group
			hadX, hadY := prev.NumRemoved > 0, prev.NumInserted > 0
			hasX, hasY := next.NumRemoved > 0, next.NumInserted > 0
			if ((hadX || hasX) && (hadY || hasY)) && curr.NumIdentical <= windowSize {
				*prev = prev.Append(*curr).Append(*next)
				groups = groups[:len(groups)-1] // Truncate off equal group
				continue
			}
		}
		groups = append(groups, ds)
	}
	return groups
}