1// Copyright 2019, The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE.md file.
4
5package cmp
6
7import (
8	"bytes"
9	"fmt"
10	"reflect"
11	"strconv"
12	"strings"
13	"unicode"
14	"unicode/utf8"
15
16	"github.com/google/go-cmp/cmp/internal/diff"
17)
18
19// CanFormatDiffSlice reports whether we support custom formatting for nodes
20// that are slices of primitive kinds or strings.
21func (opts formatOptions) CanFormatDiffSlice(v *valueNode) bool {
22	switch {
23	case opts.DiffMode != diffUnknown:
24		return false // Must be formatting in diff mode
25	case v.NumDiff == 0:
26		return false // No differences detected
27	case !v.ValueX.IsValid() || !v.ValueY.IsValid():
28		return false // Both values must be valid
29	case v.Type.Kind() == reflect.Slice && (v.ValueX.Len() == 0 || v.ValueY.Len() == 0):
30		return false // Both slice values have to be non-empty
31	case v.NumIgnored > 0:
32		return false // Some ignore option was used
33	case v.NumTransformed > 0:
34		return false // Some transform option was used
35	case v.NumCompared > 1:
36		return false // More than one comparison was used
37	case v.NumCompared == 1 && v.Type.Name() != "":
38		// The need for cmp to check applicability of options on every element
39		// in a slice is a significant performance detriment for large []byte.
40		// The workaround is to specify Comparer(bytes.Equal),
41		// which enables cmp to compare []byte more efficiently.
42		// If they differ, we still want to provide batched diffing.
43		// The logic disallows named types since they tend to have their own
44		// String method, with nicer formatting than what this provides.
45		return false
46	}
47
48	switch t := v.Type; t.Kind() {
49	case reflect.String:
50	case reflect.Array, reflect.Slice:
51		// Only slices of primitive types have specialized handling.
52		switch t.Elem().Kind() {
53		case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
54			reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr,
55			reflect.Bool, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
56		default:
57			return false
58		}
59
60		// If a sufficient number of elements already differ,
61		// use specialized formatting even if length requirement is not met.
62		if v.NumDiff > v.NumSame {
63			return true
64		}
65	default:
66		return false
67	}
68
69	// Use specialized string diffing for longer slices or strings.
70	const minLength = 64
71	return v.ValueX.Len() >= minLength && v.ValueY.Len() >= minLength
72}
73
74// FormatDiffSlice prints a diff for the slices (or strings) represented by v.
75// This provides custom-tailored logic to make printing of differences in
76// textual strings and slices of primitive kinds more readable.
77func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
78	assert(opts.DiffMode == diffUnknown)
79	t, vx, vy := v.Type, v.ValueX, v.ValueY
80
81	// Auto-detect the type of the data.
82	var isLinedText, isText, isBinary bool
83	var sx, sy string
84	switch {
85	case t.Kind() == reflect.String:
86		sx, sy = vx.String(), vy.String()
87		isText = true // Initial estimate, verify later
88	case t.Kind() == reflect.Slice && t.Elem() == reflect.TypeOf(byte(0)):
89		sx, sy = string(vx.Bytes()), string(vy.Bytes())
90		isBinary = true // Initial estimate, verify later
91	case t.Kind() == reflect.Array:
92		// Arrays need to be addressable for slice operations to work.
93		vx2, vy2 := reflect.New(t).Elem(), reflect.New(t).Elem()
94		vx2.Set(vx)
95		vy2.Set(vy)
96		vx, vy = vx2, vy2
97	}
98	if isText || isBinary {
99		var numLines, lastLineIdx, maxLineLen int
100		isBinary = !utf8.ValidString(sx) || !utf8.ValidString(sy)
101		for i, r := range sx + sy {
102			if !(unicode.IsPrint(r) || unicode.IsSpace(r)) || r == utf8.RuneError {
103				isBinary = true
104				break
105			}
106			if r == '\n' {
107				if maxLineLen < i-lastLineIdx {
108					maxLineLen = i - lastLineIdx
109				}
110				lastLineIdx = i + 1
111				numLines++
112			}
113		}
114		isText = !isBinary
115		isLinedText = isText && numLines >= 4 && maxLineLen <= 1024
116	}
117
118	// Format the string into printable records.
119	var list textList
120	var delim string
121	switch {
122	// If the text appears to be multi-lined text,
123	// then perform differencing across individual lines.
124	case isLinedText:
125		ssx := strings.Split(sx, "\n")
126		ssy := strings.Split(sy, "\n")
127		list = opts.formatDiffSlice(
128			reflect.ValueOf(ssx), reflect.ValueOf(ssy), 1, "line",
129			func(v reflect.Value, d diffMode) textRecord {
130				s := formatString(v.Index(0).String())
131				return textRecord{Diff: d, Value: textLine(s)}
132			},
133		)
134		delim = "\n"
135
136		// If possible, use a custom triple-quote (""") syntax for printing
137		// differences in a string literal. This format is more readable,
138		// but has edge-cases where differences are visually indistinguishable.
139		// This format is avoided under the following conditions:
140		//	• A line starts with `"""`
141		//	• A line starts with "..."
142		//	• A line contains non-printable characters
143		//	• Adjacent different lines differ only by whitespace
144		//
145		// For example:
146		//		"""
147		//		... // 3 identical lines
148		//		foo
149		//		bar
150		//	-	baz
151		//	+	BAZ
152		//		"""
153		isTripleQuoted := true
154		prevRemoveLines := map[string]bool{}
155		prevInsertLines := map[string]bool{}
156		var list2 textList
157		list2 = append(list2, textRecord{Value: textLine(`"""`), ElideComma: true})
158		for _, r := range list {
159			if !r.Value.Equal(textEllipsis) {
160				line, _ := strconv.Unquote(string(r.Value.(textLine)))
161				line = strings.TrimPrefix(strings.TrimSuffix(line, "\r"), "\r") // trim leading/trailing carriage returns for legacy Windows endline support
162				normLine := strings.Map(func(r rune) rune {
163					if unicode.IsSpace(r) {
164						return -1 // drop whitespace to avoid visually indistinguishable output
165					}
166					return r
167				}, line)
168				isPrintable := func(r rune) bool {
169					return unicode.IsPrint(r) || r == '\t' // specially treat tab as printable
170				}
171				isTripleQuoted = !strings.HasPrefix(line, `"""`) && !strings.HasPrefix(line, "...") && strings.TrimFunc(line, isPrintable) == ""
172				switch r.Diff {
173				case diffRemoved:
174					isTripleQuoted = isTripleQuoted && !prevInsertLines[normLine]
175					prevRemoveLines[normLine] = true
176				case diffInserted:
177					isTripleQuoted = isTripleQuoted && !prevRemoveLines[normLine]
178					prevInsertLines[normLine] = true
179				}
180				if !isTripleQuoted {
181					break
182				}
183				r.Value = textLine(line)
184				r.ElideComma = true
185			}
186			if !(r.Diff == diffRemoved || r.Diff == diffInserted) { // start a new non-adjacent difference group
187				prevRemoveLines = map[string]bool{}
188				prevInsertLines = map[string]bool{}
189			}
190			list2 = append(list2, r)
191		}
192		if r := list2[len(list2)-1]; r.Diff == diffIdentical && len(r.Value.(textLine)) == 0 {
193			list2 = list2[:len(list2)-1] // elide single empty line at the end
194		}
195		list2 = append(list2, textRecord{Value: textLine(`"""`), ElideComma: true})
196		if isTripleQuoted {
197			var out textNode = &textWrap{Prefix: "(", Value: list2, Suffix: ")"}
198			switch t.Kind() {
199			case reflect.String:
200				if t != reflect.TypeOf(string("")) {
201					out = opts.FormatType(t, out)
202				}
203			case reflect.Slice:
204				// Always emit type for slices since the triple-quote syntax
205				// looks like a string (not a slice).
206				opts = opts.WithTypeMode(emitType)
207				out = opts.FormatType(t, out)
208			}
209			return out
210		}
211
212	// If the text appears to be single-lined text,
213	// then perform differencing in approximately fixed-sized chunks.
214	// The output is printed as quoted strings.
215	case isText:
216		list = opts.formatDiffSlice(
217			reflect.ValueOf(sx), reflect.ValueOf(sy), 64, "byte",
218			func(v reflect.Value, d diffMode) textRecord {
219				s := formatString(v.String())
220				return textRecord{Diff: d, Value: textLine(s)}
221			},
222		)
223		delim = ""
224
225	// If the text appears to be binary data,
226	// then perform differencing in approximately fixed-sized chunks.
227	// The output is inspired by hexdump.
228	case isBinary:
229		list = opts.formatDiffSlice(
230			reflect.ValueOf(sx), reflect.ValueOf(sy), 16, "byte",
231			func(v reflect.Value, d diffMode) textRecord {
232				var ss []string
233				for i := 0; i < v.Len(); i++ {
234					ss = append(ss, formatHex(v.Index(i).Uint()))
235				}
236				s := strings.Join(ss, ", ")
237				comment := commentString(fmt.Sprintf("%c|%v|", d, formatASCII(v.String())))
238				return textRecord{Diff: d, Value: textLine(s), Comment: comment}
239			},
240		)
241
242	// For all other slices of primitive types,
243	// then perform differencing in approximately fixed-sized chunks.
244	// The size of each chunk depends on the width of the element kind.
245	default:
246		var chunkSize int
247		if t.Elem().Kind() == reflect.Bool {
248			chunkSize = 16
249		} else {
250			switch t.Elem().Bits() {
251			case 8:
252				chunkSize = 16
253			case 16:
254				chunkSize = 12
255			case 32:
256				chunkSize = 8
257			default:
258				chunkSize = 8
259			}
260		}
261		list = opts.formatDiffSlice(
262			vx, vy, chunkSize, t.Elem().Kind().String(),
263			func(v reflect.Value, d diffMode) textRecord {
264				var ss []string
265				for i := 0; i < v.Len(); i++ {
266					switch t.Elem().Kind() {
267					case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
268						ss = append(ss, fmt.Sprint(v.Index(i).Int()))
269					case reflect.Uint, reflect.Uint16, reflect.Uint32, reflect.Uint64:
270						ss = append(ss, fmt.Sprint(v.Index(i).Uint()))
271					case reflect.Uint8, reflect.Uintptr:
272						ss = append(ss, formatHex(v.Index(i).Uint()))
273					case reflect.Bool, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
274						ss = append(ss, fmt.Sprint(v.Index(i).Interface()))
275					}
276				}
277				s := strings.Join(ss, ", ")
278				return textRecord{Diff: d, Value: textLine(s)}
279			},
280		)
281	}
282
283	// Wrap the output with appropriate type information.
284	var out textNode = &textWrap{Prefix: "{", Value: list, Suffix: "}"}
285	if !isText {
286		// The "{...}" byte-sequence literal is not valid Go syntax for strings.
287		// Emit the type for extra clarity (e.g. "string{...}").
288		if t.Kind() == reflect.String {
289			opts = opts.WithTypeMode(emitType)
290		}
291		return opts.FormatType(t, out)
292	}
293	switch t.Kind() {
294	case reflect.String:
295		out = &textWrap{Prefix: "strings.Join(", Value: out, Suffix: fmt.Sprintf(", %q)", delim)}
296		if t != reflect.TypeOf(string("")) {
297			out = opts.FormatType(t, out)
298		}
299	case reflect.Slice:
300		out = &textWrap{Prefix: "bytes.Join(", Value: out, Suffix: fmt.Sprintf(", %q)", delim)}
301		if t != reflect.TypeOf([]byte(nil)) {
302			out = opts.FormatType(t, out)
303		}
304	}
305	return out
306}
307
308// formatASCII formats s as an ASCII string.
309// This is useful for printing binary strings in a semi-legible way.
310func formatASCII(s string) string {
311	b := bytes.Repeat([]byte{'.'}, len(s))
312	for i := 0; i < len(s); i++ {
313		if ' ' <= s[i] && s[i] <= '~' {
314			b[i] = s[i]
315		}
316	}
317	return string(b)
318}
319
320func (opts formatOptions) formatDiffSlice(
321	vx, vy reflect.Value, chunkSize int, name string,
322	makeRec func(reflect.Value, diffMode) textRecord,
323) (list textList) {
324	es := diff.Difference(vx.Len(), vy.Len(), func(ix int, iy int) diff.Result {
325		return diff.BoolResult(vx.Index(ix).Interface() == vy.Index(iy).Interface())
326	})
327
328	appendChunks := func(v reflect.Value, d diffMode) int {
329		n0 := v.Len()
330		for v.Len() > 0 {
331			n := chunkSize
332			if n > v.Len() {
333				n = v.Len()
334			}
335			list = append(list, makeRec(v.Slice(0, n), d))
336			v = v.Slice(n, v.Len())
337		}
338		return n0 - v.Len()
339	}
340
341	var numDiffs int
342	maxLen := -1
343	if opts.LimitVerbosity {
344		maxLen = (1 << opts.verbosity()) << 2 // 4, 8, 16, 32, 64, etc...
345		opts.VerbosityLevel--
346	}
347
348	groups := coalesceAdjacentEdits(name, es)
349	groups = coalesceInterveningIdentical(groups, chunkSize/4)
350	maxGroup := diffStats{Name: name}
351	for i, ds := range groups {
352		if maxLen >= 0 && numDiffs >= maxLen {
353			maxGroup = maxGroup.Append(ds)
354			continue
355		}
356
357		// Print equal.
358		if ds.NumDiff() == 0 {
359			// Compute the number of leading and trailing equal bytes to print.
360			var numLo, numHi int
361			numEqual := ds.NumIgnored + ds.NumIdentical
362			for numLo < chunkSize*numContextRecords && numLo+numHi < numEqual && i != 0 {
363				numLo++
364			}
365			for numHi < chunkSize*numContextRecords && numLo+numHi < numEqual && i != len(groups)-1 {
366				numHi++
367			}
368			if numEqual-(numLo+numHi) <= chunkSize && ds.NumIgnored == 0 {
369				numHi = numEqual - numLo // Avoid pointless coalescing of single equal row
370			}
371
372			// Print the equal bytes.
373			appendChunks(vx.Slice(0, numLo), diffIdentical)
374			if numEqual > numLo+numHi {
375				ds.NumIdentical -= numLo + numHi
376				list.AppendEllipsis(ds)
377			}
378			appendChunks(vx.Slice(numEqual-numHi, numEqual), diffIdentical)
379			vx = vx.Slice(numEqual, vx.Len())
380			vy = vy.Slice(numEqual, vy.Len())
381			continue
382		}
383
384		// Print unequal.
385		len0 := len(list)
386		nx := appendChunks(vx.Slice(0, ds.NumIdentical+ds.NumRemoved+ds.NumModified), diffRemoved)
387		vx = vx.Slice(nx, vx.Len())
388		ny := appendChunks(vy.Slice(0, ds.NumIdentical+ds.NumInserted+ds.NumModified), diffInserted)
389		vy = vy.Slice(ny, vy.Len())
390		numDiffs += len(list) - len0
391	}
392	if maxGroup.IsZero() {
393		assert(vx.Len() == 0 && vy.Len() == 0)
394	} else {
395		list.AppendEllipsis(maxGroup)
396	}
397	return list
398}
399
400// coalesceAdjacentEdits coalesces the list of edits into groups of adjacent
401// equal or unequal counts.
402func coalesceAdjacentEdits(name string, es diff.EditScript) (groups []diffStats) {
403	var prevCase int // Arbitrary index into which case last occurred
404	lastStats := func(i int) *diffStats {
405		if prevCase != i {
406			groups = append(groups, diffStats{Name: name})
407			prevCase = i
408		}
409		return &groups[len(groups)-1]
410	}
411	for _, e := range es {
412		switch e {
413		case diff.Identity:
414			lastStats(1).NumIdentical++
415		case diff.UniqueX:
416			lastStats(2).NumRemoved++
417		case diff.UniqueY:
418			lastStats(2).NumInserted++
419		case diff.Modified:
420			lastStats(2).NumModified++
421		}
422	}
423	return groups
424}
425
426// coalesceInterveningIdentical coalesces sufficiently short (<= windowSize)
427// equal groups into adjacent unequal groups that currently result in a
428// dual inserted/removed printout. This acts as a high-pass filter to smooth
429// out high-frequency changes within the windowSize.
430func coalesceInterveningIdentical(groups []diffStats, windowSize int) []diffStats {
431	groups, groupsOrig := groups[:0], groups
432	for i, ds := range groupsOrig {
433		if len(groups) >= 2 && ds.NumDiff() > 0 {
434			prev := &groups[len(groups)-2] // Unequal group
435			curr := &groups[len(groups)-1] // Equal group
436			next := &groupsOrig[i]         // Unequal group
437			hadX, hadY := prev.NumRemoved > 0, prev.NumInserted > 0
438			hasX, hasY := next.NumRemoved > 0, next.NumInserted > 0
439			if ((hadX || hasX) && (hadY || hasY)) && curr.NumIdentical <= windowSize {
440				*prev = prev.Append(*curr).Append(*next)
441				groups = groups[:len(groups)-1] // Truncate off equal group
442				continue
443			}
444		}
445		groups = append(groups, ds)
446	}
447	return groups
448}
449