1// Copyright 2019, The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE.md file.
4
5package cmp
6
7import (
8	"bytes"
9	"fmt"
10	"reflect"
11	"strings"
12	"unicode"
13	"unicode/utf8"
14
15	"github.com/google/go-cmp/cmp/internal/diff"
16)
17
18// CanFormatDiffSlice reports whether we support custom formatting for nodes
19// that are slices of primitive kinds or strings.
20func (opts formatOptions) CanFormatDiffSlice(v *valueNode) bool {
21	switch {
22	case opts.DiffMode != diffUnknown:
23		return false // Must be formatting in diff mode
24	case v.NumDiff == 0:
25		return false // No differences detected
26	case v.NumIgnored+v.NumCompared+v.NumTransformed > 0:
27		// TODO: Handle the case where someone uses bytes.Equal on a large slice.
28		return false // Some custom option was used to determined equality
29	case !v.ValueX.IsValid() || !v.ValueY.IsValid():
30		return false // Both values must be valid
31	}
32
33	switch t := v.Type; t.Kind() {
34	case reflect.String:
35	case reflect.Array, reflect.Slice:
36		// Only slices of primitive types have specialized handling.
37		switch t.Elem().Kind() {
38		case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
39			reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr,
40			reflect.Bool, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
41		default:
42			return false
43		}
44
45		// If a sufficient number of elements already differ,
46		// use specialized formatting even if length requirement is not met.
47		if v.NumDiff > v.NumSame {
48			return true
49		}
50	default:
51		return false
52	}
53
54	// Use specialized string diffing for longer slices or strings.
55	const minLength = 64
56	return v.ValueX.Len() >= minLength && v.ValueY.Len() >= minLength
57}
58
59// FormatDiffSlice prints a diff for the slices (or strings) represented by v.
60// This provides custom-tailored logic to make printing of differences in
61// textual strings and slices of primitive kinds more readable.
62func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
63	assert(opts.DiffMode == diffUnknown)
64	t, vx, vy := v.Type, v.ValueX, v.ValueY
65
66	// Auto-detect the type of the data.
67	var isLinedText, isText, isBinary bool
68	var sx, sy string
69	switch {
70	case t.Kind() == reflect.String:
71		sx, sy = vx.String(), vy.String()
72		isText = true // Initial estimate, verify later
73	case t.Kind() == reflect.Slice && t.Elem() == reflect.TypeOf(byte(0)):
74		sx, sy = string(vx.Bytes()), string(vy.Bytes())
75		isBinary = true // Initial estimate, verify later
76	case t.Kind() == reflect.Array:
77		// Arrays need to be addressable for slice operations to work.
78		vx2, vy2 := reflect.New(t).Elem(), reflect.New(t).Elem()
79		vx2.Set(vx)
80		vy2.Set(vy)
81		vx, vy = vx2, vy2
82	}
83	if isText || isBinary {
84		var numLines, lastLineIdx, maxLineLen int
85		isBinary = false
86		for i, r := range sx + sy {
87			if !(unicode.IsPrint(r) || unicode.IsSpace(r)) || r == utf8.RuneError {
88				isBinary = true
89				break
90			}
91			if r == '\n' {
92				if maxLineLen < i-lastLineIdx {
93					lastLineIdx = i - lastLineIdx
94				}
95				lastLineIdx = i + 1
96				numLines++
97			}
98		}
99		isText = !isBinary
100		isLinedText = isText && numLines >= 4 && maxLineLen <= 256
101	}
102
103	// Format the string into printable records.
104	var list textList
105	var delim string
106	switch {
107	// If the text appears to be multi-lined text,
108	// then perform differencing across individual lines.
109	case isLinedText:
110		ssx := strings.Split(sx, "\n")
111		ssy := strings.Split(sy, "\n")
112		list = opts.formatDiffSlice(
113			reflect.ValueOf(ssx), reflect.ValueOf(ssy), 1, "line",
114			func(v reflect.Value, d diffMode) textRecord {
115				s := formatString(v.Index(0).String())
116				return textRecord{Diff: d, Value: textLine(s)}
117			},
118		)
119		delim = "\n"
120	// If the text appears to be single-lined text,
121	// then perform differencing in approximately fixed-sized chunks.
122	// The output is printed as quoted strings.
123	case isText:
124		list = opts.formatDiffSlice(
125			reflect.ValueOf(sx), reflect.ValueOf(sy), 64, "byte",
126			func(v reflect.Value, d diffMode) textRecord {
127				s := formatString(v.String())
128				return textRecord{Diff: d, Value: textLine(s)}
129			},
130		)
131		delim = ""
132	// If the text appears to be binary data,
133	// then perform differencing in approximately fixed-sized chunks.
134	// The output is inspired by hexdump.
135	case isBinary:
136		list = opts.formatDiffSlice(
137			reflect.ValueOf(sx), reflect.ValueOf(sy), 16, "byte",
138			func(v reflect.Value, d diffMode) textRecord {
139				var ss []string
140				for i := 0; i < v.Len(); i++ {
141					ss = append(ss, formatHex(v.Index(i).Uint()))
142				}
143				s := strings.Join(ss, ", ")
144				comment := commentString(fmt.Sprintf("%c|%v|", d, formatASCII(v.String())))
145				return textRecord{Diff: d, Value: textLine(s), Comment: comment}
146			},
147		)
148	// For all other slices of primitive types,
149	// then perform differencing in approximately fixed-sized chunks.
150	// The size of each chunk depends on the width of the element kind.
151	default:
152		var chunkSize int
153		if t.Elem().Kind() == reflect.Bool {
154			chunkSize = 16
155		} else {
156			switch t.Elem().Bits() {
157			case 8:
158				chunkSize = 16
159			case 16:
160				chunkSize = 12
161			case 32:
162				chunkSize = 8
163			default:
164				chunkSize = 8
165			}
166		}
167		list = opts.formatDiffSlice(
168			vx, vy, chunkSize, t.Elem().Kind().String(),
169			func(v reflect.Value, d diffMode) textRecord {
170				var ss []string
171				for i := 0; i < v.Len(); i++ {
172					switch t.Elem().Kind() {
173					case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
174						ss = append(ss, fmt.Sprint(v.Index(i).Int()))
175					case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
176						ss = append(ss, formatHex(v.Index(i).Uint()))
177					case reflect.Bool, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
178						ss = append(ss, fmt.Sprint(v.Index(i).Interface()))
179					}
180				}
181				s := strings.Join(ss, ", ")
182				return textRecord{Diff: d, Value: textLine(s)}
183			},
184		)
185	}
186
187	// Wrap the output with appropriate type information.
188	var out textNode = textWrap{"{", list, "}"}
189	if !isText {
190		// The "{...}" byte-sequence literal is not valid Go syntax for strings.
191		// Emit the type for extra clarity (e.g. "string{...}").
192		if t.Kind() == reflect.String {
193			opts = opts.WithTypeMode(emitType)
194		}
195		return opts.FormatType(t, out)
196	}
197	switch t.Kind() {
198	case reflect.String:
199		out = textWrap{"strings.Join(", out, fmt.Sprintf(", %q)", delim)}
200		if t != reflect.TypeOf(string("")) {
201			out = opts.FormatType(t, out)
202		}
203	case reflect.Slice:
204		out = textWrap{"bytes.Join(", out, fmt.Sprintf(", %q)", delim)}
205		if t != reflect.TypeOf([]byte(nil)) {
206			out = opts.FormatType(t, out)
207		}
208	}
209	return out
210}
211
212// formatASCII formats s as an ASCII string.
213// This is useful for printing binary strings in a semi-legible way.
214func formatASCII(s string) string {
215	b := bytes.Repeat([]byte{'.'}, len(s))
216	for i := 0; i < len(s); i++ {
217		if ' ' <= s[i] && s[i] <= '~' {
218			b[i] = s[i]
219		}
220	}
221	return string(b)
222}
223
224func (opts formatOptions) formatDiffSlice(
225	vx, vy reflect.Value, chunkSize int, name string,
226	makeRec func(reflect.Value, diffMode) textRecord,
227) (list textList) {
228	es := diff.Difference(vx.Len(), vy.Len(), func(ix int, iy int) diff.Result {
229		return diff.BoolResult(vx.Index(ix).Interface() == vy.Index(iy).Interface())
230	})
231
232	appendChunks := func(v reflect.Value, d diffMode) int {
233		n0 := v.Len()
234		for v.Len() > 0 {
235			n := chunkSize
236			if n > v.Len() {
237				n = v.Len()
238			}
239			list = append(list, makeRec(v.Slice(0, n), d))
240			v = v.Slice(n, v.Len())
241		}
242		return n0 - v.Len()
243	}
244
245	groups := coalesceAdjacentEdits(name, es)
246	groups = coalesceInterveningIdentical(groups, chunkSize/4)
247	for i, ds := range groups {
248		// Print equal.
249		if ds.NumDiff() == 0 {
250			// Compute the number of leading and trailing equal bytes to print.
251			var numLo, numHi int
252			numEqual := ds.NumIgnored + ds.NumIdentical
253			for numLo < chunkSize*numContextRecords && numLo+numHi < numEqual && i != 0 {
254				numLo++
255			}
256			for numHi < chunkSize*numContextRecords && numLo+numHi < numEqual && i != len(groups)-1 {
257				numHi++
258			}
259			if numEqual-(numLo+numHi) <= chunkSize && ds.NumIgnored == 0 {
260				numHi = numEqual - numLo // Avoid pointless coalescing of single equal row
261			}
262
263			// Print the equal bytes.
264			appendChunks(vx.Slice(0, numLo), diffIdentical)
265			if numEqual > numLo+numHi {
266				ds.NumIdentical -= numLo + numHi
267				list.AppendEllipsis(ds)
268			}
269			appendChunks(vx.Slice(numEqual-numHi, numEqual), diffIdentical)
270			vx = vx.Slice(numEqual, vx.Len())
271			vy = vy.Slice(numEqual, vy.Len())
272			continue
273		}
274
275		// Print unequal.
276		nx := appendChunks(vx.Slice(0, ds.NumIdentical+ds.NumRemoved+ds.NumModified), diffRemoved)
277		vx = vx.Slice(nx, vx.Len())
278		ny := appendChunks(vy.Slice(0, ds.NumIdentical+ds.NumInserted+ds.NumModified), diffInserted)
279		vy = vy.Slice(ny, vy.Len())
280	}
281	assert(vx.Len() == 0 && vy.Len() == 0)
282	return list
283}
284
285// coalesceAdjacentEdits coalesces the list of edits into groups of adjacent
286// equal or unequal counts.
287func coalesceAdjacentEdits(name string, es diff.EditScript) (groups []diffStats) {
288	var prevCase int // Arbitrary index into which case last occurred
289	lastStats := func(i int) *diffStats {
290		if prevCase != i {
291			groups = append(groups, diffStats{Name: name})
292			prevCase = i
293		}
294		return &groups[len(groups)-1]
295	}
296	for _, e := range es {
297		switch e {
298		case diff.Identity:
299			lastStats(1).NumIdentical++
300		case diff.UniqueX:
301			lastStats(2).NumRemoved++
302		case diff.UniqueY:
303			lastStats(2).NumInserted++
304		case diff.Modified:
305			lastStats(2).NumModified++
306		}
307	}
308	return groups
309}
310
311// coalesceInterveningIdentical coalesces sufficiently short (<= windowSize)
312// equal groups into adjacent unequal groups that currently result in a
313// dual inserted/removed printout. This acts as a high-pass filter to smooth
314// out high-frequency changes within the windowSize.
315func coalesceInterveningIdentical(groups []diffStats, windowSize int) []diffStats {
316	groups, groupsOrig := groups[:0], groups
317	for i, ds := range groupsOrig {
318		if len(groups) >= 2 && ds.NumDiff() > 0 {
319			prev := &groups[len(groups)-2] // Unequal group
320			curr := &groups[len(groups)-1] // Equal group
321			next := &groupsOrig[i]         // Unequal group
322			hadX, hadY := prev.NumRemoved > 0, prev.NumInserted > 0
323			hasX, hasY := next.NumRemoved > 0, next.NumInserted > 0
324			if ((hadX || hasX) && (hadY || hasY)) && curr.NumIdentical <= windowSize {
325				*prev = (*prev).Append(*curr).Append(*next)
326				groups = groups[:len(groups)-1] // Truncate off equal group
327				continue
328			}
329		}
330		groups = append(groups, ds)
331	}
332	return groups
333}
334