1// Copyright 2019, The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE.md file. 4 5package cmp 6 7import ( 8 "bytes" 9 "fmt" 10 "reflect" 11 "strings" 12 "unicode" 13 "unicode/utf8" 14 15 "github.com/google/go-cmp/cmp/internal/diff" 16) 17 18// CanFormatDiffSlice reports whether we support custom formatting for nodes 19// that are slices of primitive kinds or strings. 20func (opts formatOptions) CanFormatDiffSlice(v *valueNode) bool { 21 switch { 22 case opts.DiffMode != diffUnknown: 23 return false // Must be formatting in diff mode 24 case v.NumDiff == 0: 25 return false // No differences detected 26 case v.NumIgnored+v.NumCompared+v.NumTransformed > 0: 27 // TODO: Handle the case where someone uses bytes.Equal on a large slice. 28 return false // Some custom option was used to determined equality 29 case !v.ValueX.IsValid() || !v.ValueY.IsValid(): 30 return false // Both values must be valid 31 } 32 33 switch t := v.Type; t.Kind() { 34 case reflect.String: 35 case reflect.Array, reflect.Slice: 36 // Only slices of primitive types have specialized handling. 37 switch t.Elem().Kind() { 38 case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, 39 reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, 40 reflect.Bool, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128: 41 default: 42 return false 43 } 44 45 // If a sufficient number of elements already differ, 46 // use specialized formatting even if length requirement is not met. 47 if v.NumDiff > v.NumSame { 48 return true 49 } 50 default: 51 return false 52 } 53 54 // Use specialized string diffing for longer slices or strings. 55 const minLength = 64 56 return v.ValueX.Len() >= minLength && v.ValueY.Len() >= minLength 57} 58 59// FormatDiffSlice prints a diff for the slices (or strings) represented by v. 60// This provides custom-tailored logic to make printing of differences in 61// textual strings and slices of primitive kinds more readable. 62func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode { 63 assert(opts.DiffMode == diffUnknown) 64 t, vx, vy := v.Type, v.ValueX, v.ValueY 65 66 // Auto-detect the type of the data. 67 var isLinedText, isText, isBinary bool 68 var sx, sy string 69 switch { 70 case t.Kind() == reflect.String: 71 sx, sy = vx.String(), vy.String() 72 isText = true // Initial estimate, verify later 73 case t.Kind() == reflect.Slice && t.Elem() == reflect.TypeOf(byte(0)): 74 sx, sy = string(vx.Bytes()), string(vy.Bytes()) 75 isBinary = true // Initial estimate, verify later 76 case t.Kind() == reflect.Array: 77 // Arrays need to be addressable for slice operations to work. 78 vx2, vy2 := reflect.New(t).Elem(), reflect.New(t).Elem() 79 vx2.Set(vx) 80 vy2.Set(vy) 81 vx, vy = vx2, vy2 82 } 83 if isText || isBinary { 84 var numLines, lastLineIdx, maxLineLen int 85 isBinary = false 86 for i, r := range sx + sy { 87 if !(unicode.IsPrint(r) || unicode.IsSpace(r)) || r == utf8.RuneError { 88 isBinary = true 89 break 90 } 91 if r == '\n' { 92 if maxLineLen < i-lastLineIdx { 93 lastLineIdx = i - lastLineIdx 94 } 95 lastLineIdx = i + 1 96 numLines++ 97 } 98 } 99 isText = !isBinary 100 isLinedText = isText && numLines >= 4 && maxLineLen <= 256 101 } 102 103 // Format the string into printable records. 104 var list textList 105 var delim string 106 switch { 107 // If the text appears to be multi-lined text, 108 // then perform differencing across individual lines. 109 case isLinedText: 110 ssx := strings.Split(sx, "\n") 111 ssy := strings.Split(sy, "\n") 112 list = opts.formatDiffSlice( 113 reflect.ValueOf(ssx), reflect.ValueOf(ssy), 1, "line", 114 func(v reflect.Value, d diffMode) textRecord { 115 s := formatString(v.Index(0).String()) 116 return textRecord{Diff: d, Value: textLine(s)} 117 }, 118 ) 119 delim = "\n" 120 // If the text appears to be single-lined text, 121 // then perform differencing in approximately fixed-sized chunks. 122 // The output is printed as quoted strings. 123 case isText: 124 list = opts.formatDiffSlice( 125 reflect.ValueOf(sx), reflect.ValueOf(sy), 64, "byte", 126 func(v reflect.Value, d diffMode) textRecord { 127 s := formatString(v.String()) 128 return textRecord{Diff: d, Value: textLine(s)} 129 }, 130 ) 131 delim = "" 132 // If the text appears to be binary data, 133 // then perform differencing in approximately fixed-sized chunks. 134 // The output is inspired by hexdump. 135 case isBinary: 136 list = opts.formatDiffSlice( 137 reflect.ValueOf(sx), reflect.ValueOf(sy), 16, "byte", 138 func(v reflect.Value, d diffMode) textRecord { 139 var ss []string 140 for i := 0; i < v.Len(); i++ { 141 ss = append(ss, formatHex(v.Index(i).Uint())) 142 } 143 s := strings.Join(ss, ", ") 144 comment := commentString(fmt.Sprintf("%c|%v|", d, formatASCII(v.String()))) 145 return textRecord{Diff: d, Value: textLine(s), Comment: comment} 146 }, 147 ) 148 // For all other slices of primitive types, 149 // then perform differencing in approximately fixed-sized chunks. 150 // The size of each chunk depends on the width of the element kind. 151 default: 152 var chunkSize int 153 if t.Elem().Kind() == reflect.Bool { 154 chunkSize = 16 155 } else { 156 switch t.Elem().Bits() { 157 case 8: 158 chunkSize = 16 159 case 16: 160 chunkSize = 12 161 case 32: 162 chunkSize = 8 163 default: 164 chunkSize = 8 165 } 166 } 167 list = opts.formatDiffSlice( 168 vx, vy, chunkSize, t.Elem().Kind().String(), 169 func(v reflect.Value, d diffMode) textRecord { 170 var ss []string 171 for i := 0; i < v.Len(); i++ { 172 switch t.Elem().Kind() { 173 case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: 174 ss = append(ss, fmt.Sprint(v.Index(i).Int())) 175 case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: 176 ss = append(ss, formatHex(v.Index(i).Uint())) 177 case reflect.Bool, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128: 178 ss = append(ss, fmt.Sprint(v.Index(i).Interface())) 179 } 180 } 181 s := strings.Join(ss, ", ") 182 return textRecord{Diff: d, Value: textLine(s)} 183 }, 184 ) 185 } 186 187 // Wrap the output with appropriate type information. 188 var out textNode = textWrap{"{", list, "}"} 189 if !isText { 190 // The "{...}" byte-sequence literal is not valid Go syntax for strings. 191 // Emit the type for extra clarity (e.g. "string{...}"). 192 if t.Kind() == reflect.String { 193 opts = opts.WithTypeMode(emitType) 194 } 195 return opts.FormatType(t, out) 196 } 197 switch t.Kind() { 198 case reflect.String: 199 out = textWrap{"strings.Join(", out, fmt.Sprintf(", %q)", delim)} 200 if t != reflect.TypeOf(string("")) { 201 out = opts.FormatType(t, out) 202 } 203 case reflect.Slice: 204 out = textWrap{"bytes.Join(", out, fmt.Sprintf(", %q)", delim)} 205 if t != reflect.TypeOf([]byte(nil)) { 206 out = opts.FormatType(t, out) 207 } 208 } 209 return out 210} 211 212// formatASCII formats s as an ASCII string. 213// This is useful for printing binary strings in a semi-legible way. 214func formatASCII(s string) string { 215 b := bytes.Repeat([]byte{'.'}, len(s)) 216 for i := 0; i < len(s); i++ { 217 if ' ' <= s[i] && s[i] <= '~' { 218 b[i] = s[i] 219 } 220 } 221 return string(b) 222} 223 224func (opts formatOptions) formatDiffSlice( 225 vx, vy reflect.Value, chunkSize int, name string, 226 makeRec func(reflect.Value, diffMode) textRecord, 227) (list textList) { 228 es := diff.Difference(vx.Len(), vy.Len(), func(ix int, iy int) diff.Result { 229 return diff.BoolResult(vx.Index(ix).Interface() == vy.Index(iy).Interface()) 230 }) 231 232 appendChunks := func(v reflect.Value, d diffMode) int { 233 n0 := v.Len() 234 for v.Len() > 0 { 235 n := chunkSize 236 if n > v.Len() { 237 n = v.Len() 238 } 239 list = append(list, makeRec(v.Slice(0, n), d)) 240 v = v.Slice(n, v.Len()) 241 } 242 return n0 - v.Len() 243 } 244 245 groups := coalesceAdjacentEdits(name, es) 246 groups = coalesceInterveningIdentical(groups, chunkSize/4) 247 for i, ds := range groups { 248 // Print equal. 249 if ds.NumDiff() == 0 { 250 // Compute the number of leading and trailing equal bytes to print. 251 var numLo, numHi int 252 numEqual := ds.NumIgnored + ds.NumIdentical 253 for numLo < chunkSize*numContextRecords && numLo+numHi < numEqual && i != 0 { 254 numLo++ 255 } 256 for numHi < chunkSize*numContextRecords && numLo+numHi < numEqual && i != len(groups)-1 { 257 numHi++ 258 } 259 if numEqual-(numLo+numHi) <= chunkSize && ds.NumIgnored == 0 { 260 numHi = numEqual - numLo // Avoid pointless coalescing of single equal row 261 } 262 263 // Print the equal bytes. 264 appendChunks(vx.Slice(0, numLo), diffIdentical) 265 if numEqual > numLo+numHi { 266 ds.NumIdentical -= numLo + numHi 267 list.AppendEllipsis(ds) 268 } 269 appendChunks(vx.Slice(numEqual-numHi, numEqual), diffIdentical) 270 vx = vx.Slice(numEqual, vx.Len()) 271 vy = vy.Slice(numEqual, vy.Len()) 272 continue 273 } 274 275 // Print unequal. 276 nx := appendChunks(vx.Slice(0, ds.NumIdentical+ds.NumRemoved+ds.NumModified), diffRemoved) 277 vx = vx.Slice(nx, vx.Len()) 278 ny := appendChunks(vy.Slice(0, ds.NumIdentical+ds.NumInserted+ds.NumModified), diffInserted) 279 vy = vy.Slice(ny, vy.Len()) 280 } 281 assert(vx.Len() == 0 && vy.Len() == 0) 282 return list 283} 284 285// coalesceAdjacentEdits coalesces the list of edits into groups of adjacent 286// equal or unequal counts. 287func coalesceAdjacentEdits(name string, es diff.EditScript) (groups []diffStats) { 288 var prevCase int // Arbitrary index into which case last occurred 289 lastStats := func(i int) *diffStats { 290 if prevCase != i { 291 groups = append(groups, diffStats{Name: name}) 292 prevCase = i 293 } 294 return &groups[len(groups)-1] 295 } 296 for _, e := range es { 297 switch e { 298 case diff.Identity: 299 lastStats(1).NumIdentical++ 300 case diff.UniqueX: 301 lastStats(2).NumRemoved++ 302 case diff.UniqueY: 303 lastStats(2).NumInserted++ 304 case diff.Modified: 305 lastStats(2).NumModified++ 306 } 307 } 308 return groups 309} 310 311// coalesceInterveningIdentical coalesces sufficiently short (<= windowSize) 312// equal groups into adjacent unequal groups that currently result in a 313// dual inserted/removed printout. This acts as a high-pass filter to smooth 314// out high-frequency changes within the windowSize. 315func coalesceInterveningIdentical(groups []diffStats, windowSize int) []diffStats { 316 groups, groupsOrig := groups[:0], groups 317 for i, ds := range groupsOrig { 318 if len(groups) >= 2 && ds.NumDiff() > 0 { 319 prev := &groups[len(groups)-2] // Unequal group 320 curr := &groups[len(groups)-1] // Equal group 321 next := &groupsOrig[i] // Unequal group 322 hadX, hadY := prev.NumRemoved > 0, prev.NumInserted > 0 323 hasX, hasY := next.NumRemoved > 0, next.NumInserted > 0 324 if ((hadX || hasX) && (hadY || hasY)) && curr.NumIdentical <= windowSize { 325 *prev = (*prev).Append(*curr).Append(*next) 326 groups = groups[:len(groups)-1] // Truncate off equal group 327 continue 328 } 329 } 330 groups = append(groups, ds) 331 } 332 return groups 333} 334