1// Copyright 2019, The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE.md file. 4 5package cmp 6 7import ( 8 "bytes" 9 "fmt" 10 "reflect" 11 "strconv" 12 "strings" 13 "unicode" 14 "unicode/utf8" 15 16 "github.com/google/go-cmp/cmp/internal/diff" 17) 18 19// CanFormatDiffSlice reports whether we support custom formatting for nodes 20// that are slices of primitive kinds or strings. 21func (opts formatOptions) CanFormatDiffSlice(v *valueNode) bool { 22 switch { 23 case opts.DiffMode != diffUnknown: 24 return false // Must be formatting in diff mode 25 case v.NumDiff == 0: 26 return false // No differences detected 27 case !v.ValueX.IsValid() || !v.ValueY.IsValid(): 28 return false // Both values must be valid 29 case v.Type.Kind() == reflect.Slice && (v.ValueX.Len() == 0 || v.ValueY.Len() == 0): 30 return false // Both slice values have to be non-empty 31 case v.NumIgnored > 0: 32 return false // Some ignore option was used 33 case v.NumTransformed > 0: 34 return false // Some transform option was used 35 case v.NumCompared > 1: 36 return false // More than one comparison was used 37 case v.NumCompared == 1 && v.Type.Name() != "": 38 // The need for cmp to check applicability of options on every element 39 // in a slice is a significant performance detriment for large []byte. 40 // The workaround is to specify Comparer(bytes.Equal), 41 // which enables cmp to compare []byte more efficiently. 42 // If they differ, we still want to provide batched diffing. 43 // The logic disallows named types since they tend to have their own 44 // String method, with nicer formatting than what this provides. 45 return false 46 } 47 48 switch t := v.Type; t.Kind() { 49 case reflect.String: 50 case reflect.Array, reflect.Slice: 51 // Only slices of primitive types have specialized handling. 52 switch t.Elem().Kind() { 53 case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, 54 reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, 55 reflect.Bool, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128: 56 default: 57 return false 58 } 59 60 // If a sufficient number of elements already differ, 61 // use specialized formatting even if length requirement is not met. 62 if v.NumDiff > v.NumSame { 63 return true 64 } 65 default: 66 return false 67 } 68 69 // Use specialized string diffing for longer slices or strings. 70 const minLength = 64 71 return v.ValueX.Len() >= minLength && v.ValueY.Len() >= minLength 72} 73 74// FormatDiffSlice prints a diff for the slices (or strings) represented by v. 75// This provides custom-tailored logic to make printing of differences in 76// textual strings and slices of primitive kinds more readable. 77func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode { 78 assert(opts.DiffMode == diffUnknown) 79 t, vx, vy := v.Type, v.ValueX, v.ValueY 80 81 // Auto-detect the type of the data. 82 var isLinedText, isText, isBinary bool 83 var sx, sy string 84 switch { 85 case t.Kind() == reflect.String: 86 sx, sy = vx.String(), vy.String() 87 isText = true // Initial estimate, verify later 88 case t.Kind() == reflect.Slice && t.Elem() == reflect.TypeOf(byte(0)): 89 sx, sy = string(vx.Bytes()), string(vy.Bytes()) 90 isBinary = true // Initial estimate, verify later 91 case t.Kind() == reflect.Array: 92 // Arrays need to be addressable for slice operations to work. 93 vx2, vy2 := reflect.New(t).Elem(), reflect.New(t).Elem() 94 vx2.Set(vx) 95 vy2.Set(vy) 96 vx, vy = vx2, vy2 97 } 98 if isText || isBinary { 99 var numLines, lastLineIdx, maxLineLen int 100 isBinary = !utf8.ValidString(sx) || !utf8.ValidString(sy) 101 for i, r := range sx + sy { 102 if !(unicode.IsPrint(r) || unicode.IsSpace(r)) || r == utf8.RuneError { 103 isBinary = true 104 break 105 } 106 if r == '\n' { 107 if maxLineLen < i-lastLineIdx { 108 maxLineLen = i - lastLineIdx 109 } 110 lastLineIdx = i + 1 111 numLines++ 112 } 113 } 114 isText = !isBinary 115 isLinedText = isText && numLines >= 4 && maxLineLen <= 1024 116 } 117 118 // Format the string into printable records. 119 var list textList 120 var delim string 121 switch { 122 // If the text appears to be multi-lined text, 123 // then perform differencing across individual lines. 124 case isLinedText: 125 ssx := strings.Split(sx, "\n") 126 ssy := strings.Split(sy, "\n") 127 list = opts.formatDiffSlice( 128 reflect.ValueOf(ssx), reflect.ValueOf(ssy), 1, "line", 129 func(v reflect.Value, d diffMode) textRecord { 130 s := formatString(v.Index(0).String()) 131 return textRecord{Diff: d, Value: textLine(s)} 132 }, 133 ) 134 delim = "\n" 135 136 // If possible, use a custom triple-quote (""") syntax for printing 137 // differences in a string literal. This format is more readable, 138 // but has edge-cases where differences are visually indistinguishable. 139 // This format is avoided under the following conditions: 140 // • A line starts with `"""` 141 // • A line starts with "..." 142 // • A line contains non-printable characters 143 // • Adjacent different lines differ only by whitespace 144 // 145 // For example: 146 // """ 147 // ... // 3 identical lines 148 // foo 149 // bar 150 // - baz 151 // + BAZ 152 // """ 153 isTripleQuoted := true 154 prevRemoveLines := map[string]bool{} 155 prevInsertLines := map[string]bool{} 156 var list2 textList 157 list2 = append(list2, textRecord{Value: textLine(`"""`), ElideComma: true}) 158 for _, r := range list { 159 if !r.Value.Equal(textEllipsis) { 160 line, _ := strconv.Unquote(string(r.Value.(textLine))) 161 line = strings.TrimPrefix(strings.TrimSuffix(line, "\r"), "\r") // trim leading/trailing carriage returns for legacy Windows endline support 162 normLine := strings.Map(func(r rune) rune { 163 if unicode.IsSpace(r) { 164 return -1 // drop whitespace to avoid visually indistinguishable output 165 } 166 return r 167 }, line) 168 isPrintable := func(r rune) bool { 169 return unicode.IsPrint(r) || r == '\t' // specially treat tab as printable 170 } 171 isTripleQuoted = !strings.HasPrefix(line, `"""`) && !strings.HasPrefix(line, "...") && strings.TrimFunc(line, isPrintable) == "" 172 switch r.Diff { 173 case diffRemoved: 174 isTripleQuoted = isTripleQuoted && !prevInsertLines[normLine] 175 prevRemoveLines[normLine] = true 176 case diffInserted: 177 isTripleQuoted = isTripleQuoted && !prevRemoveLines[normLine] 178 prevInsertLines[normLine] = true 179 } 180 if !isTripleQuoted { 181 break 182 } 183 r.Value = textLine(line) 184 r.ElideComma = true 185 } 186 if !(r.Diff == diffRemoved || r.Diff == diffInserted) { // start a new non-adjacent difference group 187 prevRemoveLines = map[string]bool{} 188 prevInsertLines = map[string]bool{} 189 } 190 list2 = append(list2, r) 191 } 192 if r := list2[len(list2)-1]; r.Diff == diffIdentical && len(r.Value.(textLine)) == 0 { 193 list2 = list2[:len(list2)-1] // elide single empty line at the end 194 } 195 list2 = append(list2, textRecord{Value: textLine(`"""`), ElideComma: true}) 196 if isTripleQuoted { 197 var out textNode = &textWrap{Prefix: "(", Value: list2, Suffix: ")"} 198 switch t.Kind() { 199 case reflect.String: 200 if t != reflect.TypeOf(string("")) { 201 out = opts.FormatType(t, out) 202 } 203 case reflect.Slice: 204 // Always emit type for slices since the triple-quote syntax 205 // looks like a string (not a slice). 206 opts = opts.WithTypeMode(emitType) 207 out = opts.FormatType(t, out) 208 } 209 return out 210 } 211 212 // If the text appears to be single-lined text, 213 // then perform differencing in approximately fixed-sized chunks. 214 // The output is printed as quoted strings. 215 case isText: 216 list = opts.formatDiffSlice( 217 reflect.ValueOf(sx), reflect.ValueOf(sy), 64, "byte", 218 func(v reflect.Value, d diffMode) textRecord { 219 s := formatString(v.String()) 220 return textRecord{Diff: d, Value: textLine(s)} 221 }, 222 ) 223 delim = "" 224 225 // If the text appears to be binary data, 226 // then perform differencing in approximately fixed-sized chunks. 227 // The output is inspired by hexdump. 228 case isBinary: 229 list = opts.formatDiffSlice( 230 reflect.ValueOf(sx), reflect.ValueOf(sy), 16, "byte", 231 func(v reflect.Value, d diffMode) textRecord { 232 var ss []string 233 for i := 0; i < v.Len(); i++ { 234 ss = append(ss, formatHex(v.Index(i).Uint())) 235 } 236 s := strings.Join(ss, ", ") 237 comment := commentString(fmt.Sprintf("%c|%v|", d, formatASCII(v.String()))) 238 return textRecord{Diff: d, Value: textLine(s), Comment: comment} 239 }, 240 ) 241 242 // For all other slices of primitive types, 243 // then perform differencing in approximately fixed-sized chunks. 244 // The size of each chunk depends on the width of the element kind. 245 default: 246 var chunkSize int 247 if t.Elem().Kind() == reflect.Bool { 248 chunkSize = 16 249 } else { 250 switch t.Elem().Bits() { 251 case 8: 252 chunkSize = 16 253 case 16: 254 chunkSize = 12 255 case 32: 256 chunkSize = 8 257 default: 258 chunkSize = 8 259 } 260 } 261 list = opts.formatDiffSlice( 262 vx, vy, chunkSize, t.Elem().Kind().String(), 263 func(v reflect.Value, d diffMode) textRecord { 264 var ss []string 265 for i := 0; i < v.Len(); i++ { 266 switch t.Elem().Kind() { 267 case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: 268 ss = append(ss, fmt.Sprint(v.Index(i).Int())) 269 case reflect.Uint, reflect.Uint16, reflect.Uint32, reflect.Uint64: 270 ss = append(ss, fmt.Sprint(v.Index(i).Uint())) 271 case reflect.Uint8, reflect.Uintptr: 272 ss = append(ss, formatHex(v.Index(i).Uint())) 273 case reflect.Bool, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128: 274 ss = append(ss, fmt.Sprint(v.Index(i).Interface())) 275 } 276 } 277 s := strings.Join(ss, ", ") 278 return textRecord{Diff: d, Value: textLine(s)} 279 }, 280 ) 281 } 282 283 // Wrap the output with appropriate type information. 284 var out textNode = &textWrap{Prefix: "{", Value: list, Suffix: "}"} 285 if !isText { 286 // The "{...}" byte-sequence literal is not valid Go syntax for strings. 287 // Emit the type for extra clarity (e.g. "string{...}"). 288 if t.Kind() == reflect.String { 289 opts = opts.WithTypeMode(emitType) 290 } 291 return opts.FormatType(t, out) 292 } 293 switch t.Kind() { 294 case reflect.String: 295 out = &textWrap{Prefix: "strings.Join(", Value: out, Suffix: fmt.Sprintf(", %q)", delim)} 296 if t != reflect.TypeOf(string("")) { 297 out = opts.FormatType(t, out) 298 } 299 case reflect.Slice: 300 out = &textWrap{Prefix: "bytes.Join(", Value: out, Suffix: fmt.Sprintf(", %q)", delim)} 301 if t != reflect.TypeOf([]byte(nil)) { 302 out = opts.FormatType(t, out) 303 } 304 } 305 return out 306} 307 308// formatASCII formats s as an ASCII string. 309// This is useful for printing binary strings in a semi-legible way. 310func formatASCII(s string) string { 311 b := bytes.Repeat([]byte{'.'}, len(s)) 312 for i := 0; i < len(s); i++ { 313 if ' ' <= s[i] && s[i] <= '~' { 314 b[i] = s[i] 315 } 316 } 317 return string(b) 318} 319 320func (opts formatOptions) formatDiffSlice( 321 vx, vy reflect.Value, chunkSize int, name string, 322 makeRec func(reflect.Value, diffMode) textRecord, 323) (list textList) { 324 es := diff.Difference(vx.Len(), vy.Len(), func(ix int, iy int) diff.Result { 325 return diff.BoolResult(vx.Index(ix).Interface() == vy.Index(iy).Interface()) 326 }) 327 328 appendChunks := func(v reflect.Value, d diffMode) int { 329 n0 := v.Len() 330 for v.Len() > 0 { 331 n := chunkSize 332 if n > v.Len() { 333 n = v.Len() 334 } 335 list = append(list, makeRec(v.Slice(0, n), d)) 336 v = v.Slice(n, v.Len()) 337 } 338 return n0 - v.Len() 339 } 340 341 var numDiffs int 342 maxLen := -1 343 if opts.LimitVerbosity { 344 maxLen = (1 << opts.verbosity()) << 2 // 4, 8, 16, 32, 64, etc... 345 opts.VerbosityLevel-- 346 } 347 348 groups := coalesceAdjacentEdits(name, es) 349 groups = coalesceInterveningIdentical(groups, chunkSize/4) 350 maxGroup := diffStats{Name: name} 351 for i, ds := range groups { 352 if maxLen >= 0 && numDiffs >= maxLen { 353 maxGroup = maxGroup.Append(ds) 354 continue 355 } 356 357 // Print equal. 358 if ds.NumDiff() == 0 { 359 // Compute the number of leading and trailing equal bytes to print. 360 var numLo, numHi int 361 numEqual := ds.NumIgnored + ds.NumIdentical 362 for numLo < chunkSize*numContextRecords && numLo+numHi < numEqual && i != 0 { 363 numLo++ 364 } 365 for numHi < chunkSize*numContextRecords && numLo+numHi < numEqual && i != len(groups)-1 { 366 numHi++ 367 } 368 if numEqual-(numLo+numHi) <= chunkSize && ds.NumIgnored == 0 { 369 numHi = numEqual - numLo // Avoid pointless coalescing of single equal row 370 } 371 372 // Print the equal bytes. 373 appendChunks(vx.Slice(0, numLo), diffIdentical) 374 if numEqual > numLo+numHi { 375 ds.NumIdentical -= numLo + numHi 376 list.AppendEllipsis(ds) 377 } 378 appendChunks(vx.Slice(numEqual-numHi, numEqual), diffIdentical) 379 vx = vx.Slice(numEqual, vx.Len()) 380 vy = vy.Slice(numEqual, vy.Len()) 381 continue 382 } 383 384 // Print unequal. 385 len0 := len(list) 386 nx := appendChunks(vx.Slice(0, ds.NumIdentical+ds.NumRemoved+ds.NumModified), diffRemoved) 387 vx = vx.Slice(nx, vx.Len()) 388 ny := appendChunks(vy.Slice(0, ds.NumIdentical+ds.NumInserted+ds.NumModified), diffInserted) 389 vy = vy.Slice(ny, vy.Len()) 390 numDiffs += len(list) - len0 391 } 392 if maxGroup.IsZero() { 393 assert(vx.Len() == 0 && vy.Len() == 0) 394 } else { 395 list.AppendEllipsis(maxGroup) 396 } 397 return list 398} 399 400// coalesceAdjacentEdits coalesces the list of edits into groups of adjacent 401// equal or unequal counts. 402func coalesceAdjacentEdits(name string, es diff.EditScript) (groups []diffStats) { 403 var prevCase int // Arbitrary index into which case last occurred 404 lastStats := func(i int) *diffStats { 405 if prevCase != i { 406 groups = append(groups, diffStats{Name: name}) 407 prevCase = i 408 } 409 return &groups[len(groups)-1] 410 } 411 for _, e := range es { 412 switch e { 413 case diff.Identity: 414 lastStats(1).NumIdentical++ 415 case diff.UniqueX: 416 lastStats(2).NumRemoved++ 417 case diff.UniqueY: 418 lastStats(2).NumInserted++ 419 case diff.Modified: 420 lastStats(2).NumModified++ 421 } 422 } 423 return groups 424} 425 426// coalesceInterveningIdentical coalesces sufficiently short (<= windowSize) 427// equal groups into adjacent unequal groups that currently result in a 428// dual inserted/removed printout. This acts as a high-pass filter to smooth 429// out high-frequency changes within the windowSize. 430func coalesceInterveningIdentical(groups []diffStats, windowSize int) []diffStats { 431 groups, groupsOrig := groups[:0], groups 432 for i, ds := range groupsOrig { 433 if len(groups) >= 2 && ds.NumDiff() > 0 { 434 prev := &groups[len(groups)-2] // Unequal group 435 curr := &groups[len(groups)-1] // Equal group 436 next := &groupsOrig[i] // Unequal group 437 hadX, hadY := prev.NumRemoved > 0, prev.NumInserted > 0 438 hasX, hasY := next.NumRemoved > 0, next.NumInserted > 0 439 if ((hadX || hasX) && (hadY || hasY)) && curr.NumIdentical <= windowSize { 440 *prev = prev.Append(*curr).Append(*next) 441 groups = groups[:len(groups)-1] // Truncate off equal group 442 continue 443 } 444 } 445 groups = append(groups, ds) 446 } 447 return groups 448} 449