1// Copyright (c) 2012-2016 The go-diff authors. All rights reserved. 2// https://github.com/sergi/go-diff 3// See the included LICENSE file for license details. 4// 5// go-diff is a Go implementation of Google's Diff, Match, and Patch library 6// Original library is Copyright (c) 2006 Google Inc. 7// http://code.google.com/p/google-diff-match-patch/ 8 9package diffmatchpatch 10 11import ( 12 "bytes" 13 "errors" 14 "fmt" 15 "html" 16 "math" 17 "net/url" 18 "regexp" 19 "strconv" 20 "strings" 21 "time" 22 "unicode/utf8" 23) 24 25// Operation defines the operation of a diff item. 26type Operation int8 27 28//go:generate stringer -type=Operation -trimprefix=Diff 29 30const ( 31 // DiffDelete item represents a delete diff. 32 DiffDelete Operation = -1 33 // DiffInsert item represents an insert diff. 34 DiffInsert Operation = 1 35 // DiffEqual item represents an equal diff. 36 DiffEqual Operation = 0 37) 38 39// Diff represents one diff operation 40type Diff struct { 41 Type Operation 42 Text string 43} 44 45// splice removes amount elements from slice at index index, replacing them with elements. 46func splice(slice []Diff, index int, amount int, elements ...Diff) []Diff { 47 if len(elements) == amount { 48 // Easy case: overwrite the relevant items. 49 copy(slice[index:], elements) 50 return slice 51 } 52 if len(elements) < amount { 53 // Fewer new items than old. 54 // Copy in the new items. 55 copy(slice[index:], elements) 56 // Shift the remaining items left. 57 copy(slice[index+len(elements):], slice[index+amount:]) 58 // Calculate the new end of the slice. 59 end := len(slice) - amount + len(elements) 60 // Zero stranded elements at end so that they can be garbage collected. 61 tail := slice[end:] 62 for i := range tail { 63 tail[i] = Diff{} 64 } 65 return slice[:end] 66 } 67 // More new items than old. 68 // Make room in slice for new elements. 69 // There's probably an even more efficient way to do this, 70 // but this is simple and clear. 71 need := len(slice) - amount + len(elements) 72 for len(slice) < need { 73 slice = append(slice, Diff{}) 74 } 75 // Shift slice elements right to make room for new elements. 76 copy(slice[index+len(elements):], slice[index+amount:]) 77 // Copy in new elements. 78 copy(slice[index:], elements) 79 return slice 80} 81 82// DiffMain finds the differences between two texts. 83// If an invalid UTF-8 sequence is encountered, it will be replaced by the Unicode replacement character. 84func (dmp *DiffMatchPatch) DiffMain(text1, text2 string, checklines bool) []Diff { 85 return dmp.DiffMainRunes([]rune(text1), []rune(text2), checklines) 86} 87 88// DiffMainRunes finds the differences between two rune sequences. 89// If an invalid UTF-8 sequence is encountered, it will be replaced by the Unicode replacement character. 90func (dmp *DiffMatchPatch) DiffMainRunes(text1, text2 []rune, checklines bool) []Diff { 91 var deadline time.Time 92 if dmp.DiffTimeout > 0 { 93 deadline = time.Now().Add(dmp.DiffTimeout) 94 } 95 return dmp.diffMainRunes(text1, text2, checklines, deadline) 96} 97 98func (dmp *DiffMatchPatch) diffMainRunes(text1, text2 []rune, checklines bool, deadline time.Time) []Diff { 99 if runesEqual(text1, text2) { 100 var diffs []Diff 101 if len(text1) > 0 { 102 diffs = append(diffs, Diff{DiffEqual, string(text1)}) 103 } 104 return diffs 105 } 106 // Trim off common prefix (speedup). 107 commonlength := commonPrefixLength(text1, text2) 108 commonprefix := text1[:commonlength] 109 text1 = text1[commonlength:] 110 text2 = text2[commonlength:] 111 112 // Trim off common suffix (speedup). 113 commonlength = commonSuffixLength(text1, text2) 114 commonsuffix := text1[len(text1)-commonlength:] 115 text1 = text1[:len(text1)-commonlength] 116 text2 = text2[:len(text2)-commonlength] 117 118 // Compute the diff on the middle block. 119 diffs := dmp.diffCompute(text1, text2, checklines, deadline) 120 121 // Restore the prefix and suffix. 122 if len(commonprefix) != 0 { 123 diffs = append([]Diff{Diff{DiffEqual, string(commonprefix)}}, diffs...) 124 } 125 if len(commonsuffix) != 0 { 126 diffs = append(diffs, Diff{DiffEqual, string(commonsuffix)}) 127 } 128 129 return dmp.DiffCleanupMerge(diffs) 130} 131 132// diffCompute finds the differences between two rune slices. Assumes that the texts do not have any common prefix or suffix. 133func (dmp *DiffMatchPatch) diffCompute(text1, text2 []rune, checklines bool, deadline time.Time) []Diff { 134 diffs := []Diff{} 135 if len(text1) == 0 { 136 // Just add some text (speedup). 137 return append(diffs, Diff{DiffInsert, string(text2)}) 138 } else if len(text2) == 0 { 139 // Just delete some text (speedup). 140 return append(diffs, Diff{DiffDelete, string(text1)}) 141 } 142 143 var longtext, shorttext []rune 144 if len(text1) > len(text2) { 145 longtext = text1 146 shorttext = text2 147 } else { 148 longtext = text2 149 shorttext = text1 150 } 151 152 if i := runesIndex(longtext, shorttext); i != -1 { 153 op := DiffInsert 154 // Swap insertions for deletions if diff is reversed. 155 if len(text1) > len(text2) { 156 op = DiffDelete 157 } 158 // Shorter text is inside the longer text (speedup). 159 return []Diff{ 160 Diff{op, string(longtext[:i])}, 161 Diff{DiffEqual, string(shorttext)}, 162 Diff{op, string(longtext[i+len(shorttext):])}, 163 } 164 } else if len(shorttext) == 1 { 165 // Single character string. 166 // After the previous speedup, the character can't be an equality. 167 return []Diff{ 168 Diff{DiffDelete, string(text1)}, 169 Diff{DiffInsert, string(text2)}, 170 } 171 // Check to see if the problem can be split in two. 172 } else if hm := dmp.diffHalfMatch(text1, text2); hm != nil { 173 // A half-match was found, sort out the return data. 174 text1A := hm[0] 175 text1B := hm[1] 176 text2A := hm[2] 177 text2B := hm[3] 178 midCommon := hm[4] 179 // Send both pairs off for separate processing. 180 diffsA := dmp.diffMainRunes(text1A, text2A, checklines, deadline) 181 diffsB := dmp.diffMainRunes(text1B, text2B, checklines, deadline) 182 // Merge the results. 183 diffs := diffsA 184 diffs = append(diffs, Diff{DiffEqual, string(midCommon)}) 185 diffs = append(diffs, diffsB...) 186 return diffs 187 } else if checklines && len(text1) > 100 && len(text2) > 100 { 188 return dmp.diffLineMode(text1, text2, deadline) 189 } 190 return dmp.diffBisect(text1, text2, deadline) 191} 192 193// diffLineMode does a quick line-level diff on both []runes, then rediff the parts for greater accuracy. This speedup can produce non-minimal diffs. 194func (dmp *DiffMatchPatch) diffLineMode(text1, text2 []rune, deadline time.Time) []Diff { 195 // Scan the text on a line-by-line basis first. 196 text1, text2, linearray := dmp.diffLinesToRunes(text1, text2) 197 198 diffs := dmp.diffMainRunes(text1, text2, false, deadline) 199 200 // Convert the diff back to original text. 201 diffs = dmp.DiffCharsToLines(diffs, linearray) 202 // Eliminate freak matches (e.g. blank lines) 203 diffs = dmp.DiffCleanupSemantic(diffs) 204 205 // Rediff any replacement blocks, this time character-by-character. 206 // Add a dummy entry at the end. 207 diffs = append(diffs, Diff{DiffEqual, ""}) 208 209 pointer := 0 210 countDelete := 0 211 countInsert := 0 212 213 // NOTE: Rune slices are slower than using strings in this case. 214 textDelete := "" 215 textInsert := "" 216 217 for pointer < len(diffs) { 218 switch diffs[pointer].Type { 219 case DiffInsert: 220 countInsert++ 221 textInsert += diffs[pointer].Text 222 case DiffDelete: 223 countDelete++ 224 textDelete += diffs[pointer].Text 225 case DiffEqual: 226 // Upon reaching an equality, check for prior redundancies. 227 if countDelete >= 1 && countInsert >= 1 { 228 // Delete the offending records and add the merged ones. 229 diffs = splice(diffs, pointer-countDelete-countInsert, 230 countDelete+countInsert) 231 232 pointer = pointer - countDelete - countInsert 233 a := dmp.diffMainRunes([]rune(textDelete), []rune(textInsert), false, deadline) 234 for j := len(a) - 1; j >= 0; j-- { 235 diffs = splice(diffs, pointer, 0, a[j]) 236 } 237 pointer = pointer + len(a) 238 } 239 240 countInsert = 0 241 countDelete = 0 242 textDelete = "" 243 textInsert = "" 244 } 245 pointer++ 246 } 247 248 return diffs[:len(diffs)-1] // Remove the dummy entry at the end. 249} 250 251// DiffBisect finds the 'middle snake' of a diff, split the problem in two and return the recursively constructed diff. 252// If an invalid UTF-8 sequence is encountered, it will be replaced by the Unicode replacement character. 253// See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations. 254func (dmp *DiffMatchPatch) DiffBisect(text1, text2 string, deadline time.Time) []Diff { 255 // Unused in this code, but retained for interface compatibility. 256 return dmp.diffBisect([]rune(text1), []rune(text2), deadline) 257} 258 259// diffBisect finds the 'middle snake' of a diff, splits the problem in two and returns the recursively constructed diff. 260// See Myers's 1986 paper: An O(ND) Difference Algorithm and Its Variations. 261func (dmp *DiffMatchPatch) diffBisect(runes1, runes2 []rune, deadline time.Time) []Diff { 262 // Cache the text lengths to prevent multiple calls. 263 runes1Len, runes2Len := len(runes1), len(runes2) 264 265 maxD := (runes1Len + runes2Len + 1) / 2 266 vOffset := maxD 267 vLength := 2 * maxD 268 269 v1 := make([]int, vLength) 270 v2 := make([]int, vLength) 271 for i := range v1 { 272 v1[i] = -1 273 v2[i] = -1 274 } 275 v1[vOffset+1] = 0 276 v2[vOffset+1] = 0 277 278 delta := runes1Len - runes2Len 279 // If the total number of characters is odd, then the front path will collide with the reverse path. 280 front := (delta%2 != 0) 281 // Offsets for start and end of k loop. Prevents mapping of space beyond the grid. 282 k1start := 0 283 k1end := 0 284 k2start := 0 285 k2end := 0 286 for d := 0; d < maxD; d++ { 287 // Bail out if deadline is reached. 288 if !deadline.IsZero() && d%16 == 0 && time.Now().After(deadline) { 289 break 290 } 291 292 // Walk the front path one step. 293 for k1 := -d + k1start; k1 <= d-k1end; k1 += 2 { 294 k1Offset := vOffset + k1 295 var x1 int 296 297 if k1 == -d || (k1 != d && v1[k1Offset-1] < v1[k1Offset+1]) { 298 x1 = v1[k1Offset+1] 299 } else { 300 x1 = v1[k1Offset-1] + 1 301 } 302 303 y1 := x1 - k1 304 for x1 < runes1Len && y1 < runes2Len { 305 if runes1[x1] != runes2[y1] { 306 break 307 } 308 x1++ 309 y1++ 310 } 311 v1[k1Offset] = x1 312 if x1 > runes1Len { 313 // Ran off the right of the graph. 314 k1end += 2 315 } else if y1 > runes2Len { 316 // Ran off the bottom of the graph. 317 k1start += 2 318 } else if front { 319 k2Offset := vOffset + delta - k1 320 if k2Offset >= 0 && k2Offset < vLength && v2[k2Offset] != -1 { 321 // Mirror x2 onto top-left coordinate system. 322 x2 := runes1Len - v2[k2Offset] 323 if x1 >= x2 { 324 // Overlap detected. 325 return dmp.diffBisectSplit(runes1, runes2, x1, y1, deadline) 326 } 327 } 328 } 329 } 330 // Walk the reverse path one step. 331 for k2 := -d + k2start; k2 <= d-k2end; k2 += 2 { 332 k2Offset := vOffset + k2 333 var x2 int 334 if k2 == -d || (k2 != d && v2[k2Offset-1] < v2[k2Offset+1]) { 335 x2 = v2[k2Offset+1] 336 } else { 337 x2 = v2[k2Offset-1] + 1 338 } 339 var y2 = x2 - k2 340 for x2 < runes1Len && y2 < runes2Len { 341 if runes1[runes1Len-x2-1] != runes2[runes2Len-y2-1] { 342 break 343 } 344 x2++ 345 y2++ 346 } 347 v2[k2Offset] = x2 348 if x2 > runes1Len { 349 // Ran off the left of the graph. 350 k2end += 2 351 } else if y2 > runes2Len { 352 // Ran off the top of the graph. 353 k2start += 2 354 } else if !front { 355 k1Offset := vOffset + delta - k2 356 if k1Offset >= 0 && k1Offset < vLength && v1[k1Offset] != -1 { 357 x1 := v1[k1Offset] 358 y1 := vOffset + x1 - k1Offset 359 // Mirror x2 onto top-left coordinate system. 360 x2 = runes1Len - x2 361 if x1 >= x2 { 362 // Overlap detected. 363 return dmp.diffBisectSplit(runes1, runes2, x1, y1, deadline) 364 } 365 } 366 } 367 } 368 } 369 // Diff took too long and hit the deadline or number of diffs equals number of characters, no commonality at all. 370 return []Diff{ 371 Diff{DiffDelete, string(runes1)}, 372 Diff{DiffInsert, string(runes2)}, 373 } 374} 375 376func (dmp *DiffMatchPatch) diffBisectSplit(runes1, runes2 []rune, x, y int, 377 deadline time.Time) []Diff { 378 runes1a := runes1[:x] 379 runes2a := runes2[:y] 380 runes1b := runes1[x:] 381 runes2b := runes2[y:] 382 383 // Compute both diffs serially. 384 diffs := dmp.diffMainRunes(runes1a, runes2a, false, deadline) 385 diffsb := dmp.diffMainRunes(runes1b, runes2b, false, deadline) 386 387 return append(diffs, diffsb...) 388} 389 390// DiffLinesToChars splits two texts into a list of strings, and educes the texts to a string of hashes where each Unicode character represents one line. 391// It's slightly faster to call DiffLinesToRunes first, followed by DiffMainRunes. 392func (dmp *DiffMatchPatch) DiffLinesToChars(text1, text2 string) (string, string, []string) { 393 chars1, chars2, lineArray := dmp.DiffLinesToRunes(text1, text2) 394 return string(chars1), string(chars2), lineArray 395} 396 397// DiffLinesToRunes splits two texts into a list of runes. Each rune represents one line. 398func (dmp *DiffMatchPatch) DiffLinesToRunes(text1, text2 string) ([]rune, []rune, []string) { 399 // '\x00' is a valid character, but various debuggers don't like it. So we'll insert a junk entry to avoid generating a null character. 400 lineArray := []string{""} // e.g. lineArray[4] == 'Hello\n' 401 lineHash := map[string]int{} // e.g. lineHash['Hello\n'] == 4 402 403 chars1 := dmp.diffLinesToRunesMunge(text1, &lineArray, lineHash) 404 chars2 := dmp.diffLinesToRunesMunge(text2, &lineArray, lineHash) 405 406 return chars1, chars2, lineArray 407} 408 409func (dmp *DiffMatchPatch) diffLinesToRunes(text1, text2 []rune) ([]rune, []rune, []string) { 410 return dmp.DiffLinesToRunes(string(text1), string(text2)) 411} 412 413// diffLinesToRunesMunge splits a text into an array of strings, and reduces the texts to a []rune where each Unicode character represents one line. 414// We use strings instead of []runes as input mainly because you can't use []rune as a map key. 415func (dmp *DiffMatchPatch) diffLinesToRunesMunge(text string, lineArray *[]string, lineHash map[string]int) []rune { 416 // Walk the text, pulling out a substring for each line. text.split('\n') would would temporarily double our memory footprint. Modifying text would create many large strings to garbage collect. 417 lineStart := 0 418 lineEnd := -1 419 runes := []rune{} 420 421 for lineEnd < len(text)-1 { 422 lineEnd = indexOf(text, "\n", lineStart) 423 424 if lineEnd == -1 { 425 lineEnd = len(text) - 1 426 } 427 428 line := text[lineStart : lineEnd+1] 429 lineStart = lineEnd + 1 430 lineValue, ok := lineHash[line] 431 432 if ok { 433 runes = append(runes, rune(lineValue)) 434 } else { 435 *lineArray = append(*lineArray, line) 436 lineHash[line] = len(*lineArray) - 1 437 runes = append(runes, rune(len(*lineArray)-1)) 438 } 439 } 440 441 return runes 442} 443 444// DiffCharsToLines rehydrates the text in a diff from a string of line hashes to real lines of text. 445func (dmp *DiffMatchPatch) DiffCharsToLines(diffs []Diff, lineArray []string) []Diff { 446 hydrated := make([]Diff, 0, len(diffs)) 447 for _, aDiff := range diffs { 448 chars := aDiff.Text 449 text := make([]string, len(chars)) 450 451 for i, r := range chars { 452 text[i] = lineArray[r] 453 } 454 455 aDiff.Text = strings.Join(text, "") 456 hydrated = append(hydrated, aDiff) 457 } 458 return hydrated 459} 460 461// DiffCommonPrefix determines the common prefix length of two strings. 462func (dmp *DiffMatchPatch) DiffCommonPrefix(text1, text2 string) int { 463 // Unused in this code, but retained for interface compatibility. 464 return commonPrefixLength([]rune(text1), []rune(text2)) 465} 466 467// DiffCommonSuffix determines the common suffix length of two strings. 468func (dmp *DiffMatchPatch) DiffCommonSuffix(text1, text2 string) int { 469 // Unused in this code, but retained for interface compatibility. 470 return commonSuffixLength([]rune(text1), []rune(text2)) 471} 472 473// commonPrefixLength returns the length of the common prefix of two rune slices. 474func commonPrefixLength(text1, text2 []rune) int { 475 // Linear search. See comment in commonSuffixLength. 476 n := 0 477 for ; n < len(text1) && n < len(text2); n++ { 478 if text1[n] != text2[n] { 479 return n 480 } 481 } 482 return n 483} 484 485// commonSuffixLength returns the length of the common suffix of two rune slices. 486func commonSuffixLength(text1, text2 []rune) int { 487 // Use linear search rather than the binary search discussed at https://neil.fraser.name/news/2007/10/09/. 488 // See discussion at https://github.com/sergi/go-diff/issues/54. 489 i1 := len(text1) 490 i2 := len(text2) 491 for n := 0; ; n++ { 492 i1-- 493 i2-- 494 if i1 < 0 || i2 < 0 || text1[i1] != text2[i2] { 495 return n 496 } 497 } 498} 499 500// DiffCommonOverlap determines if the suffix of one string is the prefix of another. 501func (dmp *DiffMatchPatch) DiffCommonOverlap(text1 string, text2 string) int { 502 // Cache the text lengths to prevent multiple calls. 503 text1Length := len(text1) 504 text2Length := len(text2) 505 // Eliminate the null case. 506 if text1Length == 0 || text2Length == 0 { 507 return 0 508 } 509 // Truncate the longer string. 510 if text1Length > text2Length { 511 text1 = text1[text1Length-text2Length:] 512 } else if text1Length < text2Length { 513 text2 = text2[0:text1Length] 514 } 515 textLength := int(math.Min(float64(text1Length), float64(text2Length))) 516 // Quick check for the worst case. 517 if text1 == text2 { 518 return textLength 519 } 520 521 // Start by looking for a single character match and increase length until no match is found. Performance analysis: http://neil.fraser.name/news/2010/11/04/ 522 best := 0 523 length := 1 524 for { 525 pattern := text1[textLength-length:] 526 found := strings.Index(text2, pattern) 527 if found == -1 { 528 break 529 } 530 length += found 531 if found == 0 || text1[textLength-length:] == text2[0:length] { 532 best = length 533 length++ 534 } 535 } 536 537 return best 538} 539 540// DiffHalfMatch checks whether the two texts share a substring which is at least half the length of the longer text. This speedup can produce non-minimal diffs. 541func (dmp *DiffMatchPatch) DiffHalfMatch(text1, text2 string) []string { 542 // Unused in this code, but retained for interface compatibility. 543 runeSlices := dmp.diffHalfMatch([]rune(text1), []rune(text2)) 544 if runeSlices == nil { 545 return nil 546 } 547 548 result := make([]string, len(runeSlices)) 549 for i, r := range runeSlices { 550 result[i] = string(r) 551 } 552 return result 553} 554 555func (dmp *DiffMatchPatch) diffHalfMatch(text1, text2 []rune) [][]rune { 556 if dmp.DiffTimeout <= 0 { 557 // Don't risk returning a non-optimal diff if we have unlimited time. 558 return nil 559 } 560 561 var longtext, shorttext []rune 562 if len(text1) > len(text2) { 563 longtext = text1 564 shorttext = text2 565 } else { 566 longtext = text2 567 shorttext = text1 568 } 569 570 if len(longtext) < 4 || len(shorttext)*2 < len(longtext) { 571 return nil // Pointless. 572 } 573 574 // First check if the second quarter is the seed for a half-match. 575 hm1 := dmp.diffHalfMatchI(longtext, shorttext, int(float64(len(longtext)+3)/4)) 576 577 // Check again based on the third quarter. 578 hm2 := dmp.diffHalfMatchI(longtext, shorttext, int(float64(len(longtext)+1)/2)) 579 580 hm := [][]rune{} 581 if hm1 == nil && hm2 == nil { 582 return nil 583 } else if hm2 == nil { 584 hm = hm1 585 } else if hm1 == nil { 586 hm = hm2 587 } else { 588 // Both matched. Select the longest. 589 if len(hm1[4]) > len(hm2[4]) { 590 hm = hm1 591 } else { 592 hm = hm2 593 } 594 } 595 596 // A half-match was found, sort out the return data. 597 if len(text1) > len(text2) { 598 return hm 599 } 600 601 return [][]rune{hm[2], hm[3], hm[0], hm[1], hm[4]} 602} 603 604// diffHalfMatchI checks if a substring of shorttext exist within longtext such that the substring is at least half the length of longtext? 605// Returns a slice containing the prefix of longtext, the suffix of longtext, the prefix of shorttext, the suffix of shorttext and the common middle, or null if there was no match. 606func (dmp *DiffMatchPatch) diffHalfMatchI(l, s []rune, i int) [][]rune { 607 var bestCommonA []rune 608 var bestCommonB []rune 609 var bestCommonLen int 610 var bestLongtextA []rune 611 var bestLongtextB []rune 612 var bestShorttextA []rune 613 var bestShorttextB []rune 614 615 // Start with a 1/4 length substring at position i as a seed. 616 seed := l[i : i+len(l)/4] 617 618 for j := runesIndexOf(s, seed, 0); j != -1; j = runesIndexOf(s, seed, j+1) { 619 prefixLength := commonPrefixLength(l[i:], s[j:]) 620 suffixLength := commonSuffixLength(l[:i], s[:j]) 621 622 if bestCommonLen < suffixLength+prefixLength { 623 bestCommonA = s[j-suffixLength : j] 624 bestCommonB = s[j : j+prefixLength] 625 bestCommonLen = len(bestCommonA) + len(bestCommonB) 626 bestLongtextA = l[:i-suffixLength] 627 bestLongtextB = l[i+prefixLength:] 628 bestShorttextA = s[:j-suffixLength] 629 bestShorttextB = s[j+prefixLength:] 630 } 631 } 632 633 if bestCommonLen*2 < len(l) { 634 return nil 635 } 636 637 return [][]rune{ 638 bestLongtextA, 639 bestLongtextB, 640 bestShorttextA, 641 bestShorttextB, 642 append(bestCommonA, bestCommonB...), 643 } 644} 645 646// DiffCleanupSemantic reduces the number of edits by eliminating semantically trivial equalities. 647func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff { 648 changes := false 649 // Stack of indices where equalities are found. 650 equalities := make([]int, 0, len(diffs)) 651 652 var lastequality string 653 // Always equal to diffs[equalities[equalitiesLength - 1]][1] 654 var pointer int // Index of current position. 655 // Number of characters that changed prior to the equality. 656 var lengthInsertions1, lengthDeletions1 int 657 // Number of characters that changed after the equality. 658 var lengthInsertions2, lengthDeletions2 int 659 660 for pointer < len(diffs) { 661 if diffs[pointer].Type == DiffEqual { 662 // Equality found. 663 equalities = append(equalities, pointer) 664 lengthInsertions1 = lengthInsertions2 665 lengthDeletions1 = lengthDeletions2 666 lengthInsertions2 = 0 667 lengthDeletions2 = 0 668 lastequality = diffs[pointer].Text 669 } else { 670 // An insertion or deletion. 671 672 if diffs[pointer].Type == DiffInsert { 673 lengthInsertions2 += len(diffs[pointer].Text) 674 } else { 675 lengthDeletions2 += len(diffs[pointer].Text) 676 } 677 // Eliminate an equality that is smaller or equal to the edits on both sides of it. 678 difference1 := int(math.Max(float64(lengthInsertions1), float64(lengthDeletions1))) 679 difference2 := int(math.Max(float64(lengthInsertions2), float64(lengthDeletions2))) 680 if len(lastequality) > 0 && 681 (len(lastequality) <= difference1) && 682 (len(lastequality) <= difference2) { 683 // Duplicate record. 684 insPoint := equalities[len(equalities)-1] 685 diffs = splice(diffs, insPoint, 0, Diff{DiffDelete, lastequality}) 686 687 // Change second copy to insert. 688 diffs[insPoint+1].Type = DiffInsert 689 // Throw away the equality we just deleted. 690 equalities = equalities[:len(equalities)-1] 691 692 if len(equalities) > 0 { 693 equalities = equalities[:len(equalities)-1] 694 } 695 pointer = -1 696 if len(equalities) > 0 { 697 pointer = equalities[len(equalities)-1] 698 } 699 700 lengthInsertions1 = 0 // Reset the counters. 701 lengthDeletions1 = 0 702 lengthInsertions2 = 0 703 lengthDeletions2 = 0 704 lastequality = "" 705 changes = true 706 } 707 } 708 pointer++ 709 } 710 711 // Normalize the diff. 712 if changes { 713 diffs = dmp.DiffCleanupMerge(diffs) 714 } 715 diffs = dmp.DiffCleanupSemanticLossless(diffs) 716 // Find any overlaps between deletions and insertions. 717 // e.g: <del>abcxxx</del><ins>xxxdef</ins> 718 // -> <del>abc</del>xxx<ins>def</ins> 719 // e.g: <del>xxxabc</del><ins>defxxx</ins> 720 // -> <ins>def</ins>xxx<del>abc</del> 721 // Only extract an overlap if it is as big as the edit ahead or behind it. 722 pointer = 1 723 for pointer < len(diffs) { 724 if diffs[pointer-1].Type == DiffDelete && 725 diffs[pointer].Type == DiffInsert { 726 deletion := diffs[pointer-1].Text 727 insertion := diffs[pointer].Text 728 overlapLength1 := dmp.DiffCommonOverlap(deletion, insertion) 729 overlapLength2 := dmp.DiffCommonOverlap(insertion, deletion) 730 if overlapLength1 >= overlapLength2 { 731 if float64(overlapLength1) >= float64(len(deletion))/2 || 732 float64(overlapLength1) >= float64(len(insertion))/2 { 733 734 // Overlap found. Insert an equality and trim the surrounding edits. 735 diffs = splice(diffs, pointer, 0, Diff{DiffEqual, insertion[:overlapLength1]}) 736 diffs[pointer-1].Text = 737 deletion[0 : len(deletion)-overlapLength1] 738 diffs[pointer+1].Text = insertion[overlapLength1:] 739 pointer++ 740 } 741 } else { 742 if float64(overlapLength2) >= float64(len(deletion))/2 || 743 float64(overlapLength2) >= float64(len(insertion))/2 { 744 // Reverse overlap found. Insert an equality and swap and trim the surrounding edits. 745 overlap := Diff{DiffEqual, deletion[:overlapLength2]} 746 diffs = splice(diffs, pointer, 0, overlap) 747 diffs[pointer-1].Type = DiffInsert 748 diffs[pointer-1].Text = insertion[0 : len(insertion)-overlapLength2] 749 diffs[pointer+1].Type = DiffDelete 750 diffs[pointer+1].Text = deletion[overlapLength2:] 751 pointer++ 752 } 753 } 754 pointer++ 755 } 756 pointer++ 757 } 758 759 return diffs 760} 761 762// Define some regex patterns for matching boundaries. 763var ( 764 nonAlphaNumericRegex = regexp.MustCompile(`[^a-zA-Z0-9]`) 765 whitespaceRegex = regexp.MustCompile(`\s`) 766 linebreakRegex = regexp.MustCompile(`[\r\n]`) 767 blanklineEndRegex = regexp.MustCompile(`\n\r?\n$`) 768 blanklineStartRegex = regexp.MustCompile(`^\r?\n\r?\n`) 769) 770 771// diffCleanupSemanticScore computes a score representing whether the internal boundary falls on logical boundaries. 772// Scores range from 6 (best) to 0 (worst). Closure, but does not reference any external variables. 773func diffCleanupSemanticScore(one, two string) int { 774 if len(one) == 0 || len(two) == 0 { 775 // Edges are the best. 776 return 6 777 } 778 779 // Each port of this function behaves slightly differently due to subtle differences in each language's definition of things like 'whitespace'. Since this function's purpose is largely cosmetic, the choice has been made to use each language's native features rather than force total conformity. 780 rune1, _ := utf8.DecodeLastRuneInString(one) 781 rune2, _ := utf8.DecodeRuneInString(two) 782 char1 := string(rune1) 783 char2 := string(rune2) 784 785 nonAlphaNumeric1 := nonAlphaNumericRegex.MatchString(char1) 786 nonAlphaNumeric2 := nonAlphaNumericRegex.MatchString(char2) 787 whitespace1 := nonAlphaNumeric1 && whitespaceRegex.MatchString(char1) 788 whitespace2 := nonAlphaNumeric2 && whitespaceRegex.MatchString(char2) 789 lineBreak1 := whitespace1 && linebreakRegex.MatchString(char1) 790 lineBreak2 := whitespace2 && linebreakRegex.MatchString(char2) 791 blankLine1 := lineBreak1 && blanklineEndRegex.MatchString(one) 792 blankLine2 := lineBreak2 && blanklineEndRegex.MatchString(two) 793 794 if blankLine1 || blankLine2 { 795 // Five points for blank lines. 796 return 5 797 } else if lineBreak1 || lineBreak2 { 798 // Four points for line breaks. 799 return 4 800 } else if nonAlphaNumeric1 && !whitespace1 && whitespace2 { 801 // Three points for end of sentences. 802 return 3 803 } else if whitespace1 || whitespace2 { 804 // Two points for whitespace. 805 return 2 806 } else if nonAlphaNumeric1 || nonAlphaNumeric2 { 807 // One point for non-alphanumeric. 808 return 1 809 } 810 return 0 811} 812 813// DiffCleanupSemanticLossless looks for single edits surrounded on both sides by equalities which can be shifted sideways to align the edit to a word boundary. 814// E.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came. 815func (dmp *DiffMatchPatch) DiffCleanupSemanticLossless(diffs []Diff) []Diff { 816 pointer := 1 817 818 // Intentionally ignore the first and last element (don't need checking). 819 for pointer < len(diffs)-1 { 820 if diffs[pointer-1].Type == DiffEqual && 821 diffs[pointer+1].Type == DiffEqual { 822 823 // This is a single edit surrounded by equalities. 824 equality1 := diffs[pointer-1].Text 825 edit := diffs[pointer].Text 826 equality2 := diffs[pointer+1].Text 827 828 // First, shift the edit as far left as possible. 829 commonOffset := dmp.DiffCommonSuffix(equality1, edit) 830 if commonOffset > 0 { 831 commonString := edit[len(edit)-commonOffset:] 832 equality1 = equality1[0 : len(equality1)-commonOffset] 833 edit = commonString + edit[:len(edit)-commonOffset] 834 equality2 = commonString + equality2 835 } 836 837 // Second, step character by character right, looking for the best fit. 838 bestEquality1 := equality1 839 bestEdit := edit 840 bestEquality2 := equality2 841 bestScore := diffCleanupSemanticScore(equality1, edit) + 842 diffCleanupSemanticScore(edit, equality2) 843 844 for len(edit) != 0 && len(equality2) != 0 { 845 _, sz := utf8.DecodeRuneInString(edit) 846 if len(equality2) < sz || edit[:sz] != equality2[:sz] { 847 break 848 } 849 equality1 += edit[:sz] 850 edit = edit[sz:] + equality2[:sz] 851 equality2 = equality2[sz:] 852 score := diffCleanupSemanticScore(equality1, edit) + 853 diffCleanupSemanticScore(edit, equality2) 854 // The >= encourages trailing rather than leading whitespace on edits. 855 if score >= bestScore { 856 bestScore = score 857 bestEquality1 = equality1 858 bestEdit = edit 859 bestEquality2 = equality2 860 } 861 } 862 863 if diffs[pointer-1].Text != bestEquality1 { 864 // We have an improvement, save it back to the diff. 865 if len(bestEquality1) != 0 { 866 diffs[pointer-1].Text = bestEquality1 867 } else { 868 diffs = splice(diffs, pointer-1, 1) 869 pointer-- 870 } 871 872 diffs[pointer].Text = bestEdit 873 if len(bestEquality2) != 0 { 874 diffs[pointer+1].Text = bestEquality2 875 } else { 876 diffs = append(diffs[:pointer+1], diffs[pointer+2:]...) 877 pointer-- 878 } 879 } 880 } 881 pointer++ 882 } 883 884 return diffs 885} 886 887// DiffCleanupEfficiency reduces the number of edits by eliminating operationally trivial equalities. 888func (dmp *DiffMatchPatch) DiffCleanupEfficiency(diffs []Diff) []Diff { 889 changes := false 890 // Stack of indices where equalities are found. 891 type equality struct { 892 data int 893 next *equality 894 } 895 var equalities *equality 896 // Always equal to equalities[equalitiesLength-1][1] 897 lastequality := "" 898 pointer := 0 // Index of current position. 899 // Is there an insertion operation before the last equality. 900 preIns := false 901 // Is there a deletion operation before the last equality. 902 preDel := false 903 // Is there an insertion operation after the last equality. 904 postIns := false 905 // Is there a deletion operation after the last equality. 906 postDel := false 907 for pointer < len(diffs) { 908 if diffs[pointer].Type == DiffEqual { // Equality found. 909 if len(diffs[pointer].Text) < dmp.DiffEditCost && 910 (postIns || postDel) { 911 // Candidate found. 912 equalities = &equality{ 913 data: pointer, 914 next: equalities, 915 } 916 preIns = postIns 917 preDel = postDel 918 lastequality = diffs[pointer].Text 919 } else { 920 // Not a candidate, and can never become one. 921 equalities = nil 922 lastequality = "" 923 } 924 postIns = false 925 postDel = false 926 } else { // An insertion or deletion. 927 if diffs[pointer].Type == DiffDelete { 928 postDel = true 929 } else { 930 postIns = true 931 } 932 933 // Five types to be split: 934 // <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del> 935 // <ins>A</ins>X<ins>C</ins><del>D</del> 936 // <ins>A</ins><del>B</del>X<ins>C</ins> 937 // <ins>A</del>X<ins>C</ins><del>D</del> 938 // <ins>A</ins><del>B</del>X<del>C</del> 939 var sumPres int 940 if preIns { 941 sumPres++ 942 } 943 if preDel { 944 sumPres++ 945 } 946 if postIns { 947 sumPres++ 948 } 949 if postDel { 950 sumPres++ 951 } 952 if len(lastequality) > 0 && 953 ((preIns && preDel && postIns && postDel) || 954 ((len(lastequality) < dmp.DiffEditCost/2) && sumPres == 3)) { 955 956 insPoint := equalities.data 957 958 // Duplicate record. 959 diffs = splice(diffs, insPoint, 0, Diff{DiffDelete, lastequality}) 960 961 // Change second copy to insert. 962 diffs[insPoint+1].Type = DiffInsert 963 // Throw away the equality we just deleted. 964 equalities = equalities.next 965 lastequality = "" 966 967 if preIns && preDel { 968 // No changes made which could affect previous entry, keep going. 969 postIns = true 970 postDel = true 971 equalities = nil 972 } else { 973 if equalities != nil { 974 equalities = equalities.next 975 } 976 if equalities != nil { 977 pointer = equalities.data 978 } else { 979 pointer = -1 980 } 981 postIns = false 982 postDel = false 983 } 984 changes = true 985 } 986 } 987 pointer++ 988 } 989 990 if changes { 991 diffs = dmp.DiffCleanupMerge(diffs) 992 } 993 994 return diffs 995} 996 997// DiffCleanupMerge reorders and merges like edit sections. Merge equalities. 998// Any edit section can move as long as it doesn't cross an equality. 999func (dmp *DiffMatchPatch) DiffCleanupMerge(diffs []Diff) []Diff { 1000 // Add a dummy entry at the end. 1001 diffs = append(diffs, Diff{DiffEqual, ""}) 1002 pointer := 0 1003 countDelete := 0 1004 countInsert := 0 1005 commonlength := 0 1006 textDelete := []rune(nil) 1007 textInsert := []rune(nil) 1008 1009 for pointer < len(diffs) { 1010 switch diffs[pointer].Type { 1011 case DiffInsert: 1012 countInsert++ 1013 textInsert = append(textInsert, []rune(diffs[pointer].Text)...) 1014 pointer++ 1015 break 1016 case DiffDelete: 1017 countDelete++ 1018 textDelete = append(textDelete, []rune(diffs[pointer].Text)...) 1019 pointer++ 1020 break 1021 case DiffEqual: 1022 // Upon reaching an equality, check for prior redundancies. 1023 if countDelete+countInsert > 1 { 1024 if countDelete != 0 && countInsert != 0 { 1025 // Factor out any common prefixies. 1026 commonlength = commonPrefixLength(textInsert, textDelete) 1027 if commonlength != 0 { 1028 x := pointer - countDelete - countInsert 1029 if x > 0 && diffs[x-1].Type == DiffEqual { 1030 diffs[x-1].Text += string(textInsert[:commonlength]) 1031 } else { 1032 diffs = append([]Diff{Diff{DiffEqual, string(textInsert[:commonlength])}}, diffs...) 1033 pointer++ 1034 } 1035 textInsert = textInsert[commonlength:] 1036 textDelete = textDelete[commonlength:] 1037 } 1038 // Factor out any common suffixies. 1039 commonlength = commonSuffixLength(textInsert, textDelete) 1040 if commonlength != 0 { 1041 insertIndex := len(textInsert) - commonlength 1042 deleteIndex := len(textDelete) - commonlength 1043 diffs[pointer].Text = string(textInsert[insertIndex:]) + diffs[pointer].Text 1044 textInsert = textInsert[:insertIndex] 1045 textDelete = textDelete[:deleteIndex] 1046 } 1047 } 1048 // Delete the offending records and add the merged ones. 1049 if countDelete == 0 { 1050 diffs = splice(diffs, pointer-countInsert, 1051 countDelete+countInsert, 1052 Diff{DiffInsert, string(textInsert)}) 1053 } else if countInsert == 0 { 1054 diffs = splice(diffs, pointer-countDelete, 1055 countDelete+countInsert, 1056 Diff{DiffDelete, string(textDelete)}) 1057 } else { 1058 diffs = splice(diffs, pointer-countDelete-countInsert, 1059 countDelete+countInsert, 1060 Diff{DiffDelete, string(textDelete)}, 1061 Diff{DiffInsert, string(textInsert)}) 1062 } 1063 1064 pointer = pointer - countDelete - countInsert + 1 1065 if countDelete != 0 { 1066 pointer++ 1067 } 1068 if countInsert != 0 { 1069 pointer++ 1070 } 1071 } else if pointer != 0 && diffs[pointer-1].Type == DiffEqual { 1072 // Merge this equality with the previous one. 1073 diffs[pointer-1].Text += diffs[pointer].Text 1074 diffs = append(diffs[:pointer], diffs[pointer+1:]...) 1075 } else { 1076 pointer++ 1077 } 1078 countInsert = 0 1079 countDelete = 0 1080 textDelete = nil 1081 textInsert = nil 1082 break 1083 } 1084 } 1085 1086 if len(diffs[len(diffs)-1].Text) == 0 { 1087 diffs = diffs[0 : len(diffs)-1] // Remove the dummy entry at the end. 1088 } 1089 1090 // Second pass: look for single edits surrounded on both sides by equalities which can be shifted sideways to eliminate an equality. E.g: A<ins>BA</ins>C -> <ins>AB</ins>AC 1091 changes := false 1092 pointer = 1 1093 // Intentionally ignore the first and last element (don't need checking). 1094 for pointer < (len(diffs) - 1) { 1095 if diffs[pointer-1].Type == DiffEqual && 1096 diffs[pointer+1].Type == DiffEqual { 1097 // This is a single edit surrounded by equalities. 1098 if strings.HasSuffix(diffs[pointer].Text, diffs[pointer-1].Text) { 1099 // Shift the edit over the previous equality. 1100 diffs[pointer].Text = diffs[pointer-1].Text + 1101 diffs[pointer].Text[:len(diffs[pointer].Text)-len(diffs[pointer-1].Text)] 1102 diffs[pointer+1].Text = diffs[pointer-1].Text + diffs[pointer+1].Text 1103 diffs = splice(diffs, pointer-1, 1) 1104 changes = true 1105 } else if strings.HasPrefix(diffs[pointer].Text, diffs[pointer+1].Text) { 1106 // Shift the edit over the next equality. 1107 diffs[pointer-1].Text += diffs[pointer+1].Text 1108 diffs[pointer].Text = 1109 diffs[pointer].Text[len(diffs[pointer+1].Text):] + diffs[pointer+1].Text 1110 diffs = splice(diffs, pointer+1, 1) 1111 changes = true 1112 } 1113 } 1114 pointer++ 1115 } 1116 1117 // If shifts were made, the diff needs reordering and another shift sweep. 1118 if changes { 1119 diffs = dmp.DiffCleanupMerge(diffs) 1120 } 1121 1122 return diffs 1123} 1124 1125// DiffXIndex returns the equivalent location in s2. 1126func (dmp *DiffMatchPatch) DiffXIndex(diffs []Diff, loc int) int { 1127 chars1 := 0 1128 chars2 := 0 1129 lastChars1 := 0 1130 lastChars2 := 0 1131 lastDiff := Diff{} 1132 for i := 0; i < len(diffs); i++ { 1133 aDiff := diffs[i] 1134 if aDiff.Type != DiffInsert { 1135 // Equality or deletion. 1136 chars1 += len(aDiff.Text) 1137 } 1138 if aDiff.Type != DiffDelete { 1139 // Equality or insertion. 1140 chars2 += len(aDiff.Text) 1141 } 1142 if chars1 > loc { 1143 // Overshot the location. 1144 lastDiff = aDiff 1145 break 1146 } 1147 lastChars1 = chars1 1148 lastChars2 = chars2 1149 } 1150 if lastDiff.Type == DiffDelete { 1151 // The location was deleted. 1152 return lastChars2 1153 } 1154 // Add the remaining character length. 1155 return lastChars2 + (loc - lastChars1) 1156} 1157 1158// DiffPrettyHtml converts a []Diff into a pretty HTML report. 1159// It is intended as an example from which to write one's own display functions. 1160func (dmp *DiffMatchPatch) DiffPrettyHtml(diffs []Diff) string { 1161 var buff bytes.Buffer 1162 for _, diff := range diffs { 1163 text := strings.Replace(html.EscapeString(diff.Text), "\n", "¶<br>", -1) 1164 switch diff.Type { 1165 case DiffInsert: 1166 _, _ = buff.WriteString("<ins style=\"background:#e6ffe6;\">") 1167 _, _ = buff.WriteString(text) 1168 _, _ = buff.WriteString("</ins>") 1169 case DiffDelete: 1170 _, _ = buff.WriteString("<del style=\"background:#ffe6e6;\">") 1171 _, _ = buff.WriteString(text) 1172 _, _ = buff.WriteString("</del>") 1173 case DiffEqual: 1174 _, _ = buff.WriteString("<span>") 1175 _, _ = buff.WriteString(text) 1176 _, _ = buff.WriteString("</span>") 1177 } 1178 } 1179 return buff.String() 1180} 1181 1182// DiffPrettyText converts a []Diff into a colored text report. 1183func (dmp *DiffMatchPatch) DiffPrettyText(diffs []Diff) string { 1184 var buff bytes.Buffer 1185 for _, diff := range diffs { 1186 text := diff.Text 1187 1188 switch diff.Type { 1189 case DiffInsert: 1190 _, _ = buff.WriteString("\x1b[32m") 1191 _, _ = buff.WriteString(text) 1192 _, _ = buff.WriteString("\x1b[0m") 1193 case DiffDelete: 1194 _, _ = buff.WriteString("\x1b[31m") 1195 _, _ = buff.WriteString(text) 1196 _, _ = buff.WriteString("\x1b[0m") 1197 case DiffEqual: 1198 _, _ = buff.WriteString(text) 1199 } 1200 } 1201 1202 return buff.String() 1203} 1204 1205// DiffText1 computes and returns the source text (all equalities and deletions). 1206func (dmp *DiffMatchPatch) DiffText1(diffs []Diff) string { 1207 //StringBuilder text = new StringBuilder() 1208 var text bytes.Buffer 1209 1210 for _, aDiff := range diffs { 1211 if aDiff.Type != DiffInsert { 1212 _, _ = text.WriteString(aDiff.Text) 1213 } 1214 } 1215 return text.String() 1216} 1217 1218// DiffText2 computes and returns the destination text (all equalities and insertions). 1219func (dmp *DiffMatchPatch) DiffText2(diffs []Diff) string { 1220 var text bytes.Buffer 1221 1222 for _, aDiff := range diffs { 1223 if aDiff.Type != DiffDelete { 1224 _, _ = text.WriteString(aDiff.Text) 1225 } 1226 } 1227 return text.String() 1228} 1229 1230// DiffLevenshtein computes the Levenshtein distance that is the number of inserted, deleted or substituted characters. 1231func (dmp *DiffMatchPatch) DiffLevenshtein(diffs []Diff) int { 1232 levenshtein := 0 1233 insertions := 0 1234 deletions := 0 1235 1236 for _, aDiff := range diffs { 1237 switch aDiff.Type { 1238 case DiffInsert: 1239 insertions += utf8.RuneCountInString(aDiff.Text) 1240 case DiffDelete: 1241 deletions += utf8.RuneCountInString(aDiff.Text) 1242 case DiffEqual: 1243 // A deletion and an insertion is one substitution. 1244 levenshtein += max(insertions, deletions) 1245 insertions = 0 1246 deletions = 0 1247 } 1248 } 1249 1250 levenshtein += max(insertions, deletions) 1251 return levenshtein 1252} 1253 1254// DiffToDelta crushes the diff into an encoded string which describes the operations required to transform text1 into text2. 1255// E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'. Operations are tab-separated. Inserted text is escaped using %xx notation. 1256func (dmp *DiffMatchPatch) DiffToDelta(diffs []Diff) string { 1257 var text bytes.Buffer 1258 for _, aDiff := range diffs { 1259 switch aDiff.Type { 1260 case DiffInsert: 1261 _, _ = text.WriteString("+") 1262 _, _ = text.WriteString(strings.Replace(url.QueryEscape(aDiff.Text), "+", " ", -1)) 1263 _, _ = text.WriteString("\t") 1264 break 1265 case DiffDelete: 1266 _, _ = text.WriteString("-") 1267 _, _ = text.WriteString(strconv.Itoa(utf8.RuneCountInString(aDiff.Text))) 1268 _, _ = text.WriteString("\t") 1269 break 1270 case DiffEqual: 1271 _, _ = text.WriteString("=") 1272 _, _ = text.WriteString(strconv.Itoa(utf8.RuneCountInString(aDiff.Text))) 1273 _, _ = text.WriteString("\t") 1274 break 1275 } 1276 } 1277 delta := text.String() 1278 if len(delta) != 0 { 1279 // Strip off trailing tab character. 1280 delta = delta[0 : utf8.RuneCountInString(delta)-1] 1281 delta = unescaper.Replace(delta) 1282 } 1283 return delta 1284} 1285 1286// DiffFromDelta given the original text1, and an encoded string which describes the operations required to transform text1 into text2, comAdde the full diff. 1287func (dmp *DiffMatchPatch) DiffFromDelta(text1 string, delta string) (diffs []Diff, err error) { 1288 i := 0 1289 runes := []rune(text1) 1290 1291 for _, token := range strings.Split(delta, "\t") { 1292 if len(token) == 0 { 1293 // Blank tokens are ok (from a trailing \t). 1294 continue 1295 } 1296 1297 // Each token begins with a one character parameter which specifies the operation of this token (delete, insert, equality). 1298 param := token[1:] 1299 1300 switch op := token[0]; op { 1301 case '+': 1302 // Decode would Diff all "+" to " " 1303 param = strings.Replace(param, "+", "%2b", -1) 1304 param, err = url.QueryUnescape(param) 1305 if err != nil { 1306 return nil, err 1307 } 1308 if !utf8.ValidString(param) { 1309 return nil, fmt.Errorf("invalid UTF-8 token: %q", param) 1310 } 1311 1312 diffs = append(diffs, Diff{DiffInsert, param}) 1313 case '=', '-': 1314 n, err := strconv.ParseInt(param, 10, 0) 1315 if err != nil { 1316 return nil, err 1317 } else if n < 0 { 1318 return nil, errors.New("Negative number in DiffFromDelta: " + param) 1319 } 1320 1321 i += int(n) 1322 // Break out if we are out of bounds, go1.6 can't handle this very well 1323 if i > len(runes) { 1324 break 1325 } 1326 // Remember that string slicing is by byte - we want by rune here. 1327 text := string(runes[i-int(n) : i]) 1328 1329 if op == '=' { 1330 diffs = append(diffs, Diff{DiffEqual, text}) 1331 } else { 1332 diffs = append(diffs, Diff{DiffDelete, text}) 1333 } 1334 default: 1335 // Anything else is an error. 1336 return nil, errors.New("Invalid diff operation in DiffFromDelta: " + string(token[0])) 1337 } 1338 } 1339 1340 if i != len(runes) { 1341 return nil, fmt.Errorf("Delta length (%v) is different from source text length (%v)", i, len(text1)) 1342 } 1343 1344 return diffs, nil 1345} 1346