1#!/usr/local/bin/python3.8
2
3from __future__ import division
4
5"""Diff Match and Patch
6Copyright 2018 The diff-match-patch Authors.
7https://github.com/google/diff-match-patch
8
9Licensed under the Apache License, Version 2.0 (the "License");
10you may not use this file except in compliance with the License.
11You may obtain a copy of the License at
12
13  http://www.apache.org/licenses/LICENSE-2.0
14
15Unless required by applicable law or agreed to in writing, software
16distributed under the License is distributed on an "AS IS" BASIS,
17WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18See the License for the specific language governing permissions and
19limitations under the License.
20"""
21
22"""Functions for diff, match and patch.
23
24Computes the difference between two texts to create a patch.
25Applies the patch onto another text, allowing for errors.
26"""
27
28__author__ = "fraser@google.com (Neil Fraser)"
29
30import re
31import sys
32import time
33import urllib
34
35
36class diff_match_patch:
37    """Class containing the diff, match and patch methods.
38
39  Also contains the behaviour settings.
40  """
41
42    def __init__(self):
43        """Inits a diff_match_patch object with default settings.
44    Redefine these in your program to override the defaults.
45    """
46
47        # Number of seconds to map a diff before giving up (0 for infinity).
48        self.Diff_Timeout = 1.0
49        # Cost of an empty edit operation in terms of edit characters.
50        self.Diff_EditCost = 4
51        # At what point is no match declared (0.0 = perfection, 1.0 = very loose).
52        self.Match_Threshold = 0.5
53        # How far to search for a match (0 = exact location, 1000+ = broad match).
54        # A match this many characters away from the expected location will add
55        # 1.0 to the score (0.0 is a perfect match).
56        self.Match_Distance = 1000
57        # When deleting a large block of text (over ~64 characters), how close do
58        # the contents have to be to match the expected contents. (0.0 = perfection,
59        # 1.0 = very loose).  Note that Match_Threshold controls how closely the
60        # end points of a delete need to match.
61        self.Patch_DeleteThreshold = 0.5
62        # Chunk size for context length.
63        self.Patch_Margin = 4
64
65        # The number of bits in an int.
66        # Python has no maximum, thus to disable patch splitting set to 0.
67        # However to avoid long patches in certain pathological cases, use 32.
68        # Multiple short patches (using native ints) are much faster than long ones.
69        self.Match_MaxBits = 32
70
71    #  DIFF FUNCTIONS
72
73    # The data structure representing a diff is an array of tuples:
74    # [(DIFF_DELETE, "Hello"), (DIFF_INSERT, "Goodbye"), (DIFF_EQUAL, " world.")]
75    # which means: delete "Hello", add "Goodbye" and keep " world."
76    DIFF_DELETE = -1
77    DIFF_INSERT = 1
78    DIFF_EQUAL = 0
79
80    def diff_main(self, text1, text2, checklines=True, deadline=None):
81        """Find the differences between two texts.  Simplifies the problem by
82      stripping any common prefix or suffix off the texts before diffing.
83
84    Args:
85      text1: Old string to be diffed.
86      text2: New string to be diffed.
87      checklines: Optional speedup flag.  If present and false, then don't run
88        a line-level diff first to identify the changed areas.
89        Defaults to true, which does a faster, slightly less optimal diff.
90      deadline: Optional time when the diff should be complete by.  Used
91        internally for recursive calls.  Users should set DiffTimeout instead.
92
93    Returns:
94      Array of changes.
95    """
96        # Set a deadline by which time the diff must be complete.
97        if deadline == None:
98            # Unlike in most languages, Python counts time in seconds.
99            if self.Diff_Timeout <= 0:
100                deadline = sys.maxint
101            else:
102                deadline = time.time() + self.Diff_Timeout
103
104        # Check for null inputs.
105        if text1 == None or text2 == None:
106            raise ValueError("Null inputs. (diff_main)")
107
108        # Check for equality (speedup).
109        if text1 == text2:
110            if text1:
111                return [(self.DIFF_EQUAL, text1)]
112            return []
113
114        # Trim off common prefix (speedup).
115        commonlength = self.diff_commonPrefix(text1, text2)
116        commonprefix = text1[:commonlength]
117        text1 = text1[commonlength:]
118        text2 = text2[commonlength:]
119
120        # Trim off common suffix (speedup).
121        commonlength = self.diff_commonSuffix(text1, text2)
122        if commonlength == 0:
123            commonsuffix = ""
124        else:
125            commonsuffix = text1[-commonlength:]
126            text1 = text1[:-commonlength]
127            text2 = text2[:-commonlength]
128
129        # Compute the diff on the middle block.
130        diffs = self.diff_compute(text1, text2, checklines, deadline)
131
132        # Restore the prefix and suffix.
133        if commonprefix:
134            diffs[:0] = [(self.DIFF_EQUAL, commonprefix)]
135        if commonsuffix:
136            diffs.append((self.DIFF_EQUAL, commonsuffix))
137        self.diff_cleanupMerge(diffs)
138        return diffs
139
140    def diff_compute(self, text1, text2, checklines, deadline):
141        """Find the differences between two texts.  Assumes that the texts do not
142      have any common prefix or suffix.
143
144    Args:
145      text1: Old string to be diffed.
146      text2: New string to be diffed.
147      checklines: Speedup flag.  If false, then don't run a line-level diff
148        first to identify the changed areas.
149        If true, then run a faster, slightly less optimal diff.
150      deadline: Time when the diff should be complete by.
151
152    Returns:
153      Array of changes.
154    """
155        if not text1:
156            # Just add some text (speedup).
157            return [(self.DIFF_INSERT, text2)]
158
159        if not text2:
160            # Just delete some text (speedup).
161            return [(self.DIFF_DELETE, text1)]
162
163        if len(text1) > len(text2):
164            (longtext, shorttext) = (text1, text2)
165        else:
166            (shorttext, longtext) = (text1, text2)
167        i = longtext.find(shorttext)
168        if i != -1:
169            # Shorter text is inside the longer text (speedup).
170            diffs = [
171                (self.DIFF_INSERT, longtext[:i]),
172                (self.DIFF_EQUAL, shorttext),
173                (self.DIFF_INSERT, longtext[i + len(shorttext) :]),
174            ]
175            # Swap insertions for deletions if diff is reversed.
176            if len(text1) > len(text2):
177                diffs[0] = (self.DIFF_DELETE, diffs[0][1])
178                diffs[2] = (self.DIFF_DELETE, diffs[2][1])
179            return diffs
180
181        if len(shorttext) == 1:
182            # Single character string.
183            # After the previous speedup, the character can't be an equality.
184            return [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)]
185
186        # Check to see if the problem can be split in two.
187        hm = self.diff_halfMatch(text1, text2)
188        if hm:
189            # A half-match was found, sort out the return data.
190            (text1_a, text1_b, text2_a, text2_b, mid_common) = hm
191            # Send both pairs off for separate processing.
192            diffs_a = self.diff_main(text1_a, text2_a, checklines, deadline)
193            diffs_b = self.diff_main(text1_b, text2_b, checklines, deadline)
194            # Merge the results.
195            return diffs_a + [(self.DIFF_EQUAL, mid_common)] + diffs_b
196
197        if checklines and len(text1) > 100 and len(text2) > 100:
198            return self.diff_lineMode(text1, text2, deadline)
199
200        return self.diff_bisect(text1, text2, deadline)
201
202    def diff_lineMode(self, text1, text2, deadline):
203        """Do a quick line-level diff on both strings, then rediff the parts for
204      greater accuracy.
205      This speedup can produce non-minimal diffs.
206
207    Args:
208      text1: Old string to be diffed.
209      text2: New string to be diffed.
210      deadline: Time when the diff should be complete by.
211
212    Returns:
213      Array of changes.
214    """
215
216        # Scan the text on a line-by-line basis first.
217        (text1, text2, linearray) = self.diff_linesToChars(text1, text2)
218
219        diffs = self.diff_main(text1, text2, False, deadline)
220
221        # Convert the diff back to original text.
222        self.diff_charsToLines(diffs, linearray)
223        # Eliminate freak matches (e.g. blank lines)
224        self.diff_cleanupSemantic(diffs)
225
226        # Rediff any replacement blocks, this time character-by-character.
227        # Add a dummy entry at the end.
228        diffs.append((self.DIFF_EQUAL, ""))
229        pointer = 0
230        count_delete = 0
231        count_insert = 0
232        text_delete = ""
233        text_insert = ""
234        while pointer < len(diffs):
235            if diffs[pointer][0] == self.DIFF_INSERT:
236                count_insert += 1
237                text_insert += diffs[pointer][1]
238            elif diffs[pointer][0] == self.DIFF_DELETE:
239                count_delete += 1
240                text_delete += diffs[pointer][1]
241            elif diffs[pointer][0] == self.DIFF_EQUAL:
242                # Upon reaching an equality, check for prior redundancies.
243                if count_delete >= 1 and count_insert >= 1:
244                    # Delete the offending records and add the merged ones.
245                    subDiff = self.diff_main(text_delete, text_insert, False, deadline)
246                    diffs[pointer - count_delete - count_insert : pointer] = subDiff
247                    pointer = pointer - count_delete - count_insert + len(subDiff)
248                count_insert = 0
249                count_delete = 0
250                text_delete = ""
251                text_insert = ""
252
253            pointer += 1
254
255        diffs.pop()  # Remove the dummy entry at the end.
256
257        return diffs
258
259    def diff_bisect(self, text1, text2, deadline):
260        """Find the 'middle snake' of a diff, split the problem in two
261      and return the recursively constructed diff.
262      See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations.
263
264    Args:
265      text1: Old string to be diffed.
266      text2: New string to be diffed.
267      deadline: Time at which to bail if not yet complete.
268
269    Returns:
270      Array of diff tuples.
271    """
272
273        # Cache the text lengths to prevent multiple calls.
274        text1_length = len(text1)
275        text2_length = len(text2)
276        max_d = (text1_length + text2_length + 1) // 2
277        v_offset = max_d
278        v_length = 2 * max_d
279        v1 = [-1] * v_length
280        v1[v_offset + 1] = 0
281        v2 = v1[:]
282        delta = text1_length - text2_length
283        # If the total number of characters is odd, then the front path will
284        # collide with the reverse path.
285        front = delta % 2 != 0
286        # Offsets for start and end of k loop.
287        # Prevents mapping of space beyond the grid.
288        k1start = 0
289        k1end = 0
290        k2start = 0
291        k2end = 0
292        for d in xrange(max_d):
293            # Bail out if deadline is reached.
294            if time.time() > deadline:
295                break
296
297            # Walk the front path one step.
298            for k1 in xrange(-d + k1start, d + 1 - k1end, 2):
299                k1_offset = v_offset + k1
300                if k1 == -d or (k1 != d and v1[k1_offset - 1] < v1[k1_offset + 1]):
301                    x1 = v1[k1_offset + 1]
302                else:
303                    x1 = v1[k1_offset - 1] + 1
304                y1 = x1 - k1
305                while (
306                    x1 < text1_length and y1 < text2_length and text1[x1] == text2[y1]
307                ):
308                    x1 += 1
309                    y1 += 1
310                v1[k1_offset] = x1
311                if x1 > text1_length:
312                    # Ran off the right of the graph.
313                    k1end += 2
314                elif y1 > text2_length:
315                    # Ran off the bottom of the graph.
316                    k1start += 2
317                elif front:
318                    k2_offset = v_offset + delta - k1
319                    if k2_offset >= 0 and k2_offset < v_length and v2[k2_offset] != -1:
320                        # Mirror x2 onto top-left coordinate system.
321                        x2 = text1_length - v2[k2_offset]
322                        if x1 >= x2:
323                            # Overlap detected.
324                            return self.diff_bisectSplit(text1, text2, x1, y1, deadline)
325
326            # Walk the reverse path one step.
327            for k2 in xrange(-d + k2start, d + 1 - k2end, 2):
328                k2_offset = v_offset + k2
329                if k2 == -d or (k2 != d and v2[k2_offset - 1] < v2[k2_offset + 1]):
330                    x2 = v2[k2_offset + 1]
331                else:
332                    x2 = v2[k2_offset - 1] + 1
333                y2 = x2 - k2
334                while (
335                    x2 < text1_length
336                    and y2 < text2_length
337                    and text1[-x2 - 1] == text2[-y2 - 1]
338                ):
339                    x2 += 1
340                    y2 += 1
341                v2[k2_offset] = x2
342                if x2 > text1_length:
343                    # Ran off the left of the graph.
344                    k2end += 2
345                elif y2 > text2_length:
346                    # Ran off the top of the graph.
347                    k2start += 2
348                elif not front:
349                    k1_offset = v_offset + delta - k2
350                    if k1_offset >= 0 and k1_offset < v_length and v1[k1_offset] != -1:
351                        x1 = v1[k1_offset]
352                        y1 = v_offset + x1 - k1_offset
353                        # Mirror x2 onto top-left coordinate system.
354                        x2 = text1_length - x2
355                        if x1 >= x2:
356                            # Overlap detected.
357                            return self.diff_bisectSplit(text1, text2, x1, y1, deadline)
358
359        # Diff took too long and hit the deadline or
360        # number of diffs equals number of characters, no commonality at all.
361        return [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)]
362
363    def diff_bisectSplit(self, text1, text2, x, y, deadline):
364        """Given the location of the 'middle snake', split the diff in two parts
365    and recurse.
366
367    Args:
368      text1: Old string to be diffed.
369      text2: New string to be diffed.
370      x: Index of split point in text1.
371      y: Index of split point in text2.
372      deadline: Time at which to bail if not yet complete.
373
374    Returns:
375      Array of diff tuples.
376    """
377        text1a = text1[:x]
378        text2a = text2[:y]
379        text1b = text1[x:]
380        text2b = text2[y:]
381
382        # Compute both diffs serially.
383        diffs = self.diff_main(text1a, text2a, False, deadline)
384        diffsb = self.diff_main(text1b, text2b, False, deadline)
385
386        return diffs + diffsb
387
388    def diff_linesToChars(self, text1, text2):
389        """Split two texts into an array of strings.  Reduce the texts to a string
390    of hashes where each Unicode character represents one line.
391
392    Args:
393      text1: First string.
394      text2: Second string.
395
396    Returns:
397      Three element tuple, containing the encoded text1, the encoded text2 and
398      the array of unique strings.  The zeroth element of the array of unique
399      strings is intentionally blank.
400    """
401        lineArray = []  # e.g. lineArray[4] == "Hello\n"
402        lineHash = {}  # e.g. lineHash["Hello\n"] == 4
403
404        # "\x00" is a valid character, but various debuggers don't like it.
405        # So we'll insert a junk entry to avoid generating a null character.
406        lineArray.append("")
407
408        def diff_linesToCharsMunge(text):
409            """Split a text into an array of strings.  Reduce the texts to a string
410      of hashes where each Unicode character represents one line.
411      Modifies linearray and linehash through being a closure.
412
413      Args:
414        text: String to encode.
415
416      Returns:
417        Encoded string.
418      """
419            chars = []
420            # Walk the text, pulling out a substring for each line.
421            # text.split('\n') would would temporarily double our memory footprint.
422            # Modifying text would create many large strings to garbage collect.
423            lineStart = 0
424            lineEnd = -1
425            while lineEnd < len(text) - 1:
426                lineEnd = text.find("\n", lineStart)
427                if lineEnd == -1:
428                    lineEnd = len(text) - 1
429                line = text[lineStart : lineEnd + 1]
430
431                if line in lineHash:
432                    chars.append(unichr(lineHash[line]))
433                else:
434                    if len(lineArray) == maxLines:
435                        # Bail out at 65535 because unichr(65536) throws.
436                        line = text[lineStart:]
437                        lineEnd = len(text)
438                    lineArray.append(line)
439                    lineHash[line] = len(lineArray) - 1
440                    chars.append(unichr(len(lineArray) - 1))
441                lineStart = lineEnd + 1
442            return "".join(chars)
443
444        # Allocate 2/3rds of the space for text1, the rest for text2.
445        maxLines = 40000
446        chars1 = diff_linesToCharsMunge(text1)
447        maxLines = 65535
448        chars2 = diff_linesToCharsMunge(text2)
449        return (chars1, chars2, lineArray)
450
451    def diff_charsToLines(self, diffs, lineArray):
452        """Rehydrate the text in a diff from a string of line hashes to real lines
453    of text.
454
455    Args:
456      diffs: Array of diff tuples.
457      lineArray: Array of unique strings.
458    """
459        for i in xrange(len(diffs)):
460            text = []
461            for char in diffs[i][1]:
462                text.append(lineArray[ord(char)])
463            diffs[i] = (diffs[i][0], "".join(text))
464
465    def diff_commonPrefix(self, text1, text2):
466        """Determine the common prefix of two strings.
467
468    Args:
469      text1: First string.
470      text2: Second string.
471
472    Returns:
473      The number of characters common to the start of each string.
474    """
475        # Quick check for common null cases.
476        if not text1 or not text2 or text1[0] != text2[0]:
477            return 0
478        # Binary search.
479        # Performance analysis: https://neil.fraser.name/news/2007/10/09/
480        pointermin = 0
481        pointermax = min(len(text1), len(text2))
482        pointermid = pointermax
483        pointerstart = 0
484        while pointermin < pointermid:
485            if text1[pointerstart:pointermid] == text2[pointerstart:pointermid]:
486                pointermin = pointermid
487                pointerstart = pointermin
488            else:
489                pointermax = pointermid
490            pointermid = (pointermax - pointermin) // 2 + pointermin
491        return pointermid
492
493    def diff_commonSuffix(self, text1, text2):
494        """Determine the common suffix of two strings.
495
496    Args:
497      text1: First string.
498      text2: Second string.
499
500    Returns:
501      The number of characters common to the end of each string.
502    """
503        # Quick check for common null cases.
504        if not text1 or not text2 or text1[-1] != text2[-1]:
505            return 0
506        # Binary search.
507        # Performance analysis: https://neil.fraser.name/news/2007/10/09/
508        pointermin = 0
509        pointermax = min(len(text1), len(text2))
510        pointermid = pointermax
511        pointerend = 0
512        while pointermin < pointermid:
513            if (
514                text1[-pointermid : len(text1) - pointerend]
515                == text2[-pointermid : len(text2) - pointerend]
516            ):
517                pointermin = pointermid
518                pointerend = pointermin
519            else:
520                pointermax = pointermid
521            pointermid = (pointermax - pointermin) // 2 + pointermin
522        return pointermid
523
524    def diff_commonOverlap(self, text1, text2):
525        """Determine if the suffix of one string is the prefix of another.
526
527    Args:
528      text1 First string.
529      text2 Second string.
530
531    Returns:
532      The number of characters common to the end of the first
533      string and the start of the second string.
534    """
535        # Cache the text lengths to prevent multiple calls.
536        text1_length = len(text1)
537        text2_length = len(text2)
538        # Eliminate the null case.
539        if text1_length == 0 or text2_length == 0:
540            return 0
541        # Truncate the longer string.
542        if text1_length > text2_length:
543            text1 = text1[-text2_length:]
544        elif text1_length < text2_length:
545            text2 = text2[:text1_length]
546        text_length = min(text1_length, text2_length)
547        # Quick check for the worst case.
548        if text1 == text2:
549            return text_length
550
551        # Start by looking for a single character match
552        # and increase length until no match is found.
553        # Performance analysis: https://neil.fraser.name/news/2010/11/04/
554        best = 0
555        length = 1
556        while True:
557            pattern = text1[-length:]
558            found = text2.find(pattern)
559            if found == -1:
560                return best
561            length += found
562            if found == 0 or text1[-length:] == text2[:length]:
563                best = length
564                length += 1
565
566    def diff_halfMatch(self, text1, text2):
567        """Do the two texts share a substring which is at least half the length of
568    the longer text?
569    This speedup can produce non-minimal diffs.
570
571    Args:
572      text1: First string.
573      text2: Second string.
574
575    Returns:
576      Five element Array, containing the prefix of text1, the suffix of text1,
577      the prefix of text2, the suffix of text2 and the common middle.  Or None
578      if there was no match.
579    """
580        if self.Diff_Timeout <= 0:
581            # Don't risk returning a non-optimal diff if we have unlimited time.
582            return None
583        if len(text1) > len(text2):
584            (longtext, shorttext) = (text1, text2)
585        else:
586            (shorttext, longtext) = (text1, text2)
587        if len(longtext) < 4 or len(shorttext) * 2 < len(longtext):
588            return None  # Pointless.
589
590        def diff_halfMatchI(longtext, shorttext, i):
591            """Does a substring of shorttext exist within longtext such that the
592      substring is at least half the length of longtext?
593      Closure, but does not reference any external variables.
594
595      Args:
596        longtext: Longer string.
597        shorttext: Shorter string.
598        i: Start index of quarter length substring within longtext.
599
600      Returns:
601        Five element Array, containing the prefix of longtext, the suffix of
602        longtext, the prefix of shorttext, the suffix of shorttext and the
603        common middle.  Or None if there was no match.
604      """
605            seed = longtext[i : i + len(longtext) // 4]
606            best_common = ""
607            j = shorttext.find(seed)
608            while j != -1:
609                prefixLength = self.diff_commonPrefix(longtext[i:], shorttext[j:])
610                suffixLength = self.diff_commonSuffix(longtext[:i], shorttext[:j])
611                if len(best_common) < suffixLength + prefixLength:
612                    best_common = (
613                        shorttext[j - suffixLength : j]
614                        + shorttext[j : j + prefixLength]
615                    )
616                    best_longtext_a = longtext[: i - suffixLength]
617                    best_longtext_b = longtext[i + prefixLength :]
618                    best_shorttext_a = shorttext[: j - suffixLength]
619                    best_shorttext_b = shorttext[j + prefixLength :]
620                j = shorttext.find(seed, j + 1)
621
622            if len(best_common) * 2 >= len(longtext):
623                return (
624                    best_longtext_a,
625                    best_longtext_b,
626                    best_shorttext_a,
627                    best_shorttext_b,
628                    best_common,
629                )
630            else:
631                return None
632
633        # First check if the second quarter is the seed for a half-match.
634        hm1 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 3) // 4)
635        # Check again based on the third quarter.
636        hm2 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 1) // 2)
637        if not hm1 and not hm2:
638            return None
639        elif not hm2:
640            hm = hm1
641        elif not hm1:
642            hm = hm2
643        else:
644            # Both matched.  Select the longest.
645            if len(hm1[4]) > len(hm2[4]):
646                hm = hm1
647            else:
648                hm = hm2
649
650        # A half-match was found, sort out the return data.
651        if len(text1) > len(text2):
652            (text1_a, text1_b, text2_a, text2_b, mid_common) = hm
653        else:
654            (text2_a, text2_b, text1_a, text1_b, mid_common) = hm
655        return (text1_a, text1_b, text2_a, text2_b, mid_common)
656
657    def diff_cleanupSemantic(self, diffs):
658        """Reduce the number of edits by eliminating semantically trivial
659    equalities.
660
661    Args:
662      diffs: Array of diff tuples.
663    """
664        changes = False
665        equalities = []  # Stack of indices where equalities are found.
666        lastEquality = None  # Always equal to diffs[equalities[-1]][1]
667        pointer = 0  # Index of current position.
668        # Number of chars that changed prior to the equality.
669        length_insertions1, length_deletions1 = 0, 0
670        # Number of chars that changed after the equality.
671        length_insertions2, length_deletions2 = 0, 0
672        while pointer < len(diffs):
673            if diffs[pointer][0] == self.DIFF_EQUAL:  # Equality found.
674                equalities.append(pointer)
675                length_insertions1, length_insertions2 = length_insertions2, 0
676                length_deletions1, length_deletions2 = length_deletions2, 0
677                lastEquality = diffs[pointer][1]
678            else:  # An insertion or deletion.
679                if diffs[pointer][0] == self.DIFF_INSERT:
680                    length_insertions2 += len(diffs[pointer][1])
681                else:
682                    length_deletions2 += len(diffs[pointer][1])
683                # Eliminate an equality that is smaller or equal to the edits on both
684                # sides of it.
685                if (
686                    lastEquality
687                    and (
688                        len(lastEquality) <= max(length_insertions1, length_deletions1)
689                    )
690                    and (
691                        len(lastEquality) <= max(length_insertions2, length_deletions2)
692                    )
693                ):
694                    # Duplicate record.
695                    diffs.insert(equalities[-1], (self.DIFF_DELETE, lastEquality))
696                    # Change second copy to insert.
697                    diffs[equalities[-1] + 1] = (
698                        self.DIFF_INSERT,
699                        diffs[equalities[-1] + 1][1],
700                    )
701                    # Throw away the equality we just deleted.
702                    equalities.pop()
703                    # Throw away the previous equality (it needs to be reevaluated).
704                    if len(equalities):
705                        equalities.pop()
706                    if len(equalities):
707                        pointer = equalities[-1]
708                    else:
709                        pointer = -1
710                    # Reset the counters.
711                    length_insertions1, length_deletions1 = 0, 0
712                    length_insertions2, length_deletions2 = 0, 0
713                    lastEquality = None
714                    changes = True
715            pointer += 1
716
717        # Normalize the diff.
718        if changes:
719            self.diff_cleanupMerge(diffs)
720        self.diff_cleanupSemanticLossless(diffs)
721
722        # Find any overlaps between deletions and insertions.
723        # e.g: <del>abcxxx</del><ins>xxxdef</ins>
724        #   -> <del>abc</del>xxx<ins>def</ins>
725        # e.g: <del>xxxabc</del><ins>defxxx</ins>
726        #   -> <ins>def</ins>xxx<del>abc</del>
727        # Only extract an overlap if it is as big as the edit ahead or behind it.
728        pointer = 1
729        while pointer < len(diffs):
730            if (
731                diffs[pointer - 1][0] == self.DIFF_DELETE
732                and diffs[pointer][0] == self.DIFF_INSERT
733            ):
734                deletion = diffs[pointer - 1][1]
735                insertion = diffs[pointer][1]
736                overlap_length1 = self.diff_commonOverlap(deletion, insertion)
737                overlap_length2 = self.diff_commonOverlap(insertion, deletion)
738                if overlap_length1 >= overlap_length2:
739                    if (
740                        overlap_length1 >= len(deletion) / 2.0
741                        or overlap_length1 >= len(insertion) / 2.0
742                    ):
743                        # Overlap found.  Insert an equality and trim the surrounding edits.
744                        diffs.insert(
745                            pointer, (self.DIFF_EQUAL, insertion[:overlap_length1])
746                        )
747                        diffs[pointer - 1] = (
748                            self.DIFF_DELETE,
749                            deletion[: len(deletion) - overlap_length1],
750                        )
751                        diffs[pointer + 1] = (
752                            self.DIFF_INSERT,
753                            insertion[overlap_length1:],
754                        )
755                        pointer += 1
756                else:
757                    if (
758                        overlap_length2 >= len(deletion) / 2.0
759                        or overlap_length2 >= len(insertion) / 2.0
760                    ):
761                        # Reverse overlap found.
762                        # Insert an equality and swap and trim the surrounding edits.
763                        diffs.insert(
764                            pointer, (self.DIFF_EQUAL, deletion[:overlap_length2])
765                        )
766                        diffs[pointer - 1] = (
767                            self.DIFF_INSERT,
768                            insertion[: len(insertion) - overlap_length2],
769                        )
770                        diffs[pointer + 1] = (
771                            self.DIFF_DELETE,
772                            deletion[overlap_length2:],
773                        )
774                        pointer += 1
775                pointer += 1
776            pointer += 1
777
778    def diff_cleanupSemanticLossless(self, diffs):
779        """Look for single edits surrounded on both sides by equalities
780    which can be shifted sideways to align the edit to a word boundary.
781    e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.
782
783    Args:
784      diffs: Array of diff tuples.
785    """
786
787        def diff_cleanupSemanticScore(one, two):
788            """Given two strings, compute a score representing whether the
789      internal boundary falls on logical boundaries.
790      Scores range from 6 (best) to 0 (worst).
791      Closure, but does not reference any external variables.
792
793      Args:
794        one: First string.
795        two: Second string.
796
797      Returns:
798        The score.
799      """
800            if not one or not two:
801                # Edges are the best.
802                return 6
803
804            # Each port of this function behaves slightly differently due to
805            # subtle differences in each language's definition of things like
806            # 'whitespace'.  Since this function's purpose is largely cosmetic,
807            # the choice has been made to use each language's native features
808            # rather than force total conformity.
809            char1 = one[-1]
810            char2 = two[0]
811            nonAlphaNumeric1 = not char1.isalnum()
812            nonAlphaNumeric2 = not char2.isalnum()
813            whitespace1 = nonAlphaNumeric1 and char1.isspace()
814            whitespace2 = nonAlphaNumeric2 and char2.isspace()
815            lineBreak1 = whitespace1 and (char1 == "\r" or char1 == "\n")
816            lineBreak2 = whitespace2 and (char2 == "\r" or char2 == "\n")
817            blankLine1 = lineBreak1 and self.BLANKLINEEND.search(one)
818            blankLine2 = lineBreak2 and self.BLANKLINESTART.match(two)
819
820            if blankLine1 or blankLine2:
821                # Five points for blank lines.
822                return 5
823            elif lineBreak1 or lineBreak2:
824                # Four points for line breaks.
825                return 4
826            elif nonAlphaNumeric1 and not whitespace1 and whitespace2:
827                # Three points for end of sentences.
828                return 3
829            elif whitespace1 or whitespace2:
830                # Two points for whitespace.
831                return 2
832            elif nonAlphaNumeric1 or nonAlphaNumeric2:
833                # One point for non-alphanumeric.
834                return 1
835            return 0
836
837        pointer = 1
838        # Intentionally ignore the first and last element (don't need checking).
839        while pointer < len(diffs) - 1:
840            if (
841                diffs[pointer - 1][0] == self.DIFF_EQUAL
842                and diffs[pointer + 1][0] == self.DIFF_EQUAL
843            ):
844                # This is a single edit surrounded by equalities.
845                equality1 = diffs[pointer - 1][1]
846                edit = diffs[pointer][1]
847                equality2 = diffs[pointer + 1][1]
848
849                # First, shift the edit as far left as possible.
850                commonOffset = self.diff_commonSuffix(equality1, edit)
851                if commonOffset:
852                    commonString = edit[-commonOffset:]
853                    equality1 = equality1[:-commonOffset]
854                    edit = commonString + edit[:-commonOffset]
855                    equality2 = commonString + equality2
856
857                # Second, step character by character right, looking for the best fit.
858                bestEquality1 = equality1
859                bestEdit = edit
860                bestEquality2 = equality2
861                bestScore = diff_cleanupSemanticScore(
862                    equality1, edit
863                ) + diff_cleanupSemanticScore(edit, equality2)
864                while edit and equality2 and edit[0] == equality2[0]:
865                    equality1 += edit[0]
866                    edit = edit[1:] + equality2[0]
867                    equality2 = equality2[1:]
868                    score = diff_cleanupSemanticScore(
869                        equality1, edit
870                    ) + diff_cleanupSemanticScore(edit, equality2)
871                    # The >= encourages trailing rather than leading whitespace on edits.
872                    if score >= bestScore:
873                        bestScore = score
874                        bestEquality1 = equality1
875                        bestEdit = edit
876                        bestEquality2 = equality2
877
878                if diffs[pointer - 1][1] != bestEquality1:
879                    # We have an improvement, save it back to the diff.
880                    if bestEquality1:
881                        diffs[pointer - 1] = (diffs[pointer - 1][0], bestEquality1)
882                    else:
883                        del diffs[pointer - 1]
884                        pointer -= 1
885                    diffs[pointer] = (diffs[pointer][0], bestEdit)
886                    if bestEquality2:
887                        diffs[pointer + 1] = (diffs[pointer + 1][0], bestEquality2)
888                    else:
889                        del diffs[pointer + 1]
890                        pointer -= 1
891            pointer += 1
892
893    # Define some regex patterns for matching boundaries.
894    BLANKLINEEND = re.compile(r"\n\r?\n$")
895    BLANKLINESTART = re.compile(r"^\r?\n\r?\n")
896
897    def diff_cleanupEfficiency(self, diffs):
898        """Reduce the number of edits by eliminating operationally trivial
899    equalities.
900
901    Args:
902      diffs: Array of diff tuples.
903    """
904        changes = False
905        equalities = []  # Stack of indices where equalities are found.
906        lastEquality = None  # Always equal to diffs[equalities[-1]][1]
907        pointer = 0  # Index of current position.
908        pre_ins = False  # Is there an insertion operation before the last equality.
909        pre_del = False  # Is there a deletion operation before the last equality.
910        post_ins = False  # Is there an insertion operation after the last equality.
911        post_del = False  # Is there a deletion operation after the last equality.
912        while pointer < len(diffs):
913            if diffs[pointer][0] == self.DIFF_EQUAL:  # Equality found.
914                if len(diffs[pointer][1]) < self.Diff_EditCost and (
915                    post_ins or post_del
916                ):
917                    # Candidate found.
918                    equalities.append(pointer)
919                    pre_ins = post_ins
920                    pre_del = post_del
921                    lastEquality = diffs[pointer][1]
922                else:
923                    # Not a candidate, and can never become one.
924                    equalities = []
925                    lastEquality = None
926
927                post_ins = post_del = False
928            else:  # An insertion or deletion.
929                if diffs[pointer][0] == self.DIFF_DELETE:
930                    post_del = True
931                else:
932                    post_ins = True
933
934                # Five types to be split:
935                # <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del>
936                # <ins>A</ins>X<ins>C</ins><del>D</del>
937                # <ins>A</ins><del>B</del>X<ins>C</ins>
938                # <ins>A</del>X<ins>C</ins><del>D</del>
939                # <ins>A</ins><del>B</del>X<del>C</del>
940
941                if lastEquality and (
942                    (pre_ins and pre_del and post_ins and post_del)
943                    or (
944                        (len(lastEquality) < self.Diff_EditCost / 2)
945                        and (pre_ins + pre_del + post_ins + post_del) == 3
946                    )
947                ):
948                    # Duplicate record.
949                    diffs.insert(equalities[-1], (self.DIFF_DELETE, lastEquality))
950                    # Change second copy to insert.
951                    diffs[equalities[-1] + 1] = (
952                        self.DIFF_INSERT,
953                        diffs[equalities[-1] + 1][1],
954                    )
955                    equalities.pop()  # Throw away the equality we just deleted.
956                    lastEquality = None
957                    if pre_ins and pre_del:
958                        # No changes made which could affect previous entry, keep going.
959                        post_ins = post_del = True
960                        equalities = []
961                    else:
962                        if len(equalities):
963                            equalities.pop()  # Throw away the previous equality.
964                        if len(equalities):
965                            pointer = equalities[-1]
966                        else:
967                            pointer = -1
968                        post_ins = post_del = False
969                    changes = True
970            pointer += 1
971
972        if changes:
973            self.diff_cleanupMerge(diffs)
974
975    def diff_cleanupMerge(self, diffs):
976        """Reorder and merge like edit sections.  Merge equalities.
977    Any edit section can move as long as it doesn't cross an equality.
978
979    Args:
980      diffs: Array of diff tuples.
981    """
982        diffs.append((self.DIFF_EQUAL, ""))  # Add a dummy entry at the end.
983        pointer = 0
984        count_delete = 0
985        count_insert = 0
986        text_delete = ""
987        text_insert = ""
988        while pointer < len(diffs):
989            if diffs[pointer][0] == self.DIFF_INSERT:
990                count_insert += 1
991                text_insert += diffs[pointer][1]
992                pointer += 1
993            elif diffs[pointer][0] == self.DIFF_DELETE:
994                count_delete += 1
995                text_delete += diffs[pointer][1]
996                pointer += 1
997            elif diffs[pointer][0] == self.DIFF_EQUAL:
998                # Upon reaching an equality, check for prior redundancies.
999                if count_delete + count_insert > 1:
1000                    if count_delete != 0 and count_insert != 0:
1001                        # Factor out any common prefixies.
1002                        commonlength = self.diff_commonPrefix(text_insert, text_delete)
1003                        if commonlength != 0:
1004                            x = pointer - count_delete - count_insert - 1
1005                            if x >= 0 and diffs[x][0] == self.DIFF_EQUAL:
1006                                diffs[x] = (
1007                                    diffs[x][0],
1008                                    diffs[x][1] + text_insert[:commonlength],
1009                                )
1010                            else:
1011                                diffs.insert(
1012                                    0, (self.DIFF_EQUAL, text_insert[:commonlength])
1013                                )
1014                                pointer += 1
1015                            text_insert = text_insert[commonlength:]
1016                            text_delete = text_delete[commonlength:]
1017                        # Factor out any common suffixies.
1018                        commonlength = self.diff_commonSuffix(text_insert, text_delete)
1019                        if commonlength != 0:
1020                            diffs[pointer] = (
1021                                diffs[pointer][0],
1022                                text_insert[-commonlength:] + diffs[pointer][1],
1023                            )
1024                            text_insert = text_insert[:-commonlength]
1025                            text_delete = text_delete[:-commonlength]
1026                    # Delete the offending records and add the merged ones.
1027                    new_ops = []
1028                    if len(text_delete) != 0:
1029                        new_ops.append((self.DIFF_DELETE, text_delete))
1030                    if len(text_insert) != 0:
1031                        new_ops.append((self.DIFF_INSERT, text_insert))
1032                    pointer -= count_delete + count_insert
1033                    diffs[pointer : pointer + count_delete + count_insert] = new_ops
1034                    pointer += len(new_ops) + 1
1035                elif pointer != 0 and diffs[pointer - 1][0] == self.DIFF_EQUAL:
1036                    # Merge this equality with the previous one.
1037                    diffs[pointer - 1] = (
1038                        diffs[pointer - 1][0],
1039                        diffs[pointer - 1][1] + diffs[pointer][1],
1040                    )
1041                    del diffs[pointer]
1042                else:
1043                    pointer += 1
1044
1045                count_insert = 0
1046                count_delete = 0
1047                text_delete = ""
1048                text_insert = ""
1049
1050        if diffs[-1][1] == "":
1051            diffs.pop()  # Remove the dummy entry at the end.
1052
1053        # Second pass: look for single edits surrounded on both sides by equalities
1054        # which can be shifted sideways to eliminate an equality.
1055        # e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC
1056        changes = False
1057        pointer = 1
1058        # Intentionally ignore the first and last element (don't need checking).
1059        while pointer < len(diffs) - 1:
1060            if (
1061                diffs[pointer - 1][0] == self.DIFF_EQUAL
1062                and diffs[pointer + 1][0] == self.DIFF_EQUAL
1063            ):
1064                # This is a single edit surrounded by equalities.
1065                if diffs[pointer][1].endswith(diffs[pointer - 1][1]):
1066                    # Shift the edit over the previous equality.
1067                    if diffs[pointer - 1][1] != "":
1068                        diffs[pointer] = (
1069                            diffs[pointer][0],
1070                            diffs[pointer - 1][1]
1071                            + diffs[pointer][1][: -len(diffs[pointer - 1][1])],
1072                        )
1073                        diffs[pointer + 1] = (
1074                            diffs[pointer + 1][0],
1075                            diffs[pointer - 1][1] + diffs[pointer + 1][1],
1076                        )
1077                    del diffs[pointer - 1]
1078                    changes = True
1079                elif diffs[pointer][1].startswith(diffs[pointer + 1][1]):
1080                    # Shift the edit over the next equality.
1081                    diffs[pointer - 1] = (
1082                        diffs[pointer - 1][0],
1083                        diffs[pointer - 1][1] + diffs[pointer + 1][1],
1084                    )
1085                    diffs[pointer] = (
1086                        diffs[pointer][0],
1087                        diffs[pointer][1][len(diffs[pointer + 1][1]) :]
1088                        + diffs[pointer + 1][1],
1089                    )
1090                    del diffs[pointer + 1]
1091                    changes = True
1092            pointer += 1
1093
1094        # If shifts were made, the diff needs reordering and another shift sweep.
1095        if changes:
1096            self.diff_cleanupMerge(diffs)
1097
1098    def diff_xIndex(self, diffs, loc):
1099        """loc is a location in text1, compute and return the equivalent location
1100    in text2.  e.g. "The cat" vs "The big cat", 1->1, 5->8
1101
1102    Args:
1103      diffs: Array of diff tuples.
1104      loc: Location within text1.
1105
1106    Returns:
1107      Location within text2.
1108    """
1109        chars1 = 0
1110        chars2 = 0
1111        last_chars1 = 0
1112        last_chars2 = 0
1113        for x in xrange(len(diffs)):
1114            (op, text) = diffs[x]
1115            if op != self.DIFF_INSERT:  # Equality or deletion.
1116                chars1 += len(text)
1117            if op != self.DIFF_DELETE:  # Equality or insertion.
1118                chars2 += len(text)
1119            if chars1 > loc:  # Overshot the location.
1120                break
1121            last_chars1 = chars1
1122            last_chars2 = chars2
1123
1124        if len(diffs) != x and diffs[x][0] == self.DIFF_DELETE:
1125            # The location was deleted.
1126            return last_chars2
1127        # Add the remaining len(character).
1128        return last_chars2 + (loc - last_chars1)
1129
1130    def diff_prettyHtml(self, diffs):
1131        """Convert a diff array into a pretty HTML report.
1132
1133    Args:
1134      diffs: Array of diff tuples.
1135
1136    Returns:
1137      HTML representation.
1138    """
1139        html = []
1140        for (op, data) in diffs:
1141            text = (
1142                data.replace("&", "&amp;")
1143                .replace("<", "&lt;")
1144                .replace(">", "&gt;")
1145                .replace("\n", "&para;<br>")
1146            )
1147            if op == self.DIFF_INSERT:
1148                html.append('<ins style="background:#e6ffe6;">%s</ins>' % text)
1149            elif op == self.DIFF_DELETE:
1150                html.append('<del style="background:#ffe6e6;">%s</del>' % text)
1151            elif op == self.DIFF_EQUAL:
1152                html.append("<span>%s</span>" % text)
1153        return "".join(html)
1154
1155    def diff_text1(self, diffs):
1156        """Compute and return the source text (all equalities and deletions).
1157
1158    Args:
1159      diffs: Array of diff tuples.
1160
1161    Returns:
1162      Source text.
1163    """
1164        text = []
1165        for (op, data) in diffs:
1166            if op != self.DIFF_INSERT:
1167                text.append(data)
1168        return "".join(text)
1169
1170    def diff_text2(self, diffs):
1171        """Compute and return the destination text (all equalities and insertions).
1172
1173    Args:
1174      diffs: Array of diff tuples.
1175
1176    Returns:
1177      Destination text.
1178    """
1179        text = []
1180        for (op, data) in diffs:
1181            if op != self.DIFF_DELETE:
1182                text.append(data)
1183        return "".join(text)
1184
1185    def diff_levenshtein(self, diffs):
1186        """Compute the Levenshtein distance; the number of inserted, deleted or
1187    substituted characters.
1188
1189    Args:
1190      diffs: Array of diff tuples.
1191
1192    Returns:
1193      Number of changes.
1194    """
1195        levenshtein = 0
1196        insertions = 0
1197        deletions = 0
1198        for (op, data) in diffs:
1199            if op == self.DIFF_INSERT:
1200                insertions += len(data)
1201            elif op == self.DIFF_DELETE:
1202                deletions += len(data)
1203            elif op == self.DIFF_EQUAL:
1204                # A deletion and an insertion is one substitution.
1205                levenshtein += max(insertions, deletions)
1206                insertions = 0
1207                deletions = 0
1208        levenshtein += max(insertions, deletions)
1209        return levenshtein
1210
1211    def diff_toDelta(self, diffs):
1212        """Crush the diff into an encoded string which describes the operations
1213    required to transform text1 into text2.
1214    E.g. =3\t-2\t+ing  -> Keep 3 chars, delete 2 chars, insert 'ing'.
1215    Operations are tab-separated.  Inserted text is escaped using %xx notation.
1216
1217    Args:
1218      diffs: Array of diff tuples.
1219
1220    Returns:
1221      Delta text.
1222    """
1223        text = []
1224        for (op, data) in diffs:
1225            if op == self.DIFF_INSERT:
1226                # High ascii will raise UnicodeDecodeError.  Use Unicode instead.
1227                data = data.encode("utf-8")
1228                text.append("+" + urllib.quote(data, "!~*'();/?:@&=+$,# "))
1229            elif op == self.DIFF_DELETE:
1230                text.append("-%d" % len(data))
1231            elif op == self.DIFF_EQUAL:
1232                text.append("=%d" % len(data))
1233        return "\t".join(text)
1234
1235    def diff_fromDelta(self, text1, delta):
1236        """Given the original text1, and an encoded string which describes the
1237    operations required to transform text1 into text2, compute the full diff.
1238
1239    Args:
1240      text1: Source string for the diff.
1241      delta: Delta text.
1242
1243    Returns:
1244      Array of diff tuples.
1245
1246    Raises:
1247      ValueError: If invalid input.
1248    """
1249        if type(delta) == unicode:
1250            # Deltas should be composed of a subset of ascii chars, Unicode not
1251            # required.  If this encode raises UnicodeEncodeError, delta is invalid.
1252            delta = delta.encode("ascii")
1253        diffs = []
1254        pointer = 0  # Cursor in text1
1255        tokens = delta.split("\t")
1256        for token in tokens:
1257            if token == "":
1258                # Blank tokens are ok (from a trailing \t).
1259                continue
1260            # Each token begins with a one character parameter which specifies the
1261            # operation of this token (delete, insert, equality).
1262            param = token[1:]
1263            if token[0] == "+":
1264                param = urllib.unquote(param).decode("utf-8")
1265                diffs.append((self.DIFF_INSERT, param))
1266            elif token[0] == "-" or token[0] == "=":
1267                try:
1268                    n = int(param)
1269                except ValueError:
1270                    raise ValueError("Invalid number in diff_fromDelta: " + param)
1271                if n < 0:
1272                    raise ValueError("Negative number in diff_fromDelta: " + param)
1273                text = text1[pointer : pointer + n]
1274                pointer += n
1275                if token[0] == "=":
1276                    diffs.append((self.DIFF_EQUAL, text))
1277                else:
1278                    diffs.append((self.DIFF_DELETE, text))
1279            else:
1280                # Anything else is an error.
1281                raise ValueError(
1282                    "Invalid diff operation in diff_fromDelta: " + token[0]
1283                )
1284        if pointer != len(text1):
1285            raise ValueError(
1286                "Delta length (%d) does not equal source text length (%d)."
1287                % (pointer, len(text1))
1288            )
1289        return diffs
1290
1291    #  MATCH FUNCTIONS
1292
1293    def match_main(self, text, pattern, loc):
1294        """Locate the best instance of 'pattern' in 'text' near 'loc'.
1295
1296    Args:
1297      text: The text to search.
1298      pattern: The pattern to search for.
1299      loc: The location to search around.
1300
1301    Returns:
1302      Best match index or -1.
1303    """
1304        # Check for null inputs.
1305        if text == None or pattern == None:
1306            raise ValueError("Null inputs. (match_main)")
1307
1308        loc = max(0, min(loc, len(text)))
1309        if text == pattern:
1310            # Shortcut (potentially not guaranteed by the algorithm)
1311            return 0
1312        elif not text:
1313            # Nothing to match.
1314            return -1
1315        elif text[loc : loc + len(pattern)] == pattern:
1316            # Perfect match at the perfect spot!  (Includes case of null pattern)
1317            return loc
1318        else:
1319            # Do a fuzzy compare.
1320            match = self.match_bitap(text, pattern, loc)
1321            return match
1322
1323    def match_bitap(self, text, pattern, loc):
1324        """Locate the best instance of 'pattern' in 'text' near 'loc' using the
1325    Bitap algorithm.
1326
1327    Args:
1328      text: The text to search.
1329      pattern: The pattern to search for.
1330      loc: The location to search around.
1331
1332    Returns:
1333      Best match index or -1.
1334    """
1335        # Python doesn't have a maxint limit, so ignore this check.
1336        # if self.Match_MaxBits != 0 and len(pattern) > self.Match_MaxBits:
1337        #  raise ValueError("Pattern too long for this application.")
1338
1339        # Initialise the alphabet.
1340        s = self.match_alphabet(pattern)
1341
1342        def match_bitapScore(e, x):
1343            """Compute and return the score for a match with e errors and x location.
1344      Accesses loc and pattern through being a closure.
1345
1346      Args:
1347        e: Number of errors in match.
1348        x: Location of match.
1349
1350      Returns:
1351        Overall score for match (0.0 = good, 1.0 = bad).
1352      """
1353            accuracy = float(e) / len(pattern)
1354            proximity = abs(loc - x)
1355            if not self.Match_Distance:
1356                # Dodge divide by zero error.
1357                return proximity and 1.0 or accuracy
1358            return accuracy + (proximity / float(self.Match_Distance))
1359
1360        # Highest score beyond which we give up.
1361        score_threshold = self.Match_Threshold
1362        # Is there a nearby exact match? (speedup)
1363        best_loc = text.find(pattern, loc)
1364        if best_loc != -1:
1365            score_threshold = min(match_bitapScore(0, best_loc), score_threshold)
1366            # What about in the other direction? (speedup)
1367            best_loc = text.rfind(pattern, loc + len(pattern))
1368            if best_loc != -1:
1369                score_threshold = min(match_bitapScore(0, best_loc), score_threshold)
1370
1371        # Initialise the bit arrays.
1372        matchmask = 1 << (len(pattern) - 1)
1373        best_loc = -1
1374
1375        bin_max = len(pattern) + len(text)
1376        # Empty initialization added to appease pychecker.
1377        last_rd = None
1378        for d in xrange(len(pattern)):
1379            # Scan for the best match each iteration allows for one more error.
1380            # Run a binary search to determine how far from 'loc' we can stray at
1381            # this error level.
1382            bin_min = 0
1383            bin_mid = bin_max
1384            while bin_min < bin_mid:
1385                if match_bitapScore(d, loc + bin_mid) <= score_threshold:
1386                    bin_min = bin_mid
1387                else:
1388                    bin_max = bin_mid
1389                bin_mid = (bin_max - bin_min) // 2 + bin_min
1390
1391            # Use the result from this iteration as the maximum for the next.
1392            bin_max = bin_mid
1393            start = max(1, loc - bin_mid + 1)
1394            finish = min(loc + bin_mid, len(text)) + len(pattern)
1395
1396            rd = [0] * (finish + 2)
1397            rd[finish + 1] = (1 << d) - 1
1398            for j in xrange(finish, start - 1, -1):
1399                if len(text) <= j - 1:
1400                    # Out of range.
1401                    charMatch = 0
1402                else:
1403                    charMatch = s.get(text[j - 1], 0)
1404                if d == 0:  # First pass: exact match.
1405                    rd[j] = ((rd[j + 1] << 1) | 1) & charMatch
1406                else:  # Subsequent passes: fuzzy match.
1407                    rd[j] = (
1408                        (((rd[j + 1] << 1) | 1) & charMatch)
1409                        | (((last_rd[j + 1] | last_rd[j]) << 1) | 1)
1410                        | last_rd[j + 1]
1411                    )
1412                if rd[j] & matchmask:
1413                    score = match_bitapScore(d, j - 1)
1414                    # This match will almost certainly be better than any existing match.
1415                    # But check anyway.
1416                    if score <= score_threshold:
1417                        # Told you so.
1418                        score_threshold = score
1419                        best_loc = j - 1
1420                        if best_loc > loc:
1421                            # When passing loc, don't exceed our current distance from loc.
1422                            start = max(1, 2 * loc - best_loc)
1423                        else:
1424                            # Already passed loc, downhill from here on in.
1425                            break
1426            # No hope for a (better) match at greater error levels.
1427            if match_bitapScore(d + 1, loc) > score_threshold:
1428                break
1429            last_rd = rd
1430        return best_loc
1431
1432    def match_alphabet(self, pattern):
1433        """Initialise the alphabet for the Bitap algorithm.
1434
1435    Args:
1436      pattern: The text to encode.
1437
1438    Returns:
1439      Hash of character locations.
1440    """
1441        s = {}
1442        for char in pattern:
1443            s[char] = 0
1444        for i in xrange(len(pattern)):
1445            s[pattern[i]] |= 1 << (len(pattern) - i - 1)
1446        return s
1447
1448    #  PATCH FUNCTIONS
1449
1450    def patch_addContext(self, patch, text):
1451        """Increase the context until it is unique,
1452    but don't let the pattern expand beyond Match_MaxBits.
1453
1454    Args:
1455      patch: The patch to grow.
1456      text: Source text.
1457    """
1458        if len(text) == 0:
1459            return
1460        pattern = text[patch.start2 : patch.start2 + patch.length1]
1461        padding = 0
1462
1463        # Look for the first and last matches of pattern in text.  If two different
1464        # matches are found, increase the pattern length.
1465        while text.find(pattern) != text.rfind(pattern) and (
1466            self.Match_MaxBits == 0
1467            or len(pattern) < self.Match_MaxBits - self.Patch_Margin - self.Patch_Margin
1468        ):
1469            padding += self.Patch_Margin
1470            pattern = text[
1471                max(0, patch.start2 - padding) : patch.start2 + patch.length1 + padding
1472            ]
1473        # Add one chunk for good luck.
1474        padding += self.Patch_Margin
1475
1476        # Add the prefix.
1477        prefix = text[max(0, patch.start2 - padding) : patch.start2]
1478        if prefix:
1479            patch.diffs[:0] = [(self.DIFF_EQUAL, prefix)]
1480        # Add the suffix.
1481        suffix = text[
1482            patch.start2 + patch.length1 : patch.start2 + patch.length1 + padding
1483        ]
1484        if suffix:
1485            patch.diffs.append((self.DIFF_EQUAL, suffix))
1486
1487        # Roll back the start points.
1488        patch.start1 -= len(prefix)
1489        patch.start2 -= len(prefix)
1490        # Extend lengths.
1491        patch.length1 += len(prefix) + len(suffix)
1492        patch.length2 += len(prefix) + len(suffix)
1493
1494    def patch_make(self, a, b=None, c=None):
1495        """Compute a list of patches to turn text1 into text2.
1496    Use diffs if provided, otherwise compute it ourselves.
1497    There are four ways to call this function, depending on what data is
1498    available to the caller:
1499    Method 1:
1500    a = text1, b = text2
1501    Method 2:
1502    a = diffs
1503    Method 3 (optimal):
1504    a = text1, b = diffs
1505    Method 4 (deprecated, use method 3):
1506    a = text1, b = text2, c = diffs
1507
1508    Args:
1509      a: text1 (methods 1,3,4) or Array of diff tuples for text1 to
1510          text2 (method 2).
1511      b: text2 (methods 1,4) or Array of diff tuples for text1 to
1512          text2 (method 3) or undefined (method 2).
1513      c: Array of diff tuples for text1 to text2 (method 4) or
1514          undefined (methods 1,2,3).
1515
1516    Returns:
1517      Array of Patch objects.
1518    """
1519        text1 = None
1520        diffs = None
1521        # Note that texts may arrive as 'str' or 'unicode'.
1522        if isinstance(a, basestring) and isinstance(b, basestring) and c is None:
1523            # Method 1: text1, text2
1524            # Compute diffs from text1 and text2.
1525            text1 = a
1526            diffs = self.diff_main(text1, b, True)
1527            if len(diffs) > 2:
1528                self.diff_cleanupSemantic(diffs)
1529                self.diff_cleanupEfficiency(diffs)
1530        elif isinstance(a, list) and b is None and c is None:
1531            # Method 2: diffs
1532            # Compute text1 from diffs.
1533            diffs = a
1534            text1 = self.diff_text1(diffs)
1535        elif isinstance(a, basestring) and isinstance(b, list) and c is None:
1536            # Method 3: text1, diffs
1537            text1 = a
1538            diffs = b
1539        elif (
1540            isinstance(a, basestring)
1541            and isinstance(b, basestring)
1542            and isinstance(c, list)
1543        ):
1544            # Method 4: text1, text2, diffs
1545            # text2 is not used.
1546            text1 = a
1547            diffs = c
1548        else:
1549            raise ValueError("Unknown call format to patch_make.")
1550
1551        if not diffs:
1552            return []  # Get rid of the None case.
1553        patches = []
1554        patch = patch_obj()
1555        char_count1 = 0  # Number of characters into the text1 string.
1556        char_count2 = 0  # Number of characters into the text2 string.
1557        prepatch_text = text1  # Recreate the patches to determine context info.
1558        postpatch_text = text1
1559        for x in xrange(len(diffs)):
1560            (diff_type, diff_text) = diffs[x]
1561            if len(patch.diffs) == 0 and diff_type != self.DIFF_EQUAL:
1562                # A new patch starts here.
1563                patch.start1 = char_count1
1564                patch.start2 = char_count2
1565            if diff_type == self.DIFF_INSERT:
1566                # Insertion
1567                patch.diffs.append(diffs[x])
1568                patch.length2 += len(diff_text)
1569                postpatch_text = (
1570                    postpatch_text[:char_count2]
1571                    + diff_text
1572                    + postpatch_text[char_count2:]
1573                )
1574            elif diff_type == self.DIFF_DELETE:
1575                # Deletion.
1576                patch.length1 += len(diff_text)
1577                patch.diffs.append(diffs[x])
1578                postpatch_text = (
1579                    postpatch_text[:char_count2]
1580                    + postpatch_text[char_count2 + len(diff_text) :]
1581                )
1582            elif (
1583                diff_type == self.DIFF_EQUAL
1584                and len(diff_text) <= 2 * self.Patch_Margin
1585                and len(patch.diffs) != 0
1586                and len(diffs) != x + 1
1587            ):
1588                # Small equality inside a patch.
1589                patch.diffs.append(diffs[x])
1590                patch.length1 += len(diff_text)
1591                patch.length2 += len(diff_text)
1592
1593            if diff_type == self.DIFF_EQUAL and len(diff_text) >= 2 * self.Patch_Margin:
1594                # Time for a new patch.
1595                if len(patch.diffs) != 0:
1596                    self.patch_addContext(patch, prepatch_text)
1597                    patches.append(patch)
1598                    patch = patch_obj()
1599                    # Unlike Unidiff, our patch lists have a rolling context.
1600                    # https://github.com/google/diff-match-patch/wiki/Unidiff
1601                    # Update prepatch text & pos to reflect the application of the
1602                    # just completed patch.
1603                    prepatch_text = postpatch_text
1604                    char_count1 = char_count2
1605
1606            # Update the current character count.
1607            if diff_type != self.DIFF_INSERT:
1608                char_count1 += len(diff_text)
1609            if diff_type != self.DIFF_DELETE:
1610                char_count2 += len(diff_text)
1611
1612        # Pick up the leftover patch if not empty.
1613        if len(patch.diffs) != 0:
1614            self.patch_addContext(patch, prepatch_text)
1615            patches.append(patch)
1616        return patches
1617
1618    def patch_deepCopy(self, patches):
1619        """Given an array of patches, return another array that is identical.
1620
1621    Args:
1622      patches: Array of Patch objects.
1623
1624    Returns:
1625      Array of Patch objects.
1626    """
1627        patchesCopy = []
1628        for patch in patches:
1629            patchCopy = patch_obj()
1630            # No need to deep copy the tuples since they are immutable.
1631            patchCopy.diffs = patch.diffs[:]
1632            patchCopy.start1 = patch.start1
1633            patchCopy.start2 = patch.start2
1634            patchCopy.length1 = patch.length1
1635            patchCopy.length2 = patch.length2
1636            patchesCopy.append(patchCopy)
1637        return patchesCopy
1638
1639    def patch_apply(self, patches, text):
1640        """Merge a set of patches onto the text.  Return a patched text, as well
1641    as a list of true/false values indicating which patches were applied.
1642
1643    Args:
1644      patches: Array of Patch objects.
1645      text: Old text.
1646
1647    Returns:
1648      Two element Array, containing the new text and an array of boolean values.
1649    """
1650        if not patches:
1651            return (text, [])
1652
1653        # Deep copy the patches so that no changes are made to originals.
1654        patches = self.patch_deepCopy(patches)
1655
1656        nullPadding = self.patch_addPadding(patches)
1657        text = nullPadding + text + nullPadding
1658        self.patch_splitMax(patches)
1659
1660        # delta keeps track of the offset between the expected and actual location
1661        # of the previous patch.  If there are patches expected at positions 10 and
1662        # 20, but the first patch was found at 12, delta is 2 and the second patch
1663        # has an effective expected position of 22.
1664        delta = 0
1665        results = []
1666        for patch in patches:
1667            expected_loc = patch.start2 + delta
1668            text1 = self.diff_text1(patch.diffs)
1669            end_loc = -1
1670            if len(text1) > self.Match_MaxBits:
1671                # patch_splitMax will only provide an oversized pattern in the case of
1672                # a monster delete.
1673                start_loc = self.match_main(
1674                    text, text1[: self.Match_MaxBits], expected_loc
1675                )
1676                if start_loc != -1:
1677                    end_loc = self.match_main(
1678                        text,
1679                        text1[-self.Match_MaxBits :],
1680                        expected_loc + len(text1) - self.Match_MaxBits,
1681                    )
1682                    if end_loc == -1 or start_loc >= end_loc:
1683                        # Can't find valid trailing context.  Drop this patch.
1684                        start_loc = -1
1685            else:
1686                start_loc = self.match_main(text, text1, expected_loc)
1687            if start_loc == -1:
1688                # No match found.  :(
1689                results.append(False)
1690                # Subtract the delta for this failed patch from subsequent patches.
1691                delta -= patch.length2 - patch.length1
1692            else:
1693                # Found a match.  :)
1694                results.append(True)
1695                delta = start_loc - expected_loc
1696                if end_loc == -1:
1697                    text2 = text[start_loc : start_loc + len(text1)]
1698                else:
1699                    text2 = text[start_loc : end_loc + self.Match_MaxBits]
1700                if text1 == text2:
1701                    # Perfect match, just shove the replacement text in.
1702                    text = (
1703                        text[:start_loc]
1704                        + self.diff_text2(patch.diffs)
1705                        + text[start_loc + len(text1) :]
1706                    )
1707                else:
1708                    # Imperfect match.
1709                    # Run a diff to get a framework of equivalent indices.
1710                    diffs = self.diff_main(text1, text2, False)
1711                    if (
1712                        len(text1) > self.Match_MaxBits
1713                        and self.diff_levenshtein(diffs) / float(len(text1))
1714                        > self.Patch_DeleteThreshold
1715                    ):
1716                        # The end points match, but the content is unacceptably bad.
1717                        results[-1] = False
1718                    else:
1719                        self.diff_cleanupSemanticLossless(diffs)
1720                        index1 = 0
1721                        for (op, data) in patch.diffs:
1722                            if op != self.DIFF_EQUAL:
1723                                index2 = self.diff_xIndex(diffs, index1)
1724                            if op == self.DIFF_INSERT:  # Insertion
1725                                text = (
1726                                    text[: start_loc + index2]
1727                                    + data
1728                                    + text[start_loc + index2 :]
1729                                )
1730                            elif op == self.DIFF_DELETE:  # Deletion
1731                                text = (
1732                                    text[: start_loc + index2]
1733                                    + text[
1734                                        start_loc
1735                                        + self.diff_xIndex(diffs, index1 + len(data)) :
1736                                    ]
1737                                )
1738                            if op != self.DIFF_DELETE:
1739                                index1 += len(data)
1740        # Strip the padding off.
1741        text = text[len(nullPadding) : -len(nullPadding)]
1742        return (text, results)
1743
1744    def patch_addPadding(self, patches):
1745        """Add some padding on text start and end so that edges can match
1746    something.  Intended to be called only from within patch_apply.
1747
1748    Args:
1749      patches: Array of Patch objects.
1750
1751    Returns:
1752      The padding string added to each side.
1753    """
1754        paddingLength = self.Patch_Margin
1755        nullPadding = ""
1756        for x in xrange(1, paddingLength + 1):
1757            nullPadding += chr(x)
1758
1759        # Bump all the patches forward.
1760        for patch in patches:
1761            patch.start1 += paddingLength
1762            patch.start2 += paddingLength
1763
1764        # Add some padding on start of first diff.
1765        patch = patches[0]
1766        diffs = patch.diffs
1767        if not diffs or diffs[0][0] != self.DIFF_EQUAL:
1768            # Add nullPadding equality.
1769            diffs.insert(0, (self.DIFF_EQUAL, nullPadding))
1770            patch.start1 -= paddingLength  # Should be 0.
1771            patch.start2 -= paddingLength  # Should be 0.
1772            patch.length1 += paddingLength
1773            patch.length2 += paddingLength
1774        elif paddingLength > len(diffs[0][1]):
1775            # Grow first equality.
1776            extraLength = paddingLength - len(diffs[0][1])
1777            newText = nullPadding[len(diffs[0][1]) :] + diffs[0][1]
1778            diffs[0] = (diffs[0][0], newText)
1779            patch.start1 -= extraLength
1780            patch.start2 -= extraLength
1781            patch.length1 += extraLength
1782            patch.length2 += extraLength
1783
1784        # Add some padding on end of last diff.
1785        patch = patches[-1]
1786        diffs = patch.diffs
1787        if not diffs or diffs[-1][0] != self.DIFF_EQUAL:
1788            # Add nullPadding equality.
1789            diffs.append((self.DIFF_EQUAL, nullPadding))
1790            patch.length1 += paddingLength
1791            patch.length2 += paddingLength
1792        elif paddingLength > len(diffs[-1][1]):
1793            # Grow last equality.
1794            extraLength = paddingLength - len(diffs[-1][1])
1795            newText = diffs[-1][1] + nullPadding[:extraLength]
1796            diffs[-1] = (diffs[-1][0], newText)
1797            patch.length1 += extraLength
1798            patch.length2 += extraLength
1799
1800        return nullPadding
1801
1802    def patch_splitMax(self, patches):
1803        """Look through the patches and break up any which are longer than the
1804    maximum limit of the match algorithm.
1805    Intended to be called only from within patch_apply.
1806
1807    Args:
1808      patches: Array of Patch objects.
1809    """
1810        patch_size = self.Match_MaxBits
1811        if patch_size == 0:
1812            # Python has the option of not splitting strings due to its ability
1813            # to handle integers of arbitrary precision.
1814            return
1815        for x in xrange(len(patches)):
1816            if patches[x].length1 <= patch_size:
1817                continue
1818            bigpatch = patches[x]
1819            # Remove the big old patch.
1820            del patches[x]
1821            x -= 1
1822            start1 = bigpatch.start1
1823            start2 = bigpatch.start2
1824            precontext = ""
1825            while len(bigpatch.diffs) != 0:
1826                # Create one of several smaller patches.
1827                patch = patch_obj()
1828                empty = True
1829                patch.start1 = start1 - len(precontext)
1830                patch.start2 = start2 - len(precontext)
1831                if precontext:
1832                    patch.length1 = patch.length2 = len(precontext)
1833                    patch.diffs.append((self.DIFF_EQUAL, precontext))
1834
1835                while (
1836                    len(bigpatch.diffs) != 0
1837                    and patch.length1 < patch_size - self.Patch_Margin
1838                ):
1839                    (diff_type, diff_text) = bigpatch.diffs[0]
1840                    if diff_type == self.DIFF_INSERT:
1841                        # Insertions are harmless.
1842                        patch.length2 += len(diff_text)
1843                        start2 += len(diff_text)
1844                        patch.diffs.append(bigpatch.diffs.pop(0))
1845                        empty = False
1846                    elif (
1847                        diff_type == self.DIFF_DELETE
1848                        and len(patch.diffs) == 1
1849                        and patch.diffs[0][0] == self.DIFF_EQUAL
1850                        and len(diff_text) > 2 * patch_size
1851                    ):
1852                        # This is a large deletion.  Let it pass in one chunk.
1853                        patch.length1 += len(diff_text)
1854                        start1 += len(diff_text)
1855                        empty = False
1856                        patch.diffs.append((diff_type, diff_text))
1857                        del bigpatch.diffs[0]
1858                    else:
1859                        # Deletion or equality.  Only take as much as we can stomach.
1860                        diff_text = diff_text[
1861                            : patch_size - patch.length1 - self.Patch_Margin
1862                        ]
1863                        patch.length1 += len(diff_text)
1864                        start1 += len(diff_text)
1865                        if diff_type == self.DIFF_EQUAL:
1866                            patch.length2 += len(diff_text)
1867                            start2 += len(diff_text)
1868                        else:
1869                            empty = False
1870
1871                        patch.diffs.append((diff_type, diff_text))
1872                        if diff_text == bigpatch.diffs[0][1]:
1873                            del bigpatch.diffs[0]
1874                        else:
1875                            bigpatch.diffs[0] = (
1876                                bigpatch.diffs[0][0],
1877                                bigpatch.diffs[0][1][len(diff_text) :],
1878                            )
1879
1880                # Compute the head context for the next patch.
1881                precontext = self.diff_text2(patch.diffs)
1882                precontext = precontext[-self.Patch_Margin :]
1883                # Append the end context for this patch.
1884                postcontext = self.diff_text1(bigpatch.diffs)[: self.Patch_Margin]
1885                if postcontext:
1886                    patch.length1 += len(postcontext)
1887                    patch.length2 += len(postcontext)
1888                    if len(patch.diffs) != 0 and patch.diffs[-1][0] == self.DIFF_EQUAL:
1889                        patch.diffs[-1] = (
1890                            self.DIFF_EQUAL,
1891                            patch.diffs[-1][1] + postcontext,
1892                        )
1893                    else:
1894                        patch.diffs.append((self.DIFF_EQUAL, postcontext))
1895
1896                if not empty:
1897                    x += 1
1898                    patches.insert(x, patch)
1899
1900    def patch_toText(self, patches):
1901        """Take a list of patches and return a textual representation.
1902
1903    Args:
1904      patches: Array of Patch objects.
1905
1906    Returns:
1907      Text representation of patches.
1908    """
1909        text = []
1910        for patch in patches:
1911            text.append(str(patch))
1912        return "".join(text)
1913
1914    def patch_fromText(self, textline):
1915        """Parse a textual representation of patches and return a list of patch
1916    objects.
1917
1918    Args:
1919      textline: Text representation of patches.
1920
1921    Returns:
1922      Array of Patch objects.
1923
1924    Raises:
1925      ValueError: If invalid input.
1926    """
1927        if type(textline) == unicode:
1928            # Patches should be composed of a subset of ascii chars, Unicode not
1929            # required.  If this encode raises UnicodeEncodeError, patch is invalid.
1930            textline = textline.encode("ascii")
1931        patches = []
1932        if not textline:
1933            return patches
1934        text = textline.split("\n")
1935        while len(text) != 0:
1936            m = re.match("^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$", text[0])
1937            if not m:
1938                raise ValueError("Invalid patch string: " + text[0])
1939            patch = patch_obj()
1940            patches.append(patch)
1941            patch.start1 = int(m.group(1))
1942            if m.group(2) == "":
1943                patch.start1 -= 1
1944                patch.length1 = 1
1945            elif m.group(2) == "0":
1946                patch.length1 = 0
1947            else:
1948                patch.start1 -= 1
1949                patch.length1 = int(m.group(2))
1950
1951            patch.start2 = int(m.group(3))
1952            if m.group(4) == "":
1953                patch.start2 -= 1
1954                patch.length2 = 1
1955            elif m.group(4) == "0":
1956                patch.length2 = 0
1957            else:
1958                patch.start2 -= 1
1959                patch.length2 = int(m.group(4))
1960
1961            del text[0]
1962
1963            while len(text) != 0:
1964                if text[0]:
1965                    sign = text[0][0]
1966                else:
1967                    sign = ""
1968                line = urllib.unquote(text[0][1:])
1969                line = line.decode("utf-8")
1970                if sign == "+":
1971                    # Insertion.
1972                    patch.diffs.append((self.DIFF_INSERT, line))
1973                elif sign == "-":
1974                    # Deletion.
1975                    patch.diffs.append((self.DIFF_DELETE, line))
1976                elif sign == " ":
1977                    # Minor equality.
1978                    patch.diffs.append((self.DIFF_EQUAL, line))
1979                elif sign == "@":
1980                    # Start of next patch.
1981                    break
1982                elif sign == "":
1983                    # Blank line?  Whatever.
1984                    pass
1985                else:
1986                    # WTF?
1987                    raise ValueError("Invalid patch mode: '%s'\n%s" % (sign, line))
1988                del text[0]
1989        return patches
1990
1991
1992class patch_obj:
1993    """Class representing one patch operation.
1994  """
1995
1996    def __init__(self):
1997        """Initializes with an empty list of diffs.
1998    """
1999        self.diffs = []
2000        self.start1 = None
2001        self.start2 = None
2002        self.length1 = 0
2003        self.length2 = 0
2004
2005    def __str__(self):
2006        """Emulate GNU diff's format.
2007    Header: @@ -382,8 +481,9 @@
2008    Indices are printed as 1-based, not 0-based.
2009
2010    Returns:
2011      The GNU diff string.
2012    """
2013        if self.length1 == 0:
2014            coords1 = str(self.start1) + ",0"
2015        elif self.length1 == 1:
2016            coords1 = str(self.start1 + 1)
2017        else:
2018            coords1 = str(self.start1 + 1) + "," + str(self.length1)
2019        if self.length2 == 0:
2020            coords2 = str(self.start2) + ",0"
2021        elif self.length2 == 1:
2022            coords2 = str(self.start2 + 1)
2023        else:
2024            coords2 = str(self.start2 + 1) + "," + str(self.length2)
2025        text = ["@@ -", coords1, " +", coords2, " @@\n"]
2026        # Escape the body of the patch with %xx notation.
2027        for (op, data) in self.diffs:
2028            if op == diff_match_patch.DIFF_INSERT:
2029                text.append("+")
2030            elif op == diff_match_patch.DIFF_DELETE:
2031                text.append("-")
2032            elif op == diff_match_patch.DIFF_EQUAL:
2033                text.append(" ")
2034            # High ascii will raise UnicodeDecodeError.  Use Unicode instead.
2035            data = data.encode("utf-8")
2036            text.append(urllib.quote(data, "!~*'();/?:@&=+$,# ") + "\n")
2037        return "".join(text)
2038