1 /****************************************************************************
2 **
3 ** Copyright (C) 2016 The Qt Company Ltd.
4 ** Contact: https://www.qt.io/licensing/
5 **
6 ** This file is part of Qt Creator.
7 **
8 ** Commercial License Usage
9 ** Licensees holding valid commercial Qt licenses may use this file in
10 ** accordance with the commercial license agreement provided with the
11 ** Software or, alternatively, in accordance with the terms contained in
12 ** a written agreement between you and The Qt Company. For licensing terms
13 ** and conditions see https://www.qt.io/terms-conditions. For further
14 ** information use the contact form at https://www.qt.io/contact-us.
15 **
16 ** GNU General Public License Usage
17 ** Alternatively, this file may be used under the terms of the GNU
18 ** General Public License version 3 as published by the Free Software
19 ** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
20 ** included in the packaging of this file. Please review the following
21 ** information to ensure the GNU General Public License requirements will
22 ** be met: https://www.gnu.org/licenses/gpl-3.0.html.
23 **
24 ****************************************************************************/
25 
26 /*
27 The main algorithm "diffMyers()" is based on "An O(ND) Difference Algorithm
28 and Its Variations" by Eugene W. Myers: http://www.xmailserver.org/diff2.pdf
29 
30 Preprocessing and postprocessing functions inspired by "Diff Strategies"
31 publication by Neil Fraser: http://neil.fraser.name/writing/diff/
32 */
33 
34 #include "differ.h"
35 
36 #include <QList>
37 #include <QRegularExpression>
38 #include <QStringList>
39 #include <QMap>
40 #include <QPair>
41 #include <QCoreApplication>
42 #include <QFutureInterfaceBase>
43 
44 namespace Utils {
45 
commonPrefix(const QString & text1,const QString & text2)46 static int commonPrefix(const QString &text1, const QString &text2)
47 {
48     int i = 0;
49     const int text1Count = text1.count();
50     const int text2Count = text2.count();
51     const int maxCount = qMin(text1Count, text2Count);
52     while (i < maxCount) {
53         if (text1.at(i) != text2.at(i))
54             break;
55         i++;
56     }
57     return i;
58 }
59 
commonSuffix(const QString & text1,const QString & text2)60 static int commonSuffix(const QString &text1, const QString &text2)
61 {
62     int i = 0;
63     const int text1Count = text1.count();
64     const int text2Count = text2.count();
65     const int maxCount = qMin(text1Count, text2Count);
66     while (i < maxCount) {
67         if (text1.at(text1Count - i - 1) != text2.at(text2Count - i - 1))
68             break;
69         i++;
70     }
71     return i;
72 }
73 
commonOverlap(const QString & text1,const QString & text2)74 static int commonOverlap(const QString &text1, const QString &text2)
75 {
76     int i = 0;
77     const int text1Count = text1.count();
78     const int text2Count = text2.count();
79     const int maxCount = qMin(text1Count, text2Count);
80     while (i < maxCount) {
81         if (QStringView(text1).mid(text1Count - maxCount + i) == QStringView(text2).left(maxCount - i))
82             return maxCount - i;
83         i++;
84     }
85     return 0;
86 }
87 
decode(const QList<Diff> & diffList,const QStringList & lines)88 static QList<Diff> decode(const QList<Diff> &diffList, const QStringList &lines)
89 {
90     QList<Diff> newDiffList;
91     newDiffList.reserve(diffList.count());
92     for (const Diff &diff : diffList) {
93         QString text;
94         for (QChar c : diff.text) {
95             const int idx = static_cast<ushort>(c.unicode());
96             text += lines.value(idx);
97         }
98         newDiffList.append({diff.command, text});
99     }
100     return newDiffList;
101 }
102 
squashEqualities(const QList<Diff> & diffList)103 static QList<Diff> squashEqualities(const QList<Diff> &diffList)
104 {
105     if (diffList.count() < 3) // we need at least 3 items
106         return diffList;
107 
108     QList<Diff> newDiffList;
109     Diff prevDiff = diffList.at(0);
110     Diff thisDiff = diffList.at(1);
111     Diff nextDiff = diffList.at(2);
112     int i = 2;
113     while (i < diffList.count()) {
114         if (prevDiff.command == Diff::Equal
115                 && nextDiff.command == Diff::Equal) {
116             if (thisDiff.text.endsWith(prevDiff.text)) {
117                 thisDiff.text = prevDiff.text
118                         + thisDiff.text.left(thisDiff.text.count()
119                         - prevDiff.text.count());
120                 nextDiff.text = prevDiff.text + nextDiff.text;
121             } else if (thisDiff.text.startsWith(nextDiff.text)) {
122                 prevDiff.text += nextDiff.text;
123                 thisDiff.text = thisDiff.text.mid(nextDiff.text.count())
124                         + nextDiff.text;
125                 i++;
126                 if (i < diffList.count())
127                     nextDiff = diffList.at(i);
128                 newDiffList.append(prevDiff);
129             } else {
130                 newDiffList.append(prevDiff);
131             }
132         } else {
133             newDiffList.append(prevDiff);
134         }
135         prevDiff = thisDiff;
136         thisDiff = nextDiff;
137         i++;
138         if (i < diffList.count())
139             nextDiff = diffList.at(i);
140     }
141     newDiffList.append(prevDiff);
142     if (i == diffList.count())
143         newDiffList.append(thisDiff);
144     return newDiffList;
145 }
146 
cleanupOverlaps(const QList<Diff> & diffList)147 static QList<Diff> cleanupOverlaps(const QList<Diff> &diffList)
148 {
149     // Find overlaps between deletions and insetions.
150     // The "diffList" already contains at most one deletion and
151     // one insertion between two equalities, in this order.
152     // Eliminate overlaps, e.g.:
153     // DEL(ABCXXXX), INS(XXXXDEF) -> DEL(ABC), EQ(XXXX), INS(DEF)
154     // DEL(XXXXABC), INS(DEFXXXX) -> INS(DEF), EQ(XXXX), DEL(ABC)
155     QList<Diff> newDiffList;
156     int i = 0;
157     while (i < diffList.count()) {
158         Diff thisDiff = diffList.at(i);
159         Diff nextDiff = i < diffList.count() - 1
160                 ? diffList.at(i + 1)
161                 : Diff(Diff::Equal);
162         if (thisDiff.command == Diff::Delete
163                 && nextDiff.command == Diff::Insert) {
164             const int delInsOverlap = commonOverlap(thisDiff.text, nextDiff.text);
165             const int insDelOverlap = commonOverlap(nextDiff.text, thisDiff.text);
166             if (delInsOverlap >= insDelOverlap) {
167                 if (delInsOverlap > thisDiff.text.count() / 2
168                         || delInsOverlap > nextDiff.text.count() / 2) {
169                     thisDiff.text = thisDiff.text.left(thisDiff.text.count() - delInsOverlap);
170                     const Diff equality(Diff::Equal, nextDiff.text.left(delInsOverlap));
171                     nextDiff.text = nextDiff.text.mid(delInsOverlap);
172                     newDiffList.append(thisDiff);
173                     newDiffList.append(equality);
174                     newDiffList.append(nextDiff);
175                 } else {
176                     newDiffList.append(thisDiff);
177                     newDiffList.append(nextDiff);
178                 }
179             } else {
180                 if (insDelOverlap > thisDiff.text.count() / 2
181                         || insDelOverlap > nextDiff.text.count() / 2) {
182                     nextDiff.text = nextDiff.text.left(nextDiff.text.count() - insDelOverlap);
183                     const Diff equality(Diff::Equal, thisDiff.text.left(insDelOverlap));
184                     thisDiff.text = thisDiff.text.mid(insDelOverlap);
185                     newDiffList.append(nextDiff);
186                     newDiffList.append(equality);
187                     newDiffList.append(thisDiff);
188                 } else {
189                     newDiffList.append(thisDiff);
190                     newDiffList.append(nextDiff);
191                 }
192             }
193             i += 2;
194         } else {
195             newDiffList.append(thisDiff);
196             i++;
197         }
198     }
199     return newDiffList;
200 }
201 
cleanupSemanticsScore(const QString & text1,const QString & text2)202 static int cleanupSemanticsScore(const QString &text1, const QString &text2)
203 {
204     const QRegularExpression blankLineEnd("\\n\\r?\\n$");
205     const QRegularExpression blankLineStart("^\\r?\\n\\r?\\n");
206     const QRegularExpression sentenceEnd("\\. $");
207 
208     if (text1.isEmpty() || text2.isEmpty()) // Edges
209         return 6;
210 
211     const QChar char1 = text1[text1.count() - 1];
212     const QChar char2 = text2[0];
213     const bool nonAlphaNumeric1 = !char1.isLetterOrNumber();
214     const bool nonAlphaNumeric2 = !char2.isLetterOrNumber();
215     const bool whitespace1 = nonAlphaNumeric1 && char1.isSpace();
216     const bool whitespace2 = nonAlphaNumeric2 && char2.isSpace();
217     const bool lineBreak1 = whitespace1 && char1.category() == QChar::Other_Control;
218     const bool lineBreak2 = whitespace2 && char2.category() == QChar::Other_Control;
219     const bool blankLine1 = lineBreak1 && blankLineEnd.match(text1).hasMatch();
220     const bool blankLine2 = lineBreak2 && blankLineStart.match(text2).hasMatch();
221 
222     if (blankLine1 || blankLine2) // Blank lines
223       return 5;
224     if (lineBreak1 || lineBreak2) // Line breaks
225       return 4;
226     if (sentenceEnd.match(text1).hasMatch()) // End of sentence
227       return 3;
228     if (whitespace1 || whitespace2) // Whitespaces
229       return 2;
230     if (nonAlphaNumeric1 || nonAlphaNumeric2) // Non-alphanumerics
231       return 1;
232 
233     return 0;
234 }
235 
isWhitespace(const QChar & c)236 static bool isWhitespace(const QChar &c)
237 {
238     return c == ' ' || c == '\t';
239 }
240 
isNewLine(const QChar & c)241 static bool isNewLine(const QChar &c)
242 {
243     return c == '\n';
244 }
245 
246 /*
247  * Splits the diffList into left and right diff lists.
248  * The left diff list contains the original (insertions removed),
249  * while the right diff list contains the destination (deletions removed).
250  */
splitDiffList(const QList<Diff> & diffList,QList<Diff> * leftDiffList,QList<Diff> * rightDiffList)251 void Differ::splitDiffList(const QList<Diff> &diffList,
252                           QList<Diff> *leftDiffList,
253                           QList<Diff> *rightDiffList)
254 {
255     if (!leftDiffList || !rightDiffList)
256         return;
257 
258     leftDiffList->clear();
259     rightDiffList->clear();
260 
261     for (const Diff &diff : diffList) {
262         if (diff.command != Diff::Delete)
263             rightDiffList->append(diff);
264         if (diff.command != Diff::Insert)
265             leftDiffList->append(diff);
266     }
267 }
268 
269 /*
270  * Prerequisites:
271  * input should be only the left or right list of diffs, not a mix of both.
272  *
273  * Moves whitespace characters from Diff::Delete or Diff::Insert into
274  * surrounding Diff::Equal, if possible.
275  * It may happen, that some Diff::Delete of Diff::Insert will disappear.
276  */
moveWhitespaceIntoEqualities(const QList<Diff> & input)277 QList<Diff> Differ::moveWhitespaceIntoEqualities(const QList<Diff> &input)
278 {
279     QList<Diff> output = input;
280 
281     for (int i = 0; i < output.count(); i++) {
282         Diff diff = output[i];
283 
284         if (diff.command != Diff::Equal) {
285             if (i > 0) { // check previous equality
286                 Diff &previousDiff = output[i - 1];
287                 const int previousDiffCount = previousDiff.text.count();
288                 if (previousDiff.command == Diff::Equal
289                         && previousDiffCount
290                         && isWhitespace(previousDiff.text.at(previousDiffCount - 1))) {
291                     // previous diff ends with whitespace
292                     int j = 0;
293                     while (j < diff.text.count()) {
294                         if (!isWhitespace(diff.text.at(j)))
295                             break;
296                         ++j;
297                     }
298                     if (j > 0) {
299                         // diff starts with j whitespaces, move them to the previous diff
300                         previousDiff.text.append(diff.text.left(j));
301                         diff.text = diff.text.mid(j);
302                     }
303                 }
304             }
305             if (i < output.count() - 1) { // check next equality
306                 const int diffCount = diff.text.count();
307                 Diff &nextDiff = output[i + 1];
308                 const int nextDiffCount = nextDiff.text.count();
309                 if (nextDiff.command == Diff::Equal
310                         && nextDiffCount
311                         && (isWhitespace(nextDiff.text.at(0)) || isNewLine(nextDiff.text.at(0)))) {
312                     // next diff starts with whitespace or with newline
313                     int j = 0;
314                     while (j < diffCount) {
315                         if (!isWhitespace(diff.text.at(diffCount - j - 1)))
316                             break;
317                         ++j;
318                     }
319                     if (j > 0) {
320                         // diff ends with j whitespaces, move them to the next diff
321                         nextDiff.text.prepend(diff.text.mid(diffCount - j));
322                         diff.text = diff.text.left(diffCount - j);
323                     }
324                 }
325             }
326             // remove diff if empty
327             if (diff.text.isEmpty()) {
328                 output.removeAt(i);
329                 --i;
330             } else {
331                 output[i] = diff;
332             }
333         }
334     }
335     return output;
336 }
337 
338 /*
339  * Encodes any sentence of whitespaces with one simple space.
340  *
341  * The mapping is returned by codeMap argument, which contains
342  * the position in output string of encoded whitespace character
343  * and it's corresponding original sequence of whitespace characters.
344  *
345  * The returned string contains encoded version of the input string.
346  */
encodeReducedWhitespace(const QString & input,QMap<int,QString> * codeMap)347 static QString encodeReducedWhitespace(const QString &input,
348                                        QMap<int, QString> *codeMap)
349 {
350     QString output;
351     if (!codeMap)
352         return output;
353 
354     int inputIndex = 0;
355     int outputIndex = 0;
356     while (inputIndex < input.count()) {
357         QChar c = input.at(inputIndex);
358 
359         if (isWhitespace(c)) {
360             output.append(' ');
361             codeMap->insert(outputIndex, QString(c));
362             ++inputIndex;
363 
364             while (inputIndex < input.count()) {
365                 QChar reducedChar = input.at(inputIndex);
366 
367                 if (!isWhitespace(reducedChar))
368                     break;
369 
370                 (*codeMap)[outputIndex].append(reducedChar);
371                 ++inputIndex;
372             }
373         } else {
374             output.append(c);
375             ++inputIndex;
376         }
377         ++outputIndex;
378     }
379     return output;
380 }
381 
382 /*
383  * This is corresponding function to encodeReducedWhitespace().
384  *
385  * The input argument contains version encoded with codeMap,
386  * the returned value contains decoded diff list.
387  */
decodeReducedWhitespace(const QList<Diff> & input,const QMap<int,QString> & codeMap)388 static QList<Diff> decodeReducedWhitespace(const QList<Diff> &input,
389                                            const QMap<int, QString> &codeMap)
390 {
391     QList<Diff> output;
392 
393     int counter = 0;
394     auto it = codeMap.constBegin();
395     const auto itEnd = codeMap.constEnd();
396     for (Diff diff : input) {
397         const int diffCount = diff.text.count();
398         while ((it != itEnd) && (it.key() < counter + diffCount)) {
399             const int reversePosition = diffCount + counter - it.key();
400             const int updatedDiffCount = diff.text.count();
401             diff.text.replace(updatedDiffCount - reversePosition, 1, it.value());
402             ++it;
403         }
404         output.append(diff);
405         counter += diffCount;
406     }
407     return output;
408 }
409 
410 /*
411  * Prerequisites:
412  * leftDiff is expected to be Diff::Delete and rightDiff is expected to be Diff::Insert.
413  *
414  * Encode any sentence of whitespaces with simple space (inside leftDiff and rightDiff),
415  * diff without cleanup,
416  * split diffs,
417  * decode.
418  */
diffWithWhitespaceReduced(const QString & leftInput,const QString & rightInput,QList<Diff> * leftOutput,QList<Diff> * rightOutput)419 void Differ::diffWithWhitespaceReduced(const QString &leftInput,
420                                        const QString &rightInput,
421                                        QList<Diff> *leftOutput,
422                                        QList<Diff> *rightOutput)
423 {
424     if (!leftOutput || !rightOutput)
425         return;
426 
427     leftOutput->clear();
428     rightOutput->clear();
429 
430     QMap<int, QString> leftCodeMap;
431     QMap<int, QString> rightCodeMap;
432     const QString &leftString = encodeReducedWhitespace(leftInput, &leftCodeMap);
433     const QString &rightString = encodeReducedWhitespace(rightInput, &rightCodeMap);
434 
435     Differ differ;
436     QList<Diff> diffList = differ.diff(leftString, rightString);
437 
438     QList<Diff> leftDiffList;
439     QList<Diff> rightDiffList;
440     Differ::splitDiffList(diffList, &leftDiffList, &rightDiffList);
441 
442     *leftOutput = decodeReducedWhitespace(leftDiffList, leftCodeMap);
443     *rightOutput = decodeReducedWhitespace(rightDiffList, rightCodeMap);
444 }
445 
446 /*
447  * Prerequisites:
448  * leftDiff is expected to be Diff::Delete and rightDiff is expected to be Diff::Insert.
449  *
450  * Encode any sentence of whitespaces with simple space (inside leftDiff and rightDiff),
451  * diff without cleanup,
452  * split diffs,
453  * decode.
454  */
unifiedDiffWithWhitespaceReduced(const QString & leftInput,const QString & rightInput,QList<Diff> * leftOutput,QList<Diff> * rightOutput)455 void Differ::unifiedDiffWithWhitespaceReduced(const QString &leftInput,
456                                        const QString &rightInput,
457                                        QList<Diff> *leftOutput,
458                                        QList<Diff> *rightOutput)
459 {
460     if (!leftOutput || !rightOutput)
461         return;
462 
463     leftOutput->clear();
464     rightOutput->clear();
465 
466     QMap<int, QString> leftCodeMap;
467     QMap<int, QString> rightCodeMap;
468     const QString &leftString = encodeReducedWhitespace(leftInput, &leftCodeMap);
469     const QString &rightString = encodeReducedWhitespace(rightInput, &rightCodeMap);
470 
471     Differ differ;
472     const QList<Diff> &diffList = differ.unifiedDiff(leftString, rightString);
473 
474     QList<Diff> leftDiffList;
475     QList<Diff> rightDiffList;
476     Differ::splitDiffList(diffList, &leftDiffList, &rightDiffList);
477 
478     *leftOutput = decodeReducedWhitespace(leftDiffList, leftCodeMap);
479     *rightOutput = decodeReducedWhitespace(rightDiffList, rightCodeMap);
480 }
481 
482 /*
483  * Prerequisites:
484  * leftEquality and rightEquality needs to be equal. They may differ only with
485  * whitespaces character (count and kind).
486  *
487  * Replaces any corresponding sentence of whitespaces inside left and right equality
488  * with space characters. The number of space characters inside
489  * replaced sequence depends on the longest sequence of whitespace characters
490  * either in left or right equlity.
491  *
492  * E.g., when:
493  * leftEquality:   "a   b     c" (3 whitespace characters, 5 whitespace characters)
494  * rightEquality:  "a /tb  /t    c" (2 whitespace characters, 7 whitespace characters)
495  * then:
496  * returned value: "a   b       c" (3 space characters, 7 space characters)
497  *
498  * The returned code maps contains the info about the encoding done.
499  * The key on the map is the position of encoding inside the output string,
500  * and the value, which is a pair of int and string,
501  * describes how many characters were encoded in the output string
502  * and what was the original whitespace sequence in the original
503  * For the above example it would be:
504  *
505  * leftCodeMap:  <1, <3, "   "> >
506  *               <5, <7, "     "> >
507  * rightCodeMap: <1, <3, " /t"> >
508  *               <5, <7, "  /t    "> >
509  *
510  */
encodeExpandedWhitespace(const QString & leftEquality,const QString & rightEquality,QMap<int,QPair<int,QString>> * leftCodeMap,QMap<int,QPair<int,QString>> * rightCodeMap,bool * ok)511 static QString encodeExpandedWhitespace(const QString &leftEquality,
512                                         const QString &rightEquality,
513                                         QMap<int, QPair<int, QString> > *leftCodeMap,
514                                         QMap<int, QPair<int, QString> > *rightCodeMap,
515                                         bool *ok)
516 {
517     if (ok)
518         *ok = false;
519 
520     if (!leftCodeMap || !rightCodeMap)
521         return QString();
522 
523     leftCodeMap->clear();
524     rightCodeMap->clear();
525     QString output;
526 
527     const int leftCount = leftEquality.count();
528     const int rightCount = rightEquality.count();
529     int leftIndex = 0;
530     int rightIndex = 0;
531     while (leftIndex < leftCount && rightIndex < rightCount) {
532         QString leftWhitespaces;
533         QString rightWhitespaces;
534         while (leftIndex < leftCount && isWhitespace(leftEquality.at(leftIndex))) {
535             leftWhitespaces.append(leftEquality.at(leftIndex));
536             ++leftIndex;
537         }
538         while (rightIndex < rightCount && isWhitespace(rightEquality.at(rightIndex))) {
539             rightWhitespaces.append(rightEquality.at(rightIndex));
540             ++rightIndex;
541         }
542 
543         if (leftIndex < leftCount && rightIndex < rightCount) {
544             if (leftEquality.at(leftIndex) != rightEquality.at(rightIndex))
545                 return QString(); // equalities broken
546 
547         } else if (leftIndex == leftCount && rightIndex == rightCount) {
548             ; // do nothing, the last iteration
549         } else {
550             return QString(); // equalities broken
551         }
552 
553         if (leftWhitespaces.isEmpty() ^ rightWhitespaces.isEmpty()) {
554             // there must be at least 1 corresponding whitespace, equalities broken
555             return QString();
556         }
557 
558         if (!leftWhitespaces.isEmpty() && !rightWhitespaces.isEmpty()) {
559             const int replacementPosition = output.count();
560             const int replacementSize = qMax(leftWhitespaces.count(), rightWhitespaces.count());
561             const QString replacement(replacementSize, ' ');
562             leftCodeMap->insert(replacementPosition,
563                                 qMakePair(replacementSize, leftWhitespaces));
564             rightCodeMap->insert(replacementPosition,
565                                  qMakePair(replacementSize, rightWhitespaces));
566             output.append(replacement);
567         }
568 
569         if (leftIndex < leftCount)
570             output.append(leftEquality.at(leftIndex)); // add common character
571 
572         ++leftIndex;
573         ++rightIndex;
574     }
575 
576     if (ok)
577         *ok = true;
578 
579     return output;
580 }
581 
582 /*
583  * This is corresponding function to encodeExpandedWhitespace().
584  *
585  * The input argument contains version encoded with codeMap,
586  * the returned value contains decoded diff list.
587  */
decodeExpandedWhitespace(const QList<Diff> & input,const QMap<int,QPair<int,QString>> & codeMap,bool * ok)588 static QList<Diff> decodeExpandedWhitespace(const QList<Diff> &input,
589                                             const QMap<int, QPair<int, QString> > &codeMap,
590                                             bool *ok)
591 {
592     if (ok)
593         *ok = false;
594 
595     QList<Diff> output;
596 
597     int counter = 0;
598     auto it = codeMap.constBegin();
599     const auto itEnd = codeMap.constEnd();
600     for (Diff diff : input) {
601         const int diffCount = diff.text.count();
602         while ((it != itEnd) && (it.key() < counter + diffCount)) {
603             const int replacementSize = it.value().first;
604             const int reversePosition = diffCount + counter - it.key();
605             if (reversePosition < replacementSize)
606                 return QList<Diff>(); // replacement exceeds one Diff
607             const QString replacement = it.value().second;
608             const int updatedDiffCount = diff.text.count();
609             diff.text.replace(updatedDiffCount - reversePosition,
610                               replacementSize, replacement);
611             ++it;
612         }
613         output.append(diff);
614         counter += diffCount;
615     }
616 
617     if (ok)
618         *ok = true;
619 
620     return output;
621 }
622 
623 /*
624  * Prerequisites:
625  * leftInput and rightInput should contain the same number of equalities,
626  * equalities should differ only in whitespaces.
627  *
628  * Encodes any sentence of whitespace characters in equalities only
629  * with the maximal number of corresponding whitespace characters
630  * (inside leftInput and rightInput), so that the leftInput and rightInput
631  * can be merged together again,
632  * diff merged sequence with cleanup,
633  * decode.
634  */
diffWithWhitespaceExpandedInEqualities(const QList<Diff> & leftInput,const QList<Diff> & rightInput,QList<Diff> * leftOutput,QList<Diff> * rightOutput)635 static bool diffWithWhitespaceExpandedInEqualities(const QList<Diff> &leftInput,
636                                                    const QList<Diff> &rightInput,
637                                                    QList<Diff> *leftOutput,
638                                                    QList<Diff> *rightOutput)
639 {
640     if (!leftOutput || !rightOutput)
641         return false;
642 
643     leftOutput->clear();
644     rightOutput->clear();
645 
646     const int leftCount = leftInput.count();
647     const int rightCount = rightInput.count();
648     int l = 0;
649     int r = 0;
650 
651     QString leftText;
652     QString rightText;
653 
654     QMap<int, QPair<int, QString> > commonLeftCodeMap;
655     QMap<int, QPair<int, QString> > commonRightCodeMap;
656 
657     while (l <= leftCount && r <= rightCount) {
658         const Diff leftDiff = l < leftCount ? leftInput.at(l) : Diff(Diff::Equal);
659         const Diff rightDiff = r < rightCount ? rightInput.at(r) : Diff(Diff::Equal);
660 
661         if (leftDiff.command == Diff::Equal && rightDiff.command == Diff::Equal) {
662             QMap<int, QPair<int, QString> > leftCodeMap;
663             QMap<int, QPair<int, QString> > rightCodeMap;
664 
665             bool ok = false;
666             const QString &commonEquality = encodeExpandedWhitespace(leftDiff.text,
667                                           rightDiff.text,
668                                           &leftCodeMap,
669                                           &rightCodeMap,
670                                           &ok);
671             if (!ok)
672                 return false;
673 
674             // join code map positions with common maps
675             for (auto it = leftCodeMap.cbegin(), end = leftCodeMap.cend(); it != end; ++it)
676                 commonLeftCodeMap.insert(leftText.count() + it.key(), it.value());
677 
678             for (auto it = rightCodeMap.cbegin(), end = rightCodeMap.cend(); it != end; ++it)
679                 commonRightCodeMap.insert(rightText.count() + it.key(), it.value());
680 
681             leftText.append(commonEquality);
682             rightText.append(commonEquality);
683 
684             ++l;
685             ++r;
686         }
687 
688         if (leftDiff.command != Diff::Equal) {
689             leftText.append(leftDiff.text);
690             ++l;
691         }
692         if (rightDiff.command != Diff::Equal) {
693             rightText.append(rightDiff.text);
694             ++r;
695         }
696     }
697 
698     Differ differ;
699     const QList<Diff> &diffList = Differ::cleanupSemantics(
700                 differ.diff(leftText, rightText));
701 
702     QList<Diff> leftDiffList;
703     QList<Diff> rightDiffList;
704     Differ::splitDiffList(diffList, &leftDiffList, &rightDiffList);
705 
706     leftDiffList = Differ::moveWhitespaceIntoEqualities(leftDiffList);
707     rightDiffList = Differ::moveWhitespaceIntoEqualities(rightDiffList);
708 
709     bool ok = false;
710     *leftOutput = decodeExpandedWhitespace(leftDiffList,
711                                            commonLeftCodeMap, &ok);
712     if (!ok)
713         return false;
714     *rightOutput = decodeExpandedWhitespace(rightDiffList,
715                                             commonRightCodeMap, &ok);
716     return ok;
717 }
718 
appendWithEqualitiesSquashed(const QList<Diff> & leftInput,const QList<Diff> & rightInput,QList<Diff> * leftOutput,QList<Diff> * rightOutput)719 static void appendWithEqualitiesSquashed(const QList<Diff> &leftInput,
720                              const QList<Diff> &rightInput,
721                              QList<Diff> *leftOutput,
722                              QList<Diff> *rightOutput)
723 {
724     if (!leftInput.isEmpty()
725             && !rightInput.isEmpty()
726             && !leftOutput->isEmpty()
727             && !rightOutput->isEmpty()
728             && leftInput.first().command == Diff::Equal
729             && rightInput.first().command == Diff::Equal
730             && leftOutput->last().command == Diff::Equal
731             && rightOutput->last().command == Diff::Equal) {
732         leftOutput->last().text += leftInput.first().text;
733         rightOutput->last().text += rightInput.first().text;
734         leftOutput->append(leftInput.mid(1));
735         rightOutput->append(rightInput.mid(1));
736         return;
737     }
738     leftOutput->append(leftInput);
739     rightOutput->append(rightInput);
740 }
741 
742 /*
743  * Prerequisites:
744  * leftInput cannot contain insertions, while right input cannot contain deletions.
745  * The number of equalities on leftInput and rightInput lists should be the same.
746  * Deletions and insertions need to be merged.
747  *
748  * For each corresponding insertion / deletion pair:
749  * - diffWithWhitespaceReduced(): rediff them separately with whitespace reduced
750  *                            (new equalities may appear)
751  * - moveWhitespaceIntoEqualities(): move whitespace into new equalities
752  * - diffWithWhitespaceExpandedInEqualities(): expand whitespace inside new
753  *                            equalities only and rediff with cleanup
754  */
ignoreWhitespaceBetweenEqualities(const QList<Diff> & leftInput,const QList<Diff> & rightInput,QList<Diff> * leftOutput,QList<Diff> * rightOutput)755 void Differ::ignoreWhitespaceBetweenEqualities(const QList<Diff> &leftInput,
756                                   const QList<Diff> &rightInput,
757                                   QList<Diff> *leftOutput,
758                                   QList<Diff> *rightOutput)
759 {
760     if (!leftOutput || !rightOutput)
761         return;
762 
763     leftOutput->clear();
764     rightOutput->clear();
765 
766     const int leftCount = leftInput.count();
767     const int rightCount = rightInput.count();
768     int l = 0;
769     int r = 0;
770 
771     while (l <= leftCount && r <= rightCount) {
772         const Diff leftDiff = l < leftCount
773                 ? leftInput.at(l)
774                 : Diff(Diff::Equal);
775         const Diff rightDiff = r < rightCount
776                 ? rightInput.at(r)
777                 : Diff(Diff::Equal);
778 
779         if (leftDiff.command == Diff::Equal && rightDiff.command == Diff::Equal) {
780             const Diff previousLeftDiff = l > 0 ? leftInput.at(l - 1) : Diff(Diff::Equal);
781             const Diff previousRightDiff = r > 0 ? rightInput.at(r - 1) : Diff(Diff::Equal);
782 
783             if (previousLeftDiff.command == Diff::Delete
784                     && previousRightDiff.command == Diff::Insert) {
785                 QList<Diff> outputLeftDiffList;
786                 QList<Diff> outputRightDiffList;
787 
788                 QList<Diff> reducedLeftDiffList;
789                 QList<Diff> reducedRightDiffList;
790                 diffWithWhitespaceReduced(previousLeftDiff.text,
791                                            previousRightDiff.text,
792                                            &reducedLeftDiffList,
793                                            &reducedRightDiffList);
794 
795                 reducedLeftDiffList = moveWhitespaceIntoEqualities(reducedLeftDiffList);
796                 reducedRightDiffList = moveWhitespaceIntoEqualities(reducedRightDiffList);
797 
798                 QList<Diff> cleanedLeftDiffList;
799                 QList<Diff> cleanedRightDiffList;
800                 if (diffWithWhitespaceExpandedInEqualities(reducedLeftDiffList,
801                                                            reducedRightDiffList,
802                                                            &cleanedLeftDiffList,
803                                                            &cleanedRightDiffList)) {
804                     outputLeftDiffList = cleanedLeftDiffList;
805                     outputRightDiffList = cleanedRightDiffList;
806                 } else {
807                     outputLeftDiffList = reducedLeftDiffList;
808                     outputRightDiffList = reducedRightDiffList;
809                 }
810 
811                 appendWithEqualitiesSquashed(outputLeftDiffList,
812                                              outputRightDiffList,
813                                              leftOutput,
814                                              rightOutput);
815             } else if (previousLeftDiff.command == Diff::Delete) {
816                 leftOutput->append(previousLeftDiff);
817             } else if (previousRightDiff.command == Diff::Insert) {
818                 rightOutput->append(previousRightDiff);
819             }
820 
821             QList<Diff> leftEquality;
822             QList<Diff> rightEquality;
823             if (l < leftCount)
824                 leftEquality.append(leftDiff);
825             if (r < rightCount)
826                 rightEquality.append(rightDiff);
827 
828             appendWithEqualitiesSquashed(leftEquality,
829                                          rightEquality,
830                                          leftOutput,
831                                          rightOutput);
832 
833             ++l;
834             ++r;
835         }
836 
837         if (leftDiff.command != Diff::Equal)
838             ++l;
839         if (rightDiff.command != Diff::Equal)
840             ++r;
841     }
842 }
843 
844 /*
845  * Prerequisites:
846  * leftInput cannot contain insertions, while right input cannot contain deletions.
847  * The number of equalities on leftInput and rightInput lists should be the same.
848  * Deletions and insertions need to be merged.
849  *
850  * For each corresponding insertion / deletion pair do just diff and merge equalities
851  */
diffBetweenEqualities(const QList<Diff> & leftInput,const QList<Diff> & rightInput,QList<Diff> * leftOutput,QList<Diff> * rightOutput)852 void Differ::diffBetweenEqualities(const QList<Diff> &leftInput,
853                                    const QList<Diff> &rightInput,
854                                    QList<Diff> *leftOutput,
855                                    QList<Diff> *rightOutput)
856 {
857     if (!leftOutput || !rightOutput)
858         return;
859 
860     leftOutput->clear();
861     rightOutput->clear();
862 
863     const int leftCount = leftInput.count();
864     const int rightCount = rightInput.count();
865     int l = 0;
866     int r = 0;
867 
868     while (l <= leftCount && r <= rightCount) {
869         const Diff leftDiff = l < leftCount
870                 ? leftInput.at(l)
871                 : Diff(Diff::Equal);
872         const Diff rightDiff = r < rightCount
873                 ? rightInput.at(r)
874                 : Diff(Diff::Equal);
875 
876         if (leftDiff.command == Diff::Equal && rightDiff.command == Diff::Equal) {
877             const Diff previousLeftDiff = l > 0 ? leftInput.at(l - 1) : Diff(Diff::Equal);
878             const Diff previousRightDiff = r > 0 ? rightInput.at(r - 1) : Diff(Diff::Equal);
879 
880             if (previousLeftDiff.command == Diff::Delete
881                     && previousRightDiff.command == Diff::Insert) {
882                 Differ differ;
883                 differ.setDiffMode(Differ::CharMode);
884                 const QList<Diff> commonOutput = cleanupSemantics(
885                             differ.diff(previousLeftDiff.text, previousRightDiff.text));
886 
887                 QList<Diff> outputLeftDiffList;
888                 QList<Diff> outputRightDiffList;
889 
890                 Differ::splitDiffList(commonOutput, &outputLeftDiffList,
891                                       &outputRightDiffList);
892 
893                 appendWithEqualitiesSquashed(outputLeftDiffList,
894                                              outputRightDiffList,
895                                              leftOutput,
896                                              rightOutput);
897             } else if (previousLeftDiff.command == Diff::Delete) {
898                 leftOutput->append(previousLeftDiff);
899             } else if (previousRightDiff.command == Diff::Insert) {
900                 rightOutput->append(previousRightDiff);
901             }
902 
903             QList<Diff> leftEquality;
904             QList<Diff> rightEquality;
905             if (l < leftCount)
906                 leftEquality.append(leftDiff);
907             if (r < rightCount)
908                 rightEquality.append(rightDiff);
909 
910             appendWithEqualitiesSquashed(leftEquality,
911                                          rightEquality,
912                                          leftOutput,
913                                          rightOutput);
914 
915             ++l;
916             ++r;
917         }
918 
919         if (leftDiff.command != Diff::Equal)
920             ++l;
921         if (rightDiff.command != Diff::Equal)
922             ++r;
923     }
924 }
925 
926 ///////////////
927 
Diff(Command com,const QString & txt)928 Diff::Diff(Command com, const QString &txt) :
929     command(com),
930     text(txt)
931 {
932 }
933 
operator ==(const Diff & other) const934 bool Diff::operator==(const Diff &other) const
935 {
936      return command == other.command && text == other.text;
937 }
938 
operator !=(const Diff & other) const939 bool Diff::operator!=(const Diff &other) const
940 {
941      return !(operator == (other));
942 }
943 
commandString(Command com)944 QString Diff::commandString(Command com)
945 {
946     if (com == Delete)
947         return QCoreApplication::translate("Diff", "Delete");
948     else if (com == Insert)
949         return QCoreApplication::translate("Diff", "Insert");
950     return QCoreApplication::translate("Diff", "Equal");
951 }
952 
toString() const953 QString Diff::toString() const
954 {
955     QString prettyText = text;
956     // Replace linebreaks with pretty char
957     prettyText.replace('\n', '\xb6');
958     return commandString(command) + " \"" + prettyText + "\"";
959 }
960 
961 ///////////////
962 
Differ(QFutureInterfaceBase * jobController)963 Differ::Differ(QFutureInterfaceBase *jobController)
964     : m_jobController(jobController)
965 {
966 
967 }
968 
diff(const QString & text1,const QString & text2)969 QList<Diff> Differ::diff(const QString &text1, const QString &text2)
970 {
971     m_currentDiffMode = m_diffMode;
972     return merge(preprocess1AndDiff(text1, text2));
973 }
974 
unifiedDiff(const QString & text1,const QString & text2)975 QList<Diff> Differ::unifiedDiff(const QString &text1, const QString &text2)
976 {
977     QString encodedText1;
978     QString encodedText2;
979     QStringList subtexts = encode(text1, text2, &encodedText1, &encodedText2);
980 
981     const DiffMode diffMode = m_currentDiffMode;
982     m_currentDiffMode = CharMode;
983 
984     // Each different subtext is a separate symbol
985     // process these symbols as text with bigger alphabet
986     QList<Diff> diffList = merge(preprocess1AndDiff(encodedText1, encodedText2));
987 
988     diffList = decode(diffList, subtexts);
989     m_currentDiffMode = diffMode;
990     return diffList;
991 }
992 
setDiffMode(Differ::DiffMode mode)993 void Differ::setDiffMode(Differ::DiffMode mode)
994 {
995     m_diffMode = mode;
996 }
997 
diffMode() const998 Differ::DiffMode Differ::diffMode() const
999 {
1000     return m_diffMode;
1001 }
1002 
preprocess1AndDiff(const QString & text1,const QString & text2)1003 QList<Diff> Differ::preprocess1AndDiff(const QString &text1, const QString &text2)
1004 {
1005     if (text1.isNull() && text2.isNull())
1006         return QList<Diff>();
1007 
1008     if (text1 == text2) {
1009         QList<Diff> diffList;
1010         if (!text1.isEmpty())
1011             diffList.append(Diff(Diff::Equal, text1));
1012         return diffList;
1013     }
1014 
1015     QString newText1 = text1;
1016     QString newText2 = text2;
1017     QString prefix;
1018     QString suffix;
1019     const int prefixCount = commonPrefix(text1, text2);
1020     if (prefixCount) {
1021         prefix = text1.left(prefixCount);
1022         newText1 = text1.mid(prefixCount);
1023         newText2 = text2.mid(prefixCount);
1024     }
1025     const int suffixCount = commonSuffix(newText1, newText2);
1026     if (suffixCount) {
1027         suffix = newText1.right(suffixCount);
1028         newText1 = newText1.left(newText1.count() - suffixCount);
1029         newText2 = newText2.left(newText2.count() - suffixCount);
1030     }
1031     QList<Diff> diffList = preprocess2AndDiff(newText1, newText2);
1032     if (prefixCount)
1033         diffList.prepend(Diff(Diff::Equal, prefix));
1034     if (suffixCount)
1035         diffList.append(Diff(Diff::Equal, suffix));
1036     return diffList;
1037 }
1038 
preprocess2AndDiff(const QString & text1,const QString & text2)1039 QList<Diff> Differ::preprocess2AndDiff(const QString &text1, const QString &text2)
1040 {
1041     QList<Diff> diffList;
1042 
1043     if (text1.isEmpty()) {
1044         diffList.append(Diff(Diff::Insert, text2));
1045         return diffList;
1046     }
1047 
1048     if (text2.isEmpty()) {
1049         diffList.append(Diff(Diff::Delete, text1));
1050         return diffList;
1051     }
1052 
1053     if (text1.count() != text2.count())
1054     {
1055         const QString longtext = text1.count() > text2.count() ? text1 : text2;
1056         const QString shorttext = text1.count() > text2.count() ? text2 : text1;
1057         const int i = longtext.indexOf(shorttext);
1058         if (i != -1) {
1059             const Diff::Command command = (text1.count() > text2.count())
1060                     ? Diff::Delete : Diff::Insert;
1061             diffList.append(Diff(command, longtext.left(i)));
1062             diffList.append(Diff(Diff::Equal, shorttext));
1063             diffList.append(Diff(command, longtext.mid(i + shorttext.count())));
1064             return diffList;
1065         }
1066 
1067         if (shorttext.count() == 1) {
1068             diffList.append(Diff(Diff::Delete, text1));
1069             diffList.append(Diff(Diff::Insert, text2));
1070             return diffList;
1071         }
1072     }
1073 
1074     if (m_currentDiffMode != Differ::CharMode && text1.count() > 80 && text2.count() > 80)
1075         return diffNonCharMode(text1, text2);
1076 
1077     return diffMyers(text1, text2);
1078 }
1079 
diffMyers(const QString & text1,const QString & text2)1080 QList<Diff> Differ::diffMyers(const QString &text1, const QString &text2)
1081 {
1082     const int n = text1.count();
1083     const int m = text2.count();
1084     const bool odd = (n + m) % 2;
1085     const int D = odd ? (n + m) / 2 + 1 : (n + m) / 2;
1086     const int delta = n - m;
1087     const int vShift = D;
1088     int *forwardV = new int[2 * D + 1]; // free me
1089     int *reverseV = new int[2 * D + 1]; // free me
1090     for (int i = 0; i <= 2 * D; i++) {
1091         forwardV[i] = -1;
1092         reverseV[i] = -1;
1093     }
1094     forwardV[vShift + 1] = 0;
1095     reverseV[vShift + 1] = 0;
1096     int kMinForward = -D;
1097     int kMaxForward = D;
1098     int kMinReverse = -D;
1099     int kMaxReverse = D;
1100     for (int d = 0; d <= D; d++) {
1101         if (m_jobController && m_jobController->isCanceled()) {
1102             delete [] forwardV;
1103             delete [] reverseV;
1104             return QList<Diff>();
1105         }
1106         // going forward
1107         for (int k = qMax(-d, kMinForward + qAbs(d + kMinForward) % 2);
1108              k <= qMin(d, kMaxForward - qAbs(d + kMaxForward) % 2);
1109              k = k + 2) {
1110             int x;
1111             if (k == -d || (k < d && forwardV[k + vShift - 1] < forwardV[k + vShift + 1]))
1112                 x = forwardV[k + vShift + 1]; // copy vertically from diagonal k + 1, y increases, y may exceed the graph
1113             else
1114                 x = forwardV[k + vShift - 1] + 1; // copy horizontally from diagonal k - 1, x increases, x may exceed the graph
1115             int y = x - k;
1116 
1117             if (x > n) {
1118                 kMaxForward = k - 1; // we are beyond the graph (right border), don't check diagonals >= current k anymore
1119             } else if (y > m) {
1120                 kMinForward = k + 1; // we are beyond the graph (bottom border), don't check diagonals <= current k anymore
1121             } else {
1122                 // find snake
1123                 while (x < n && y < m) {
1124                     if (text1.at(x) != text2.at(y))
1125                         break;
1126                     x++;
1127                     y++;
1128                 }
1129                 forwardV[k + vShift] = x;
1130                 if (odd) { // check if overlap
1131                     if (k >= delta - (d - 1) && k <= delta + (d - 1)) {
1132                         if (n - reverseV[delta - k + vShift] <= x) {
1133                             delete [] forwardV;
1134                             delete [] reverseV;
1135                             return diffMyersSplit(text1, x, text2, y);
1136                         }
1137                     }
1138                 }
1139             }
1140         }
1141         // in reverse direction
1142         for (int k = qMax(-d, kMinReverse + qAbs(d + kMinReverse) % 2);
1143              k <= qMin(d, kMaxReverse - qAbs(d + kMaxReverse) % 2);
1144              k = k + 2) {
1145             int x;
1146             if (k == -d || (k < d && reverseV[k + vShift - 1] < reverseV[k + vShift + 1]))
1147                 x = reverseV[k + vShift + 1];
1148             else
1149                 x = reverseV[k + vShift - 1] + 1;
1150             int y = x - k;
1151 
1152             if (x > n) {
1153                 kMaxReverse = k - 1; // we are beyond the graph (right border), don't check diagonals >= current k anymore
1154             } else if (y > m) {
1155                 kMinReverse = k + 1; // we are beyond the graph (bottom border), don't check diagonals <= current k anymore
1156             } else {
1157                 // find snake
1158                 while (x < n && y < m) {
1159                     if (text1.at(n - x - 1) != text2.at(m - y - 1))
1160                         break;
1161                     x++;
1162                     y++;
1163                 }
1164                 reverseV[k + vShift] = x;
1165                 if (!odd) { // check if overlap
1166                     if (k >= delta - d && k <= delta + d) {
1167                         if (n - forwardV[delta - k + vShift] <= x) {
1168                             delete [] forwardV;
1169                             delete [] reverseV;
1170                             return diffMyersSplit(text1, n - x, text2, m - x + k);
1171                         }
1172                     }
1173                 }
1174             }
1175         }
1176     }
1177     delete [] forwardV;
1178     delete [] reverseV;
1179 
1180     // Completely different
1181     QList<Diff> diffList;
1182     diffList.append(Diff(Diff::Delete, text1));
1183     diffList.append(Diff(Diff::Insert, text2));
1184     return diffList;
1185 }
1186 
diffMyersSplit(const QString & text1,int x,const QString & text2,int y)1187 QList<Diff> Differ::diffMyersSplit(
1188         const QString &text1, int x,
1189         const QString &text2, int y)
1190 {
1191     const QString text11 = text1.left(x);
1192     const QString text12 = text1.mid(x);
1193     const QString text21 = text2.left(y);
1194     const QString text22 = text2.mid(y);
1195 
1196     const QList<Diff> &diffList1 = preprocess1AndDiff(text11, text21);
1197     const QList<Diff> &diffList2 = preprocess1AndDiff(text12, text22);
1198     return diffList1 + diffList2;
1199 }
1200 
diffNonCharMode(const QString & text1,const QString & text2)1201 QList<Diff> Differ::diffNonCharMode(const QString &text1, const QString &text2)
1202 {
1203     QString encodedText1;
1204     QString encodedText2;
1205     QStringList subtexts = encode(text1, text2, &encodedText1, &encodedText2);
1206 
1207     DiffMode diffMode = m_currentDiffMode;
1208     m_currentDiffMode = CharMode;
1209 
1210     // Each different subtext is a separate symbol
1211     // process these symbols as text with bigger alphabet
1212     QList<Diff> diffList = preprocess1AndDiff(encodedText1, encodedText2);
1213 
1214     diffList = decode(diffList, subtexts);
1215 
1216     QString lastDelete;
1217     QString lastInsert;
1218     QList<Diff> newDiffList;
1219     if (m_jobController) {
1220         m_jobController->setProgressRange(0, diffList.count());
1221         m_jobController->setProgressValue(0);
1222     }
1223     for (int i = 0; i <= diffList.count(); i++) {
1224         if (m_jobController) {
1225             if (m_jobController->isCanceled()) {
1226                 m_currentDiffMode = diffMode;
1227                 return QList<Diff>();
1228             }
1229             m_jobController->setProgressValue(i + 1);
1230         }
1231         const Diff diffItem = i < diffList.count()
1232                   ? diffList.at(i)
1233                   : Diff(Diff::Equal); // dummy, ensure we process to the end
1234                                        // even when diffList doesn't end with equality
1235         if (diffItem.command == Diff::Delete) {
1236             lastDelete += diffItem.text;
1237         } else if (diffItem.command == Diff::Insert) {
1238             lastInsert += diffItem.text;
1239         } else { // Diff::Equal
1240             if (!(lastDelete.isEmpty() && lastInsert.isEmpty())) {
1241                 // Rediff here on char basis
1242                 newDiffList += preprocess1AndDiff(lastDelete, lastInsert);
1243 
1244                 lastDelete.clear();
1245                 lastInsert.clear();
1246             }
1247             newDiffList.append(diffItem);
1248         }
1249     }
1250 
1251     m_currentDiffMode = diffMode;
1252     return newDiffList;
1253 }
1254 
encode(const QString & text1,const QString & text2,QString * encodedText1,QString * encodedText2)1255 QStringList Differ::encode(const QString &text1,
1256                                   const QString &text2,
1257                                   QString *encodedText1,
1258                                   QString *encodedText2)
1259 {
1260     QStringList lines;
1261     lines.append(QString()); // don't use code: 0
1262     QMap<QString, int> lineToCode;
1263 
1264     *encodedText1 = encode(text1, &lines, &lineToCode);
1265     *encodedText2 = encode(text2, &lines, &lineToCode);
1266 
1267     return lines;
1268 }
1269 
findSubtextEnd(const QString & text,int subtextStart)1270 int Differ::findSubtextEnd(const QString &text,
1271                                   int subtextStart)
1272 {
1273     if (m_currentDiffMode == Differ::LineMode) {
1274         int subtextEnd = text.indexOf('\n', subtextStart);
1275         if (subtextEnd == -1)
1276             subtextEnd = text.count() - 1;
1277         return ++subtextEnd;
1278     } else if (m_currentDiffMode == Differ::WordMode) {
1279         if (!text.at(subtextStart).isLetter())
1280             return subtextStart + 1;
1281         int i = subtextStart + 1;
1282 
1283         const int count = text.count();
1284         while (i < count && text.at(i).isLetter())
1285             i++;
1286         return i;
1287     }
1288     return subtextStart + 1; // CharMode
1289 }
1290 
encode(const QString & text,QStringList * lines,QMap<QString,int> * lineToCode)1291 QString Differ::encode(const QString &text,
1292                               QStringList *lines,
1293                               QMap<QString, int> *lineToCode)
1294 {
1295     int subtextStart = 0;
1296     int subtextEnd = -1;
1297     QString codes;
1298     while (subtextEnd < text.count()) {
1299         subtextEnd = findSubtextEnd(text, subtextStart);
1300         const QString line = text.mid(subtextStart, subtextEnd - subtextStart);
1301         subtextStart = subtextEnd;
1302 
1303         if (lineToCode->contains(line)) {
1304             codes += QChar(static_cast<ushort>(lineToCode->value(line)));
1305         } else {
1306             lines->append(line);
1307             lineToCode->insert(line, lines->count() - 1);
1308             codes += QChar(static_cast<ushort>(lines->count() - 1));
1309         }
1310     }
1311     return codes;
1312 }
1313 
merge(const QList<Diff> & diffList)1314 QList<Diff> Differ::merge(const QList<Diff> &diffList)
1315 {
1316     QString lastDelete;
1317     QString lastInsert;
1318     QList<Diff> newDiffList;
1319     for (int i = 0; i <= diffList.count(); i++) {
1320         Diff diff = i < diffList.count()
1321                   ? diffList.at(i)
1322                   : Diff(Diff::Equal); // dummy, ensure we process to the end
1323                                        // even when diffList doesn't end with equality
1324         if (diff.command == Diff::Delete) {
1325             lastDelete += diff.text;
1326         } else if (diff.command == Diff::Insert) {
1327             lastInsert += diff.text;
1328         } else { // Diff::Equal
1329             if (!(lastDelete.isEmpty() && lastInsert.isEmpty())) {
1330 
1331                 // common prefix
1332                 const int prefixCount = commonPrefix(lastDelete, lastInsert);
1333                 if (prefixCount) {
1334                     const QString prefix = lastDelete.left(prefixCount);
1335                     lastDelete = lastDelete.mid(prefixCount);
1336                     lastInsert = lastInsert.mid(prefixCount);
1337 
1338                     if (!newDiffList.isEmpty()
1339                             && newDiffList.last().command == Diff::Equal) {
1340                         newDiffList.last().text += prefix;
1341                     } else {
1342                         newDiffList.append(Diff(Diff::Equal, prefix));
1343                     }
1344                 }
1345 
1346                 // common suffix
1347                 const int suffixCount = commonSuffix(lastDelete, lastInsert);
1348                 if (suffixCount) {
1349                     const QString suffix = lastDelete.right(suffixCount);
1350                     lastDelete = lastDelete.left(lastDelete.count() - suffixCount);
1351                     lastInsert = lastInsert.left(lastInsert.count() - suffixCount);
1352 
1353                     diff.text.prepend(suffix);
1354                 }
1355 
1356                 // append delete / insert / equal
1357                 if (!lastDelete.isEmpty())
1358                     newDiffList.append(Diff(Diff::Delete, lastDelete));
1359                 if (!lastInsert.isEmpty())
1360                     newDiffList.append(Diff(Diff::Insert, lastInsert));
1361                 if (!diff.text.isEmpty())
1362                     newDiffList.append(diff);
1363                 lastDelete.clear();
1364                 lastInsert.clear();
1365             } else { // join with last equal diff
1366                 if (!newDiffList.isEmpty()
1367                         && newDiffList.last().command == Diff::Equal) {
1368                     newDiffList.last().text += diff.text;
1369                 } else {
1370                     if (!diff.text.isEmpty())
1371                         newDiffList.append(diff);
1372                 }
1373             }
1374         }
1375     }
1376 
1377     QList<Diff> squashedDiffList = squashEqualities(newDiffList);
1378     if (squashedDiffList.count() != newDiffList.count())
1379         return merge(squashedDiffList);
1380 
1381     return squashedDiffList;
1382 }
1383 
1384 struct EqualityData
1385 {
1386     int equalityIndex;
1387     int textCount;
1388     int deletesBefore;
1389     int insertsBefore;
1390     int deletesAfter;
1391     int insertsAfter;
1392 };
1393 
cleanupSemantics(const QList<Diff> & diffList)1394 QList<Diff> Differ::cleanupSemantics(const QList<Diff> &diffList)
1395 {
1396     int deletes = 0;
1397     int inserts = 0;
1398     // equality index, equality data
1399     QList<EqualityData> equalities;
1400     for (int i = 0; i <= diffList.count(); i++) {
1401         const Diff diff = i < diffList.count()
1402                   ? diffList.at(i)
1403                   : Diff(Diff::Equal); // dummy, ensure we process to the end
1404                                        // even when diffList doesn't end with equality
1405         if (diff.command == Diff::Equal) {
1406             if (!equalities.isEmpty()) {
1407                 EqualityData &previousData = equalities.last();
1408                 previousData.deletesAfter = deletes;
1409                 previousData.insertsAfter = inserts;
1410             }
1411             if (i < diffList.count()) { // don't insert dummy
1412                 EqualityData data;
1413                 data.equalityIndex = i;
1414                 data.textCount = diff.text.count();
1415                 data.deletesBefore = deletes;
1416                 data.insertsBefore = inserts;
1417                 equalities.append(data);
1418 
1419                 deletes = 0;
1420                 inserts = 0;
1421             }
1422         } else {
1423             if (diff.command == Diff::Delete)
1424                 deletes += diff.text.count();
1425             else if (diff.command == Diff::Insert)
1426                 inserts += diff.text.count();
1427         }
1428     }
1429 
1430     QMap<int, bool> equalitiesToBeSplit;
1431     int i = 0;
1432     while (i < equalities.count()) {
1433         const EqualityData &data = equalities.at(i);
1434         if (data.textCount <= qMax(data.deletesBefore, data.insertsBefore)
1435                 && data.textCount <= qMax(data.deletesAfter, data.insertsAfter)) {
1436             if (i > 0) {
1437                 EqualityData &previousData = equalities[i - 1];
1438                 previousData.deletesAfter += data.textCount + data.deletesAfter;
1439                 previousData.insertsAfter += data.textCount + data.insertsAfter;
1440             }
1441             if (i < equalities.count() - 1) {
1442                 EqualityData &nextData = equalities[i + 1];
1443                 nextData.deletesBefore += data.textCount + data.deletesBefore;
1444                 nextData.insertsBefore += data.textCount + data.insertsBefore;
1445             }
1446             equalitiesToBeSplit.insert(data.equalityIndex, true);
1447             equalities.removeAt(i);
1448             if (i > 0)
1449                 i--; // reexamine previous equality
1450         } else {
1451             i++;
1452         }
1453     }
1454 
1455     QList<Diff> newDiffList;
1456     for (int i = 0; i < diffList.count(); i++) {
1457         const Diff &diff = diffList.at(i);
1458         if (equalitiesToBeSplit.contains(i)) {
1459             newDiffList.append(Diff(Diff::Delete, diff.text));
1460             newDiffList.append(Diff(Diff::Insert, diff.text));
1461         } else {
1462             newDiffList.append(diff);
1463         }
1464     }
1465 
1466     return cleanupOverlaps(merge(cleanupSemanticsLossless(merge(newDiffList))));
1467 }
1468 
cleanupSemanticsLossless(const QList<Diff> & diffList)1469 QList<Diff> Differ::cleanupSemanticsLossless(const QList<Diff> &diffList)
1470 {
1471     if (diffList.count() < 3) // we need at least 3 items
1472         return diffList;
1473 
1474     QList<Diff> newDiffList;
1475     Diff prevDiff = diffList.at(0);
1476     Diff thisDiff = diffList.at(1);
1477     Diff nextDiff = diffList.at(2);
1478     int i = 2;
1479     while (i < diffList.count()) {
1480         if (prevDiff.command == Diff::Equal
1481                 && nextDiff.command == Diff::Equal) {
1482 
1483             // Single edit surrounded by equalities
1484             QString equality1 = prevDiff.text;
1485             QString edit = thisDiff.text;
1486             QString equality2 = nextDiff.text;
1487 
1488             // Shift the edit as far left as possible
1489             const int suffixCount = commonSuffix(equality1, edit);
1490             if (suffixCount) {
1491                 const QString commonString = edit.mid(edit.count() - suffixCount);
1492                 equality1 = equality1.left(equality1.count() - suffixCount);
1493                 edit = commonString + edit.left(edit.count() - suffixCount);
1494                 equality2 = commonString + equality2;
1495             }
1496 
1497             // Step char by char right, looking for the best score
1498             QString bestEquality1 = equality1;
1499             QString bestEdit = edit;
1500             QString bestEquality2 = equality2;
1501             int bestScore = cleanupSemanticsScore(equality1, edit)
1502                     + cleanupSemanticsScore(edit, equality2);
1503 
1504             while (!edit.isEmpty() && !equality2.isEmpty()
1505                    && edit.at(0) == equality2.at(0)) {
1506                 equality1 += edit.at(0);
1507                 edit = edit.mid(1) + equality2.at(0);
1508                 equality2 = equality2.mid(1);
1509                 const int score = cleanupSemanticsScore(equality1, edit)
1510                         + cleanupSemanticsScore(edit, equality2);
1511                 if (score >= bestScore) {
1512                     bestEquality1 = equality1;
1513                     bestEdit = edit;
1514                     bestEquality2 = equality2;
1515                     bestScore = score;
1516                 }
1517             }
1518             prevDiff.text = bestEquality1;
1519             thisDiff.text = bestEdit;
1520             nextDiff.text = bestEquality2;
1521 
1522             if (!bestEquality1.isEmpty())
1523                 newDiffList.append(prevDiff); // append modified equality1
1524             if (bestEquality2.isEmpty()) {
1525                 i++;
1526                 if (i < diffList.count())
1527                     nextDiff = diffList.at(i); // omit equality2
1528             }
1529         } else {
1530             newDiffList.append(prevDiff); // append prevDiff
1531         }
1532         prevDiff = thisDiff;
1533         thisDiff = nextDiff;
1534         i++;
1535         if (i < diffList.count())
1536             nextDiff = diffList.at(i);
1537     }
1538     newDiffList.append(prevDiff);
1539     if (i == diffList.count())
1540         newDiffList.append(thisDiff);
1541     return newDiffList;
1542 }
1543 
1544 } // namespace Utils
1545