1 /****************************************************************************
2 **
3 ** Copyright (C) 2016 The Qt Company Ltd.
4 ** Contact: https://www.qt.io/licensing/
5 **
6 ** This file is part of Qt Creator.
7 **
8 ** Commercial License Usage
9 ** Licensees holding valid commercial Qt licenses may use this file in
10 ** accordance with the commercial license agreement provided with the
11 ** Software or, alternatively, in accordance with the terms contained in
12 ** a written agreement between you and The Qt Company. For licensing terms
13 ** and conditions see https://www.qt.io/terms-conditions. For further
14 ** information use the contact form at https://www.qt.io/contact-us.
15 **
16 ** GNU General Public License Usage
17 ** Alternatively, this file may be used under the terms of the GNU
18 ** General Public License version 3 as published by the Free Software
19 ** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
20 ** included in the packaging of this file. Please review the following
21 ** information to ensure the GNU General Public License requirements will
22 ** be met: https://www.gnu.org/licenses/gpl-3.0.html.
23 **
24 ****************************************************************************/
25
26 /*
27 The main algorithm "diffMyers()" is based on "An O(ND) Difference Algorithm
28 and Its Variations" by Eugene W. Myers: http://www.xmailserver.org/diff2.pdf
29
30 Preprocessing and postprocessing functions inspired by "Diff Strategies"
31 publication by Neil Fraser: http://neil.fraser.name/writing/diff/
32 */
33
34 #include "differ.h"
35
36 #include <QList>
37 #include <QRegularExpression>
38 #include <QStringList>
39 #include <QMap>
40 #include <QPair>
41 #include <QCoreApplication>
42 #include <QFutureInterfaceBase>
43
44 namespace Utils {
45
commonPrefix(const QString & text1,const QString & text2)46 static int commonPrefix(const QString &text1, const QString &text2)
47 {
48 int i = 0;
49 const int text1Count = text1.count();
50 const int text2Count = text2.count();
51 const int maxCount = qMin(text1Count, text2Count);
52 while (i < maxCount) {
53 if (text1.at(i) != text2.at(i))
54 break;
55 i++;
56 }
57 return i;
58 }
59
commonSuffix(const QString & text1,const QString & text2)60 static int commonSuffix(const QString &text1, const QString &text2)
61 {
62 int i = 0;
63 const int text1Count = text1.count();
64 const int text2Count = text2.count();
65 const int maxCount = qMin(text1Count, text2Count);
66 while (i < maxCount) {
67 if (text1.at(text1Count - i - 1) != text2.at(text2Count - i - 1))
68 break;
69 i++;
70 }
71 return i;
72 }
73
commonOverlap(const QString & text1,const QString & text2)74 static int commonOverlap(const QString &text1, const QString &text2)
75 {
76 int i = 0;
77 const int text1Count = text1.count();
78 const int text2Count = text2.count();
79 const int maxCount = qMin(text1Count, text2Count);
80 while (i < maxCount) {
81 if (QStringView(text1).mid(text1Count - maxCount + i) == QStringView(text2).left(maxCount - i))
82 return maxCount - i;
83 i++;
84 }
85 return 0;
86 }
87
decode(const QList<Diff> & diffList,const QStringList & lines)88 static QList<Diff> decode(const QList<Diff> &diffList, const QStringList &lines)
89 {
90 QList<Diff> newDiffList;
91 newDiffList.reserve(diffList.count());
92 for (const Diff &diff : diffList) {
93 QString text;
94 for (QChar c : diff.text) {
95 const int idx = static_cast<ushort>(c.unicode());
96 text += lines.value(idx);
97 }
98 newDiffList.append({diff.command, text});
99 }
100 return newDiffList;
101 }
102
squashEqualities(const QList<Diff> & diffList)103 static QList<Diff> squashEqualities(const QList<Diff> &diffList)
104 {
105 if (diffList.count() < 3) // we need at least 3 items
106 return diffList;
107
108 QList<Diff> newDiffList;
109 Diff prevDiff = diffList.at(0);
110 Diff thisDiff = diffList.at(1);
111 Diff nextDiff = diffList.at(2);
112 int i = 2;
113 while (i < diffList.count()) {
114 if (prevDiff.command == Diff::Equal
115 && nextDiff.command == Diff::Equal) {
116 if (thisDiff.text.endsWith(prevDiff.text)) {
117 thisDiff.text = prevDiff.text
118 + thisDiff.text.left(thisDiff.text.count()
119 - prevDiff.text.count());
120 nextDiff.text = prevDiff.text + nextDiff.text;
121 } else if (thisDiff.text.startsWith(nextDiff.text)) {
122 prevDiff.text += nextDiff.text;
123 thisDiff.text = thisDiff.text.mid(nextDiff.text.count())
124 + nextDiff.text;
125 i++;
126 if (i < diffList.count())
127 nextDiff = diffList.at(i);
128 newDiffList.append(prevDiff);
129 } else {
130 newDiffList.append(prevDiff);
131 }
132 } else {
133 newDiffList.append(prevDiff);
134 }
135 prevDiff = thisDiff;
136 thisDiff = nextDiff;
137 i++;
138 if (i < diffList.count())
139 nextDiff = diffList.at(i);
140 }
141 newDiffList.append(prevDiff);
142 if (i == diffList.count())
143 newDiffList.append(thisDiff);
144 return newDiffList;
145 }
146
cleanupOverlaps(const QList<Diff> & diffList)147 static QList<Diff> cleanupOverlaps(const QList<Diff> &diffList)
148 {
149 // Find overlaps between deletions and insetions.
150 // The "diffList" already contains at most one deletion and
151 // one insertion between two equalities, in this order.
152 // Eliminate overlaps, e.g.:
153 // DEL(ABCXXXX), INS(XXXXDEF) -> DEL(ABC), EQ(XXXX), INS(DEF)
154 // DEL(XXXXABC), INS(DEFXXXX) -> INS(DEF), EQ(XXXX), DEL(ABC)
155 QList<Diff> newDiffList;
156 int i = 0;
157 while (i < diffList.count()) {
158 Diff thisDiff = diffList.at(i);
159 Diff nextDiff = i < diffList.count() - 1
160 ? diffList.at(i + 1)
161 : Diff(Diff::Equal);
162 if (thisDiff.command == Diff::Delete
163 && nextDiff.command == Diff::Insert) {
164 const int delInsOverlap = commonOverlap(thisDiff.text, nextDiff.text);
165 const int insDelOverlap = commonOverlap(nextDiff.text, thisDiff.text);
166 if (delInsOverlap >= insDelOverlap) {
167 if (delInsOverlap > thisDiff.text.count() / 2
168 || delInsOverlap > nextDiff.text.count() / 2) {
169 thisDiff.text = thisDiff.text.left(thisDiff.text.count() - delInsOverlap);
170 const Diff equality(Diff::Equal, nextDiff.text.left(delInsOverlap));
171 nextDiff.text = nextDiff.text.mid(delInsOverlap);
172 newDiffList.append(thisDiff);
173 newDiffList.append(equality);
174 newDiffList.append(nextDiff);
175 } else {
176 newDiffList.append(thisDiff);
177 newDiffList.append(nextDiff);
178 }
179 } else {
180 if (insDelOverlap > thisDiff.text.count() / 2
181 || insDelOverlap > nextDiff.text.count() / 2) {
182 nextDiff.text = nextDiff.text.left(nextDiff.text.count() - insDelOverlap);
183 const Diff equality(Diff::Equal, thisDiff.text.left(insDelOverlap));
184 thisDiff.text = thisDiff.text.mid(insDelOverlap);
185 newDiffList.append(nextDiff);
186 newDiffList.append(equality);
187 newDiffList.append(thisDiff);
188 } else {
189 newDiffList.append(thisDiff);
190 newDiffList.append(nextDiff);
191 }
192 }
193 i += 2;
194 } else {
195 newDiffList.append(thisDiff);
196 i++;
197 }
198 }
199 return newDiffList;
200 }
201
cleanupSemanticsScore(const QString & text1,const QString & text2)202 static int cleanupSemanticsScore(const QString &text1, const QString &text2)
203 {
204 const QRegularExpression blankLineEnd("\\n\\r?\\n$");
205 const QRegularExpression blankLineStart("^\\r?\\n\\r?\\n");
206 const QRegularExpression sentenceEnd("\\. $");
207
208 if (text1.isEmpty() || text2.isEmpty()) // Edges
209 return 6;
210
211 const QChar char1 = text1[text1.count() - 1];
212 const QChar char2 = text2[0];
213 const bool nonAlphaNumeric1 = !char1.isLetterOrNumber();
214 const bool nonAlphaNumeric2 = !char2.isLetterOrNumber();
215 const bool whitespace1 = nonAlphaNumeric1 && char1.isSpace();
216 const bool whitespace2 = nonAlphaNumeric2 && char2.isSpace();
217 const bool lineBreak1 = whitespace1 && char1.category() == QChar::Other_Control;
218 const bool lineBreak2 = whitespace2 && char2.category() == QChar::Other_Control;
219 const bool blankLine1 = lineBreak1 && blankLineEnd.match(text1).hasMatch();
220 const bool blankLine2 = lineBreak2 && blankLineStart.match(text2).hasMatch();
221
222 if (blankLine1 || blankLine2) // Blank lines
223 return 5;
224 if (lineBreak1 || lineBreak2) // Line breaks
225 return 4;
226 if (sentenceEnd.match(text1).hasMatch()) // End of sentence
227 return 3;
228 if (whitespace1 || whitespace2) // Whitespaces
229 return 2;
230 if (nonAlphaNumeric1 || nonAlphaNumeric2) // Non-alphanumerics
231 return 1;
232
233 return 0;
234 }
235
isWhitespace(const QChar & c)236 static bool isWhitespace(const QChar &c)
237 {
238 return c == ' ' || c == '\t';
239 }
240
isNewLine(const QChar & c)241 static bool isNewLine(const QChar &c)
242 {
243 return c == '\n';
244 }
245
246 /*
247 * Splits the diffList into left and right diff lists.
248 * The left diff list contains the original (insertions removed),
249 * while the right diff list contains the destination (deletions removed).
250 */
splitDiffList(const QList<Diff> & diffList,QList<Diff> * leftDiffList,QList<Diff> * rightDiffList)251 void Differ::splitDiffList(const QList<Diff> &diffList,
252 QList<Diff> *leftDiffList,
253 QList<Diff> *rightDiffList)
254 {
255 if (!leftDiffList || !rightDiffList)
256 return;
257
258 leftDiffList->clear();
259 rightDiffList->clear();
260
261 for (const Diff &diff : diffList) {
262 if (diff.command != Diff::Delete)
263 rightDiffList->append(diff);
264 if (diff.command != Diff::Insert)
265 leftDiffList->append(diff);
266 }
267 }
268
269 /*
270 * Prerequisites:
271 * input should be only the left or right list of diffs, not a mix of both.
272 *
273 * Moves whitespace characters from Diff::Delete or Diff::Insert into
274 * surrounding Diff::Equal, if possible.
275 * It may happen, that some Diff::Delete of Diff::Insert will disappear.
276 */
moveWhitespaceIntoEqualities(const QList<Diff> & input)277 QList<Diff> Differ::moveWhitespaceIntoEqualities(const QList<Diff> &input)
278 {
279 QList<Diff> output = input;
280
281 for (int i = 0; i < output.count(); i++) {
282 Diff diff = output[i];
283
284 if (diff.command != Diff::Equal) {
285 if (i > 0) { // check previous equality
286 Diff &previousDiff = output[i - 1];
287 const int previousDiffCount = previousDiff.text.count();
288 if (previousDiff.command == Diff::Equal
289 && previousDiffCount
290 && isWhitespace(previousDiff.text.at(previousDiffCount - 1))) {
291 // previous diff ends with whitespace
292 int j = 0;
293 while (j < diff.text.count()) {
294 if (!isWhitespace(diff.text.at(j)))
295 break;
296 ++j;
297 }
298 if (j > 0) {
299 // diff starts with j whitespaces, move them to the previous diff
300 previousDiff.text.append(diff.text.left(j));
301 diff.text = diff.text.mid(j);
302 }
303 }
304 }
305 if (i < output.count() - 1) { // check next equality
306 const int diffCount = diff.text.count();
307 Diff &nextDiff = output[i + 1];
308 const int nextDiffCount = nextDiff.text.count();
309 if (nextDiff.command == Diff::Equal
310 && nextDiffCount
311 && (isWhitespace(nextDiff.text.at(0)) || isNewLine(nextDiff.text.at(0)))) {
312 // next diff starts with whitespace or with newline
313 int j = 0;
314 while (j < diffCount) {
315 if (!isWhitespace(diff.text.at(diffCount - j - 1)))
316 break;
317 ++j;
318 }
319 if (j > 0) {
320 // diff ends with j whitespaces, move them to the next diff
321 nextDiff.text.prepend(diff.text.mid(diffCount - j));
322 diff.text = diff.text.left(diffCount - j);
323 }
324 }
325 }
326 // remove diff if empty
327 if (diff.text.isEmpty()) {
328 output.removeAt(i);
329 --i;
330 } else {
331 output[i] = diff;
332 }
333 }
334 }
335 return output;
336 }
337
338 /*
339 * Encodes any sentence of whitespaces with one simple space.
340 *
341 * The mapping is returned by codeMap argument, which contains
342 * the position in output string of encoded whitespace character
343 * and it's corresponding original sequence of whitespace characters.
344 *
345 * The returned string contains encoded version of the input string.
346 */
encodeReducedWhitespace(const QString & input,QMap<int,QString> * codeMap)347 static QString encodeReducedWhitespace(const QString &input,
348 QMap<int, QString> *codeMap)
349 {
350 QString output;
351 if (!codeMap)
352 return output;
353
354 int inputIndex = 0;
355 int outputIndex = 0;
356 while (inputIndex < input.count()) {
357 QChar c = input.at(inputIndex);
358
359 if (isWhitespace(c)) {
360 output.append(' ');
361 codeMap->insert(outputIndex, QString(c));
362 ++inputIndex;
363
364 while (inputIndex < input.count()) {
365 QChar reducedChar = input.at(inputIndex);
366
367 if (!isWhitespace(reducedChar))
368 break;
369
370 (*codeMap)[outputIndex].append(reducedChar);
371 ++inputIndex;
372 }
373 } else {
374 output.append(c);
375 ++inputIndex;
376 }
377 ++outputIndex;
378 }
379 return output;
380 }
381
382 /*
383 * This is corresponding function to encodeReducedWhitespace().
384 *
385 * The input argument contains version encoded with codeMap,
386 * the returned value contains decoded diff list.
387 */
decodeReducedWhitespace(const QList<Diff> & input,const QMap<int,QString> & codeMap)388 static QList<Diff> decodeReducedWhitespace(const QList<Diff> &input,
389 const QMap<int, QString> &codeMap)
390 {
391 QList<Diff> output;
392
393 int counter = 0;
394 auto it = codeMap.constBegin();
395 const auto itEnd = codeMap.constEnd();
396 for (Diff diff : input) {
397 const int diffCount = diff.text.count();
398 while ((it != itEnd) && (it.key() < counter + diffCount)) {
399 const int reversePosition = diffCount + counter - it.key();
400 const int updatedDiffCount = diff.text.count();
401 diff.text.replace(updatedDiffCount - reversePosition, 1, it.value());
402 ++it;
403 }
404 output.append(diff);
405 counter += diffCount;
406 }
407 return output;
408 }
409
410 /*
411 * Prerequisites:
412 * leftDiff is expected to be Diff::Delete and rightDiff is expected to be Diff::Insert.
413 *
414 * Encode any sentence of whitespaces with simple space (inside leftDiff and rightDiff),
415 * diff without cleanup,
416 * split diffs,
417 * decode.
418 */
diffWithWhitespaceReduced(const QString & leftInput,const QString & rightInput,QList<Diff> * leftOutput,QList<Diff> * rightOutput)419 void Differ::diffWithWhitespaceReduced(const QString &leftInput,
420 const QString &rightInput,
421 QList<Diff> *leftOutput,
422 QList<Diff> *rightOutput)
423 {
424 if (!leftOutput || !rightOutput)
425 return;
426
427 leftOutput->clear();
428 rightOutput->clear();
429
430 QMap<int, QString> leftCodeMap;
431 QMap<int, QString> rightCodeMap;
432 const QString &leftString = encodeReducedWhitespace(leftInput, &leftCodeMap);
433 const QString &rightString = encodeReducedWhitespace(rightInput, &rightCodeMap);
434
435 Differ differ;
436 QList<Diff> diffList = differ.diff(leftString, rightString);
437
438 QList<Diff> leftDiffList;
439 QList<Diff> rightDiffList;
440 Differ::splitDiffList(diffList, &leftDiffList, &rightDiffList);
441
442 *leftOutput = decodeReducedWhitespace(leftDiffList, leftCodeMap);
443 *rightOutput = decodeReducedWhitespace(rightDiffList, rightCodeMap);
444 }
445
446 /*
447 * Prerequisites:
448 * leftDiff is expected to be Diff::Delete and rightDiff is expected to be Diff::Insert.
449 *
450 * Encode any sentence of whitespaces with simple space (inside leftDiff and rightDiff),
451 * diff without cleanup,
452 * split diffs,
453 * decode.
454 */
unifiedDiffWithWhitespaceReduced(const QString & leftInput,const QString & rightInput,QList<Diff> * leftOutput,QList<Diff> * rightOutput)455 void Differ::unifiedDiffWithWhitespaceReduced(const QString &leftInput,
456 const QString &rightInput,
457 QList<Diff> *leftOutput,
458 QList<Diff> *rightOutput)
459 {
460 if (!leftOutput || !rightOutput)
461 return;
462
463 leftOutput->clear();
464 rightOutput->clear();
465
466 QMap<int, QString> leftCodeMap;
467 QMap<int, QString> rightCodeMap;
468 const QString &leftString = encodeReducedWhitespace(leftInput, &leftCodeMap);
469 const QString &rightString = encodeReducedWhitespace(rightInput, &rightCodeMap);
470
471 Differ differ;
472 const QList<Diff> &diffList = differ.unifiedDiff(leftString, rightString);
473
474 QList<Diff> leftDiffList;
475 QList<Diff> rightDiffList;
476 Differ::splitDiffList(diffList, &leftDiffList, &rightDiffList);
477
478 *leftOutput = decodeReducedWhitespace(leftDiffList, leftCodeMap);
479 *rightOutput = decodeReducedWhitespace(rightDiffList, rightCodeMap);
480 }
481
482 /*
483 * Prerequisites:
484 * leftEquality and rightEquality needs to be equal. They may differ only with
485 * whitespaces character (count and kind).
486 *
487 * Replaces any corresponding sentence of whitespaces inside left and right equality
488 * with space characters. The number of space characters inside
489 * replaced sequence depends on the longest sequence of whitespace characters
490 * either in left or right equlity.
491 *
492 * E.g., when:
493 * leftEquality: "a b c" (3 whitespace characters, 5 whitespace characters)
494 * rightEquality: "a /tb /t c" (2 whitespace characters, 7 whitespace characters)
495 * then:
496 * returned value: "a b c" (3 space characters, 7 space characters)
497 *
498 * The returned code maps contains the info about the encoding done.
499 * The key on the map is the position of encoding inside the output string,
500 * and the value, which is a pair of int and string,
501 * describes how many characters were encoded in the output string
502 * and what was the original whitespace sequence in the original
503 * For the above example it would be:
504 *
505 * leftCodeMap: <1, <3, " "> >
506 * <5, <7, " "> >
507 * rightCodeMap: <1, <3, " /t"> >
508 * <5, <7, " /t "> >
509 *
510 */
encodeExpandedWhitespace(const QString & leftEquality,const QString & rightEquality,QMap<int,QPair<int,QString>> * leftCodeMap,QMap<int,QPair<int,QString>> * rightCodeMap,bool * ok)511 static QString encodeExpandedWhitespace(const QString &leftEquality,
512 const QString &rightEquality,
513 QMap<int, QPair<int, QString> > *leftCodeMap,
514 QMap<int, QPair<int, QString> > *rightCodeMap,
515 bool *ok)
516 {
517 if (ok)
518 *ok = false;
519
520 if (!leftCodeMap || !rightCodeMap)
521 return QString();
522
523 leftCodeMap->clear();
524 rightCodeMap->clear();
525 QString output;
526
527 const int leftCount = leftEquality.count();
528 const int rightCount = rightEquality.count();
529 int leftIndex = 0;
530 int rightIndex = 0;
531 while (leftIndex < leftCount && rightIndex < rightCount) {
532 QString leftWhitespaces;
533 QString rightWhitespaces;
534 while (leftIndex < leftCount && isWhitespace(leftEquality.at(leftIndex))) {
535 leftWhitespaces.append(leftEquality.at(leftIndex));
536 ++leftIndex;
537 }
538 while (rightIndex < rightCount && isWhitespace(rightEquality.at(rightIndex))) {
539 rightWhitespaces.append(rightEquality.at(rightIndex));
540 ++rightIndex;
541 }
542
543 if (leftIndex < leftCount && rightIndex < rightCount) {
544 if (leftEquality.at(leftIndex) != rightEquality.at(rightIndex))
545 return QString(); // equalities broken
546
547 } else if (leftIndex == leftCount && rightIndex == rightCount) {
548 ; // do nothing, the last iteration
549 } else {
550 return QString(); // equalities broken
551 }
552
553 if (leftWhitespaces.isEmpty() ^ rightWhitespaces.isEmpty()) {
554 // there must be at least 1 corresponding whitespace, equalities broken
555 return QString();
556 }
557
558 if (!leftWhitespaces.isEmpty() && !rightWhitespaces.isEmpty()) {
559 const int replacementPosition = output.count();
560 const int replacementSize = qMax(leftWhitespaces.count(), rightWhitespaces.count());
561 const QString replacement(replacementSize, ' ');
562 leftCodeMap->insert(replacementPosition,
563 qMakePair(replacementSize, leftWhitespaces));
564 rightCodeMap->insert(replacementPosition,
565 qMakePair(replacementSize, rightWhitespaces));
566 output.append(replacement);
567 }
568
569 if (leftIndex < leftCount)
570 output.append(leftEquality.at(leftIndex)); // add common character
571
572 ++leftIndex;
573 ++rightIndex;
574 }
575
576 if (ok)
577 *ok = true;
578
579 return output;
580 }
581
582 /*
583 * This is corresponding function to encodeExpandedWhitespace().
584 *
585 * The input argument contains version encoded with codeMap,
586 * the returned value contains decoded diff list.
587 */
decodeExpandedWhitespace(const QList<Diff> & input,const QMap<int,QPair<int,QString>> & codeMap,bool * ok)588 static QList<Diff> decodeExpandedWhitespace(const QList<Diff> &input,
589 const QMap<int, QPair<int, QString> > &codeMap,
590 bool *ok)
591 {
592 if (ok)
593 *ok = false;
594
595 QList<Diff> output;
596
597 int counter = 0;
598 auto it = codeMap.constBegin();
599 const auto itEnd = codeMap.constEnd();
600 for (Diff diff : input) {
601 const int diffCount = diff.text.count();
602 while ((it != itEnd) && (it.key() < counter + diffCount)) {
603 const int replacementSize = it.value().first;
604 const int reversePosition = diffCount + counter - it.key();
605 if (reversePosition < replacementSize)
606 return QList<Diff>(); // replacement exceeds one Diff
607 const QString replacement = it.value().second;
608 const int updatedDiffCount = diff.text.count();
609 diff.text.replace(updatedDiffCount - reversePosition,
610 replacementSize, replacement);
611 ++it;
612 }
613 output.append(diff);
614 counter += diffCount;
615 }
616
617 if (ok)
618 *ok = true;
619
620 return output;
621 }
622
623 /*
624 * Prerequisites:
625 * leftInput and rightInput should contain the same number of equalities,
626 * equalities should differ only in whitespaces.
627 *
628 * Encodes any sentence of whitespace characters in equalities only
629 * with the maximal number of corresponding whitespace characters
630 * (inside leftInput and rightInput), so that the leftInput and rightInput
631 * can be merged together again,
632 * diff merged sequence with cleanup,
633 * decode.
634 */
diffWithWhitespaceExpandedInEqualities(const QList<Diff> & leftInput,const QList<Diff> & rightInput,QList<Diff> * leftOutput,QList<Diff> * rightOutput)635 static bool diffWithWhitespaceExpandedInEqualities(const QList<Diff> &leftInput,
636 const QList<Diff> &rightInput,
637 QList<Diff> *leftOutput,
638 QList<Diff> *rightOutput)
639 {
640 if (!leftOutput || !rightOutput)
641 return false;
642
643 leftOutput->clear();
644 rightOutput->clear();
645
646 const int leftCount = leftInput.count();
647 const int rightCount = rightInput.count();
648 int l = 0;
649 int r = 0;
650
651 QString leftText;
652 QString rightText;
653
654 QMap<int, QPair<int, QString> > commonLeftCodeMap;
655 QMap<int, QPair<int, QString> > commonRightCodeMap;
656
657 while (l <= leftCount && r <= rightCount) {
658 const Diff leftDiff = l < leftCount ? leftInput.at(l) : Diff(Diff::Equal);
659 const Diff rightDiff = r < rightCount ? rightInput.at(r) : Diff(Diff::Equal);
660
661 if (leftDiff.command == Diff::Equal && rightDiff.command == Diff::Equal) {
662 QMap<int, QPair<int, QString> > leftCodeMap;
663 QMap<int, QPair<int, QString> > rightCodeMap;
664
665 bool ok = false;
666 const QString &commonEquality = encodeExpandedWhitespace(leftDiff.text,
667 rightDiff.text,
668 &leftCodeMap,
669 &rightCodeMap,
670 &ok);
671 if (!ok)
672 return false;
673
674 // join code map positions with common maps
675 for (auto it = leftCodeMap.cbegin(), end = leftCodeMap.cend(); it != end; ++it)
676 commonLeftCodeMap.insert(leftText.count() + it.key(), it.value());
677
678 for (auto it = rightCodeMap.cbegin(), end = rightCodeMap.cend(); it != end; ++it)
679 commonRightCodeMap.insert(rightText.count() + it.key(), it.value());
680
681 leftText.append(commonEquality);
682 rightText.append(commonEquality);
683
684 ++l;
685 ++r;
686 }
687
688 if (leftDiff.command != Diff::Equal) {
689 leftText.append(leftDiff.text);
690 ++l;
691 }
692 if (rightDiff.command != Diff::Equal) {
693 rightText.append(rightDiff.text);
694 ++r;
695 }
696 }
697
698 Differ differ;
699 const QList<Diff> &diffList = Differ::cleanupSemantics(
700 differ.diff(leftText, rightText));
701
702 QList<Diff> leftDiffList;
703 QList<Diff> rightDiffList;
704 Differ::splitDiffList(diffList, &leftDiffList, &rightDiffList);
705
706 leftDiffList = Differ::moveWhitespaceIntoEqualities(leftDiffList);
707 rightDiffList = Differ::moveWhitespaceIntoEqualities(rightDiffList);
708
709 bool ok = false;
710 *leftOutput = decodeExpandedWhitespace(leftDiffList,
711 commonLeftCodeMap, &ok);
712 if (!ok)
713 return false;
714 *rightOutput = decodeExpandedWhitespace(rightDiffList,
715 commonRightCodeMap, &ok);
716 return ok;
717 }
718
appendWithEqualitiesSquashed(const QList<Diff> & leftInput,const QList<Diff> & rightInput,QList<Diff> * leftOutput,QList<Diff> * rightOutput)719 static void appendWithEqualitiesSquashed(const QList<Diff> &leftInput,
720 const QList<Diff> &rightInput,
721 QList<Diff> *leftOutput,
722 QList<Diff> *rightOutput)
723 {
724 if (!leftInput.isEmpty()
725 && !rightInput.isEmpty()
726 && !leftOutput->isEmpty()
727 && !rightOutput->isEmpty()
728 && leftInput.first().command == Diff::Equal
729 && rightInput.first().command == Diff::Equal
730 && leftOutput->last().command == Diff::Equal
731 && rightOutput->last().command == Diff::Equal) {
732 leftOutput->last().text += leftInput.first().text;
733 rightOutput->last().text += rightInput.first().text;
734 leftOutput->append(leftInput.mid(1));
735 rightOutput->append(rightInput.mid(1));
736 return;
737 }
738 leftOutput->append(leftInput);
739 rightOutput->append(rightInput);
740 }
741
742 /*
743 * Prerequisites:
744 * leftInput cannot contain insertions, while right input cannot contain deletions.
745 * The number of equalities on leftInput and rightInput lists should be the same.
746 * Deletions and insertions need to be merged.
747 *
748 * For each corresponding insertion / deletion pair:
749 * - diffWithWhitespaceReduced(): rediff them separately with whitespace reduced
750 * (new equalities may appear)
751 * - moveWhitespaceIntoEqualities(): move whitespace into new equalities
752 * - diffWithWhitespaceExpandedInEqualities(): expand whitespace inside new
753 * equalities only and rediff with cleanup
754 */
ignoreWhitespaceBetweenEqualities(const QList<Diff> & leftInput,const QList<Diff> & rightInput,QList<Diff> * leftOutput,QList<Diff> * rightOutput)755 void Differ::ignoreWhitespaceBetweenEqualities(const QList<Diff> &leftInput,
756 const QList<Diff> &rightInput,
757 QList<Diff> *leftOutput,
758 QList<Diff> *rightOutput)
759 {
760 if (!leftOutput || !rightOutput)
761 return;
762
763 leftOutput->clear();
764 rightOutput->clear();
765
766 const int leftCount = leftInput.count();
767 const int rightCount = rightInput.count();
768 int l = 0;
769 int r = 0;
770
771 while (l <= leftCount && r <= rightCount) {
772 const Diff leftDiff = l < leftCount
773 ? leftInput.at(l)
774 : Diff(Diff::Equal);
775 const Diff rightDiff = r < rightCount
776 ? rightInput.at(r)
777 : Diff(Diff::Equal);
778
779 if (leftDiff.command == Diff::Equal && rightDiff.command == Diff::Equal) {
780 const Diff previousLeftDiff = l > 0 ? leftInput.at(l - 1) : Diff(Diff::Equal);
781 const Diff previousRightDiff = r > 0 ? rightInput.at(r - 1) : Diff(Diff::Equal);
782
783 if (previousLeftDiff.command == Diff::Delete
784 && previousRightDiff.command == Diff::Insert) {
785 QList<Diff> outputLeftDiffList;
786 QList<Diff> outputRightDiffList;
787
788 QList<Diff> reducedLeftDiffList;
789 QList<Diff> reducedRightDiffList;
790 diffWithWhitespaceReduced(previousLeftDiff.text,
791 previousRightDiff.text,
792 &reducedLeftDiffList,
793 &reducedRightDiffList);
794
795 reducedLeftDiffList = moveWhitespaceIntoEqualities(reducedLeftDiffList);
796 reducedRightDiffList = moveWhitespaceIntoEqualities(reducedRightDiffList);
797
798 QList<Diff> cleanedLeftDiffList;
799 QList<Diff> cleanedRightDiffList;
800 if (diffWithWhitespaceExpandedInEqualities(reducedLeftDiffList,
801 reducedRightDiffList,
802 &cleanedLeftDiffList,
803 &cleanedRightDiffList)) {
804 outputLeftDiffList = cleanedLeftDiffList;
805 outputRightDiffList = cleanedRightDiffList;
806 } else {
807 outputLeftDiffList = reducedLeftDiffList;
808 outputRightDiffList = reducedRightDiffList;
809 }
810
811 appendWithEqualitiesSquashed(outputLeftDiffList,
812 outputRightDiffList,
813 leftOutput,
814 rightOutput);
815 } else if (previousLeftDiff.command == Diff::Delete) {
816 leftOutput->append(previousLeftDiff);
817 } else if (previousRightDiff.command == Diff::Insert) {
818 rightOutput->append(previousRightDiff);
819 }
820
821 QList<Diff> leftEquality;
822 QList<Diff> rightEquality;
823 if (l < leftCount)
824 leftEquality.append(leftDiff);
825 if (r < rightCount)
826 rightEquality.append(rightDiff);
827
828 appendWithEqualitiesSquashed(leftEquality,
829 rightEquality,
830 leftOutput,
831 rightOutput);
832
833 ++l;
834 ++r;
835 }
836
837 if (leftDiff.command != Diff::Equal)
838 ++l;
839 if (rightDiff.command != Diff::Equal)
840 ++r;
841 }
842 }
843
844 /*
845 * Prerequisites:
846 * leftInput cannot contain insertions, while right input cannot contain deletions.
847 * The number of equalities on leftInput and rightInput lists should be the same.
848 * Deletions and insertions need to be merged.
849 *
850 * For each corresponding insertion / deletion pair do just diff and merge equalities
851 */
diffBetweenEqualities(const QList<Diff> & leftInput,const QList<Diff> & rightInput,QList<Diff> * leftOutput,QList<Diff> * rightOutput)852 void Differ::diffBetweenEqualities(const QList<Diff> &leftInput,
853 const QList<Diff> &rightInput,
854 QList<Diff> *leftOutput,
855 QList<Diff> *rightOutput)
856 {
857 if (!leftOutput || !rightOutput)
858 return;
859
860 leftOutput->clear();
861 rightOutput->clear();
862
863 const int leftCount = leftInput.count();
864 const int rightCount = rightInput.count();
865 int l = 0;
866 int r = 0;
867
868 while (l <= leftCount && r <= rightCount) {
869 const Diff leftDiff = l < leftCount
870 ? leftInput.at(l)
871 : Diff(Diff::Equal);
872 const Diff rightDiff = r < rightCount
873 ? rightInput.at(r)
874 : Diff(Diff::Equal);
875
876 if (leftDiff.command == Diff::Equal && rightDiff.command == Diff::Equal) {
877 const Diff previousLeftDiff = l > 0 ? leftInput.at(l - 1) : Diff(Diff::Equal);
878 const Diff previousRightDiff = r > 0 ? rightInput.at(r - 1) : Diff(Diff::Equal);
879
880 if (previousLeftDiff.command == Diff::Delete
881 && previousRightDiff.command == Diff::Insert) {
882 Differ differ;
883 differ.setDiffMode(Differ::CharMode);
884 const QList<Diff> commonOutput = cleanupSemantics(
885 differ.diff(previousLeftDiff.text, previousRightDiff.text));
886
887 QList<Diff> outputLeftDiffList;
888 QList<Diff> outputRightDiffList;
889
890 Differ::splitDiffList(commonOutput, &outputLeftDiffList,
891 &outputRightDiffList);
892
893 appendWithEqualitiesSquashed(outputLeftDiffList,
894 outputRightDiffList,
895 leftOutput,
896 rightOutput);
897 } else if (previousLeftDiff.command == Diff::Delete) {
898 leftOutput->append(previousLeftDiff);
899 } else if (previousRightDiff.command == Diff::Insert) {
900 rightOutput->append(previousRightDiff);
901 }
902
903 QList<Diff> leftEquality;
904 QList<Diff> rightEquality;
905 if (l < leftCount)
906 leftEquality.append(leftDiff);
907 if (r < rightCount)
908 rightEquality.append(rightDiff);
909
910 appendWithEqualitiesSquashed(leftEquality,
911 rightEquality,
912 leftOutput,
913 rightOutput);
914
915 ++l;
916 ++r;
917 }
918
919 if (leftDiff.command != Diff::Equal)
920 ++l;
921 if (rightDiff.command != Diff::Equal)
922 ++r;
923 }
924 }
925
926 ///////////////
927
Diff(Command com,const QString & txt)928 Diff::Diff(Command com, const QString &txt) :
929 command(com),
930 text(txt)
931 {
932 }
933
operator ==(const Diff & other) const934 bool Diff::operator==(const Diff &other) const
935 {
936 return command == other.command && text == other.text;
937 }
938
operator !=(const Diff & other) const939 bool Diff::operator!=(const Diff &other) const
940 {
941 return !(operator == (other));
942 }
943
commandString(Command com)944 QString Diff::commandString(Command com)
945 {
946 if (com == Delete)
947 return QCoreApplication::translate("Diff", "Delete");
948 else if (com == Insert)
949 return QCoreApplication::translate("Diff", "Insert");
950 return QCoreApplication::translate("Diff", "Equal");
951 }
952
toString() const953 QString Diff::toString() const
954 {
955 QString prettyText = text;
956 // Replace linebreaks with pretty char
957 prettyText.replace('\n', '\xb6');
958 return commandString(command) + " \"" + prettyText + "\"";
959 }
960
961 ///////////////
962
Differ(QFutureInterfaceBase * jobController)963 Differ::Differ(QFutureInterfaceBase *jobController)
964 : m_jobController(jobController)
965 {
966
967 }
968
diff(const QString & text1,const QString & text2)969 QList<Diff> Differ::diff(const QString &text1, const QString &text2)
970 {
971 m_currentDiffMode = m_diffMode;
972 return merge(preprocess1AndDiff(text1, text2));
973 }
974
unifiedDiff(const QString & text1,const QString & text2)975 QList<Diff> Differ::unifiedDiff(const QString &text1, const QString &text2)
976 {
977 QString encodedText1;
978 QString encodedText2;
979 QStringList subtexts = encode(text1, text2, &encodedText1, &encodedText2);
980
981 const DiffMode diffMode = m_currentDiffMode;
982 m_currentDiffMode = CharMode;
983
984 // Each different subtext is a separate symbol
985 // process these symbols as text with bigger alphabet
986 QList<Diff> diffList = merge(preprocess1AndDiff(encodedText1, encodedText2));
987
988 diffList = decode(diffList, subtexts);
989 m_currentDiffMode = diffMode;
990 return diffList;
991 }
992
setDiffMode(Differ::DiffMode mode)993 void Differ::setDiffMode(Differ::DiffMode mode)
994 {
995 m_diffMode = mode;
996 }
997
diffMode() const998 Differ::DiffMode Differ::diffMode() const
999 {
1000 return m_diffMode;
1001 }
1002
preprocess1AndDiff(const QString & text1,const QString & text2)1003 QList<Diff> Differ::preprocess1AndDiff(const QString &text1, const QString &text2)
1004 {
1005 if (text1.isNull() && text2.isNull())
1006 return QList<Diff>();
1007
1008 if (text1 == text2) {
1009 QList<Diff> diffList;
1010 if (!text1.isEmpty())
1011 diffList.append(Diff(Diff::Equal, text1));
1012 return diffList;
1013 }
1014
1015 QString newText1 = text1;
1016 QString newText2 = text2;
1017 QString prefix;
1018 QString suffix;
1019 const int prefixCount = commonPrefix(text1, text2);
1020 if (prefixCount) {
1021 prefix = text1.left(prefixCount);
1022 newText1 = text1.mid(prefixCount);
1023 newText2 = text2.mid(prefixCount);
1024 }
1025 const int suffixCount = commonSuffix(newText1, newText2);
1026 if (suffixCount) {
1027 suffix = newText1.right(suffixCount);
1028 newText1 = newText1.left(newText1.count() - suffixCount);
1029 newText2 = newText2.left(newText2.count() - suffixCount);
1030 }
1031 QList<Diff> diffList = preprocess2AndDiff(newText1, newText2);
1032 if (prefixCount)
1033 diffList.prepend(Diff(Diff::Equal, prefix));
1034 if (suffixCount)
1035 diffList.append(Diff(Diff::Equal, suffix));
1036 return diffList;
1037 }
1038
preprocess2AndDiff(const QString & text1,const QString & text2)1039 QList<Diff> Differ::preprocess2AndDiff(const QString &text1, const QString &text2)
1040 {
1041 QList<Diff> diffList;
1042
1043 if (text1.isEmpty()) {
1044 diffList.append(Diff(Diff::Insert, text2));
1045 return diffList;
1046 }
1047
1048 if (text2.isEmpty()) {
1049 diffList.append(Diff(Diff::Delete, text1));
1050 return diffList;
1051 }
1052
1053 if (text1.count() != text2.count())
1054 {
1055 const QString longtext = text1.count() > text2.count() ? text1 : text2;
1056 const QString shorttext = text1.count() > text2.count() ? text2 : text1;
1057 const int i = longtext.indexOf(shorttext);
1058 if (i != -1) {
1059 const Diff::Command command = (text1.count() > text2.count())
1060 ? Diff::Delete : Diff::Insert;
1061 diffList.append(Diff(command, longtext.left(i)));
1062 diffList.append(Diff(Diff::Equal, shorttext));
1063 diffList.append(Diff(command, longtext.mid(i + shorttext.count())));
1064 return diffList;
1065 }
1066
1067 if (shorttext.count() == 1) {
1068 diffList.append(Diff(Diff::Delete, text1));
1069 diffList.append(Diff(Diff::Insert, text2));
1070 return diffList;
1071 }
1072 }
1073
1074 if (m_currentDiffMode != Differ::CharMode && text1.count() > 80 && text2.count() > 80)
1075 return diffNonCharMode(text1, text2);
1076
1077 return diffMyers(text1, text2);
1078 }
1079
diffMyers(const QString & text1,const QString & text2)1080 QList<Diff> Differ::diffMyers(const QString &text1, const QString &text2)
1081 {
1082 const int n = text1.count();
1083 const int m = text2.count();
1084 const bool odd = (n + m) % 2;
1085 const int D = odd ? (n + m) / 2 + 1 : (n + m) / 2;
1086 const int delta = n - m;
1087 const int vShift = D;
1088 int *forwardV = new int[2 * D + 1]; // free me
1089 int *reverseV = new int[2 * D + 1]; // free me
1090 for (int i = 0; i <= 2 * D; i++) {
1091 forwardV[i] = -1;
1092 reverseV[i] = -1;
1093 }
1094 forwardV[vShift + 1] = 0;
1095 reverseV[vShift + 1] = 0;
1096 int kMinForward = -D;
1097 int kMaxForward = D;
1098 int kMinReverse = -D;
1099 int kMaxReverse = D;
1100 for (int d = 0; d <= D; d++) {
1101 if (m_jobController && m_jobController->isCanceled()) {
1102 delete [] forwardV;
1103 delete [] reverseV;
1104 return QList<Diff>();
1105 }
1106 // going forward
1107 for (int k = qMax(-d, kMinForward + qAbs(d + kMinForward) % 2);
1108 k <= qMin(d, kMaxForward - qAbs(d + kMaxForward) % 2);
1109 k = k + 2) {
1110 int x;
1111 if (k == -d || (k < d && forwardV[k + vShift - 1] < forwardV[k + vShift + 1]))
1112 x = forwardV[k + vShift + 1]; // copy vertically from diagonal k + 1, y increases, y may exceed the graph
1113 else
1114 x = forwardV[k + vShift - 1] + 1; // copy horizontally from diagonal k - 1, x increases, x may exceed the graph
1115 int y = x - k;
1116
1117 if (x > n) {
1118 kMaxForward = k - 1; // we are beyond the graph (right border), don't check diagonals >= current k anymore
1119 } else if (y > m) {
1120 kMinForward = k + 1; // we are beyond the graph (bottom border), don't check diagonals <= current k anymore
1121 } else {
1122 // find snake
1123 while (x < n && y < m) {
1124 if (text1.at(x) != text2.at(y))
1125 break;
1126 x++;
1127 y++;
1128 }
1129 forwardV[k + vShift] = x;
1130 if (odd) { // check if overlap
1131 if (k >= delta - (d - 1) && k <= delta + (d - 1)) {
1132 if (n - reverseV[delta - k + vShift] <= x) {
1133 delete [] forwardV;
1134 delete [] reverseV;
1135 return diffMyersSplit(text1, x, text2, y);
1136 }
1137 }
1138 }
1139 }
1140 }
1141 // in reverse direction
1142 for (int k = qMax(-d, kMinReverse + qAbs(d + kMinReverse) % 2);
1143 k <= qMin(d, kMaxReverse - qAbs(d + kMaxReverse) % 2);
1144 k = k + 2) {
1145 int x;
1146 if (k == -d || (k < d && reverseV[k + vShift - 1] < reverseV[k + vShift + 1]))
1147 x = reverseV[k + vShift + 1];
1148 else
1149 x = reverseV[k + vShift - 1] + 1;
1150 int y = x - k;
1151
1152 if (x > n) {
1153 kMaxReverse = k - 1; // we are beyond the graph (right border), don't check diagonals >= current k anymore
1154 } else if (y > m) {
1155 kMinReverse = k + 1; // we are beyond the graph (bottom border), don't check diagonals <= current k anymore
1156 } else {
1157 // find snake
1158 while (x < n && y < m) {
1159 if (text1.at(n - x - 1) != text2.at(m - y - 1))
1160 break;
1161 x++;
1162 y++;
1163 }
1164 reverseV[k + vShift] = x;
1165 if (!odd) { // check if overlap
1166 if (k >= delta - d && k <= delta + d) {
1167 if (n - forwardV[delta - k + vShift] <= x) {
1168 delete [] forwardV;
1169 delete [] reverseV;
1170 return diffMyersSplit(text1, n - x, text2, m - x + k);
1171 }
1172 }
1173 }
1174 }
1175 }
1176 }
1177 delete [] forwardV;
1178 delete [] reverseV;
1179
1180 // Completely different
1181 QList<Diff> diffList;
1182 diffList.append(Diff(Diff::Delete, text1));
1183 diffList.append(Diff(Diff::Insert, text2));
1184 return diffList;
1185 }
1186
diffMyersSplit(const QString & text1,int x,const QString & text2,int y)1187 QList<Diff> Differ::diffMyersSplit(
1188 const QString &text1, int x,
1189 const QString &text2, int y)
1190 {
1191 const QString text11 = text1.left(x);
1192 const QString text12 = text1.mid(x);
1193 const QString text21 = text2.left(y);
1194 const QString text22 = text2.mid(y);
1195
1196 const QList<Diff> &diffList1 = preprocess1AndDiff(text11, text21);
1197 const QList<Diff> &diffList2 = preprocess1AndDiff(text12, text22);
1198 return diffList1 + diffList2;
1199 }
1200
diffNonCharMode(const QString & text1,const QString & text2)1201 QList<Diff> Differ::diffNonCharMode(const QString &text1, const QString &text2)
1202 {
1203 QString encodedText1;
1204 QString encodedText2;
1205 QStringList subtexts = encode(text1, text2, &encodedText1, &encodedText2);
1206
1207 DiffMode diffMode = m_currentDiffMode;
1208 m_currentDiffMode = CharMode;
1209
1210 // Each different subtext is a separate symbol
1211 // process these symbols as text with bigger alphabet
1212 QList<Diff> diffList = preprocess1AndDiff(encodedText1, encodedText2);
1213
1214 diffList = decode(diffList, subtexts);
1215
1216 QString lastDelete;
1217 QString lastInsert;
1218 QList<Diff> newDiffList;
1219 if (m_jobController) {
1220 m_jobController->setProgressRange(0, diffList.count());
1221 m_jobController->setProgressValue(0);
1222 }
1223 for (int i = 0; i <= diffList.count(); i++) {
1224 if (m_jobController) {
1225 if (m_jobController->isCanceled()) {
1226 m_currentDiffMode = diffMode;
1227 return QList<Diff>();
1228 }
1229 m_jobController->setProgressValue(i + 1);
1230 }
1231 const Diff diffItem = i < diffList.count()
1232 ? diffList.at(i)
1233 : Diff(Diff::Equal); // dummy, ensure we process to the end
1234 // even when diffList doesn't end with equality
1235 if (diffItem.command == Diff::Delete) {
1236 lastDelete += diffItem.text;
1237 } else if (diffItem.command == Diff::Insert) {
1238 lastInsert += diffItem.text;
1239 } else { // Diff::Equal
1240 if (!(lastDelete.isEmpty() && lastInsert.isEmpty())) {
1241 // Rediff here on char basis
1242 newDiffList += preprocess1AndDiff(lastDelete, lastInsert);
1243
1244 lastDelete.clear();
1245 lastInsert.clear();
1246 }
1247 newDiffList.append(diffItem);
1248 }
1249 }
1250
1251 m_currentDiffMode = diffMode;
1252 return newDiffList;
1253 }
1254
encode(const QString & text1,const QString & text2,QString * encodedText1,QString * encodedText2)1255 QStringList Differ::encode(const QString &text1,
1256 const QString &text2,
1257 QString *encodedText1,
1258 QString *encodedText2)
1259 {
1260 QStringList lines;
1261 lines.append(QString()); // don't use code: 0
1262 QMap<QString, int> lineToCode;
1263
1264 *encodedText1 = encode(text1, &lines, &lineToCode);
1265 *encodedText2 = encode(text2, &lines, &lineToCode);
1266
1267 return lines;
1268 }
1269
findSubtextEnd(const QString & text,int subtextStart)1270 int Differ::findSubtextEnd(const QString &text,
1271 int subtextStart)
1272 {
1273 if (m_currentDiffMode == Differ::LineMode) {
1274 int subtextEnd = text.indexOf('\n', subtextStart);
1275 if (subtextEnd == -1)
1276 subtextEnd = text.count() - 1;
1277 return ++subtextEnd;
1278 } else if (m_currentDiffMode == Differ::WordMode) {
1279 if (!text.at(subtextStart).isLetter())
1280 return subtextStart + 1;
1281 int i = subtextStart + 1;
1282
1283 const int count = text.count();
1284 while (i < count && text.at(i).isLetter())
1285 i++;
1286 return i;
1287 }
1288 return subtextStart + 1; // CharMode
1289 }
1290
encode(const QString & text,QStringList * lines,QMap<QString,int> * lineToCode)1291 QString Differ::encode(const QString &text,
1292 QStringList *lines,
1293 QMap<QString, int> *lineToCode)
1294 {
1295 int subtextStart = 0;
1296 int subtextEnd = -1;
1297 QString codes;
1298 while (subtextEnd < text.count()) {
1299 subtextEnd = findSubtextEnd(text, subtextStart);
1300 const QString line = text.mid(subtextStart, subtextEnd - subtextStart);
1301 subtextStart = subtextEnd;
1302
1303 if (lineToCode->contains(line)) {
1304 codes += QChar(static_cast<ushort>(lineToCode->value(line)));
1305 } else {
1306 lines->append(line);
1307 lineToCode->insert(line, lines->count() - 1);
1308 codes += QChar(static_cast<ushort>(lines->count() - 1));
1309 }
1310 }
1311 return codes;
1312 }
1313
merge(const QList<Diff> & diffList)1314 QList<Diff> Differ::merge(const QList<Diff> &diffList)
1315 {
1316 QString lastDelete;
1317 QString lastInsert;
1318 QList<Diff> newDiffList;
1319 for (int i = 0; i <= diffList.count(); i++) {
1320 Diff diff = i < diffList.count()
1321 ? diffList.at(i)
1322 : Diff(Diff::Equal); // dummy, ensure we process to the end
1323 // even when diffList doesn't end with equality
1324 if (diff.command == Diff::Delete) {
1325 lastDelete += diff.text;
1326 } else if (diff.command == Diff::Insert) {
1327 lastInsert += diff.text;
1328 } else { // Diff::Equal
1329 if (!(lastDelete.isEmpty() && lastInsert.isEmpty())) {
1330
1331 // common prefix
1332 const int prefixCount = commonPrefix(lastDelete, lastInsert);
1333 if (prefixCount) {
1334 const QString prefix = lastDelete.left(prefixCount);
1335 lastDelete = lastDelete.mid(prefixCount);
1336 lastInsert = lastInsert.mid(prefixCount);
1337
1338 if (!newDiffList.isEmpty()
1339 && newDiffList.last().command == Diff::Equal) {
1340 newDiffList.last().text += prefix;
1341 } else {
1342 newDiffList.append(Diff(Diff::Equal, prefix));
1343 }
1344 }
1345
1346 // common suffix
1347 const int suffixCount = commonSuffix(lastDelete, lastInsert);
1348 if (suffixCount) {
1349 const QString suffix = lastDelete.right(suffixCount);
1350 lastDelete = lastDelete.left(lastDelete.count() - suffixCount);
1351 lastInsert = lastInsert.left(lastInsert.count() - suffixCount);
1352
1353 diff.text.prepend(suffix);
1354 }
1355
1356 // append delete / insert / equal
1357 if (!lastDelete.isEmpty())
1358 newDiffList.append(Diff(Diff::Delete, lastDelete));
1359 if (!lastInsert.isEmpty())
1360 newDiffList.append(Diff(Diff::Insert, lastInsert));
1361 if (!diff.text.isEmpty())
1362 newDiffList.append(diff);
1363 lastDelete.clear();
1364 lastInsert.clear();
1365 } else { // join with last equal diff
1366 if (!newDiffList.isEmpty()
1367 && newDiffList.last().command == Diff::Equal) {
1368 newDiffList.last().text += diff.text;
1369 } else {
1370 if (!diff.text.isEmpty())
1371 newDiffList.append(diff);
1372 }
1373 }
1374 }
1375 }
1376
1377 QList<Diff> squashedDiffList = squashEqualities(newDiffList);
1378 if (squashedDiffList.count() != newDiffList.count())
1379 return merge(squashedDiffList);
1380
1381 return squashedDiffList;
1382 }
1383
1384 struct EqualityData
1385 {
1386 int equalityIndex;
1387 int textCount;
1388 int deletesBefore;
1389 int insertsBefore;
1390 int deletesAfter;
1391 int insertsAfter;
1392 };
1393
cleanupSemantics(const QList<Diff> & diffList)1394 QList<Diff> Differ::cleanupSemantics(const QList<Diff> &diffList)
1395 {
1396 int deletes = 0;
1397 int inserts = 0;
1398 // equality index, equality data
1399 QList<EqualityData> equalities;
1400 for (int i = 0; i <= diffList.count(); i++) {
1401 const Diff diff = i < diffList.count()
1402 ? diffList.at(i)
1403 : Diff(Diff::Equal); // dummy, ensure we process to the end
1404 // even when diffList doesn't end with equality
1405 if (diff.command == Diff::Equal) {
1406 if (!equalities.isEmpty()) {
1407 EqualityData &previousData = equalities.last();
1408 previousData.deletesAfter = deletes;
1409 previousData.insertsAfter = inserts;
1410 }
1411 if (i < diffList.count()) { // don't insert dummy
1412 EqualityData data;
1413 data.equalityIndex = i;
1414 data.textCount = diff.text.count();
1415 data.deletesBefore = deletes;
1416 data.insertsBefore = inserts;
1417 equalities.append(data);
1418
1419 deletes = 0;
1420 inserts = 0;
1421 }
1422 } else {
1423 if (diff.command == Diff::Delete)
1424 deletes += diff.text.count();
1425 else if (diff.command == Diff::Insert)
1426 inserts += diff.text.count();
1427 }
1428 }
1429
1430 QMap<int, bool> equalitiesToBeSplit;
1431 int i = 0;
1432 while (i < equalities.count()) {
1433 const EqualityData &data = equalities.at(i);
1434 if (data.textCount <= qMax(data.deletesBefore, data.insertsBefore)
1435 && data.textCount <= qMax(data.deletesAfter, data.insertsAfter)) {
1436 if (i > 0) {
1437 EqualityData &previousData = equalities[i - 1];
1438 previousData.deletesAfter += data.textCount + data.deletesAfter;
1439 previousData.insertsAfter += data.textCount + data.insertsAfter;
1440 }
1441 if (i < equalities.count() - 1) {
1442 EqualityData &nextData = equalities[i + 1];
1443 nextData.deletesBefore += data.textCount + data.deletesBefore;
1444 nextData.insertsBefore += data.textCount + data.insertsBefore;
1445 }
1446 equalitiesToBeSplit.insert(data.equalityIndex, true);
1447 equalities.removeAt(i);
1448 if (i > 0)
1449 i--; // reexamine previous equality
1450 } else {
1451 i++;
1452 }
1453 }
1454
1455 QList<Diff> newDiffList;
1456 for (int i = 0; i < diffList.count(); i++) {
1457 const Diff &diff = diffList.at(i);
1458 if (equalitiesToBeSplit.contains(i)) {
1459 newDiffList.append(Diff(Diff::Delete, diff.text));
1460 newDiffList.append(Diff(Diff::Insert, diff.text));
1461 } else {
1462 newDiffList.append(diff);
1463 }
1464 }
1465
1466 return cleanupOverlaps(merge(cleanupSemanticsLossless(merge(newDiffList))));
1467 }
1468
cleanupSemanticsLossless(const QList<Diff> & diffList)1469 QList<Diff> Differ::cleanupSemanticsLossless(const QList<Diff> &diffList)
1470 {
1471 if (diffList.count() < 3) // we need at least 3 items
1472 return diffList;
1473
1474 QList<Diff> newDiffList;
1475 Diff prevDiff = diffList.at(0);
1476 Diff thisDiff = diffList.at(1);
1477 Diff nextDiff = diffList.at(2);
1478 int i = 2;
1479 while (i < diffList.count()) {
1480 if (prevDiff.command == Diff::Equal
1481 && nextDiff.command == Diff::Equal) {
1482
1483 // Single edit surrounded by equalities
1484 QString equality1 = prevDiff.text;
1485 QString edit = thisDiff.text;
1486 QString equality2 = nextDiff.text;
1487
1488 // Shift the edit as far left as possible
1489 const int suffixCount = commonSuffix(equality1, edit);
1490 if (suffixCount) {
1491 const QString commonString = edit.mid(edit.count() - suffixCount);
1492 equality1 = equality1.left(equality1.count() - suffixCount);
1493 edit = commonString + edit.left(edit.count() - suffixCount);
1494 equality2 = commonString + equality2;
1495 }
1496
1497 // Step char by char right, looking for the best score
1498 QString bestEquality1 = equality1;
1499 QString bestEdit = edit;
1500 QString bestEquality2 = equality2;
1501 int bestScore = cleanupSemanticsScore(equality1, edit)
1502 + cleanupSemanticsScore(edit, equality2);
1503
1504 while (!edit.isEmpty() && !equality2.isEmpty()
1505 && edit.at(0) == equality2.at(0)) {
1506 equality1 += edit.at(0);
1507 edit = edit.mid(1) + equality2.at(0);
1508 equality2 = equality2.mid(1);
1509 const int score = cleanupSemanticsScore(equality1, edit)
1510 + cleanupSemanticsScore(edit, equality2);
1511 if (score >= bestScore) {
1512 bestEquality1 = equality1;
1513 bestEdit = edit;
1514 bestEquality2 = equality2;
1515 bestScore = score;
1516 }
1517 }
1518 prevDiff.text = bestEquality1;
1519 thisDiff.text = bestEdit;
1520 nextDiff.text = bestEquality2;
1521
1522 if (!bestEquality1.isEmpty())
1523 newDiffList.append(prevDiff); // append modified equality1
1524 if (bestEquality2.isEmpty()) {
1525 i++;
1526 if (i < diffList.count())
1527 nextDiff = diffList.at(i); // omit equality2
1528 }
1529 } else {
1530 newDiffList.append(prevDiff); // append prevDiff
1531 }
1532 prevDiff = thisDiff;
1533 thisDiff = nextDiff;
1534 i++;
1535 if (i < diffList.count())
1536 nextDiff = diffList.at(i);
1537 }
1538 newDiffList.append(prevDiff);
1539 if (i == diffList.count())
1540 newDiffList.append(thisDiff);
1541 return newDiffList;
1542 }
1543
1544 } // namespace Utils
1545