1 /*
2 Author: Daniele Fognini, Andreas Wuerl
3 Copyright (C) 2013-2014, Siemens AG
4
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License
7 version 2 as published by the Free Software Foundation.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License along
15 with this program; if not, write to the Free Software Foundation, Inc.,
16 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 */
18
19 #include "diff.h"
20
21 #include "_squareVisitor.h"
22 #include <stdlib.h>
23
matchNTokens(const GArray * textTokens,size_t textStart,size_t textLength,const GArray * searchTokens,size_t searchStart,size_t searchLength,unsigned int numberOfWantedMatches)24 int matchNTokens(const GArray* textTokens, size_t textStart, size_t textLength,
25 const GArray* searchTokens, size_t searchStart, size_t searchLength,
26 unsigned int numberOfWantedMatches) {
27
28 if (
29 textStart < textLength &&
30 searchStart < searchLength &&
31 tokenEquals(
32 tokens_index(textTokens, textStart),
33 tokens_index(searchTokens, searchStart
34 )))
35 {
36 unsigned matched = 1;
37
38 size_t canMatch = MIN(textLength - textStart, searchLength - searchStart);
39 size_t shouldMatch = MIN(numberOfWantedMatches, canMatch);
40 for (size_t i = 1; i < shouldMatch; i++) {
41 if (tokenEquals(
42 tokens_index(textTokens, i + textStart),
43 tokens_index(searchTokens, i + searchStart)))
44 matched++;
45 else
46 break;
47 }
48 return (matched == shouldMatch);
49 }
50 return 0;
51 }
52
lookForDiff(const GArray * textTokens,const GArray * searchTokens,size_t iText,size_t iSearch,unsigned int maxAllowedDiff,unsigned int minAdjacentMatches,DiffMatchInfo * result)53 int lookForDiff(const GArray* textTokens, const GArray* searchTokens,
54 size_t iText, size_t iSearch,
55 unsigned int maxAllowedDiff, unsigned int minAdjacentMatches,
56 DiffMatchInfo* result) {
57 size_t searchLength = searchTokens->len;
58 size_t textLength = textTokens->len;
59
60 size_t searchStopAt = MIN(iSearch + maxAllowedDiff, searchLength);
61 size_t textStopAt = MIN(iText + maxAllowedDiff, textLength);
62
63 size_t textPos;
64 size_t searchPos;
65 for (unsigned int i = 0; i < SQUARE_VISITOR_LENGTH; i++) {
66 textPos = iText + squareVisitorX[i];
67 searchPos = iSearch + squareVisitorY[i];
68
69 if ((textPos < textStopAt) && (searchPos < searchStopAt))
70 if (matchNTokens(textTokens, textPos, textLength,
71 searchTokens, searchPos, searchLength,
72 minAdjacentMatches))
73 {
74 result->search.start = searchPos;
75 result->search.length = searchPos - iSearch;
76 result->text.start = textPos;
77 result->text.length = textPos - iText;
78 return 1;
79 }
80 }
81
82 return 0;
83 }
84
applyDiff(const DiffMatchInfo * diff,GArray * matchedInfo,size_t * additionsCounter,size_t * removedCounter,size_t * iText,size_t * iSearch)85 static int applyDiff(const DiffMatchInfo* diff,
86 GArray* matchedInfo,
87 size_t* additionsCounter, size_t* removedCounter,
88 size_t* iText, size_t* iSearch) {
89
90 DiffMatchInfo diffCopy = *diff;
91 diffCopy.text.start = *iText;
92 diffCopy.search.start = *iSearch;
93
94 if (diffCopy.search.length == 0)
95 diffCopy.diffType = DIFF_TYPE_ADDITION;
96 else if (diffCopy.text.length == 0)
97 diffCopy.diffType = DIFF_TYPE_REMOVAL;
98 else
99 diffCopy.diffType = DIFF_TYPE_REPLACE;
100
101 *iText = diff->text.start;
102 *iSearch = diff->search.start;
103
104 g_array_append_val(matchedInfo, diffCopy);
105
106 *additionsCounter += diffCopy.text.length;
107 *removedCounter += diffCopy.search.length;
108
109 return 1;
110 }
111
applyTailDiff(GArray * matchedInfo,size_t searchLength,size_t * removedCounter,size_t * additionsCounter,size_t * iText,size_t * iSearch)112 static void applyTailDiff(GArray* matchedInfo,
113 size_t searchLength,
114 size_t* removedCounter, size_t* additionsCounter,
115 size_t* iText, size_t* iSearch) {
116
117 DiffMatchInfo tailDiff = (DiffMatchInfo) {
118 .search = (DiffPoint) {
119 .start = (*iSearch),
120 .length = searchLength - (*iSearch)
121 },
122 .text = (DiffPoint) {
123 .start = (*iText),
124 .length = 0
125 },
126 .diffType = NULL
127 };
128
129 applyDiff(&tailDiff, matchedInfo, additionsCounter, removedCounter, iText, iSearch);
130 }
131
initSimpleMatch(DiffMatchInfo * simpleMatch,size_t iText,size_t iSearch)132 static void initSimpleMatch(DiffMatchInfo* simpleMatch, size_t iText, size_t iSearch) {
133 simpleMatch->text.start = iText;
134 simpleMatch->text.length = 0;
135 simpleMatch->search.start = iSearch;
136 simpleMatch->search.length = 0;
137 }
138
139 /**
140 @brief perform a diff match search between two tokenized texts
141
142 @param textTokens array containing the Tokens of the text in which we are searching
143 @param searchTokens array containing the Tokens of the reference text to be searched
144 @param textStartPosition position in the text where the search starts,
145 it will be updated to a value for a successive search
146 @param maxAllowedDiff maximum number of Tokens that can be avoided
147 @param minAdjacentMatches minimum number of adjacent matched Tokens that must be equal
148
149 @return pointer to the result, or NULL on negative match. To be freed with diffResult_free
150 ****************************************************/
findMatchAsDiffs(const GArray * textTokens,const GArray * searchTokens,size_t textStartPosition,size_t searchStartPosition,unsigned int maxAllowedDiff,unsigned int minAdjacentMatches)151 DiffResult* findMatchAsDiffs(const GArray* textTokens, const GArray* searchTokens,
152 size_t textStartPosition, size_t searchStartPosition,
153 unsigned int maxAllowedDiff, unsigned int minAdjacentMatches) {
154 size_t textLength = textTokens->len;
155 size_t searchLength = searchTokens->len;
156
157 if ((searchLength<=searchStartPosition) || (textLength<=textStartPosition)) {
158 return NULL;
159 }
160
161 DiffResult* result = malloc(sizeof(DiffResult));
162 result->matchedInfo = g_array_new(FALSE, FALSE, sizeof(DiffMatchInfo));
163 GArray* matchedInfo = result->matchedInfo;
164
165 size_t iText = textStartPosition;
166 size_t iSearch = 0;
167
168 size_t removedCounter = 0;
169 size_t matchedCounter = 0;
170 size_t additionsCounter = 0;
171
172 if (searchStartPosition > 0) {
173 DiffMatchInfo licenseHeadDiff = (DiffMatchInfo) {
174 .search = (DiffPoint) {
175 .start = 0,
176 .length = searchStartPosition
177 },
178 .text = (DiffPoint) {
179 .start = iText,
180 .length = 0
181 },
182 .diffType = NULL
183 };
184 applyDiff(&licenseHeadDiff, matchedInfo, &additionsCounter, &removedCounter, &iText, &iSearch);
185 iSearch = searchStartPosition;
186 }
187
188 // match first token
189 while (iText < textLength) {
190 Token* textToken = tokens_index(textTokens, iText);
191 Token* searchToken = tokens_index(searchTokens, iSearch);
192 if (tokenEquals(textToken, searchToken))
193 break;
194 iText++;
195 }
196
197 if (iText < textLength) {
198 DiffMatchInfo simpleMatch;
199 simpleMatch.diffType = DIFF_TYPE_MATCH;
200 initSimpleMatch(&simpleMatch, iText, iSearch);
201
202 while ((iText < textLength) && (iSearch < searchLength)) {
203 Token* textToken = tokens_index(textTokens, iText);
204 Token* searchToken = tokens_index(searchTokens, iSearch);
205
206 if (tokenEquals(textToken, searchToken)) {
207 simpleMatch.text.length++;
208 simpleMatch.search.length++;
209 matchedCounter++;
210 iSearch++;
211 iText++;
212 } else {
213 /* the previous tokens matched, here starts a difference */
214 g_array_append_val(matchedInfo, simpleMatch);
215 initSimpleMatch(&simpleMatch, iText, iSearch);
216
217 DiffMatchInfo diff;
218 if (lookForDiff(textTokens, searchTokens,
219 iText, iSearch,
220 maxAllowedDiff, minAdjacentMatches,
221 &diff)) {
222 applyDiff(&diff,
223 matchedInfo,
224 &additionsCounter, &removedCounter,
225 &iText, &iSearch);
226
227 simpleMatch.text.start = iText;
228 simpleMatch.search.start = iSearch;
229 } else {
230 break;
231 }
232 }
233 }
234 if (simpleMatch.text.length > 0) {
235 g_array_append_val(matchedInfo, simpleMatch);
236 }
237 }
238
239 if ((iSearch < searchLength) && (searchLength < maxAllowedDiff + iSearch)) {
240 applyTailDiff(matchedInfo, searchLength, &removedCounter, &additionsCounter, &iText, &iSearch);
241 }
242
243 if (matchedCounter + removedCounter != searchLength) {
244 g_array_free(matchedInfo, TRUE);
245 free(result);
246
247 return NULL;
248 } else {
249 #ifdef DEBUG_DIFF
250 printf("diff: (=%zu +%zu -%zu)/%zu\n", matchedCounter, additionsCounter, removedCounter, searchLength);
251 for (size_t i=0; i<matchedInfo->len; i++) {
252 DiffMatchInfo current = g_array_index(matchedInfo, DiffMatchInfo, i);
253 printf("info[%zu]: t[%zu+%zu] %s s[%zu+%zu]}\n",
254 i,
255 current.text.start,
256 current.text.length,
257 current.diffType,
258 current.search.start,
259 current.search.length
260 );
261 }
262 printf("\n");
263 #endif
264 result->removed = removedCounter;
265 result->added = additionsCounter;
266 result->matched = matchedCounter;
267 return result;
268 }
269 }
270
diffResult_free(DiffResult * diffResult)271 void diffResult_free(DiffResult* diffResult) {
272 g_array_free(diffResult->matchedInfo, TRUE);
273 free(diffResult);
274 }
275