1 /*
2 * PROJECT: ReactOS FC Command
3 * LICENSE: GPL-2.0-or-later (https://spdx.org/licenses/GPL-2.0-or-later)
4 * PURPOSE: Comparing text files
5 * COPYRIGHT: Copyright 2021 Katayama Hirofumi MZ (katayama.hirofumi.mz@gmail.com)
6 */
7
8 #include "fc.h"
9
10 #define IS_SPACE(ch) ((ch) == TEXT(' ') || (ch) == TEXT('\t'))
11
12 #ifdef UNICODE
13 #define NODE NODE_W
14 #define PrintLine PrintLineW
15 #define TextCompare TextCompareW
16 #else
17 #define NODE NODE_A
18 #define PrintLine PrintLineA
19 #define TextCompare TextCompareA
20 #endif
21
AllocLine(LPCTSTR pch,DWORD cch)22 static LPTSTR AllocLine(LPCTSTR pch, DWORD cch)
23 {
24 LPTSTR pszNew = malloc((cch + 1) * sizeof(TCHAR));
25 if (!pszNew)
26 return NULL;
27 memcpy(pszNew, pch, cch * sizeof(TCHAR));
28 pszNew[cch] = 0;
29 return pszNew;
30 }
31
AllocNode(LPTSTR psz,DWORD lineno)32 static NODE *AllocNode(LPTSTR psz, DWORD lineno)
33 {
34 NODE *node;
35 if (!psz)
36 return NULL;
37 node = calloc(1, sizeof(NODE));
38 if (!node)
39 {
40 free(psz);
41 return NULL;
42 }
43 node->pszLine = psz;
44 node->lineno = lineno;
45 return node;
46 }
47
DeleteNode(NODE * node)48 static __inline VOID DeleteNode(NODE *node)
49 {
50 if (node)
51 {
52 free(node->pszLine);
53 free(node->pszComp);
54 free(node);
55 }
56 }
57
DeleteList(struct list * list)58 static VOID DeleteList(struct list *list)
59 {
60 struct list *ptr;
61 NODE *node;
62 while ((ptr = list_head(list)) != NULL)
63 {
64 list_remove(ptr);
65 node = LIST_ENTRY(ptr, NODE, entry);
66 DeleteNode(node);
67 }
68 }
69
SkipSpace(LPCTSTR pch)70 static __inline LPCTSTR SkipSpace(LPCTSTR pch)
71 {
72 while (IS_SPACE(*pch))
73 ++pch;
74 return pch;
75 }
76
FindLastNonSpace(LPCTSTR pch)77 static __inline LPCTSTR FindLastNonSpace(LPCTSTR pch)
78 {
79 LPCTSTR pchLast = NULL;
80 while (*pch)
81 {
82 if (!IS_SPACE(*pch))
83 pchLast = pch;
84 ++pch;
85 }
86 return pchLast;
87 }
88
DeleteDuplicateSpaces(LPTSTR psz)89 static VOID DeleteDuplicateSpaces(LPTSTR psz)
90 {
91 LPTSTR pch0, pch1;
92 for (pch0 = pch1 = psz; *pch0; ++pch0)
93 {
94 *pch1++ = *pch0;
95 if (IS_SPACE(*pch0))
96 {
97 do
98 {
99 ++pch0;
100 } while (IS_SPACE(*pch0));
101 --pch0;
102 }
103 }
104 *pch1 = 0;
105 }
106
CompressSpace(LPCTSTR line)107 static LPTSTR CompressSpace(LPCTSTR line)
108 {
109 LPTSTR pszNew;
110 LPCTSTR pchLast;
111
112 line = SkipSpace(line);
113 pchLast = FindLastNonSpace(line);
114 if (pchLast == NULL)
115 return AllocLine(NULL, 0);
116
117 pszNew = AllocLine(line, (DWORD)(pchLast - line) + 1);
118 if (!pszNew)
119 return NULL;
120
121 DeleteDuplicateSpaces(pszNew);
122 return pszNew;
123 }
124
125 #define TAB_WIDTH 8
126
ExpandTabLength(LPCTSTR line)127 static INT ExpandTabLength(LPCTSTR line)
128 {
129 LPCTSTR pch;
130 INT cch = 0;
131 for (pch = line; *pch; ++pch)
132 {
133 if (*pch == TEXT('\t'))
134 cch += TAB_WIDTH - (cch % TAB_WIDTH);
135 else
136 ++cch;
137 }
138 return cch;
139 }
140
ExpandTab(LPCTSTR line)141 static LPTSTR ExpandTab(LPCTSTR line)
142 {
143 INT spaces, cch = ExpandTabLength(line), ich;
144 LPTSTR pszNew = malloc((cch + 1) * sizeof(TCHAR));
145 LPCTSTR pch;
146 if (!pszNew)
147 return NULL;
148 ich = 0;
149 for (pch = line; *pch; ++pch)
150 {
151 if (*pch == TEXT('\t'))
152 {
153 spaces = TAB_WIDTH - (ich % TAB_WIDTH);
154 while (spaces-- > 0)
155 {
156 pszNew[ich++] = TEXT(' ');
157 }
158 }
159 else
160 {
161 pszNew[ich++] = *pch;
162 }
163 }
164 pszNew[ich] = 0;
165 return pszNew;
166 }
167
168 #define HASH_EOF 0xFFFFFFFF
169 #define HASH_MASK 0x7FFFFFFF
170
GetHash(LPCTSTR psz,BOOL bIgnoreCase)171 static DWORD GetHash(LPCTSTR psz, BOOL bIgnoreCase)
172 {
173 DWORD ret = 0xDEADFACE;
174 while (*psz)
175 {
176 ret += (bIgnoreCase ? towupper(*psz) : *psz);
177 ret <<= 2;
178 ++psz;
179 }
180 return (ret & HASH_MASK);
181 }
182
AllocEOFNode(DWORD lineno)183 static NODE *AllocEOFNode(DWORD lineno)
184 {
185 NODE *node = AllocNode(AllocLine(NULL, 0), 0);
186 if (node == NULL)
187 return NULL;
188 node->pszComp = AllocLine(NULL, 0);
189 if (node->pszComp == NULL)
190 {
191 DeleteNode(node);
192 return NULL;
193 }
194 node->lineno = lineno;
195 node->hash = HASH_EOF;
196 return node;
197 }
198
IsEOFNode(NODE * node)199 static __inline BOOL IsEOFNode(NODE *node)
200 {
201 return !node || node->hash == HASH_EOF;
202 }
203
ConvertNode(const FILECOMPARE * pFC,NODE * node)204 static BOOL ConvertNode(const FILECOMPARE *pFC, NODE *node)
205 {
206 if (!(pFC->dwFlags & FLAG_T))
207 {
208 LPTSTR tmp = ExpandTab(node->pszLine);
209 if (!tmp)
210 return FALSE;
211 free(node->pszLine);
212 node->pszLine = tmp;
213 if (!(pFC->dwFlags & FLAG_W))
214 node->hash = GetHash(node->pszLine, !!(pFC->dwFlags & FLAG_C));
215 }
216 if (pFC->dwFlags & FLAG_W)
217 {
218 node->pszComp = CompressSpace(node->pszLine);
219 if (!node->pszComp)
220 return FALSE;
221 node->hash = GetHash(node->pszComp, !!(pFC->dwFlags & FLAG_C));
222 }
223 return TRUE;
224 }
225
CompareNode(const FILECOMPARE * pFC,const NODE * node0,const NODE * node1)226 static FCRET CompareNode(const FILECOMPARE *pFC, const NODE *node0, const NODE *node1)
227 {
228 DWORD dwCmpFlags;
229 LPTSTR psz0, psz1;
230 INT ret;
231 if (node0->hash != node1->hash)
232 return FCRET_DIFFERENT;
233
234 psz0 = (pFC->dwFlags & FLAG_W) ? node0->pszComp : node0->pszLine;
235 psz1 = (pFC->dwFlags & FLAG_W) ? node1->pszComp : node1->pszLine;
236 dwCmpFlags = ((pFC->dwFlags & FLAG_C) ? NORM_IGNORECASE : 0);
237 ret = CompareString(LOCALE_USER_DEFAULT, dwCmpFlags, psz0, -1, psz1, -1);
238 return (ret == CSTR_EQUAL) ? FCRET_IDENTICAL : FCRET_DIFFERENT;
239 }
240
FindNextLine(LPCTSTR pch,DWORD ich,DWORD cch,LPDWORD pich)241 static BOOL FindNextLine(LPCTSTR pch, DWORD ich, DWORD cch, LPDWORD pich)
242 {
243 while (ich < cch)
244 {
245 if (pch[ich] == TEXT('\n') || pch[ich] == TEXT('\0'))
246 {
247 *pich = ich;
248 return TRUE;
249 }
250 ++ich;
251 }
252 *pich = cch;
253 return FALSE;
254 }
255
256 static FCRET
ParseLines(const FILECOMPARE * pFC,HANDLE * phMapping,LARGE_INTEGER * pib,const LARGE_INTEGER * pcb,struct list * list)257 ParseLines(const FILECOMPARE *pFC, HANDLE *phMapping,
258 LARGE_INTEGER *pib, const LARGE_INTEGER *pcb, struct list *list)
259 {
260 DWORD lineno = 1, ich, cch, ichNext, cbView, cchNode;
261 LPTSTR psz, pszLine;
262 BOOL fLast, bCR;
263 NODE *node;
264
265 if (*phMapping == NULL)
266 return FCRET_NO_MORE_DATA;
267
268 if (pib->QuadPart >= pcb->QuadPart)
269 {
270 CloseHandle(*phMapping);
271 *phMapping = NULL;
272 return FCRET_NO_MORE_DATA;
273 }
274
275 cbView = (DWORD)min(pcb->QuadPart - pib->QuadPart, MAX_VIEW_SIZE);
276 psz = MapViewOfFile(*phMapping, FILE_MAP_READ, pib->HighPart, pib->LowPart, cbView);
277 if (!psz)
278 {
279 return OutOfMemory();
280 }
281
282 ich = 0;
283 cch = cbView / sizeof(TCHAR);
284 fLast = (pib->QuadPart + cbView >= pcb->QuadPart);
285 while (ich < cch &&
286 (FindNextLine(psz, ich, cch, &ichNext) ||
287 (ichNext == cch && (fLast || ich == 0))))
288 {
289 bCR = (ichNext > 0) && (psz[ichNext - 1] == TEXT('\r'));
290 cchNode = ichNext - ich - bCR;
291 pszLine = AllocLine(&psz[ich], cchNode);
292 node = AllocNode(pszLine, lineno++);
293 if (!node || !ConvertNode(pFC, node))
294 {
295 DeleteNode(node);
296 UnmapViewOfFile(psz);
297 return OutOfMemory();
298 }
299 list_add_tail(list, &node->entry);
300 ich = ichNext + 1;
301 }
302
303 UnmapViewOfFile(psz);
304
305 pib->QuadPart += ichNext * sizeof(WCHAR);
306
307 if (pib->QuadPart < pcb->QuadPart)
308 return FCRET_IDENTICAL;
309
310 // append EOF node
311 node = AllocEOFNode(lineno);
312 if (!node)
313 return OutOfMemory();
314 list_add_tail(list, &node->entry);
315
316 return FCRET_NO_MORE_DATA;
317 }
318
319 static VOID
ShowDiff(FILECOMPARE * pFC,INT i,struct list * begin,struct list * end)320 ShowDiff(FILECOMPARE *pFC, INT i, struct list *begin, struct list *end)
321 {
322 NODE* node;
323 struct list *list = &pFC->list[i];
324 struct list *first = NULL, *last = NULL;
325 PrintCaption(pFC->file[i]);
326 if (begin && end && list_prev(list, begin))
327 begin = list_prev(list, begin);
328 while (begin != end)
329 {
330 node = LIST_ENTRY(begin, NODE, entry);
331 if (IsEOFNode(node))
332 break;
333 if (!first)
334 first = begin;
335 last = begin;
336 if (!(pFC->dwFlags & FLAG_A))
337 PrintLine(pFC, node->lineno, node->pszLine);
338 begin = list_next(list, begin);
339 }
340 if ((pFC->dwFlags & FLAG_A) && first)
341 {
342 node = LIST_ENTRY(first, NODE, entry);
343 PrintLine(pFC, node->lineno, node->pszLine);
344 first = list_next(list, first);
345 if (first != last)
346 {
347 if (list_next(list, first) == last)
348 {
349 node = LIST_ENTRY(first, NODE, entry);
350 PrintLine(pFC, node->lineno, node->pszLine);
351 }
352 else
353 {
354 PrintDots();
355 }
356 }
357 node = LIST_ENTRY(last, NODE, entry);
358 PrintLine(pFC, node->lineno, node->pszLine);
359 }
360 }
361
362 static VOID
SkipIdentical(FILECOMPARE * pFC,struct list ** pptr0,struct list ** pptr1)363 SkipIdentical(FILECOMPARE *pFC, struct list **pptr0, struct list **pptr1)
364 {
365 struct list *ptr0 = *pptr0, *ptr1 = *pptr1;
366 while (ptr0 && ptr1)
367 {
368 NODE *node0 = LIST_ENTRY(ptr0, NODE, entry);
369 NODE *node1 = LIST_ENTRY(ptr1, NODE, entry);
370 if (CompareNode(pFC, node0, node1) != FCRET_IDENTICAL)
371 break;
372 ptr0 = list_next(&pFC->list[0], ptr0);
373 ptr1 = list_next(&pFC->list[1], ptr1);
374 }
375 *pptr0 = ptr0;
376 *pptr1 = ptr1;
377 }
378
379 static DWORD
SkipIdenticalN(FILECOMPARE * pFC,struct list ** pptr0,struct list ** pptr1,DWORD nnnn,DWORD lineno0,DWORD lineno1)380 SkipIdenticalN(FILECOMPARE *pFC, struct list **pptr0, struct list **pptr1,
381 DWORD nnnn, DWORD lineno0, DWORD lineno1)
382 {
383 struct list *ptr0 = *pptr0, *ptr1 = *pptr1;
384 DWORD count = 0;
385 while (ptr0 && ptr1)
386 {
387 NODE *node0 = LIST_ENTRY(ptr0, NODE, entry);
388 NODE *node1 = LIST_ENTRY(ptr1, NODE, entry);
389 if (node0->lineno >= lineno0)
390 break;
391 if (node1->lineno >= lineno1)
392 break;
393 if (CompareNode(pFC, node0, node1) != FCRET_IDENTICAL)
394 break;
395 ptr0 = list_next(&pFC->list[0], ptr0);
396 ptr1 = list_next(&pFC->list[1], ptr1);
397 ++count;
398 if (count >= nnnn)
399 break;
400 }
401 *pptr0 = ptr0;
402 *pptr1 = ptr1;
403 return count;
404 }
405
406 static FCRET
ScanDiff(FILECOMPARE * pFC,struct list ** pptr0,struct list ** pptr1,DWORD lineno0,DWORD lineno1)407 ScanDiff(FILECOMPARE *pFC, struct list **pptr0, struct list **pptr1,
408 DWORD lineno0, DWORD lineno1)
409 {
410 struct list *ptr0 = *pptr0, *ptr1 = *pptr1, *tmp0, *tmp1;
411 NODE *node0, *node1;
412 INT count;
413 while (ptr0 && ptr1)
414 {
415 node0 = LIST_ENTRY(ptr0, NODE, entry);
416 node1 = LIST_ENTRY(ptr1, NODE, entry);
417 if (node0->lineno >= lineno0)
418 return FCRET_DIFFERENT;
419 if (node1->lineno >= lineno1)
420 return FCRET_DIFFERENT;
421 tmp0 = ptr0;
422 tmp1 = ptr1;
423 count = SkipIdenticalN(pFC, &tmp0, &tmp1, pFC->nnnn, lineno0, lineno1);
424 if (count >= pFC->nnnn)
425 break;
426 if (count > 0)
427 {
428 ptr0 = tmp0;
429 ptr1 = tmp1;
430 }
431 else
432 {
433 ptr0 = list_next(&pFC->list[0], ptr0);
434 ptr1 = list_next(&pFC->list[1], ptr1);
435 }
436 }
437 *pptr0 = ptr0;
438 *pptr1 = ptr1;
439 return FCRET_IDENTICAL;
440 }
441
442 static FCRET
Resync(FILECOMPARE * pFC,struct list ** pptr0,struct list ** pptr1)443 Resync(FILECOMPARE *pFC, struct list **pptr0, struct list **pptr1)
444 {
445 FCRET ret;
446 struct list *ptr0, *ptr1, *save0 = NULL, *save1 = NULL;
447 NODE *node0, *node1;
448 struct list *list0 = &pFC->list[0], *list1 = &pFC->list[1];
449 DWORD lineno0, lineno1;
450 INT penalty, i0, i1, min_penalty = MAXLONG;
451
452 node0 = LIST_ENTRY(*pptr0, NODE, entry);
453 node1 = LIST_ENTRY(*pptr1, NODE, entry);
454 lineno0 = node0->lineno + pFC->n;
455 lineno1 = node1->lineno + pFC->n;
456
457 // ``If the files that you are comparing have more than pFC->n consecutive
458 // differing lines, FC cancels the comparison,,
459 // ``If the number of matching lines in the files is less than pFC->nnnn,
460 // FC displays the matching lines as differences,,
461 for (ptr1 = list_next(list1, *pptr1), i1 = 0; ptr1; ptr1 = list_next(list1, ptr1), ++i1)
462 {
463 node1 = LIST_ENTRY(ptr1, NODE, entry);
464 if (node1->lineno >= lineno1)
465 break;
466 for (ptr0 = list_next(list0, *pptr0), i0 = 0; ptr0; ptr0 = list_next(list0, ptr0), ++i0)
467 {
468 node0 = LIST_ENTRY(ptr0, NODE, entry);
469 if (node0->lineno >= lineno0)
470 break;
471 if (CompareNode(pFC, node0, node1) == FCRET_IDENTICAL)
472 {
473 penalty = min(i0, i1) + abs(i1 - i0);
474 if (min_penalty > penalty)
475 {
476 min_penalty = penalty;
477 save0 = ptr0;
478 save1 = ptr1;
479 }
480 }
481 }
482 }
483
484 if (save0 && save1)
485 {
486 *pptr0 = save0;
487 *pptr1 = save1;
488 ret = ScanDiff(pFC, &save0, &save1, lineno0, lineno1);
489 if (save0 && save1)
490 {
491 *pptr0 = save0;
492 *pptr1 = save1;
493 }
494 return ret;
495 }
496
497 for (ptr0 = *pptr0; ptr0; ptr0 = list_next(list0, ptr0))
498 {
499 node0 = LIST_ENTRY(ptr0, NODE, entry);
500 if (node0->lineno == lineno0)
501 break;
502 }
503 for (ptr1 = *pptr1; ptr1; ptr1 = list_next(list1, ptr1))
504 {
505 node1 = LIST_ENTRY(ptr1, NODE, entry);
506 if (node1->lineno == lineno1)
507 break;
508 }
509 *pptr0 = ptr0;
510 *pptr1 = ptr1;
511 return FCRET_DIFFERENT;
512 }
513
514 static FCRET
Finalize(FILECOMPARE * pFC,struct list * ptr0,struct list * ptr1,BOOL fDifferent)515 Finalize(FILECOMPARE* pFC, struct list *ptr0, struct list* ptr1, BOOL fDifferent)
516 {
517 if (!ptr0 && !ptr1)
518 {
519 if (fDifferent)
520 return Different(pFC->file[0], pFC->file[1]);
521 return NoDifference();
522 }
523 else
524 {
525 ShowDiff(pFC, 0, ptr0, NULL);
526 ShowDiff(pFC, 1, ptr1, NULL);
527 PrintEndOfDiff();
528 return FCRET_DIFFERENT;
529 }
530 }
531
532 // FIXME: "cmd_apitest fc" has some failures.
TextCompare(FILECOMPARE * pFC,HANDLE * phMapping0,const LARGE_INTEGER * pcb0,HANDLE * phMapping1,const LARGE_INTEGER * pcb1)533 FCRET TextCompare(FILECOMPARE *pFC, HANDLE *phMapping0, const LARGE_INTEGER *pcb0,
534 HANDLE *phMapping1, const LARGE_INTEGER *pcb1)
535 {
536 FCRET ret, ret0, ret1;
537 struct list *ptr0, *ptr1, *save0, *save1, *next0, *next1;
538 NODE* node0, * node1;
539 BOOL fDifferent = FALSE;
540 LARGE_INTEGER ib0 = { .QuadPart = 0 }, ib1 = { .QuadPart = 0 };
541 struct list *list0 = &pFC->list[0], *list1 = &pFC->list[1];
542 list_init(list0);
543 list_init(list1);
544
545 do
546 {
547 ret0 = ParseLines(pFC, phMapping0, &ib0, pcb0, list0);
548 if (ret0 == FCRET_INVALID)
549 {
550 ret = ret0;
551 goto cleanup;
552 }
553 ret1 = ParseLines(pFC, phMapping1, &ib1, pcb1, list1);
554 if (ret1 == FCRET_INVALID)
555 {
556 ret = ret1;
557 goto cleanup;
558 }
559
560 ptr0 = list_head(list0);
561 ptr1 = list_head(list1);
562 for (;;)
563 {
564 if (!ptr0 || !ptr1)
565 goto quit;
566
567 // skip identical (sync'ed)
568 SkipIdentical(pFC, &ptr0, &ptr1);
569 if (ptr0 || ptr1)
570 fDifferent = TRUE;
571 node0 = LIST_ENTRY(ptr0, NODE, entry);
572 node1 = LIST_ENTRY(ptr1, NODE, entry);
573 if (IsEOFNode(node0) || IsEOFNode(node1))
574 goto quit;
575
576 // try to resync
577 save0 = ptr0;
578 save1 = ptr1;
579 ret = Resync(pFC, &ptr0, &ptr1);
580 if (ret == FCRET_INVALID)
581 goto cleanup;
582 if (ret == FCRET_DIFFERENT)
583 {
584 // resync failed
585 ret = ResyncFailed();
586 // show the difference
587 ShowDiff(pFC, 0, save0, ptr0);
588 ShowDiff(pFC, 1, save1, ptr1);
589 PrintEndOfDiff();
590 goto cleanup;
591 }
592
593 // show the difference
594 fDifferent = TRUE;
595 next0 = ptr0 ? list_next(list0, ptr0) : ptr0;
596 next1 = ptr1 ? list_next(list1, ptr1) : ptr1;
597 ShowDiff(pFC, 0, save0, (next0 ? next0 : ptr0));
598 ShowDiff(pFC, 1, save1, (next1 ? next1 : ptr1));
599 PrintEndOfDiff();
600
601 // now resync'ed
602 }
603 } while (ret0 != FCRET_NO_MORE_DATA || ret1 != FCRET_NO_MORE_DATA);
604
605 quit:
606 ret = Finalize(pFC, ptr0, ptr1, fDifferent);
607 cleanup:
608 DeleteList(list0);
609 DeleteList(list1);
610 return ret;
611 }
612