xref: /reactos/base/applications/cmdutils/fc/text.h (revision 9393fc32)
1 /*
2  * PROJECT:     ReactOS FC Command
3  * LICENSE:     GPL-2.0-or-later (https://spdx.org/licenses/GPL-2.0-or-later)
4  * PURPOSE:     Comparing text files
5  * COPYRIGHT:   Copyright 2021 Katayama Hirofumi MZ (katayama.hirofumi.mz@gmail.com)
6  */
7 
8 #include "fc.h"
9 
10 #define IS_SPACE(ch) ((ch) == TEXT(' ') || (ch) == TEXT('\t'))
11 
12 #ifdef UNICODE
13     #define NODE NODE_W
14     #define PrintLine PrintLineW
15     #define TextCompare TextCompareW
16 #else
17     #define NODE NODE_A
18     #define PrintLine PrintLineA
19     #define TextCompare TextCompareA
20 #endif
21 
AllocLine(LPCTSTR pch,DWORD cch)22 static LPTSTR AllocLine(LPCTSTR pch, DWORD cch)
23 {
24     LPTSTR pszNew = malloc((cch + 1) * sizeof(TCHAR));
25     if (!pszNew)
26         return NULL;
27     memcpy(pszNew, pch, cch * sizeof(TCHAR));
28     pszNew[cch] = 0;
29     return pszNew;
30 }
31 
AllocNode(LPTSTR psz,DWORD lineno)32 static NODE *AllocNode(LPTSTR psz, DWORD lineno)
33 {
34     NODE *node;
35     if (!psz)
36         return NULL;
37     node = calloc(1, sizeof(NODE));
38     if (!node)
39     {
40         free(psz);
41         return NULL;
42     }
43     node->pszLine = psz;
44     node->lineno = lineno;
45     return node;
46 }
47 
DeleteNode(NODE * node)48 static __inline VOID DeleteNode(NODE *node)
49 {
50     if (node)
51     {
52         free(node->pszLine);
53         free(node->pszComp);
54         free(node);
55     }
56 }
57 
DeleteList(struct list * list)58 static VOID DeleteList(struct list *list)
59 {
60     struct list *ptr;
61     NODE *node;
62     while ((ptr = list_head(list)) != NULL)
63     {
64         list_remove(ptr);
65         node = LIST_ENTRY(ptr, NODE, entry);
66         DeleteNode(node);
67     }
68 }
69 
SkipSpace(LPCTSTR pch)70 static __inline LPCTSTR SkipSpace(LPCTSTR pch)
71 {
72     while (IS_SPACE(*pch))
73         ++pch;
74     return pch;
75 }
76 
FindLastNonSpace(LPCTSTR pch)77 static __inline LPCTSTR FindLastNonSpace(LPCTSTR pch)
78 {
79     LPCTSTR pchLast = NULL;
80     while (*pch)
81     {
82         if (!IS_SPACE(*pch))
83             pchLast = pch;
84         ++pch;
85     }
86     return pchLast;
87 }
88 
DeleteDuplicateSpaces(LPTSTR psz)89 static VOID DeleteDuplicateSpaces(LPTSTR psz)
90 {
91     LPTSTR pch0, pch1;
92     for (pch0 = pch1 = psz; *pch0; ++pch0)
93     {
94         *pch1++ = *pch0;
95         if (IS_SPACE(*pch0))
96         {
97             do
98             {
99                 ++pch0;
100             } while (IS_SPACE(*pch0));
101             --pch0;
102         }
103     }
104     *pch1 = 0;
105 }
106 
CompressSpace(LPCTSTR line)107 static LPTSTR CompressSpace(LPCTSTR line)
108 {
109     LPTSTR pszNew;
110     LPCTSTR pchLast;
111 
112     line = SkipSpace(line);
113     pchLast = FindLastNonSpace(line);
114     if (pchLast == NULL)
115         return AllocLine(NULL, 0);
116 
117     pszNew = AllocLine(line, (DWORD)(pchLast - line) + 1);
118     if (!pszNew)
119         return NULL;
120 
121     DeleteDuplicateSpaces(pszNew);
122     return pszNew;
123 }
124 
125 #define TAB_WIDTH 8
126 
ExpandTabLength(LPCTSTR line)127 static INT ExpandTabLength(LPCTSTR line)
128 {
129     LPCTSTR pch;
130     INT cch = 0;
131     for (pch = line; *pch; ++pch)
132     {
133         if (*pch == TEXT('\t'))
134             cch += TAB_WIDTH - (cch % TAB_WIDTH);
135         else
136             ++cch;
137     }
138     return cch;
139 }
140 
ExpandTab(LPCTSTR line)141 static LPTSTR ExpandTab(LPCTSTR line)
142 {
143     INT spaces, cch = ExpandTabLength(line), ich;
144     LPTSTR pszNew = malloc((cch + 1) * sizeof(TCHAR));
145     LPCTSTR pch;
146     if (!pszNew)
147         return NULL;
148     ich = 0;
149     for (pch = line; *pch; ++pch)
150     {
151         if (*pch == TEXT('\t'))
152         {
153             spaces = TAB_WIDTH - (ich % TAB_WIDTH);
154             while (spaces-- > 0)
155             {
156                 pszNew[ich++] = TEXT(' ');
157             }
158         }
159         else
160         {
161             pszNew[ich++] = *pch;
162         }
163     }
164     pszNew[ich] = 0;
165     return pszNew;
166 }
167 
168 #define HASH_EOF 0xFFFFFFFF
169 #define HASH_MASK 0x7FFFFFFF
170 
GetHash(LPCTSTR psz,BOOL bIgnoreCase)171 static DWORD GetHash(LPCTSTR psz, BOOL bIgnoreCase)
172 {
173     DWORD ret = 0xDEADFACE;
174     while (*psz)
175     {
176         ret += (bIgnoreCase ? towupper(*psz) : *psz);
177         ret <<= 2;
178         ++psz;
179     }
180     return (ret & HASH_MASK);
181 }
182 
AllocEOFNode(DWORD lineno)183 static NODE *AllocEOFNode(DWORD lineno)
184 {
185     NODE *node = AllocNode(AllocLine(NULL, 0), 0);
186     if (node == NULL)
187         return NULL;
188     node->pszComp = AllocLine(NULL, 0);
189     if (node->pszComp == NULL)
190     {
191         DeleteNode(node);
192         return NULL;
193     }
194     node->lineno = lineno;
195     node->hash = HASH_EOF;
196     return node;
197 }
198 
IsEOFNode(NODE * node)199 static __inline BOOL IsEOFNode(NODE *node)
200 {
201     return !node || node->hash == HASH_EOF;
202 }
203 
ConvertNode(const FILECOMPARE * pFC,NODE * node)204 static BOOL ConvertNode(const FILECOMPARE *pFC, NODE *node)
205 {
206     if (!(pFC->dwFlags & FLAG_T))
207     {
208         LPTSTR tmp = ExpandTab(node->pszLine);
209         if (!tmp)
210             return FALSE;
211         free(node->pszLine);
212         node->pszLine = tmp;
213         if (!(pFC->dwFlags & FLAG_W))
214             node->hash = GetHash(node->pszLine, !!(pFC->dwFlags & FLAG_C));
215     }
216     if (pFC->dwFlags & FLAG_W)
217     {
218         node->pszComp = CompressSpace(node->pszLine);
219         if (!node->pszComp)
220             return FALSE;
221         node->hash = GetHash(node->pszComp, !!(pFC->dwFlags & FLAG_C));
222     }
223     return TRUE;
224 }
225 
CompareNode(const FILECOMPARE * pFC,const NODE * node0,const NODE * node1)226 static FCRET CompareNode(const FILECOMPARE *pFC, const NODE *node0, const NODE *node1)
227 {
228     DWORD dwCmpFlags;
229     LPTSTR psz0, psz1;
230     INT ret;
231     if (node0->hash != node1->hash)
232         return FCRET_DIFFERENT;
233 
234     psz0 = (pFC->dwFlags & FLAG_W) ? node0->pszComp : node0->pszLine;
235     psz1 = (pFC->dwFlags & FLAG_W) ? node1->pszComp : node1->pszLine;
236     dwCmpFlags = ((pFC->dwFlags & FLAG_C) ? NORM_IGNORECASE : 0);
237     ret = CompareString(LOCALE_USER_DEFAULT, dwCmpFlags, psz0, -1, psz1, -1);
238     return (ret == CSTR_EQUAL) ? FCRET_IDENTICAL : FCRET_DIFFERENT;
239 }
240 
FindNextLine(LPCTSTR pch,DWORD ich,DWORD cch,LPDWORD pich)241 static BOOL FindNextLine(LPCTSTR pch, DWORD ich, DWORD cch, LPDWORD pich)
242 {
243     while (ich < cch)
244     {
245         if (pch[ich] == TEXT('\n') || pch[ich] == TEXT('\0'))
246         {
247             *pich = ich;
248             return TRUE;
249         }
250         ++ich;
251     }
252     *pich = cch;
253     return FALSE;
254 }
255 
256 static FCRET
ParseLines(const FILECOMPARE * pFC,HANDLE * phMapping,LARGE_INTEGER * pib,const LARGE_INTEGER * pcb,struct list * list)257 ParseLines(const FILECOMPARE *pFC, HANDLE *phMapping,
258            LARGE_INTEGER *pib, const LARGE_INTEGER *pcb, struct list *list)
259 {
260     DWORD lineno = 1, ich, cch, ichNext, cbView, cchNode;
261     LPTSTR psz, pszLine;
262     BOOL fLast, bCR;
263     NODE *node;
264 
265     if (*phMapping == NULL)
266         return FCRET_NO_MORE_DATA;
267 
268     if (pib->QuadPart >= pcb->QuadPart)
269     {
270         CloseHandle(*phMapping);
271         *phMapping = NULL;
272         return FCRET_NO_MORE_DATA;
273     }
274 
275     cbView = (DWORD)min(pcb->QuadPart - pib->QuadPart, MAX_VIEW_SIZE);
276     psz = MapViewOfFile(*phMapping, FILE_MAP_READ, pib->HighPart, pib->LowPart, cbView);
277     if (!psz)
278     {
279         return OutOfMemory();
280     }
281 
282     ich = 0;
283     cch = cbView / sizeof(TCHAR);
284     fLast = (pib->QuadPart + cbView >= pcb->QuadPart);
285     while (ich < cch &&
286            (FindNextLine(psz, ich, cch, &ichNext) ||
287             (ichNext == cch && (fLast || ich == 0))))
288     {
289         bCR = (ichNext > 0) && (psz[ichNext - 1] == TEXT('\r'));
290         cchNode = ichNext - ich - bCR;
291         pszLine = AllocLine(&psz[ich], cchNode);
292         node = AllocNode(pszLine, lineno++);
293         if (!node || !ConvertNode(pFC, node))
294         {
295             DeleteNode(node);
296             UnmapViewOfFile(psz);
297             return OutOfMemory();
298         }
299         list_add_tail(list, &node->entry);
300         ich = ichNext + 1;
301     }
302 
303     UnmapViewOfFile(psz);
304 
305     pib->QuadPart += ichNext * sizeof(WCHAR);
306 
307     if (pib->QuadPart < pcb->QuadPart)
308         return FCRET_IDENTICAL;
309 
310     // append EOF node
311     node = AllocEOFNode(lineno);
312     if (!node)
313         return OutOfMemory();
314     list_add_tail(list, &node->entry);
315 
316     return FCRET_NO_MORE_DATA;
317 }
318 
319 static VOID
ShowDiff(FILECOMPARE * pFC,INT i,struct list * begin,struct list * end)320 ShowDiff(FILECOMPARE *pFC, INT i, struct list *begin, struct list *end)
321 {
322     NODE* node;
323     struct list *list = &pFC->list[i];
324     struct list *first = NULL, *last = NULL;
325     PrintCaption(pFC->file[i]);
326     if (begin && end && list_prev(list, begin))
327         begin = list_prev(list, begin);
328     while (begin != end)
329     {
330         node = LIST_ENTRY(begin, NODE, entry);
331         if (IsEOFNode(node))
332             break;
333         if (!first)
334             first = begin;
335         last = begin;
336         if (!(pFC->dwFlags & FLAG_A))
337             PrintLine(pFC, node->lineno, node->pszLine);
338         begin = list_next(list, begin);
339     }
340     if ((pFC->dwFlags & FLAG_A) && first)
341     {
342         node = LIST_ENTRY(first, NODE, entry);
343         PrintLine(pFC, node->lineno, node->pszLine);
344         first = list_next(list, first);
345         if (first != last)
346         {
347             if (list_next(list, first) == last)
348             {
349                 node = LIST_ENTRY(first, NODE, entry);
350                 PrintLine(pFC, node->lineno, node->pszLine);
351             }
352             else
353             {
354                 PrintDots();
355             }
356         }
357         node = LIST_ENTRY(last, NODE, entry);
358         PrintLine(pFC, node->lineno, node->pszLine);
359     }
360 }
361 
362 static VOID
SkipIdentical(FILECOMPARE * pFC,struct list ** pptr0,struct list ** pptr1)363 SkipIdentical(FILECOMPARE *pFC, struct list **pptr0, struct list **pptr1)
364 {
365     struct list *ptr0 = *pptr0, *ptr1 = *pptr1;
366     while (ptr0 && ptr1)
367     {
368         NODE *node0 = LIST_ENTRY(ptr0, NODE, entry);
369         NODE *node1 = LIST_ENTRY(ptr1, NODE, entry);
370         if (CompareNode(pFC, node0, node1) != FCRET_IDENTICAL)
371             break;
372         ptr0 = list_next(&pFC->list[0], ptr0);
373         ptr1 = list_next(&pFC->list[1], ptr1);
374     }
375     *pptr0 = ptr0;
376     *pptr1 = ptr1;
377 }
378 
379 static DWORD
SkipIdenticalN(FILECOMPARE * pFC,struct list ** pptr0,struct list ** pptr1,DWORD nnnn,DWORD lineno0,DWORD lineno1)380 SkipIdenticalN(FILECOMPARE *pFC, struct list **pptr0, struct list **pptr1,
381                DWORD nnnn, DWORD lineno0, DWORD lineno1)
382 {
383     struct list *ptr0 = *pptr0, *ptr1 = *pptr1;
384     DWORD count = 0;
385     while (ptr0 && ptr1)
386     {
387         NODE *node0 = LIST_ENTRY(ptr0, NODE, entry);
388         NODE *node1 = LIST_ENTRY(ptr1, NODE, entry);
389         if (node0->lineno >= lineno0)
390             break;
391         if (node1->lineno >= lineno1)
392             break;
393         if (CompareNode(pFC, node0, node1) != FCRET_IDENTICAL)
394             break;
395         ptr0 = list_next(&pFC->list[0], ptr0);
396         ptr1 = list_next(&pFC->list[1], ptr1);
397         ++count;
398         if (count >= nnnn)
399             break;
400     }
401     *pptr0 = ptr0;
402     *pptr1 = ptr1;
403     return count;
404 }
405 
406 static FCRET
ScanDiff(FILECOMPARE * pFC,struct list ** pptr0,struct list ** pptr1,DWORD lineno0,DWORD lineno1)407 ScanDiff(FILECOMPARE *pFC, struct list **pptr0, struct list **pptr1,
408          DWORD lineno0, DWORD lineno1)
409 {
410     struct list *ptr0 = *pptr0, *ptr1 = *pptr1, *tmp0, *tmp1;
411     NODE *node0, *node1;
412     INT count;
413     while (ptr0 && ptr1)
414     {
415         node0 = LIST_ENTRY(ptr0, NODE, entry);
416         node1 = LIST_ENTRY(ptr1, NODE, entry);
417         if (node0->lineno >= lineno0)
418             return FCRET_DIFFERENT;
419         if (node1->lineno >= lineno1)
420             return FCRET_DIFFERENT;
421         tmp0 = ptr0;
422         tmp1 = ptr1;
423         count = SkipIdenticalN(pFC, &tmp0, &tmp1, pFC->nnnn, lineno0, lineno1);
424         if (count >= pFC->nnnn)
425             break;
426         if (count > 0)
427         {
428             ptr0 = tmp0;
429             ptr1 = tmp1;
430         }
431         else
432         {
433             ptr0 = list_next(&pFC->list[0], ptr0);
434             ptr1 = list_next(&pFC->list[1], ptr1);
435         }
436     }
437     *pptr0 = ptr0;
438     *pptr1 = ptr1;
439     return FCRET_IDENTICAL;
440 }
441 
442 static FCRET
Resync(FILECOMPARE * pFC,struct list ** pptr0,struct list ** pptr1)443 Resync(FILECOMPARE *pFC, struct list **pptr0, struct list **pptr1)
444 {
445     FCRET ret;
446     struct list *ptr0, *ptr1, *save0 = NULL, *save1 = NULL;
447     NODE *node0, *node1;
448     struct list *list0 = &pFC->list[0], *list1 = &pFC->list[1];
449     DWORD lineno0, lineno1;
450     INT penalty, i0, i1, min_penalty = MAXLONG;
451 
452     node0 = LIST_ENTRY(*pptr0, NODE, entry);
453     node1 = LIST_ENTRY(*pptr1, NODE, entry);
454     lineno0 = node0->lineno + pFC->n;
455     lineno1 = node1->lineno + pFC->n;
456 
457     // ``If the files that you are comparing have more than pFC->n consecutive
458     //   differing lines, FC cancels the comparison,,
459     // ``If the number of matching lines in the files is less than pFC->nnnn,
460     //   FC displays the matching lines as differences,,
461     for (ptr1 = list_next(list1, *pptr1), i1 = 0; ptr1; ptr1 = list_next(list1, ptr1), ++i1)
462     {
463         node1 = LIST_ENTRY(ptr1, NODE, entry);
464         if (node1->lineno >= lineno1)
465             break;
466         for (ptr0 = list_next(list0, *pptr0), i0 = 0; ptr0; ptr0 = list_next(list0, ptr0), ++i0)
467         {
468             node0 = LIST_ENTRY(ptr0, NODE, entry);
469             if (node0->lineno >= lineno0)
470                 break;
471             if (CompareNode(pFC, node0, node1) == FCRET_IDENTICAL)
472             {
473                 penalty = min(i0, i1) + abs(i1 - i0);
474                 if (min_penalty > penalty)
475                 {
476                     min_penalty = penalty;
477                     save0 = ptr0;
478                     save1 = ptr1;
479                 }
480             }
481         }
482     }
483 
484     if (save0 && save1)
485     {
486         *pptr0 = save0;
487         *pptr1 = save1;
488         ret = ScanDiff(pFC, &save0, &save1, lineno0, lineno1);
489         if (save0 && save1)
490         {
491             *pptr0 = save0;
492             *pptr1 = save1;
493         }
494         return ret;
495     }
496 
497     for (ptr0 = *pptr0; ptr0; ptr0 = list_next(list0, ptr0))
498     {
499         node0 = LIST_ENTRY(ptr0, NODE, entry);
500         if (node0->lineno == lineno0)
501             break;
502     }
503     for (ptr1 = *pptr1; ptr1; ptr1 = list_next(list1, ptr1))
504     {
505         node1 = LIST_ENTRY(ptr1, NODE, entry);
506         if (node1->lineno == lineno1)
507             break;
508     }
509     *pptr0 = ptr0;
510     *pptr1 = ptr1;
511     return FCRET_DIFFERENT;
512 }
513 
514 static FCRET
Finalize(FILECOMPARE * pFC,struct list * ptr0,struct list * ptr1,BOOL fDifferent)515 Finalize(FILECOMPARE* pFC, struct list *ptr0, struct list* ptr1, BOOL fDifferent)
516 {
517     if (!ptr0 && !ptr1)
518     {
519         if (fDifferent)
520             return Different(pFC->file[0], pFC->file[1]);
521         return NoDifference();
522     }
523     else
524     {
525         ShowDiff(pFC, 0, ptr0, NULL);
526         ShowDiff(pFC, 1, ptr1, NULL);
527         PrintEndOfDiff();
528         return FCRET_DIFFERENT;
529     }
530 }
531 
532 // FIXME: "cmd_apitest fc" has some failures.
TextCompare(FILECOMPARE * pFC,HANDLE * phMapping0,const LARGE_INTEGER * pcb0,HANDLE * phMapping1,const LARGE_INTEGER * pcb1)533 FCRET TextCompare(FILECOMPARE *pFC, HANDLE *phMapping0, const LARGE_INTEGER *pcb0,
534                                     HANDLE *phMapping1, const LARGE_INTEGER *pcb1)
535 {
536     FCRET ret, ret0, ret1;
537     struct list *ptr0, *ptr1, *save0, *save1, *next0, *next1;
538     NODE* node0, * node1;
539     BOOL fDifferent = FALSE;
540     LARGE_INTEGER ib0 = { .QuadPart = 0 }, ib1 = { .QuadPart = 0 };
541     struct list *list0 = &pFC->list[0], *list1 = &pFC->list[1];
542     list_init(list0);
543     list_init(list1);
544 
545     do
546     {
547         ret0 = ParseLines(pFC, phMapping0, &ib0, pcb0, list0);
548         if (ret0 == FCRET_INVALID)
549         {
550             ret = ret0;
551             goto cleanup;
552         }
553         ret1 = ParseLines(pFC, phMapping1, &ib1, pcb1, list1);
554         if (ret1 == FCRET_INVALID)
555         {
556             ret = ret1;
557             goto cleanup;
558         }
559 
560         ptr0 = list_head(list0);
561         ptr1 = list_head(list1);
562         for (;;)
563         {
564             if (!ptr0 || !ptr1)
565                 goto quit;
566 
567             // skip identical (sync'ed)
568             SkipIdentical(pFC, &ptr0, &ptr1);
569             if (ptr0 || ptr1)
570                 fDifferent = TRUE;
571             node0 = LIST_ENTRY(ptr0, NODE, entry);
572             node1 = LIST_ENTRY(ptr1, NODE, entry);
573             if (IsEOFNode(node0) || IsEOFNode(node1))
574                 goto quit;
575 
576             // try to resync
577             save0 = ptr0;
578             save1 = ptr1;
579             ret = Resync(pFC, &ptr0, &ptr1);
580             if (ret == FCRET_INVALID)
581                 goto cleanup;
582             if (ret == FCRET_DIFFERENT)
583             {
584                 // resync failed
585                 ret = ResyncFailed();
586                 // show the difference
587                 ShowDiff(pFC, 0, save0, ptr0);
588                 ShowDiff(pFC, 1, save1, ptr1);
589                 PrintEndOfDiff();
590                 goto cleanup;
591             }
592 
593             // show the difference
594             fDifferent = TRUE;
595             next0 = ptr0 ? list_next(list0, ptr0) : ptr0;
596             next1 = ptr1 ? list_next(list1, ptr1) : ptr1;
597             ShowDiff(pFC, 0, save0, (next0 ? next0 : ptr0));
598             ShowDiff(pFC, 1, save1, (next1 ? next1 : ptr1));
599             PrintEndOfDiff();
600 
601             // now resync'ed
602         }
603     } while (ret0 != FCRET_NO_MORE_DATA || ret1 != FCRET_NO_MORE_DATA);
604 
605 quit:
606     ret = Finalize(pFC, ptr0, ptr1, fDifferent);
607 cleanup:
608     DeleteList(list0);
609     DeleteList(list1);
610     return ret;
611 }
612