xref: /reactos/base/applications/cmdutils/fc/text.h (revision ea6e7740)
1 /*
2  * PROJECT:     ReactOS FC Command
3  * LICENSE:     GPL-2.0-or-later (https://spdx.org/licenses/GPL-2.0-or-later)
4  * PURPOSE:     Comparing text files
5  * COPYRIGHT:   Copyright 2021 Katayama Hirofumi MZ (katayama.hirofumi.mz@gmail.com)
6  */
7 #include "fc.h"
8 
9 #define IS_SPACE(ch) ((ch) == TEXT(' ') || (ch) == TEXT('\t'))
10 
11 #ifdef UNICODE
12     #define NODE NODE_W
13     #define PrintLine PrintLineW
14     #define TextCompare TextCompareW
15 #else
16     #define NODE NODE_A
17     #define PrintLine PrintLineA
18     #define TextCompare TextCompareA
19 #endif
20 
21 static LPTSTR AllocLine(LPCTSTR pch, DWORD cch)
22 {
23     LPTSTR pszNew = malloc((cch + 1) * sizeof(TCHAR));
24     if (!pszNew)
25         return NULL;
26     memcpy(pszNew, pch, cch * sizeof(TCHAR));
27     pszNew[cch] = 0;
28     return pszNew;
29 }
30 
31 static NODE *AllocNode(LPTSTR psz, DWORD lineno)
32 {
33     NODE *node;
34     if (!psz)
35         return NULL;
36     node = calloc(1, sizeof(NODE));
37     if (!node)
38     {
39         free(psz);
40         return NULL;
41     }
42     node->pszLine = psz;
43     node->lineno = lineno;
44     return node;
45 }
46 
47 static __inline VOID DeleteNode(NODE *node)
48 {
49     if (node)
50     {
51         free(node->pszLine);
52         free(node->pszComp);
53         free(node);
54     }
55 }
56 
57 static VOID DeleteList(struct list *list)
58 {
59     struct list *ptr;
60     NODE *node;
61     while ((ptr = list_head(list)) != NULL)
62     {
63         list_remove(ptr);
64         node = LIST_ENTRY(ptr, NODE, entry);
65         DeleteNode(node);
66     }
67 }
68 
69 static __inline LPCTSTR SkipSpace(LPCTSTR pch)
70 {
71     while (IS_SPACE(*pch))
72         ++pch;
73     return pch;
74 }
75 
76 static __inline LPCTSTR FindLastNonSpace(LPCTSTR pch)
77 {
78     LPCTSTR pchLast = NULL;
79     while (*pch)
80     {
81         if (!IS_SPACE(*pch))
82             pchLast = pch;
83         ++pch;
84     }
85     return pchLast;
86 }
87 
88 static VOID DeleteDuplicateSpaces(LPTSTR psz)
89 {
90     LPTSTR pch0, pch1;
91     for (pch0 = pch1 = psz; *pch0; ++pch0)
92     {
93         *pch1++ = *pch0;
94         if (IS_SPACE(*pch0))
95         {
96             do
97             {
98                 ++pch0;
99             } while (IS_SPACE(*pch0));
100             --pch0;
101         }
102     }
103     *pch1 = 0;
104 }
105 
106 static LPTSTR CompressSpace(LPCTSTR line)
107 {
108     LPTSTR pszNew;
109     LPCTSTR pchLast;
110 
111     line = SkipSpace(line);
112     pchLast = FindLastNonSpace(line);
113     if (pchLast == NULL)
114         return AllocLine(NULL, 0);
115 
116     pszNew = AllocLine(line, (DWORD)(pchLast - line) + 1);
117     if (!pszNew)
118         return NULL;
119 
120     DeleteDuplicateSpaces(pszNew);
121     return pszNew;
122 }
123 
124 #define TAB_WIDTH 8
125 
126 static INT ExpandTabLength(LPCTSTR line)
127 {
128     LPCTSTR pch;
129     INT cch = 0;
130     for (pch = line; *pch; ++pch)
131     {
132         if (*pch == TEXT('\t'))
133             cch += TAB_WIDTH - (cch % TAB_WIDTH);
134         else
135             ++cch;
136     }
137     return cch;
138 }
139 
140 static LPTSTR ExpandTab(LPCTSTR line)
141 {
142     INT spaces, cch = ExpandTabLength(line), ich;
143     LPTSTR pszNew = malloc((cch + 1) * sizeof(TCHAR));
144     LPCTSTR pch;
145     if (!pszNew)
146         return NULL;
147     ich = 0;
148     for (pch = line; *pch; ++pch)
149     {
150         if (*pch == TEXT('\t'))
151         {
152             spaces = TAB_WIDTH - (ich % TAB_WIDTH);
153             while (spaces-- > 0)
154             {
155                 pszNew[ich++] = TEXT(' ');
156             }
157         }
158         else
159         {
160             pszNew[ich++] = *pch;
161         }
162     }
163     pszNew[ich] = 0;
164     return pszNew;
165 }
166 
167 #define HASH_EOF 0xFFFFFFFF
168 #define HASH_MASK 0x7FFFFFFF
169 
170 static DWORD GetHash(LPCTSTR psz, BOOL bIgnoreCase)
171 {
172     DWORD ret = 0xDEADFACE;
173     while (*psz)
174     {
175         ret += (bIgnoreCase ? towupper(*psz) : *psz);
176         ret <<= 2;
177         ++psz;
178     }
179     return (ret & HASH_MASK);
180 }
181 
182 static NODE *AllocEOFNode(DWORD lineno)
183 {
184     NODE *node = AllocNode(AllocLine(NULL, 0), 0);
185     if (node == NULL)
186         return NULL;
187     node->pszComp = AllocLine(NULL, 0);
188     if (node->pszComp == NULL)
189     {
190         DeleteNode(node);
191         return NULL;
192     }
193     node->lineno = lineno;
194     node->hash = HASH_EOF;
195     return node;
196 }
197 
198 static __inline BOOL IsEOFNode(NODE *node)
199 {
200     return !node || node->hash == HASH_EOF;
201 }
202 
203 static BOOL ConvertNode(const FILECOMPARE *pFC, NODE *node)
204 {
205     if (!(pFC->dwFlags & FLAG_T))
206     {
207         LPTSTR tmp = ExpandTab(node->pszLine);
208         if (!tmp)
209             return FALSE;
210         free(node->pszLine);
211         node->pszLine = tmp;
212         if (!(pFC->dwFlags & FLAG_W))
213             node->hash = GetHash(node->pszLine, !!(pFC->dwFlags & FLAG_C));
214     }
215     if (pFC->dwFlags & FLAG_W)
216     {
217         node->pszComp = CompressSpace(node->pszLine);
218         if (!node->pszComp)
219             return FALSE;
220         node->hash = GetHash(node->pszComp, !!(pFC->dwFlags & FLAG_C));
221     }
222     return TRUE;
223 }
224 
225 static FCRET CompareNode(const FILECOMPARE *pFC, const NODE *node0, const NODE *node1)
226 {
227     DWORD dwCmpFlags;
228     LPTSTR psz0, psz1;
229     INT ret;
230     if (node0->hash != node1->hash)
231         return FCRET_DIFFERENT;
232 
233     psz0 = (pFC->dwFlags & FLAG_W) ? node0->pszComp : node0->pszLine;
234     psz1 = (pFC->dwFlags & FLAG_W) ? node1->pszComp : node1->pszLine;
235     dwCmpFlags = ((pFC->dwFlags & FLAG_C) ? NORM_IGNORECASE : 0);
236     ret = CompareString(LOCALE_USER_DEFAULT, dwCmpFlags, psz0, -1, psz1, -1);
237     return (ret == CSTR_EQUAL) ? FCRET_IDENTICAL : FCRET_DIFFERENT;
238 }
239 
240 static BOOL FindNextLine(LPCTSTR pch, DWORD ich, DWORD cch, LPDWORD pich)
241 {
242     while (ich < cch)
243     {
244         if (pch[ich] == TEXT('\n') || pch[ich] == TEXT('\0'))
245         {
246             *pich = ich;
247             return TRUE;
248         }
249         ++ich;
250     }
251     *pich = cch;
252     return FALSE;
253 }
254 
255 static FCRET
256 ParseLines(const FILECOMPARE *pFC, HANDLE *phMapping,
257            LARGE_INTEGER *pib, const LARGE_INTEGER *pcb, struct list *list)
258 {
259     DWORD lineno = 1, ich, cch, ichNext, cbView, cchNode;
260     LPTSTR psz, pszLine;
261     BOOL fLast, bCR;
262     NODE *node;
263 
264     if (*phMapping == NULL)
265         return FCRET_NO_MORE_DATA;
266 
267     if (pib->QuadPart >= pcb->QuadPart)
268     {
269         CloseHandle(*phMapping);
270         *phMapping = NULL;
271         return FCRET_NO_MORE_DATA;
272     }
273 
274     cbView = (DWORD)min(pcb->QuadPart - pib->QuadPart, MAX_VIEW_SIZE);
275     psz = MapViewOfFile(*phMapping, FILE_MAP_READ, pib->HighPart, pib->LowPart, cbView);
276     if (!psz)
277     {
278         return OutOfMemory();
279     }
280 
281     ich = 0;
282     cch = cbView / sizeof(TCHAR);
283     fLast = (pib->QuadPart + cbView >= pcb->QuadPart);
284     while (ich < cch &&
285            (FindNextLine(psz, ich, cch, &ichNext) ||
286             (ichNext == cch && (fLast || ich == 0))))
287     {
288         bCR = (ichNext > 0) && (psz[ichNext - 1] == TEXT('\r'));
289         cchNode = ichNext - ich - bCR;
290         pszLine = AllocLine(&psz[ich], cchNode);
291         node = AllocNode(pszLine, lineno++);
292         if (!node || !ConvertNode(pFC, node))
293         {
294             DeleteNode(node);
295             UnmapViewOfFile(psz);
296             return OutOfMemory();
297         }
298         list_add_tail(list, &node->entry);
299         ich = ichNext + 1;
300     }
301 
302     UnmapViewOfFile(psz);
303 
304     pib->QuadPart += ichNext * sizeof(WCHAR);
305 
306     if (pib->QuadPart < pcb->QuadPart)
307         return FCRET_IDENTICAL;
308 
309     // append EOF node
310     node = AllocEOFNode(lineno);
311     if (!node)
312         return OutOfMemory();
313     list_add_tail(list, &node->entry);
314 
315     return FCRET_NO_MORE_DATA;
316 }
317 
318 static VOID
319 ShowDiff(FILECOMPARE *pFC, INT i, struct list *begin, struct list *end)
320 {
321     NODE* node;
322     struct list *list = &pFC->list[i];
323     struct list *first = NULL, *last = NULL;
324     PrintCaption(pFC->file[i]);
325     if (begin && end && list_prev(list, begin))
326         begin = list_prev(list, begin);
327     while (begin != end)
328     {
329         node = LIST_ENTRY(begin, NODE, entry);
330         if (IsEOFNode(node))
331             break;
332         if (!first)
333             first = begin;
334         last = begin;
335         if (!(pFC->dwFlags & FLAG_A))
336             PrintLine(pFC, node->lineno, node->pszLine);
337         begin = list_next(list, begin);
338     }
339     if ((pFC->dwFlags & FLAG_A) && first)
340     {
341         node = LIST_ENTRY(first, NODE, entry);
342         PrintLine(pFC, node->lineno, node->pszLine);
343         first = list_next(list, first);
344         if (first != last)
345         {
346             if (list_next(list, first) == last)
347             {
348                 node = LIST_ENTRY(first, NODE, entry);
349                 PrintLine(pFC, node->lineno, node->pszLine);
350             }
351             else
352             {
353                 PrintDots();
354             }
355         }
356         node = LIST_ENTRY(last, NODE, entry);
357         PrintLine(pFC, node->lineno, node->pszLine);
358     }
359 }
360 
361 static VOID
362 SkipIdentical(FILECOMPARE *pFC, struct list **pptr0, struct list **pptr1)
363 {
364     struct list *ptr0 = *pptr0, *ptr1 = *pptr1;
365     while (ptr0 && ptr1)
366     {
367         NODE *node0 = LIST_ENTRY(ptr0, NODE, entry);
368         NODE *node1 = LIST_ENTRY(ptr1, NODE, entry);
369         if (CompareNode(pFC, node0, node1) != FCRET_IDENTICAL)
370             break;
371         ptr0 = list_next(&pFC->list[0], ptr0);
372         ptr1 = list_next(&pFC->list[1], ptr1);
373     }
374     *pptr0 = ptr0;
375     *pptr1 = ptr1;
376 }
377 
378 static DWORD
379 SkipIdenticalN(FILECOMPARE *pFC, struct list **pptr0, struct list **pptr1,
380                DWORD nnnn, DWORD lineno0, DWORD lineno1)
381 {
382     struct list *ptr0 = *pptr0, *ptr1 = *pptr1;
383     DWORD count = 0;
384     while (ptr0 && ptr1)
385     {
386         NODE *node0 = LIST_ENTRY(ptr0, NODE, entry);
387         NODE *node1 = LIST_ENTRY(ptr1, NODE, entry);
388         if (node0->lineno >= lineno0)
389             break;
390         if (node1->lineno >= lineno1)
391             break;
392         if (CompareNode(pFC, node0, node1) != FCRET_IDENTICAL)
393             break;
394         ptr0 = list_next(&pFC->list[0], ptr0);
395         ptr1 = list_next(&pFC->list[1], ptr1);
396         ++count;
397         if (count >= nnnn)
398             break;
399     }
400     *pptr0 = ptr0;
401     *pptr1 = ptr1;
402     return count;
403 }
404 
405 static FCRET
406 ScanDiff(FILECOMPARE *pFC, struct list **pptr0, struct list **pptr1,
407          DWORD lineno0, DWORD lineno1)
408 {
409     struct list *ptr0 = *pptr0, *ptr1 = *pptr1, *tmp0, *tmp1;
410     NODE *node0, *node1;
411     INT count;
412     while (ptr0 && ptr1)
413     {
414         node0 = LIST_ENTRY(ptr0, NODE, entry);
415         node1 = LIST_ENTRY(ptr1, NODE, entry);
416         if (node0->lineno >= lineno0)
417             return FCRET_DIFFERENT;
418         if (node1->lineno >= lineno1)
419             return FCRET_DIFFERENT;
420         tmp0 = ptr0;
421         tmp1 = ptr1;
422         count = SkipIdenticalN(pFC, &tmp0, &tmp1, pFC->nnnn, lineno0, lineno1);
423         if (count >= pFC->nnnn)
424             break;
425         if (count > 0)
426         {
427             ptr0 = tmp0;
428             ptr1 = tmp1;
429         }
430         else
431         {
432             ptr0 = list_next(&pFC->list[0], ptr0);
433             ptr1 = list_next(&pFC->list[1], ptr1);
434         }
435     }
436     *pptr0 = ptr0;
437     *pptr1 = ptr1;
438     return FCRET_IDENTICAL;
439 }
440 
441 static FCRET
442 Resync(FILECOMPARE *pFC, struct list **pptr0, struct list **pptr1)
443 {
444     FCRET ret;
445     struct list *ptr0, *ptr1, *save0 = NULL, *save1 = NULL;
446     NODE *node0, *node1;
447     struct list *list0 = &pFC->list[0], *list1 = &pFC->list[1];
448     DWORD lineno0, lineno1;
449     INT penalty, i0, i1, min_penalty = MAXLONG;
450 
451     node0 = LIST_ENTRY(*pptr0, NODE, entry);
452     node1 = LIST_ENTRY(*pptr1, NODE, entry);
453     lineno0 = node0->lineno + pFC->n;
454     lineno1 = node1->lineno + pFC->n;
455 
456     // ``If the files that you are comparing have more than pFC->n consecutive
457     //   differing lines, FC cancels the comparison,,
458     // ``If the number of matching lines in the files is less than pFC->nnnn,
459     //   FC displays the matching lines as differences,,
460     for (ptr1 = list_next(list1, *pptr1), i1 = 0; ptr1; ptr1 = list_next(list1, ptr1), ++i1)
461     {
462         node1 = LIST_ENTRY(ptr1, NODE, entry);
463         if (node1->lineno >= lineno1)
464             break;
465         for (ptr0 = list_next(list0, *pptr0), i0 = 0; ptr0; ptr0 = list_next(list0, ptr0), ++i0)
466         {
467             node0 = LIST_ENTRY(ptr0, NODE, entry);
468             if (node0->lineno >= lineno0)
469                 break;
470             if (CompareNode(pFC, node0, node1) == FCRET_IDENTICAL)
471             {
472                 penalty = min(i0, i1) + abs(i1 - i0);
473                 if (min_penalty > penalty)
474                 {
475                     min_penalty = penalty;
476                     save0 = ptr0;
477                     save1 = ptr1;
478                 }
479             }
480         }
481     }
482 
483     if (save0 && save1)
484     {
485         *pptr0 = save0;
486         *pptr1 = save1;
487         ret = ScanDiff(pFC, &save0, &save1, lineno0, lineno1);
488         if (save0 && save1)
489         {
490             *pptr0 = save0;
491             *pptr1 = save1;
492         }
493         return ret;
494     }
495 
496     for (ptr0 = *pptr0; ptr0; ptr0 = list_next(list0, ptr0))
497     {
498         node0 = LIST_ENTRY(ptr0, NODE, entry);
499         if (node0->lineno == lineno0)
500             break;
501     }
502     for (ptr1 = *pptr1; ptr1; ptr1 = list_next(list1, ptr1))
503     {
504         node1 = LIST_ENTRY(ptr1, NODE, entry);
505         if (node1->lineno == lineno1)
506             break;
507     }
508     *pptr0 = ptr0;
509     *pptr1 = ptr1;
510     return FCRET_DIFFERENT;
511 }
512 
513 static FCRET
514 Finalize(FILECOMPARE* pFC, struct list *ptr0, struct list* ptr1, BOOL fDifferent)
515 {
516     if (!ptr0 && !ptr1)
517     {
518         if (fDifferent)
519             return Different(pFC->file[0], pFC->file[1]);
520         return NoDifference();
521     }
522     else
523     {
524         ShowDiff(pFC, 0, ptr0, NULL);
525         ShowDiff(pFC, 1, ptr1, NULL);
526         PrintEndOfDiff();
527         return FCRET_DIFFERENT;
528     }
529 }
530 
531 // FIXME: "cmd_apitest fc" has some failures.
532 FCRET TextCompare(FILECOMPARE *pFC, HANDLE *phMapping0, const LARGE_INTEGER *pcb0,
533                                     HANDLE *phMapping1, const LARGE_INTEGER *pcb1)
534 {
535     FCRET ret, ret0, ret1;
536     struct list *ptr0, *ptr1, *save0, *save1, *next0, *next1;
537     NODE* node0, * node1;
538     BOOL fDifferent = FALSE;
539     LARGE_INTEGER ib0 = { .QuadPart = 0 }, ib1 = { .QuadPart = 0 };
540     struct list *list0 = &pFC->list[0], *list1 = &pFC->list[1];
541     list_init(list0);
542     list_init(list1);
543 
544     do
545     {
546         ret0 = ParseLines(pFC, phMapping0, &ib0, pcb0, list0);
547         if (ret0 == FCRET_INVALID)
548         {
549             ret = ret0;
550             goto cleanup;
551         }
552         ret1 = ParseLines(pFC, phMapping1, &ib1, pcb1, list1);
553         if (ret1 == FCRET_INVALID)
554         {
555             ret = ret1;
556             goto cleanup;
557         }
558 
559         ptr0 = list_head(list0);
560         ptr1 = list_head(list1);
561         for (;;)
562         {
563             if (!ptr0 || !ptr1)
564                 goto quit;
565 
566             // skip identical (sync'ed)
567             SkipIdentical(pFC, &ptr0, &ptr1);
568             if (ptr0 || ptr1)
569                 fDifferent = TRUE;
570             node0 = LIST_ENTRY(ptr0, NODE, entry);
571             node1 = LIST_ENTRY(ptr1, NODE, entry);
572             if (IsEOFNode(node0) || IsEOFNode(node1))
573                 goto quit;
574 
575             // try to resync
576             save0 = ptr0;
577             save1 = ptr1;
578             ret = Resync(pFC, &ptr0, &ptr1);
579             if (ret == FCRET_INVALID)
580                 goto cleanup;
581             if (ret == FCRET_DIFFERENT)
582             {
583                 // resync failed
584                 ret = ResyncFailed();
585                 // show the difference
586                 ShowDiff(pFC, 0, save0, ptr0);
587                 ShowDiff(pFC, 1, save1, ptr1);
588                 PrintEndOfDiff();
589                 goto cleanup;
590             }
591 
592             // show the difference
593             fDifferent = TRUE;
594             next0 = ptr0 ? list_next(list0, ptr0) : ptr0;
595             next1 = ptr1 ? list_next(list1, ptr1) : ptr1;
596             ShowDiff(pFC, 0, save0, (next0 ? next0 : ptr0));
597             ShowDiff(pFC, 1, save1, (next1 ? next1 : ptr1));
598             PrintEndOfDiff();
599 
600             // now resync'ed
601         }
602     } while (ret0 != FCRET_NO_MORE_DATA || ret1 != FCRET_NO_MORE_DATA);
603 
604 quit:
605     ret = Finalize(pFC, ptr0, ptr1, fDifferent);
606 cleanup:
607     DeleteList(list0);
608     DeleteList(list1);
609     return ret;
610 }
611