1 /* 2 * PROJECT: ReactOS FC Command 3 * LICENSE: GPL-2.0-or-later (https://spdx.org/licenses/GPL-2.0-or-later) 4 * PURPOSE: Comparing text files 5 * COPYRIGHT: Copyright 2021 Katayama Hirofumi MZ (katayama.hirofumi.mz@gmail.com) 6 */ 7 8 #include "fc.h" 9 10 #define IS_SPACE(ch) ((ch) == TEXT(' ') || (ch) == TEXT('\t')) 11 12 #ifdef UNICODE 13 #define NODE NODE_W 14 #define PrintLine PrintLineW 15 #define TextCompare TextCompareW 16 #else 17 #define NODE NODE_A 18 #define PrintLine PrintLineA 19 #define TextCompare TextCompareA 20 #endif 21 22 static LPTSTR AllocLine(LPCTSTR pch, DWORD cch) 23 { 24 LPTSTR pszNew = malloc((cch + 1) * sizeof(TCHAR)); 25 if (!pszNew) 26 return NULL; 27 memcpy(pszNew, pch, cch * sizeof(TCHAR)); 28 pszNew[cch] = 0; 29 return pszNew; 30 } 31 32 static NODE *AllocNode(LPTSTR psz, DWORD lineno) 33 { 34 NODE *node; 35 if (!psz) 36 return NULL; 37 node = calloc(1, sizeof(NODE)); 38 if (!node) 39 { 40 free(psz); 41 return NULL; 42 } 43 node->pszLine = psz; 44 node->lineno = lineno; 45 return node; 46 } 47 48 static __inline VOID DeleteNode(NODE *node) 49 { 50 if (node) 51 { 52 free(node->pszLine); 53 free(node->pszComp); 54 free(node); 55 } 56 } 57 58 static VOID DeleteList(struct list *list) 59 { 60 struct list *ptr; 61 NODE *node; 62 while ((ptr = list_head(list)) != NULL) 63 { 64 list_remove(ptr); 65 node = LIST_ENTRY(ptr, NODE, entry); 66 DeleteNode(node); 67 } 68 } 69 70 static __inline LPCTSTR SkipSpace(LPCTSTR pch) 71 { 72 while (IS_SPACE(*pch)) 73 ++pch; 74 return pch; 75 } 76 77 static __inline LPCTSTR FindLastNonSpace(LPCTSTR pch) 78 { 79 LPCTSTR pchLast = NULL; 80 while (*pch) 81 { 82 if (!IS_SPACE(*pch)) 83 pchLast = pch; 84 ++pch; 85 } 86 return pchLast; 87 } 88 89 static VOID DeleteDuplicateSpaces(LPTSTR psz) 90 { 91 LPTSTR pch0, pch1; 92 for (pch0 = pch1 = psz; *pch0; ++pch0) 93 { 94 *pch1++ = *pch0; 95 if (IS_SPACE(*pch0)) 96 { 97 do 98 { 99 ++pch0; 100 } while (IS_SPACE(*pch0)); 101 --pch0; 102 } 103 } 104 *pch1 = 0; 105 } 106 107 static LPTSTR CompressSpace(LPCTSTR line) 108 { 109 LPTSTR pszNew; 110 LPCTSTR pchLast; 111 112 line = SkipSpace(line); 113 pchLast = FindLastNonSpace(line); 114 if (pchLast == NULL) 115 return AllocLine(NULL, 0); 116 117 pszNew = AllocLine(line, (DWORD)(pchLast - line) + 1); 118 if (!pszNew) 119 return NULL; 120 121 DeleteDuplicateSpaces(pszNew); 122 return pszNew; 123 } 124 125 #define TAB_WIDTH 8 126 127 static INT ExpandTabLength(LPCTSTR line) 128 { 129 LPCTSTR pch; 130 INT cch = 0; 131 for (pch = line; *pch; ++pch) 132 { 133 if (*pch == TEXT('\t')) 134 cch += TAB_WIDTH - (cch % TAB_WIDTH); 135 else 136 ++cch; 137 } 138 return cch; 139 } 140 141 static LPTSTR ExpandTab(LPCTSTR line) 142 { 143 INT spaces, cch = ExpandTabLength(line), ich; 144 LPTSTR pszNew = malloc((cch + 1) * sizeof(TCHAR)); 145 LPCTSTR pch; 146 if (!pszNew) 147 return NULL; 148 ich = 0; 149 for (pch = line; *pch; ++pch) 150 { 151 if (*pch == TEXT('\t')) 152 { 153 spaces = TAB_WIDTH - (ich % TAB_WIDTH); 154 while (spaces-- > 0) 155 { 156 pszNew[ich++] = TEXT(' '); 157 } 158 } 159 else 160 { 161 pszNew[ich++] = *pch; 162 } 163 } 164 pszNew[ich] = 0; 165 return pszNew; 166 } 167 168 #define HASH_EOF 0xFFFFFFFF 169 #define HASH_MASK 0x7FFFFFFF 170 171 static DWORD GetHash(LPCTSTR psz, BOOL bIgnoreCase) 172 { 173 DWORD ret = 0xDEADFACE; 174 while (*psz) 175 { 176 ret += (bIgnoreCase ? towupper(*psz) : *psz); 177 ret <<= 2; 178 ++psz; 179 } 180 return (ret & HASH_MASK); 181 } 182 183 static NODE *AllocEOFNode(DWORD lineno) 184 { 185 NODE *node = AllocNode(AllocLine(NULL, 0), 0); 186 if (node == NULL) 187 return NULL; 188 node->pszComp = AllocLine(NULL, 0); 189 if (node->pszComp == NULL) 190 { 191 DeleteNode(node); 192 return NULL; 193 } 194 node->lineno = lineno; 195 node->hash = HASH_EOF; 196 return node; 197 } 198 199 static __inline BOOL IsEOFNode(NODE *node) 200 { 201 return !node || node->hash == HASH_EOF; 202 } 203 204 static BOOL ConvertNode(const FILECOMPARE *pFC, NODE *node) 205 { 206 if (!(pFC->dwFlags & FLAG_T)) 207 { 208 LPTSTR tmp = ExpandTab(node->pszLine); 209 if (!tmp) 210 return FALSE; 211 free(node->pszLine); 212 node->pszLine = tmp; 213 if (!(pFC->dwFlags & FLAG_W)) 214 node->hash = GetHash(node->pszLine, !!(pFC->dwFlags & FLAG_C)); 215 } 216 if (pFC->dwFlags & FLAG_W) 217 { 218 node->pszComp = CompressSpace(node->pszLine); 219 if (!node->pszComp) 220 return FALSE; 221 node->hash = GetHash(node->pszComp, !!(pFC->dwFlags & FLAG_C)); 222 } 223 return TRUE; 224 } 225 226 static FCRET CompareNode(const FILECOMPARE *pFC, const NODE *node0, const NODE *node1) 227 { 228 DWORD dwCmpFlags; 229 LPTSTR psz0, psz1; 230 INT ret; 231 if (node0->hash != node1->hash) 232 return FCRET_DIFFERENT; 233 234 psz0 = (pFC->dwFlags & FLAG_W) ? node0->pszComp : node0->pszLine; 235 psz1 = (pFC->dwFlags & FLAG_W) ? node1->pszComp : node1->pszLine; 236 dwCmpFlags = ((pFC->dwFlags & FLAG_C) ? NORM_IGNORECASE : 0); 237 ret = CompareString(LOCALE_USER_DEFAULT, dwCmpFlags, psz0, -1, psz1, -1); 238 return (ret == CSTR_EQUAL) ? FCRET_IDENTICAL : FCRET_DIFFERENT; 239 } 240 241 static BOOL FindNextLine(LPCTSTR pch, DWORD ich, DWORD cch, LPDWORD pich) 242 { 243 while (ich < cch) 244 { 245 if (pch[ich] == TEXT('\n') || pch[ich] == TEXT('\0')) 246 { 247 *pich = ich; 248 return TRUE; 249 } 250 ++ich; 251 } 252 *pich = cch; 253 return FALSE; 254 } 255 256 static FCRET 257 ParseLines(const FILECOMPARE *pFC, HANDLE *phMapping, 258 LARGE_INTEGER *pib, const LARGE_INTEGER *pcb, struct list *list) 259 { 260 DWORD lineno = 1, ich, cch, ichNext, cbView, cchNode; 261 LPTSTR psz, pszLine; 262 BOOL fLast, bCR; 263 NODE *node; 264 265 if (*phMapping == NULL) 266 return FCRET_NO_MORE_DATA; 267 268 if (pib->QuadPart >= pcb->QuadPart) 269 { 270 CloseHandle(*phMapping); 271 *phMapping = NULL; 272 return FCRET_NO_MORE_DATA; 273 } 274 275 cbView = (DWORD)min(pcb->QuadPart - pib->QuadPart, MAX_VIEW_SIZE); 276 psz = MapViewOfFile(*phMapping, FILE_MAP_READ, pib->HighPart, pib->LowPart, cbView); 277 if (!psz) 278 { 279 return OutOfMemory(); 280 } 281 282 ich = 0; 283 cch = cbView / sizeof(TCHAR); 284 fLast = (pib->QuadPart + cbView >= pcb->QuadPart); 285 while (ich < cch && 286 (FindNextLine(psz, ich, cch, &ichNext) || 287 (ichNext == cch && (fLast || ich == 0)))) 288 { 289 bCR = (ichNext > 0) && (psz[ichNext - 1] == TEXT('\r')); 290 cchNode = ichNext - ich - bCR; 291 pszLine = AllocLine(&psz[ich], cchNode); 292 node = AllocNode(pszLine, lineno++); 293 if (!node || !ConvertNode(pFC, node)) 294 { 295 DeleteNode(node); 296 UnmapViewOfFile(psz); 297 return OutOfMemory(); 298 } 299 list_add_tail(list, &node->entry); 300 ich = ichNext + 1; 301 } 302 303 UnmapViewOfFile(psz); 304 305 pib->QuadPart += ichNext * sizeof(WCHAR); 306 307 if (pib->QuadPart < pcb->QuadPart) 308 return FCRET_IDENTICAL; 309 310 // append EOF node 311 node = AllocEOFNode(lineno); 312 if (!node) 313 return OutOfMemory(); 314 list_add_tail(list, &node->entry); 315 316 return FCRET_NO_MORE_DATA; 317 } 318 319 static VOID 320 ShowDiff(FILECOMPARE *pFC, INT i, struct list *begin, struct list *end) 321 { 322 NODE* node; 323 struct list *list = &pFC->list[i]; 324 struct list *first = NULL, *last = NULL; 325 PrintCaption(pFC->file[i]); 326 if (begin && end && list_prev(list, begin)) 327 begin = list_prev(list, begin); 328 while (begin != end) 329 { 330 node = LIST_ENTRY(begin, NODE, entry); 331 if (IsEOFNode(node)) 332 break; 333 if (!first) 334 first = begin; 335 last = begin; 336 if (!(pFC->dwFlags & FLAG_A)) 337 PrintLine(pFC, node->lineno, node->pszLine); 338 begin = list_next(list, begin); 339 } 340 if ((pFC->dwFlags & FLAG_A) && first) 341 { 342 node = LIST_ENTRY(first, NODE, entry); 343 PrintLine(pFC, node->lineno, node->pszLine); 344 first = list_next(list, first); 345 if (first != last) 346 { 347 if (list_next(list, first) == last) 348 { 349 node = LIST_ENTRY(first, NODE, entry); 350 PrintLine(pFC, node->lineno, node->pszLine); 351 } 352 else 353 { 354 PrintDots(); 355 } 356 } 357 node = LIST_ENTRY(last, NODE, entry); 358 PrintLine(pFC, node->lineno, node->pszLine); 359 } 360 } 361 362 static VOID 363 SkipIdentical(FILECOMPARE *pFC, struct list **pptr0, struct list **pptr1) 364 { 365 struct list *ptr0 = *pptr0, *ptr1 = *pptr1; 366 while (ptr0 && ptr1) 367 { 368 NODE *node0 = LIST_ENTRY(ptr0, NODE, entry); 369 NODE *node1 = LIST_ENTRY(ptr1, NODE, entry); 370 if (CompareNode(pFC, node0, node1) != FCRET_IDENTICAL) 371 break; 372 ptr0 = list_next(&pFC->list[0], ptr0); 373 ptr1 = list_next(&pFC->list[1], ptr1); 374 } 375 *pptr0 = ptr0; 376 *pptr1 = ptr1; 377 } 378 379 static DWORD 380 SkipIdenticalN(FILECOMPARE *pFC, struct list **pptr0, struct list **pptr1, 381 DWORD nnnn, DWORD lineno0, DWORD lineno1) 382 { 383 struct list *ptr0 = *pptr0, *ptr1 = *pptr1; 384 DWORD count = 0; 385 while (ptr0 && ptr1) 386 { 387 NODE *node0 = LIST_ENTRY(ptr0, NODE, entry); 388 NODE *node1 = LIST_ENTRY(ptr1, NODE, entry); 389 if (node0->lineno >= lineno0) 390 break; 391 if (node1->lineno >= lineno1) 392 break; 393 if (CompareNode(pFC, node0, node1) != FCRET_IDENTICAL) 394 break; 395 ptr0 = list_next(&pFC->list[0], ptr0); 396 ptr1 = list_next(&pFC->list[1], ptr1); 397 ++count; 398 if (count >= nnnn) 399 break; 400 } 401 *pptr0 = ptr0; 402 *pptr1 = ptr1; 403 return count; 404 } 405 406 static FCRET 407 ScanDiff(FILECOMPARE *pFC, struct list **pptr0, struct list **pptr1, 408 DWORD lineno0, DWORD lineno1) 409 { 410 struct list *ptr0 = *pptr0, *ptr1 = *pptr1, *tmp0, *tmp1; 411 NODE *node0, *node1; 412 INT count; 413 while (ptr0 && ptr1) 414 { 415 node0 = LIST_ENTRY(ptr0, NODE, entry); 416 node1 = LIST_ENTRY(ptr1, NODE, entry); 417 if (node0->lineno >= lineno0) 418 return FCRET_DIFFERENT; 419 if (node1->lineno >= lineno1) 420 return FCRET_DIFFERENT; 421 tmp0 = ptr0; 422 tmp1 = ptr1; 423 count = SkipIdenticalN(pFC, &tmp0, &tmp1, pFC->nnnn, lineno0, lineno1); 424 if (count >= pFC->nnnn) 425 break; 426 if (count > 0) 427 { 428 ptr0 = tmp0; 429 ptr1 = tmp1; 430 } 431 else 432 { 433 ptr0 = list_next(&pFC->list[0], ptr0); 434 ptr1 = list_next(&pFC->list[1], ptr1); 435 } 436 } 437 *pptr0 = ptr0; 438 *pptr1 = ptr1; 439 return FCRET_IDENTICAL; 440 } 441 442 static FCRET 443 Resync(FILECOMPARE *pFC, struct list **pptr0, struct list **pptr1) 444 { 445 FCRET ret; 446 struct list *ptr0, *ptr1, *save0 = NULL, *save1 = NULL; 447 NODE *node0, *node1; 448 struct list *list0 = &pFC->list[0], *list1 = &pFC->list[1]; 449 DWORD lineno0, lineno1; 450 INT penalty, i0, i1, min_penalty = MAXLONG; 451 452 node0 = LIST_ENTRY(*pptr0, NODE, entry); 453 node1 = LIST_ENTRY(*pptr1, NODE, entry); 454 lineno0 = node0->lineno + pFC->n; 455 lineno1 = node1->lineno + pFC->n; 456 457 // ``If the files that you are comparing have more than pFC->n consecutive 458 // differing lines, FC cancels the comparison,, 459 // ``If the number of matching lines in the files is less than pFC->nnnn, 460 // FC displays the matching lines as differences,, 461 for (ptr1 = list_next(list1, *pptr1), i1 = 0; ptr1; ptr1 = list_next(list1, ptr1), ++i1) 462 { 463 node1 = LIST_ENTRY(ptr1, NODE, entry); 464 if (node1->lineno >= lineno1) 465 break; 466 for (ptr0 = list_next(list0, *pptr0), i0 = 0; ptr0; ptr0 = list_next(list0, ptr0), ++i0) 467 { 468 node0 = LIST_ENTRY(ptr0, NODE, entry); 469 if (node0->lineno >= lineno0) 470 break; 471 if (CompareNode(pFC, node0, node1) == FCRET_IDENTICAL) 472 { 473 penalty = min(i0, i1) + abs(i1 - i0); 474 if (min_penalty > penalty) 475 { 476 min_penalty = penalty; 477 save0 = ptr0; 478 save1 = ptr1; 479 } 480 } 481 } 482 } 483 484 if (save0 && save1) 485 { 486 *pptr0 = save0; 487 *pptr1 = save1; 488 ret = ScanDiff(pFC, &save0, &save1, lineno0, lineno1); 489 if (save0 && save1) 490 { 491 *pptr0 = save0; 492 *pptr1 = save1; 493 } 494 return ret; 495 } 496 497 for (ptr0 = *pptr0; ptr0; ptr0 = list_next(list0, ptr0)) 498 { 499 node0 = LIST_ENTRY(ptr0, NODE, entry); 500 if (node0->lineno == lineno0) 501 break; 502 } 503 for (ptr1 = *pptr1; ptr1; ptr1 = list_next(list1, ptr1)) 504 { 505 node1 = LIST_ENTRY(ptr1, NODE, entry); 506 if (node1->lineno == lineno1) 507 break; 508 } 509 *pptr0 = ptr0; 510 *pptr1 = ptr1; 511 return FCRET_DIFFERENT; 512 } 513 514 static FCRET 515 Finalize(FILECOMPARE* pFC, struct list *ptr0, struct list* ptr1, BOOL fDifferent) 516 { 517 if (!ptr0 && !ptr1) 518 { 519 if (fDifferent) 520 return Different(pFC->file[0], pFC->file[1]); 521 return NoDifference(); 522 } 523 else 524 { 525 ShowDiff(pFC, 0, ptr0, NULL); 526 ShowDiff(pFC, 1, ptr1, NULL); 527 PrintEndOfDiff(); 528 return FCRET_DIFFERENT; 529 } 530 } 531 532 // FIXME: "cmd_apitest fc" has some failures. 533 FCRET TextCompare(FILECOMPARE *pFC, HANDLE *phMapping0, const LARGE_INTEGER *pcb0, 534 HANDLE *phMapping1, const LARGE_INTEGER *pcb1) 535 { 536 FCRET ret, ret0, ret1; 537 struct list *ptr0, *ptr1, *save0, *save1, *next0, *next1; 538 NODE* node0, * node1; 539 BOOL fDifferent = FALSE; 540 LARGE_INTEGER ib0 = { .QuadPart = 0 }, ib1 = { .QuadPart = 0 }; 541 struct list *list0 = &pFC->list[0], *list1 = &pFC->list[1]; 542 list_init(list0); 543 list_init(list1); 544 545 do 546 { 547 ret0 = ParseLines(pFC, phMapping0, &ib0, pcb0, list0); 548 if (ret0 == FCRET_INVALID) 549 { 550 ret = ret0; 551 goto cleanup; 552 } 553 ret1 = ParseLines(pFC, phMapping1, &ib1, pcb1, list1); 554 if (ret1 == FCRET_INVALID) 555 { 556 ret = ret1; 557 goto cleanup; 558 } 559 560 ptr0 = list_head(list0); 561 ptr1 = list_head(list1); 562 for (;;) 563 { 564 if (!ptr0 || !ptr1) 565 goto quit; 566 567 // skip identical (sync'ed) 568 SkipIdentical(pFC, &ptr0, &ptr1); 569 if (ptr0 || ptr1) 570 fDifferent = TRUE; 571 node0 = LIST_ENTRY(ptr0, NODE, entry); 572 node1 = LIST_ENTRY(ptr1, NODE, entry); 573 if (IsEOFNode(node0) || IsEOFNode(node1)) 574 goto quit; 575 576 // try to resync 577 save0 = ptr0; 578 save1 = ptr1; 579 ret = Resync(pFC, &ptr0, &ptr1); 580 if (ret == FCRET_INVALID) 581 goto cleanup; 582 if (ret == FCRET_DIFFERENT) 583 { 584 // resync failed 585 ret = ResyncFailed(); 586 // show the difference 587 ShowDiff(pFC, 0, save0, ptr0); 588 ShowDiff(pFC, 1, save1, ptr1); 589 PrintEndOfDiff(); 590 goto cleanup; 591 } 592 593 // show the difference 594 fDifferent = TRUE; 595 next0 = ptr0 ? list_next(list0, ptr0) : ptr0; 596 next1 = ptr1 ? list_next(list1, ptr1) : ptr1; 597 ShowDiff(pFC, 0, save0, (next0 ? next0 : ptr0)); 598 ShowDiff(pFC, 1, save1, (next1 ? next1 : ptr1)); 599 PrintEndOfDiff(); 600 601 // now resync'ed 602 } 603 } while (ret0 != FCRET_NO_MORE_DATA || ret1 != FCRET_NO_MORE_DATA); 604 605 quit: 606 ret = Finalize(pFC, ptr0, ptr1, fDifferent); 607 cleanup: 608 DeleteList(list0); 609 DeleteList(list1); 610 return ret; 611 } 612