1 /* 2 * PROJECT: ReactOS FC Command 3 * LICENSE: GPL-2.0-or-later (https://spdx.org/licenses/GPL-2.0-or-later) 4 * PURPOSE: Comparing text files 5 * COPYRIGHT: Copyright 2021 Katayama Hirofumi MZ (katayama.hirofumi.mz@gmail.com) 6 */ 7 #include "fc.h" 8 9 #define IS_SPACE(ch) ((ch) == TEXT(' ') || (ch) == TEXT('\t')) 10 11 #ifdef UNICODE 12 #define NODE NODE_W 13 #define PrintLine PrintLineW 14 #define TextCompare TextCompareW 15 #else 16 #define NODE NODE_A 17 #define PrintLine PrintLineA 18 #define TextCompare TextCompareA 19 #endif 20 21 static LPTSTR AllocLine(LPCTSTR pch, DWORD cch) 22 { 23 LPTSTR pszNew = malloc((cch + 1) * sizeof(TCHAR)); 24 if (!pszNew) 25 return NULL; 26 memcpy(pszNew, pch, cch * sizeof(TCHAR)); 27 pszNew[cch] = 0; 28 return pszNew; 29 } 30 31 static NODE *AllocNode(LPTSTR psz, DWORD lineno) 32 { 33 NODE *node; 34 if (!psz) 35 return NULL; 36 node = calloc(1, sizeof(NODE)); 37 if (!node) 38 { 39 free(psz); 40 return NULL; 41 } 42 node->pszLine = psz; 43 node->lineno = lineno; 44 return node; 45 } 46 47 static __inline VOID DeleteNode(NODE *node) 48 { 49 if (node) 50 { 51 free(node->pszLine); 52 free(node->pszComp); 53 free(node); 54 } 55 } 56 57 static VOID DeleteList(struct list *list) 58 { 59 struct list *ptr; 60 NODE *node; 61 while ((ptr = list_head(list)) != NULL) 62 { 63 list_remove(ptr); 64 node = LIST_ENTRY(ptr, NODE, entry); 65 DeleteNode(node); 66 } 67 } 68 69 static __inline LPCTSTR SkipSpace(LPCTSTR pch) 70 { 71 while (IS_SPACE(*pch)) 72 ++pch; 73 return pch; 74 } 75 76 static __inline LPCTSTR FindLastNonSpace(LPCTSTR pch) 77 { 78 LPCTSTR pchLast = NULL; 79 while (*pch) 80 { 81 if (!IS_SPACE(*pch)) 82 pchLast = pch; 83 ++pch; 84 } 85 return pchLast; 86 } 87 88 static VOID DeleteDuplicateSpaces(LPTSTR psz) 89 { 90 LPTSTR pch0, pch1; 91 for (pch0 = pch1 = psz; *pch0; ++pch0) 92 { 93 *pch1++ = *pch0; 94 if (IS_SPACE(*pch0)) 95 { 96 do 97 { 98 ++pch0; 99 } while (IS_SPACE(*pch0)); 100 --pch0; 101 } 102 } 103 *pch1 = 0; 104 } 105 106 static LPTSTR CompressSpace(LPCTSTR line) 107 { 108 LPTSTR pszNew; 109 LPCTSTR pchLast; 110 111 line = SkipSpace(line); 112 pchLast = FindLastNonSpace(line); 113 if (pchLast == NULL) 114 return AllocLine(NULL, 0); 115 116 pszNew = AllocLine(line, (DWORD)(pchLast - line) + 1); 117 if (!pszNew) 118 return NULL; 119 120 DeleteDuplicateSpaces(pszNew); 121 return pszNew; 122 } 123 124 #define TAB_WIDTH 8 125 126 static INT ExpandTabLength(LPCTSTR line) 127 { 128 LPCTSTR pch; 129 INT cch = 0; 130 for (pch = line; *pch; ++pch) 131 { 132 if (*pch == TEXT('\t')) 133 cch += TAB_WIDTH - (cch % TAB_WIDTH); 134 else 135 ++cch; 136 } 137 return cch; 138 } 139 140 static LPTSTR ExpandTab(LPCTSTR line) 141 { 142 INT spaces, cch = ExpandTabLength(line), ich; 143 LPTSTR pszNew = malloc((cch + 1) * sizeof(TCHAR)); 144 LPCTSTR pch; 145 if (!pszNew) 146 return NULL; 147 ich = 0; 148 for (pch = line; *pch; ++pch) 149 { 150 if (*pch == TEXT('\t')) 151 { 152 spaces = TAB_WIDTH - (ich % TAB_WIDTH); 153 while (spaces-- > 0) 154 { 155 pszNew[ich++] = TEXT(' '); 156 } 157 } 158 else 159 { 160 pszNew[ich++] = *pch; 161 } 162 } 163 pszNew[ich] = 0; 164 return pszNew; 165 } 166 167 #define HASH_EOF 0xFFFFFFFF 168 #define HASH_MASK 0x7FFFFFFF 169 170 static DWORD GetHash(LPCTSTR psz, BOOL bIgnoreCase) 171 { 172 DWORD ret = 0xDEADFACE; 173 while (*psz) 174 { 175 ret += (bIgnoreCase ? towupper(*psz) : *psz); 176 ret <<= 2; 177 ++psz; 178 } 179 return (ret & HASH_MASK); 180 } 181 182 static NODE *AllocEOFNode(DWORD lineno) 183 { 184 NODE *node = AllocNode(AllocLine(NULL, 0), 0); 185 if (node == NULL) 186 return NULL; 187 node->pszComp = AllocLine(NULL, 0); 188 if (node->pszComp == NULL) 189 { 190 DeleteNode(node); 191 return NULL; 192 } 193 node->lineno = lineno; 194 node->hash = HASH_EOF; 195 return node; 196 } 197 198 static __inline BOOL IsEOFNode(NODE *node) 199 { 200 return !node || node->hash == HASH_EOF; 201 } 202 203 static BOOL ConvertNode(const FILECOMPARE *pFC, NODE *node) 204 { 205 if (!(pFC->dwFlags & FLAG_T)) 206 { 207 LPTSTR tmp = ExpandTab(node->pszLine); 208 if (!tmp) 209 return FALSE; 210 free(node->pszLine); 211 node->pszLine = tmp; 212 if (!(pFC->dwFlags & FLAG_W)) 213 node->hash = GetHash(node->pszLine, !!(pFC->dwFlags & FLAG_C)); 214 } 215 if (pFC->dwFlags & FLAG_W) 216 { 217 node->pszComp = CompressSpace(node->pszLine); 218 if (!node->pszComp) 219 return FALSE; 220 node->hash = GetHash(node->pszComp, !!(pFC->dwFlags & FLAG_C)); 221 } 222 return TRUE; 223 } 224 225 static FCRET CompareNode(const FILECOMPARE *pFC, const NODE *node0, const NODE *node1) 226 { 227 DWORD dwCmpFlags; 228 LPTSTR psz0, psz1; 229 INT ret; 230 if (node0->hash != node1->hash) 231 return FCRET_DIFFERENT; 232 233 psz0 = (pFC->dwFlags & FLAG_W) ? node0->pszComp : node0->pszLine; 234 psz1 = (pFC->dwFlags & FLAG_W) ? node1->pszComp : node1->pszLine; 235 dwCmpFlags = ((pFC->dwFlags & FLAG_C) ? NORM_IGNORECASE : 0); 236 ret = CompareString(LOCALE_USER_DEFAULT, dwCmpFlags, psz0, -1, psz1, -1); 237 return (ret == CSTR_EQUAL) ? FCRET_IDENTICAL : FCRET_DIFFERENT; 238 } 239 240 static BOOL FindNextLine(LPCTSTR pch, DWORD ich, DWORD cch, LPDWORD pich) 241 { 242 while (ich < cch) 243 { 244 if (pch[ich] == TEXT('\n') || pch[ich] == TEXT('\0')) 245 { 246 *pich = ich; 247 return TRUE; 248 } 249 ++ich; 250 } 251 *pich = cch; 252 return FALSE; 253 } 254 255 static FCRET 256 ParseLines(const FILECOMPARE *pFC, HANDLE *phMapping, 257 LARGE_INTEGER *pib, const LARGE_INTEGER *pcb, struct list *list) 258 { 259 DWORD lineno = 1, ich, cch, ichNext, cbView, cchNode; 260 LPTSTR psz, pszLine; 261 BOOL fLast, bCR; 262 NODE *node; 263 264 if (*phMapping == NULL) 265 return FCRET_NO_MORE_DATA; 266 267 if (pib->QuadPart >= pcb->QuadPart) 268 { 269 CloseHandle(*phMapping); 270 *phMapping = NULL; 271 return FCRET_NO_MORE_DATA; 272 } 273 274 cbView = (DWORD)min(pcb->QuadPart - pib->QuadPart, MAX_VIEW_SIZE); 275 psz = MapViewOfFile(*phMapping, FILE_MAP_READ, pib->HighPart, pib->LowPart, cbView); 276 if (!psz) 277 { 278 return OutOfMemory(); 279 } 280 281 ich = 0; 282 cch = cbView / sizeof(TCHAR); 283 fLast = (pib->QuadPart + cbView >= pcb->QuadPart); 284 while (ich < cch && 285 (FindNextLine(psz, ich, cch, &ichNext) || 286 (ichNext == cch && (fLast || ich == 0)))) 287 { 288 bCR = (ichNext > 0) && (psz[ichNext - 1] == TEXT('\r')); 289 cchNode = ichNext - ich - bCR; 290 pszLine = AllocLine(&psz[ich], cchNode); 291 node = AllocNode(pszLine, lineno++); 292 if (!node || !ConvertNode(pFC, node)) 293 { 294 DeleteNode(node); 295 UnmapViewOfFile(psz); 296 return OutOfMemory(); 297 } 298 list_add_tail(list, &node->entry); 299 ich = ichNext + 1; 300 } 301 302 UnmapViewOfFile(psz); 303 304 pib->QuadPart += ichNext * sizeof(WCHAR); 305 306 if (pib->QuadPart < pcb->QuadPart) 307 return FCRET_IDENTICAL; 308 309 // append EOF node 310 node = AllocEOFNode(lineno); 311 if (!node) 312 return OutOfMemory(); 313 list_add_tail(list, &node->entry); 314 315 return FCRET_NO_MORE_DATA; 316 } 317 318 static VOID 319 ShowDiff(FILECOMPARE *pFC, INT i, struct list *begin, struct list *end) 320 { 321 NODE* node; 322 struct list *list = &pFC->list[i]; 323 struct list *first = NULL, *last = NULL; 324 PrintCaption(pFC->file[i]); 325 if (begin && end && list_prev(list, begin)) 326 begin = list_prev(list, begin); 327 while (begin != end) 328 { 329 node = LIST_ENTRY(begin, NODE, entry); 330 if (IsEOFNode(node)) 331 break; 332 if (!first) 333 first = begin; 334 last = begin; 335 if (!(pFC->dwFlags & FLAG_A)) 336 PrintLine(pFC, node->lineno, node->pszLine); 337 begin = list_next(list, begin); 338 } 339 if ((pFC->dwFlags & FLAG_A) && first) 340 { 341 node = LIST_ENTRY(first, NODE, entry); 342 PrintLine(pFC, node->lineno, node->pszLine); 343 first = list_next(list, first); 344 if (first != last) 345 { 346 if (list_next(list, first) == last) 347 { 348 node = LIST_ENTRY(first, NODE, entry); 349 PrintLine(pFC, node->lineno, node->pszLine); 350 } 351 else 352 { 353 PrintDots(); 354 } 355 } 356 node = LIST_ENTRY(last, NODE, entry); 357 PrintLine(pFC, node->lineno, node->pszLine); 358 } 359 } 360 361 static VOID 362 SkipIdentical(FILECOMPARE *pFC, struct list **pptr0, struct list **pptr1) 363 { 364 struct list *ptr0 = *pptr0, *ptr1 = *pptr1; 365 while (ptr0 && ptr1) 366 { 367 NODE *node0 = LIST_ENTRY(ptr0, NODE, entry); 368 NODE *node1 = LIST_ENTRY(ptr1, NODE, entry); 369 if (CompareNode(pFC, node0, node1) != FCRET_IDENTICAL) 370 break; 371 ptr0 = list_next(&pFC->list[0], ptr0); 372 ptr1 = list_next(&pFC->list[1], ptr1); 373 } 374 *pptr0 = ptr0; 375 *pptr1 = ptr1; 376 } 377 378 static DWORD 379 SkipIdenticalN(FILECOMPARE *pFC, struct list **pptr0, struct list **pptr1, 380 DWORD nnnn, DWORD lineno0, DWORD lineno1) 381 { 382 struct list *ptr0 = *pptr0, *ptr1 = *pptr1; 383 DWORD count = 0; 384 while (ptr0 && ptr1) 385 { 386 NODE *node0 = LIST_ENTRY(ptr0, NODE, entry); 387 NODE *node1 = LIST_ENTRY(ptr1, NODE, entry); 388 if (node0->lineno >= lineno0) 389 break; 390 if (node1->lineno >= lineno1) 391 break; 392 if (CompareNode(pFC, node0, node1) != FCRET_IDENTICAL) 393 break; 394 ptr0 = list_next(&pFC->list[0], ptr0); 395 ptr1 = list_next(&pFC->list[1], ptr1); 396 ++count; 397 if (count >= nnnn) 398 break; 399 } 400 *pptr0 = ptr0; 401 *pptr1 = ptr1; 402 return count; 403 } 404 405 static FCRET 406 ScanDiff(FILECOMPARE *pFC, struct list **pptr0, struct list **pptr1, 407 DWORD lineno0, DWORD lineno1) 408 { 409 struct list *ptr0 = *pptr0, *ptr1 = *pptr1, *tmp0, *tmp1; 410 NODE *node0, *node1; 411 INT count; 412 while (ptr0 && ptr1) 413 { 414 node0 = LIST_ENTRY(ptr0, NODE, entry); 415 node1 = LIST_ENTRY(ptr1, NODE, entry); 416 if (node0->lineno >= lineno0) 417 return FCRET_DIFFERENT; 418 if (node1->lineno >= lineno1) 419 return FCRET_DIFFERENT; 420 tmp0 = ptr0; 421 tmp1 = ptr1; 422 count = SkipIdenticalN(pFC, &tmp0, &tmp1, pFC->nnnn, lineno0, lineno1); 423 if (count >= pFC->nnnn) 424 break; 425 if (count > 0) 426 { 427 ptr0 = tmp0; 428 ptr1 = tmp1; 429 } 430 else 431 { 432 ptr0 = list_next(&pFC->list[0], ptr0); 433 ptr1 = list_next(&pFC->list[1], ptr1); 434 } 435 } 436 *pptr0 = ptr0; 437 *pptr1 = ptr1; 438 return FCRET_IDENTICAL; 439 } 440 441 static FCRET 442 Resync(FILECOMPARE *pFC, struct list **pptr0, struct list **pptr1) 443 { 444 FCRET ret; 445 struct list *ptr0, *ptr1, *save0 = NULL, *save1 = NULL; 446 NODE *node0, *node1; 447 struct list *list0 = &pFC->list[0], *list1 = &pFC->list[1]; 448 DWORD lineno0, lineno1; 449 INT penalty, i0, i1, min_penalty = MAXLONG; 450 451 node0 = LIST_ENTRY(*pptr0, NODE, entry); 452 node1 = LIST_ENTRY(*pptr1, NODE, entry); 453 lineno0 = node0->lineno + pFC->n; 454 lineno1 = node1->lineno + pFC->n; 455 456 // ``If the files that you are comparing have more than pFC->n consecutive 457 // differing lines, FC cancels the comparison,, 458 // ``If the number of matching lines in the files is less than pFC->nnnn, 459 // FC displays the matching lines as differences,, 460 for (ptr1 = list_next(list1, *pptr1), i1 = 0; ptr1; ptr1 = list_next(list1, ptr1), ++i1) 461 { 462 node1 = LIST_ENTRY(ptr1, NODE, entry); 463 if (node1->lineno >= lineno1) 464 break; 465 for (ptr0 = list_next(list0, *pptr0), i0 = 0; ptr0; ptr0 = list_next(list0, ptr0), ++i0) 466 { 467 node0 = LIST_ENTRY(ptr0, NODE, entry); 468 if (node0->lineno >= lineno0) 469 break; 470 if (CompareNode(pFC, node0, node1) == FCRET_IDENTICAL) 471 { 472 penalty = min(i0, i1) + abs(i1 - i0); 473 if (min_penalty > penalty) 474 { 475 min_penalty = penalty; 476 save0 = ptr0; 477 save1 = ptr1; 478 } 479 } 480 } 481 } 482 483 if (save0 && save1) 484 { 485 *pptr0 = save0; 486 *pptr1 = save1; 487 ret = ScanDiff(pFC, &save0, &save1, lineno0, lineno1); 488 if (save0 && save1) 489 { 490 *pptr0 = save0; 491 *pptr1 = save1; 492 } 493 return ret; 494 } 495 496 for (ptr0 = *pptr0; ptr0; ptr0 = list_next(list0, ptr0)) 497 { 498 node0 = LIST_ENTRY(ptr0, NODE, entry); 499 if (node0->lineno == lineno0) 500 break; 501 } 502 for (ptr1 = *pptr1; ptr1; ptr1 = list_next(list1, ptr1)) 503 { 504 node1 = LIST_ENTRY(ptr1, NODE, entry); 505 if (node1->lineno == lineno1) 506 break; 507 } 508 *pptr0 = ptr0; 509 *pptr1 = ptr1; 510 return FCRET_DIFFERENT; 511 } 512 513 static FCRET 514 Finalize(FILECOMPARE* pFC, struct list *ptr0, struct list* ptr1, BOOL fDifferent) 515 { 516 if (!ptr0 && !ptr1) 517 { 518 if (fDifferent) 519 return Different(pFC->file[0], pFC->file[1]); 520 return NoDifference(); 521 } 522 else 523 { 524 ShowDiff(pFC, 0, ptr0, NULL); 525 ShowDiff(pFC, 1, ptr1, NULL); 526 PrintEndOfDiff(); 527 return FCRET_DIFFERENT; 528 } 529 } 530 531 // FIXME: "cmd_apitest fc" has some failures. 532 FCRET TextCompare(FILECOMPARE *pFC, HANDLE *phMapping0, const LARGE_INTEGER *pcb0, 533 HANDLE *phMapping1, const LARGE_INTEGER *pcb1) 534 { 535 FCRET ret, ret0, ret1; 536 struct list *ptr0, *ptr1, *save0, *save1, *next0, *next1; 537 NODE* node0, * node1; 538 BOOL fDifferent = FALSE; 539 LARGE_INTEGER ib0 = { .QuadPart = 0 }, ib1 = { .QuadPart = 0 }; 540 struct list *list0 = &pFC->list[0], *list1 = &pFC->list[1]; 541 list_init(list0); 542 list_init(list1); 543 544 do 545 { 546 ret0 = ParseLines(pFC, phMapping0, &ib0, pcb0, list0); 547 if (ret0 == FCRET_INVALID) 548 { 549 ret = ret0; 550 goto cleanup; 551 } 552 ret1 = ParseLines(pFC, phMapping1, &ib1, pcb1, list1); 553 if (ret1 == FCRET_INVALID) 554 { 555 ret = ret1; 556 goto cleanup; 557 } 558 559 ptr0 = list_head(list0); 560 ptr1 = list_head(list1); 561 for (;;) 562 { 563 if (!ptr0 || !ptr1) 564 goto quit; 565 566 // skip identical (sync'ed) 567 SkipIdentical(pFC, &ptr0, &ptr1); 568 if (ptr0 || ptr1) 569 fDifferent = TRUE; 570 node0 = LIST_ENTRY(ptr0, NODE, entry); 571 node1 = LIST_ENTRY(ptr1, NODE, entry); 572 if (IsEOFNode(node0) || IsEOFNode(node1)) 573 goto quit; 574 575 // try to resync 576 save0 = ptr0; 577 save1 = ptr1; 578 ret = Resync(pFC, &ptr0, &ptr1); 579 if (ret == FCRET_INVALID) 580 goto cleanup; 581 if (ret == FCRET_DIFFERENT) 582 { 583 // resync failed 584 ret = ResyncFailed(); 585 // show the difference 586 ShowDiff(pFC, 0, save0, ptr0); 587 ShowDiff(pFC, 1, save1, ptr1); 588 PrintEndOfDiff(); 589 goto cleanup; 590 } 591 592 // show the difference 593 fDifferent = TRUE; 594 next0 = ptr0 ? list_next(list0, ptr0) : ptr0; 595 next1 = ptr1 ? list_next(list1, ptr1) : ptr1; 596 ShowDiff(pFC, 0, save0, (next0 ? next0 : ptr0)); 597 ShowDiff(pFC, 1, save1, (next1 ? next1 : ptr1)); 598 PrintEndOfDiff(); 599 600 // now resync'ed 601 } 602 } while (ret0 != FCRET_NO_MORE_DATA || ret1 != FCRET_NO_MORE_DATA); 603 604 quit: 605 ret = Finalize(pFC, ptr0, ptr1, fDifferent); 606 cleanup: 607 DeleteList(list0); 608 DeleteList(list1); 609 return ret; 610 } 611