1 /* Support routines for GNU DIFF. 2 3 Copyright (C) 1988-1989, 1992-1995, 1998, 2001-2002, 2004, 2006, 2009-2011 4 Free Software Foundation, Inc. 5 6 This file is part of GNU DIFF. 7 8 This program is free software: you can redistribute it and/or modify 9 it under the terms of the GNU General Public License as published by 10 the Free Software Foundation, either version 3 of the License, or 11 (at your option) any later version. 12 13 This program is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 20 21 #include "diff.h" 22 #include <dirname.h> 23 #include <error.h> 24 #include <sh-quote.h> 25 #include <xalloc.h> 26 27 char const pr_program[] = PR_PROGRAM; 28 29 /* Queue up one-line messages to be printed at the end, 30 when -l is specified. Each message is recorded with a `struct msg'. */ 31 32 struct msg 33 { 34 struct msg *next; 35 char args[1]; /* Format + 4 args, each '\0' terminated, concatenated. */ 36 }; 37 38 /* Head of the chain of queues messages. */ 39 40 static struct msg *msg_chain; 41 42 /* Tail of the chain of queues messages. */ 43 44 static struct msg **msg_chain_end = &msg_chain; 45 46 /* Use when a system call returns non-zero status. 47 NAME should normally be the file name. */ 48 49 void 50 perror_with_name (char const *name) 51 { 52 error (0, errno, "%s", name); 53 } 54 55 /* Use when a system call returns non-zero status and that is fatal. */ 56 57 void 58 pfatal_with_name (char const *name) 59 { 60 int e = errno; 61 print_message_queue (); 62 error (EXIT_TROUBLE, e, "%s", name); 63 abort (); 64 } 65 66 /* Print an error message containing MSGID, then exit. */ 67 68 void 69 fatal (char const *msgid) 70 { 71 print_message_queue (); 72 error (EXIT_TROUBLE, 0, "%s", _(msgid)); 73 abort (); 74 } 75 76 /* Like printf, except if -l in effect then save the message and print later. 77 This is used for things like "Only in ...". */ 78 79 void 80 message (char const *format_msgid, char const *arg1, char const *arg2) 81 { 82 message5 (format_msgid, arg1, arg2, 0, 0); 83 } 84 85 void 86 message5 (char const *format_msgid, char const *arg1, char const *arg2, 87 char const *arg3, char const *arg4) 88 { 89 if (paginate) 90 { 91 char *p; 92 char const *arg[5]; 93 int i; 94 size_t size[5]; 95 size_t total_size = offsetof (struct msg, args); 96 struct msg *new; 97 98 arg[0] = format_msgid; 99 arg[1] = arg1; 100 arg[2] = arg2; 101 arg[3] = arg3 ? arg3 : ""; 102 arg[4] = arg4 ? arg4 : ""; 103 104 for (i = 0; i < 5; i++) 105 total_size += size[i] = strlen (arg[i]) + 1; 106 107 new = xmalloc (total_size); 108 109 for (i = 0, p = new->args; i < 5; p += size[i++]) 110 memcpy (p, arg[i], size[i]); 111 112 *msg_chain_end = new; 113 new->next = 0; 114 msg_chain_end = &new->next; 115 } 116 else 117 { 118 if (sdiff_merge_assist) 119 putchar (' '); 120 printf (_(format_msgid), arg1, arg2, arg3, arg4); 121 } 122 } 123 124 /* Output all the messages that were saved up by calls to `message'. */ 125 126 void 127 print_message_queue (void) 128 { 129 char const *arg[5]; 130 int i; 131 struct msg *m = msg_chain; 132 133 while (m) 134 { 135 struct msg *next = m->next; 136 arg[0] = m->args; 137 for (i = 0; i < 4; i++) 138 arg[i + 1] = arg[i] + strlen (arg[i]) + 1; 139 printf (_(arg[0]), arg[1], arg[2], arg[3], arg[4]); 140 free (m); 141 m = next; 142 } 143 } 144 145 /* Call before outputting the results of comparing files NAME0 and NAME1 146 to set up OUTFILE, the stdio stream for the output to go to. 147 148 Usually, OUTFILE is just stdout. But when -l was specified 149 we fork off a `pr' and make OUTFILE a pipe to it. 150 `pr' then outputs to our stdout. */ 151 152 static char const *current_name0; 153 static char const *current_name1; 154 static bool currently_recursive; 155 156 void 157 setup_output (char const *name0, char const *name1, bool recursive) 158 { 159 current_name0 = name0; 160 current_name1 = name1; 161 currently_recursive = recursive; 162 outfile = 0; 163 } 164 165 #if HAVE_WORKING_FORK 166 static pid_t pr_pid; 167 #endif 168 169 void 170 begin_output (void) 171 { 172 char *name; 173 174 if (outfile != 0) 175 return; 176 177 /* Construct the header of this piece of diff. */ 178 name = xmalloc (strlen (current_name0) + strlen (current_name1) 179 + strlen (switch_string) + 7); 180 181 /* POSIX 1003.1-2001 specifies this format. But there are some bugs in 182 the standard: it says that we must print only the last component 183 of the pathnames, and it requires two spaces after "diff" if 184 there are no options. These requirements are silly and do not 185 match historical practice. */ 186 sprintf (name, "diff%s %s %s", switch_string, current_name0, current_name1); 187 188 if (paginate) 189 { 190 if (fflush (stdout) != 0) 191 pfatal_with_name (_("write failed")); 192 193 /* Make OUTFILE a pipe to a subsidiary `pr'. */ 194 { 195 #if HAVE_WORKING_FORK 196 int pipes[2]; 197 198 if (pipe (pipes) != 0) 199 pfatal_with_name ("pipe"); 200 201 pr_pid = fork (); 202 if (pr_pid < 0) 203 pfatal_with_name ("fork"); 204 205 if (pr_pid == 0) 206 { 207 close (pipes[1]); 208 if (pipes[0] != STDIN_FILENO) 209 { 210 if (dup2 (pipes[0], STDIN_FILENO) < 0) 211 pfatal_with_name ("dup2"); 212 close (pipes[0]); 213 } 214 215 execl (pr_program, pr_program, "-h", name, (char *) 0); 216 _exit (errno == ENOENT ? 127 : 126); 217 } 218 else 219 { 220 close (pipes[0]); 221 outfile = fdopen (pipes[1], "w"); 222 if (!outfile) 223 pfatal_with_name ("fdopen"); 224 } 225 #else 226 char *command = xmalloc (sizeof pr_program - 1 + 7 227 + shell_quote_length (name) + 1); 228 char *p; 229 sprintf (command, "%s -f -h ", pr_program); 230 p = command + sizeof pr_program - 1 + 7; 231 p = shell_quote_copy (p, name); 232 *p = 0; 233 errno = 0; 234 outfile = popen (command, "w"); 235 if (!outfile) 236 pfatal_with_name (command); 237 free (command); 238 #endif 239 } 240 } 241 else 242 { 243 244 /* If -l was not specified, output the diff straight to `stdout'. */ 245 246 outfile = stdout; 247 248 /* If handling multiple files (because scanning a directory), 249 print which files the following output is about. */ 250 if (currently_recursive) 251 printf ("%s\n", name); 252 } 253 254 free (name); 255 256 /* A special header is needed at the beginning of context output. */ 257 switch (output_style) 258 { 259 case OUTPUT_CONTEXT: 260 print_context_header (files, false); 261 break; 262 263 case OUTPUT_UNIFIED: 264 print_context_header (files, true); 265 break; 266 267 default: 268 break; 269 } 270 } 271 272 /* Call after the end of output of diffs for one file. 273 Close OUTFILE and get rid of the `pr' subfork. */ 274 275 void 276 finish_output (void) 277 { 278 if (outfile != 0 && outfile != stdout) 279 { 280 int status; 281 int wstatus; 282 int werrno = 0; 283 if (ferror (outfile)) 284 fatal ("write failed"); 285 #if ! HAVE_WORKING_FORK 286 wstatus = pclose (outfile); 287 if (wstatus == -1) 288 werrno = errno; 289 #else 290 if (fclose (outfile) != 0) 291 pfatal_with_name (_("write failed")); 292 if (waitpid (pr_pid, &wstatus, 0) < 0) 293 pfatal_with_name ("waitpid"); 294 #endif 295 status = (! werrno && WIFEXITED (wstatus) 296 ? WEXITSTATUS (wstatus) 297 : INT_MAX); 298 if (status) 299 error (EXIT_TROUBLE, werrno, 300 _(status == 126 301 ? "subsidiary program `%s' could not be invoked" 302 : status == 127 303 ? "subsidiary program `%s' not found" 304 : status == INT_MAX 305 ? "subsidiary program `%s' failed" 306 : "subsidiary program `%s' failed (exit status %d)"), 307 pr_program, status); 308 } 309 310 outfile = 0; 311 } 312 313 /* Compare two lines (typically one from each input file) 314 according to the command line options. 315 For efficiency, this is invoked only when the lines do not match exactly 316 but an option like -i might cause us to ignore the difference. 317 Return nonzero if the lines differ. */ 318 319 bool 320 lines_differ (char const *s1, char const *s2) 321 { 322 register char const *t1 = s1; 323 register char const *t2 = s2; 324 size_t column = 0; 325 326 while (1) 327 { 328 register unsigned char c1 = *t1++; 329 register unsigned char c2 = *t2++; 330 331 /* Test for exact char equality first, since it's a common case. */ 332 if (c1 != c2) 333 { 334 switch (ignore_white_space) 335 { 336 case IGNORE_ALL_SPACE: 337 /* For -w, just skip past any white space. */ 338 while (isspace (c1) && c1 != '\n') c1 = *t1++; 339 while (isspace (c2) && c2 != '\n') c2 = *t2++; 340 break; 341 342 case IGNORE_SPACE_CHANGE: 343 /* For -b, advance past any sequence of white space in 344 line 1 and consider it just one space, or nothing at 345 all if it is at the end of the line. */ 346 if (isspace (c1)) 347 { 348 while (c1 != '\n') 349 { 350 c1 = *t1++; 351 if (! isspace (c1)) 352 { 353 --t1; 354 c1 = ' '; 355 break; 356 } 357 } 358 } 359 360 /* Likewise for line 2. */ 361 if (isspace (c2)) 362 { 363 while (c2 != '\n') 364 { 365 c2 = *t2++; 366 if (! isspace (c2)) 367 { 368 --t2; 369 c2 = ' '; 370 break; 371 } 372 } 373 } 374 375 if (c1 != c2) 376 { 377 /* If we went too far when doing the simple test 378 for equality, go back to the first non-white-space 379 character in both sides and try again. */ 380 if (c2 == ' ' && c1 != '\n' 381 && s1 + 1 < t1 382 && isspace ((unsigned char) t1[-2])) 383 { 384 --t1; 385 continue; 386 } 387 if (c1 == ' ' && c2 != '\n' 388 && s2 + 1 < t2 389 && isspace ((unsigned char) t2[-2])) 390 { 391 --t2; 392 continue; 393 } 394 } 395 396 break; 397 398 case IGNORE_TRAILING_SPACE: 399 case IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE: 400 if (isspace (c1) && isspace (c2)) 401 { 402 unsigned char c; 403 if (c1 != '\n') 404 { 405 char const *p = t1; 406 while ((c = *p) != '\n' && isspace (c)) 407 ++p; 408 if (c != '\n') 409 break; 410 } 411 if (c2 != '\n') 412 { 413 char const *p = t2; 414 while ((c = *p) != '\n' && isspace (c)) 415 ++p; 416 if (c != '\n') 417 break; 418 } 419 /* Both lines have nothing but whitespace left. */ 420 return false; 421 } 422 if (ignore_white_space == IGNORE_TRAILING_SPACE) 423 break; 424 /* Fall through. */ 425 case IGNORE_TAB_EXPANSION: 426 if ((c1 == ' ' && c2 == '\t') 427 || (c1 == '\t' && c2 == ' ')) 428 { 429 size_t column2 = column; 430 for (;; c1 = *t1++) 431 { 432 if (c1 == ' ') 433 column++; 434 else if (c1 == '\t') 435 column += tabsize - column % tabsize; 436 else 437 break; 438 } 439 for (;; c2 = *t2++) 440 { 441 if (c2 == ' ') 442 column2++; 443 else if (c2 == '\t') 444 column2 += tabsize - column2 % tabsize; 445 else 446 break; 447 } 448 if (column != column2) 449 return true; 450 } 451 break; 452 453 case IGNORE_NO_WHITE_SPACE: 454 break; 455 } 456 457 /* Lowercase all letters if -i is specified. */ 458 459 if (ignore_case) 460 { 461 c1 = tolower (c1); 462 c2 = tolower (c2); 463 } 464 465 if (c1 != c2) 466 break; 467 } 468 if (c1 == '\n') 469 return false; 470 471 column += c1 == '\t' ? tabsize - column % tabsize : 1; 472 } 473 474 return true; 475 } 476 477 /* Find the consecutive changes at the start of the script START. 478 Return the last link before the first gap. */ 479 480 struct change * 481 find_change (struct change *start) 482 { 483 return start; 484 } 485 486 struct change * 487 find_reverse_change (struct change *start) 488 { 489 return start; 490 } 491 492 /* Divide SCRIPT into pieces by calling HUNKFUN and 493 print each piece with PRINTFUN. 494 Both functions take one arg, an edit script. 495 496 HUNKFUN is called with the tail of the script 497 and returns the last link that belongs together with the start 498 of the tail. 499 500 PRINTFUN takes a subscript which belongs together (with a null 501 link at the end) and prints it. */ 502 503 void 504 print_script (struct change *script, 505 struct change * (*hunkfun) (struct change *), 506 void (*printfun) (struct change *)) 507 { 508 struct change *next = script; 509 510 while (next) 511 { 512 struct change *this, *end; 513 514 /* Find a set of changes that belong together. */ 515 this = next; 516 end = (*hunkfun) (next); 517 518 /* Disconnect them from the rest of the changes, 519 making them a hunk, and remember the rest for next iteration. */ 520 next = end->link; 521 end->link = 0; 522 #ifdef DEBUG 523 debug_script (this); 524 #endif 525 526 /* Print this hunk. */ 527 (*printfun) (this); 528 529 /* Reconnect the script so it will all be freed properly. */ 530 end->link = next; 531 } 532 } 533 534 /* Print the text of a single line LINE, 535 flagging it with the characters in LINE_FLAG (which say whether 536 the line is inserted, deleted, changed, etc.). LINE_FLAG must not 537 end in a blank, unless it is a single blank. */ 538 539 void 540 print_1_line (char const *line_flag, char const *const *line) 541 { 542 char const *base = line[0], *limit = line[1]; /* Help the compiler. */ 543 FILE *out = outfile; /* Help the compiler some more. */ 544 char const *flag_format = 0; 545 546 /* If -T was specified, use a Tab between the line-flag and the text. 547 Otherwise use a Space (as Unix diff does). 548 Print neither space nor tab if line-flags are empty. 549 But omit trailing blanks if requested. */ 550 551 if (line_flag && *line_flag) 552 { 553 char const *flag_format_1 = flag_format = initial_tab ? "%s\t" : "%s "; 554 char const *line_flag_1 = line_flag; 555 556 if (suppress_blank_empty && **line == '\n') 557 { 558 flag_format_1 = "%s"; 559 560 /* This hack to omit trailing blanks takes advantage of the 561 fact that the only way that LINE_FLAG can end in a blank 562 is when LINE_FLAG consists of a single blank. */ 563 line_flag_1 += *line_flag_1 == ' '; 564 } 565 566 fprintf (out, flag_format_1, line_flag_1); 567 } 568 569 output_1_line (base, limit, flag_format, line_flag); 570 571 if ((!line_flag || line_flag[0]) && limit[-1] != '\n') 572 fprintf (out, "\n\\ %s\n", _("No newline at end of file")); 573 } 574 575 /* Output a line from BASE up to LIMIT. 576 With -t, expand white space characters to spaces, and if FLAG_FORMAT 577 is nonzero, output it with argument LINE_FLAG after every 578 internal carriage return, so that tab stops continue to line up. */ 579 580 void 581 output_1_line (char const *base, char const *limit, char const *flag_format, 582 char const *line_flag) 583 { 584 if (!expand_tabs) 585 fwrite (base, sizeof (char), limit - base, outfile); 586 else 587 { 588 register FILE *out = outfile; 589 register unsigned char c; 590 register char const *t = base; 591 register size_t column = 0; 592 size_t tab_size = tabsize; 593 594 while (t < limit) 595 switch ((c = *t++)) 596 { 597 case '\t': 598 { 599 size_t spaces = tab_size - column % tab_size; 600 column += spaces; 601 do 602 putc (' ', out); 603 while (--spaces); 604 } 605 break; 606 607 case '\r': 608 putc (c, out); 609 if (flag_format && t < limit && *t != '\n') 610 fprintf (out, flag_format, line_flag); 611 column = 0; 612 break; 613 614 case '\b': 615 if (column == 0) 616 continue; 617 column--; 618 putc (c, out); 619 break; 620 621 default: 622 column += isprint (c) != 0; 623 putc (c, out); 624 break; 625 } 626 } 627 } 628 629 char const change_letter[] = { 0, 'd', 'a', 'c' }; 630 631 /* Translate an internal line number (an index into diff's table of lines) 632 into an actual line number in the input file. 633 The internal line number is I. FILE points to the data on the file. 634 635 Internal line numbers count from 0 starting after the prefix. 636 Actual line numbers count from 1 within the entire file. */ 637 638 lin 639 translate_line_number (struct file_data const *file, lin i) 640 { 641 return i + file->prefix_lines + 1; 642 } 643 644 /* Translate a line number range. This is always done for printing, 645 so for convenience translate to long int rather than lin, so that the 646 caller can use printf with "%ld" without casting. */ 647 648 void 649 translate_range (struct file_data const *file, 650 lin a, lin b, 651 long int *aptr, long int *bptr) 652 { 653 *aptr = translate_line_number (file, a - 1) + 1; 654 *bptr = translate_line_number (file, b + 1) - 1; 655 } 656 657 /* Print a pair of line numbers with SEPCHAR, translated for file FILE. 658 If the two numbers are identical, print just one number. 659 660 Args A and B are internal line numbers. 661 We print the translated (real) line numbers. */ 662 663 void 664 print_number_range (char sepchar, struct file_data *file, lin a, lin b) 665 { 666 long int trans_a, trans_b; 667 translate_range (file, a, b, &trans_a, &trans_b); 668 669 /* Note: we can have B < A in the case of a range of no lines. 670 In this case, we should print the line number before the range, 671 which is B. */ 672 if (trans_b > trans_a) 673 fprintf (outfile, "%ld%c%ld", trans_a, sepchar, trans_b); 674 else 675 fprintf (outfile, "%ld", trans_b); 676 } 677 678 /* Look at a hunk of edit script and report the range of lines in each file 679 that it applies to. HUNK is the start of the hunk, which is a chain 680 of `struct change'. The first and last line numbers of file 0 are stored in 681 *FIRST0 and *LAST0, and likewise for file 1 in *FIRST1 and *LAST1. 682 Note that these are internal line numbers that count from 0. 683 684 If no lines from file 0 are deleted, then FIRST0 is LAST0+1. 685 686 Return UNCHANGED if only ignorable lines are inserted or deleted, 687 OLD if lines of file 0 are deleted, 688 NEW if lines of file 1 are inserted, 689 and CHANGED if both kinds of changes are found. */ 690 691 enum changes 692 analyze_hunk (struct change *hunk, 693 lin *first0, lin *last0, 694 lin *first1, lin *last1) 695 { 696 struct change *next; 697 lin l0, l1; 698 lin show_from, show_to; 699 lin i; 700 bool trivial = ignore_blank_lines || ignore_regexp.fastmap; 701 size_t trivial_length = ignore_blank_lines - 1; 702 /* If 0, ignore zero-length lines; 703 if SIZE_MAX, do not ignore lines just because of their length. */ 704 705 bool skip_white_space = 706 ignore_blank_lines && IGNORE_TRAILING_SPACE <= ignore_white_space; 707 bool skip_leading_white_space = 708 skip_white_space && IGNORE_SPACE_CHANGE <= ignore_white_space; 709 710 char const * const *linbuf0 = files[0].linbuf; /* Help the compiler. */ 711 char const * const *linbuf1 = files[1].linbuf; 712 713 show_from = show_to = 0; 714 715 *first0 = hunk->line0; 716 *first1 = hunk->line1; 717 718 next = hunk; 719 do 720 { 721 l0 = next->line0 + next->deleted - 1; 722 l1 = next->line1 + next->inserted - 1; 723 show_from += next->deleted; 724 show_to += next->inserted; 725 726 for (i = next->line0; i <= l0 && trivial; i++) 727 { 728 char const *line = linbuf0[i]; 729 char const *newline = linbuf0[i + 1] - 1; 730 size_t len = newline - line; 731 char const *p = line; 732 if (skip_white_space) 733 for (; *p != '\n'; p++) 734 if (! isspace ((unsigned char) *p)) 735 { 736 if (! skip_leading_white_space) 737 p = line; 738 break; 739 } 740 if (newline - p != trivial_length 741 && (! ignore_regexp.fastmap 742 || re_search (&ignore_regexp, line, len, 0, len, 0) < 0)) 743 trivial = 0; 744 } 745 746 for (i = next->line1; i <= l1 && trivial; i++) 747 { 748 char const *line = linbuf1[i]; 749 char const *newline = linbuf1[i + 1] - 1; 750 size_t len = newline - line; 751 char const *p = line; 752 if (skip_white_space) 753 for (; *p != '\n'; p++) 754 if (! isspace ((unsigned char) *p)) 755 { 756 if (! skip_leading_white_space) 757 p = line; 758 break; 759 } 760 if (newline - p != trivial_length 761 && (! ignore_regexp.fastmap 762 || re_search (&ignore_regexp, line, len, 0, len, 0) < 0)) 763 trivial = 0; 764 } 765 } 766 while ((next = next->link) != 0); 767 768 *last0 = l0; 769 *last1 = l1; 770 771 /* If all inserted or deleted lines are ignorable, 772 tell the caller to ignore this hunk. */ 773 774 if (trivial) 775 return UNCHANGED; 776 777 return (show_from ? OLD : UNCHANGED) | (show_to ? NEW : UNCHANGED); 778 } 779 780 /* Concatenate three strings, returning a newly malloc'd string. */ 781 782 char * 783 concat (char const *s1, char const *s2, char const *s3) 784 { 785 char *new = xmalloc (strlen (s1) + strlen (s2) + strlen (s3) + 1); 786 sprintf (new, "%s%s%s", s1, s2, s3); 787 return new; 788 } 789 790 /* Yield a new block of SIZE bytes, initialized to zero. */ 791 792 void * 793 zalloc (size_t size) 794 { 795 void *p = xmalloc (size); 796 memset (p, 0, size); 797 return p; 798 } 799 800 void 801 debug_script (struct change *sp) 802 { 803 fflush (stdout); 804 805 for (; sp; sp = sp->link) 806 { 807 long int line0 = sp->line0; 808 long int line1 = sp->line1; 809 long int deleted = sp->deleted; 810 long int inserted = sp->inserted; 811 fprintf (stderr, "%3ld %3ld delete %ld insert %ld\n", 812 line0, line1, deleted, inserted); 813 } 814 815 fflush (stderr); 816 } 817