1 /* Find similar sequences in multiple files and report differences.
2 Copyright (C) 1992, 1997, 1998, 1999, 2011 Free Software Foundation, Inc.
3 Francois Pinard <pinard@iro.umontreal.ca>, 1997.
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
17
18 */
19
20 #include "wdiff.h"
21
22 /* Define to 1 to compile in a debugging option. */
23 #define DEBUGGING 1
24
25 /* Define to the printed name of standard input. */
26 #define STDIN_PRINTED_NAME "<stdin>"
27
28 /* Define to 1 for requesting the capability of doing full comparisons, then
29 not necessarily relying on checksums (both methods are available). If 0
30 instead, always rely on checksums and pack them with item types into the
31 item structure: this yields faster code and disallows paranoid mode. */
32 #define SAFER_SLOWER 0
33
34 /* One may also, optionally, define a default PAGER_PROGRAM. This
35 might be done using the --with-default-pager=PAGER configure
36 switch. If PAGER_PROGRAM is undefined and neither the WDIFF_PAGER
37 nor the PAGER environment variable is set, none will be used. */
38
39 /* We do termcap init ourselves, so pass -X.
40 We might do coloring, so pass -R. */
41 #define LESS_DEFAULT_OPTS "-X -R"
42
43 /*-----------------------.
44 | Library declarations. |
45 `-----------------------*/
46
47 #if DEBUGGING
48 # include <assert.h>
49 #else
50 # define assert (Expr)
51 #endif
52
53 #include <ctype.h>
54 #include <string.h>
55
56 char *strstr ();
57
58 #if HAVE_TPUTS
59 # if HAVE_TERMCAP_H
60 # include <termcap.h>
61 # else
62 # if HAVE_TERMLIB_H
63 # include <termlib.h>
64 # else
65 # if HAVE_CURSES_H
66 # include <curses.h>
67 # else
68 # if HAVE_NCURSES_H
69 # include <ncurses.h>
70 # endif
71 # endif
72 # if HAVE_TERM_H
73 # include <term.h>
74 # endif
75 # endif
76 # endif
77 #endif
78
79 #include <fcntl.h>
80 #include <sys/stat.h>
81 #include <unistd.h>
82 #include <getopt.h>
83 #include <locale.h>
84 #include <sys/wait.h>
85
86 #include "regex.h"
87 #define CHAR_SET_SIZE 256
88
89 #define MASK(Length) (~(~0 << (Length)))
90
91 /*---------------------.
92 | Local declarations. |
93 `---------------------*/
94
95 /* Exit codes values. */
96 #define EXIT_DIFFERENCE 1 /* some differences found */
97 #define EXIT_ERROR 2 /* any other reason for exit */
98
99 /* Define pseudo short options for long options without short options. */
100 #define GTYPE_GROUP_FORMAT_OPTION 10
101 #define HORIZON_LINES_OPTION 11
102 #define LEFT_COLUMN_OPTION 12
103 #define LINE_FORMAT_OPTION 13
104 #define LTYPE_LINE_FORMAT_OPTION 14
105 #define SUPPRESS_COMMON_LINES_OPTION 15
106 #define TOLERANCE_OPTION 16
107
108 /* The name this program was run with. */
109 const char *program_name;
110
111 /* If nonzero, display usage information and exit. */
112 static int show_help = 0;
113
114 /* If nonzero, print the version on standard output and exit. */
115 static int show_version = 0;
116
117 /* Other variables. */
118
119 FILE *output_file; /* file or pipe to which we write output */
120 int exit_status = EXIT_SUCCESS; /* status at end of execution */
121
122 /* Options variables. */
123
124 #define OPTION_STRING \
125 "0123ABC::D:F:GHI:J:KL:NO:PQ:RS:TU::VWX:Y:Z:abc::dehijklmnopqrs:tu::vw:xyz"
126 /* BC: D:F: HI: L:N P S:TU: X: abc dehi l n pqrs:tu vw:xy */
127 /* The line above gives GNU diff options, for reference. */
128
129 /* Long options equivalences. */
130 static const struct option long_options[] = {
131 {"auto-pager", no_argument, NULL, 'A'},
132 {"avoid-wraps", no_argument, NULL, 'm'},
133 {"brief", no_argument, NULL, 'q'},
134 {"context", optional_argument, NULL, 'c'},
135 {"debugging", no_argument, NULL, '0'},
136 {"ed", no_argument, NULL, 'e'},
137 {"exclude-from", required_argument, NULL, 'X'},
138 {"exclude", required_argument, NULL, 'x'},
139 {"expand-tabs", no_argument, NULL, 't'},
140 {"GTYPE-group-format", required_argument, NULL, GTYPE_GROUP_FORMAT_OPTION},
141 {"help", no_argument, &show_help, 1},
142 {"horizon-lines", required_argument, NULL, HORIZON_LINES_OPTION},
143 {"ifdef", required_argument, NULL, 'D'},
144 {"ignore-all-space", no_argument, NULL, 'w'},
145 {"ignore-blank-lines", no_argument, NULL, 'B'},
146 {"ignore-case", no_argument, NULL, 'i'},
147 {"ignore-matching-lines", required_argument, NULL, 'I'},
148 {"ignore-space-change", no_argument, NULL, 'b'},
149 {"initial-tab", no_argument, NULL, 'T'},
150 {"item-regexp", required_argument, NULL, 'O'},
151 {"label", required_argument, NULL, 'L'},
152 {"left-column", no_argument, NULL, LEFT_COLUMN_OPTION},
153 {"less-mode", no_argument, NULL, 'k'},
154 {"line-format", required_argument, NULL, LINE_FORMAT_OPTION},
155 {"LTYPE-line-format", required_argument, NULL, LTYPE_LINE_FORMAT_OPTION},
156 {"minimal", no_argument, NULL, 'd'},
157 {"minimum-size", required_argument, NULL, 'J'},
158 {"new-file", no_argument, NULL, 'N'},
159 {"no-common", no_argument, NULL, '3'},
160 {"no-deleted", no_argument, NULL, '1'},
161 {"no-init-term", no_argument, NULL, 'K'}, /* backwards compatibility */
162 {"no-inserted", no_argument, NULL, '2'},
163 {"ignore-delimiters", no_argument, NULL, 'j'},
164 {"paginate", no_argument, NULL, 'l'},
165 {"printer", no_argument, NULL, 'o'},
166 {"rcs", no_argument, NULL, 'n'},
167 {"recursive", no_argument, NULL, 'r'},
168 {"relist-files", no_argument, NULL, 'G'},
169 {"report-identical-files", no_argument, NULL, 's'},
170 {"show-c-function", no_argument, NULL, 'p'},
171 {"show-function-line", required_argument, NULL, 'F'},
172 {"show-links", no_argument, NULL, 'R'},
173 {"side-by-side", no_argument, NULL, 'y'},
174 {"speed-large-files", no_argument, NULL, 'H'},
175 {"starting-file", required_argument, NULL, 'S'},
176 {"string", optional_argument, NULL, 'Z'},
177 {"suppress-common-lines", no_argument, NULL, SUPPRESS_COMMON_LINES_OPTION},
178 {"terminal", no_argument, NULL, 'z'},
179 {"text", no_argument, NULL, 'a'},
180 {"tolerance", required_argument, NULL, TOLERANCE_OPTION},
181 {"unidirectional-new-file", no_argument, NULL, 'P'},
182 {"unified", optional_argument, NULL, 'u'},
183 {"verbose", no_argument, NULL, 'v'},
184 {"version", no_argument, &show_version, 1},
185 {"width", required_argument, NULL, 'w'},
186 {"word-mode", required_argument, NULL, 'W'},
187 {0, 0, 0, 0},
188 };
189
190 /*--------------------.
191 | Available options. |
192 `--------------------*/
193
194 /* If calling the pager automatically. */
195 static int autopager = 0;
196
197 /* End/restart strings at end of lines. */
198 static int avoid_wraps = 0;
199
200 /* If nonzero, output quite verbose detail of operations. */
201 static int debugging = 0;
202
203 /* Initialize the termcap strings. */
204 static int find_termcap = -1; /* undecided yet */
205
206 /* Ignore all white space. */
207 static int ignore_all_space = 0;
208
209 /* Ignore changes whose lines are all blank. */
210 static int ignore_blank_lines = 0;
211
212 /* Consider upper-case and lower-case to be the same. */
213 static int ignore_case = 0;
214
215 /* If nonzero, segregate matchable only items out of normal items. */
216 static int ignore_delimiters = 0;
217
218 /* Ignore changes whose lines all match regular expression. */
219 static struct re_pattern_buffer **ignore_regexp_array = NULL;
220 static int ignore_regexps = 0;
221
222 /* Ignore changes in the amount of white space. */
223 static int ignore_space_change = 0;
224
225 /* Make tabs line up by prepending a tab. */
226 static int initial_tab = 0;
227
228 /* Try hard to find a smaller set of changes. Unimplemented. */
229 static int minimal = 0;
230
231 /* Minimum size of a cluster, not counting ignored items. */
232 static int minimum_size = -1; /* undecided yet */
233
234 /* If using printer overstrikes. */
235 static int overstrike = 0;
236
237 /* If output aimed to the "less" program (overstrike even whitespace). */
238 static int overstrike_for_less = 0;
239
240 /* Pass the output through `pr' to paginate it. */
241 static int paginate = 0;
242
243 /* List all input files with annotations. */
244 static int relist_files = 0;
245
246 /* Give file and line references in annotated listings. */
247 static int show_links = 0;
248
249 /* Assume large files and many scattered small changes. Unimplemented. */
250 static int speed_large_files = 0;
251
252 /* If nonzero, show progress of operations. */
253 static int verbose = 0;
254
255 /* Compare words, use REGEXP to define item. */
256 static int word_mode = 0;
257 static struct re_pattern_buffer *item_regexp = NULL;
258
259 /*---------------.
260 | diff options. |
261 `---------------*/
262
263 /* Treat all files as text. */
264 static int text = 0;
265
266 /* Output regular context diffs. */
267 static int context = 0;
268
269 /* Output unified context diffs or unidiffs. */
270 static int unified = 0;
271
272 /* Use LABEL instead of file name. */
273 static const char *label = NULL;
274
275 /* Show which C function each change is in. */
276 static int show_c_function = 0;
277
278 /* Show the most recent line matching RE. */
279 static const char *show_function_line = NULL;
280
281 /* Output only whether files differ. */
282 static int brief = 0;
283
284 /* Output an ed script. */
285 static int ed = 0;
286
287 /* Output an RCS format diff. */
288 static int rcs = 0;
289
290 /* Output in two columns. */
291 static int side_by_side = 0;
292
293 /* Output at most NUM characters per line. */
294 static int width = 130;
295
296 /* Output only the left column of common lines. */
297 static int left_column = 0;
298
299 /* Do not output common lines. */
300 static int suppress_common_lines = 0;
301
302 /* Output merged file to show `#ifdef NAME' diffs. */
303 static const char *ifdef = NULL;
304
305 /* GTYPE input groups with GFMT. */
306 static const char *gtype_group_format = NULL;
307
308 /* All input lines with LFMT. */
309 static const char *line_format = NULL;
310
311 /* LTYPE input lines with LFMT. */
312 static const char *ltype_line_format = NULL;
313
314 /* Expand tabs to spaces in output. */
315 static int expand_tabs = 0;
316
317 /* Recursively compare any subdirectories found. */
318 static int recursive = 0;
319
320 /* Treat absent files as empty. */
321 static int new_file = 0;
322
323 /* Treat absent first files as empty. */
324 static int unidirectional_new_file = 0;
325
326 /* Report when two files are the same. */
327 static int report_identical_files = 0;
328
329 /* Exclude files that match PAT. */
330 static const char *exclude = NULL;
331
332 /* Exclude files that match any pattern in FILE. */
333 static const char *exclude_from = NULL;
334
335 /* Start with FILE when comparing directories. */
336 static const char *starting_file = NULL;
337
338 /* Keep NUM lines of the common prefix and suffix. */
339 static int horizon_lines = 2;
340
341 /*----------------------.
342 | mdiff draft options. |
343 `----------------------*/
344
345 /* Maximum number of non-matching items for cluster members. */
346 static int tolerance = 0;
347
348 /* Regular expressions. */
349
350 /*-------------------------------------------------------------------.
351 | Compile the regex represented by STRING, diagnose and abort if any |
352 | error. Returns the compiled regex structure. |
353 `-------------------------------------------------------------------*/
354
355 static struct re_pattern_buffer *
alloc_and_compile_regex(const char * string)356 alloc_and_compile_regex (const char *string)
357 {
358 struct re_pattern_buffer *pattern; /* newly allocated structure */
359 const char *message; /* error message returned by regex.c */
360
361 pattern = (struct re_pattern_buffer *)
362 xmalloc (sizeof (struct re_pattern_buffer));
363 memset (pattern, 0, sizeof (struct re_pattern_buffer));
364
365 pattern->buffer = NULL;
366 pattern->allocated = 0;
367 #if 0
368 /* FIXME */
369 pattern->translate = ignore_case ? (char *) folded_chars : NULL;
370 #endif
371
372 pattern->fastmap = xmalloc ((size_t) CHAR_SET_SIZE);
373
374 message = re_compile_pattern (string, (int) strlen (string), pattern);
375 if (message)
376 error (EXIT_ERROR, 0, _("%s (for regexp `%s')"), message, string);
377
378 /* The fastmap should be compiled before `re_match'. This would not be
379 necessary if `re_search' was called first. */
380
381 re_compile_fastmap (pattern);
382
383 #if WITH_REGEX
384
385 /* Do not waste extra allocated space. */
386
387 if (pattern->allocated > pattern->used)
388 {
389 pattern->buffer = xrealloc (pattern->buffer, (size_t) pattern->used);
390 pattern->allocated = pattern->used;
391 }
392 #endif
393
394 return pattern;
395 }
396
397 /* Items. */
398
399 /* Each item has a type which is meaningful to the clustering process.
400 NORMAL items and DELIMS items fully participate in cluster
401 construction, yet only NORMAL items are worth participating into building
402 the minimum size count, merely DELIMS items do not. WHITE items, even if
403 they do exist in input files, are a little like if they were just not
404 there: they are often skipped over. SENTINEL items do not correspond to
405 any real input item, they are used to mark file boundaries, and may not
406 compare equally to any other item.
407
408 The names NORMAL, DELIMS, WHITE and SENTINEL are a bit generic and are
409 used mainly to trigger the imagination of the reader. Practice may be
410 quite different. For example, actual white items may well be classified
411 as DELIMS or NORMAL, items having only delimiters may be seen as NORMAL,
412 items making up a block comment may be considered as WHITE. Moreover,
413 SENTINEL items could be introduced at function boundaries, say, if we
414 wanted to limit cluster members to a single function, etc. */
415
416 enum type
417 {
418 NORMAL, /* meaningful, countable items */
419 DELIMS, /* items having only delimiters */
420 WHITE, /* white items */
421 SENTINEL /* file boundaries */
422 };
423
424 /* Comparison work is done on checksums of items, rather than input items
425 themselves. The following array maintains one entry for each item,
426 whether a real item or a sentinel. Checksums, which are meaningful only
427 for NORMAL or DELIMS items, are computed so they ignore parts of the item
428 which are not really meaningful for the purpose of comparisons.
429
430 If SAFER_SLOWER has been selected, checksums may be avoided and pointers
431 used instead. For pointers to occupy the full space they might need, the
432 type bits are kept apart but tightly packed. Otherwise, the TYPE bits
433 are part of the structured item, eating out some chekcsum space. */
434
435 #define BITS_PER_CHAR 8
436 #define BITS_PER_TYPE 2
437 #define BITS_PER_WORD (sizeof (unsigned) * BITS_PER_CHAR)
438 #define TYPES_PER_WORD (BITS_PER_WORD / BITS_PER_TYPE)
439
440 #if SAFER_SLOWER
441
442 # define ITEM union item
443
444 union item
445 {
446 unsigned checksum;
447 char *pointer;
448 };
449
450 static union item *item_array = NULL;
451 static unsigned *type_array = NULL;
452 static int items = 0;
453
454 static inline enum type
item_type(ITEM * item)455 item_type (ITEM * item)
456 {
457 int position = item - item_array;
458 int shift = position % TYPES_PER_WORD * BITS_PER_TYPE;
459 unsigned *pointer = type_array + position / TYPES_PER_WORD;
460
461 return (enum type) (MASK (BITS_PER_TYPE) & *pointer >> shift);
462 }
463
464 static inline void
set_item_type(ITEM * item,enum type type)465 set_item_type (ITEM * item, enum type type)
466 {
467 int position = item - item_array;
468 int shift = position % TYPES_PER_WORD * BITS_PER_TYPE;
469 unsigned *pointer = type_array + position / TYPES_PER_WORD;
470
471 *pointer &= ~MASK (BITS_PER_TYPE) << shift;
472 *pointer |= type << shift;
473 }
474
475 #else /* not SAFER_SLOWER */
476
477 # define ITEM struct item
478
479 struct item
480 {
481 enum type type:BITS_PER_TYPE;
482 unsigned checksum:BITS_PER_WORD - BITS_PER_TYPE;
483 };
484
485 static struct item *item_array = NULL;
486 static int items = 0;
487
488 static inline enum type
item_type(ITEM * item)489 item_type (ITEM * item)
490 {
491 return item->type;
492 }
493
494 static inline void
set_item_type(ITEM * item,enum type type)495 set_item_type (ITEM * item, enum type type)
496 {
497 item->type = type;
498 }
499
500 #endif /* not SAFER_SLOWER */
501
502 #if DEBUGGING
503
504 /*---------------------.
505 | Dump a given INPUT. |
506 `---------------------*/
507
508 static void
dump_item(ITEM * item)509 dump_item (ITEM * item)
510 {
511 static const char *item_type_string[4] = { "", "delim", "white", "SENTIN" };
512
513 fprintf (stderr, "(%ld)\t%s\t%08x\n", (long) (item - item_array),
514 item_type_string[item_type (item)], item->checksum);
515 }
516
517 /*------------------.
518 | Dump all items. |
519 `------------------*/
520
521 static void
dump_all_items(void)522 dump_all_items (void)
523 {
524 int counter;
525
526 fputs ("\f\n", stderr);
527 for (counter = 0; counter < items; counter++)
528 dump_item (item_array + counter);
529 }
530
531 #endif /* DEBUGGING */
532
533 /*-------------------------------------------------------------------.
534 | Move item pointer forward or backward, skipping over white items. |
535 `-------------------------------------------------------------------*/
536
537 #define FORWARD_ITEM(Pointer) \
538 do { Pointer++; } while (item_type (Pointer) == WHITE)
539
540 #define BACKWARD_ITEM(Pointer) \
541 do { Pointer--; } while (item_type (Pointer) == WHITE)
542
543 /*------------------------------------------------------------------.
544 | Sort helper. Compare two item indices. Lower indices go first. |
545 `------------------------------------------------------------------*/
546
547 static int
compare_for_positions(const void * void_first,const void * void_second)548 compare_for_positions (const void *void_first, const void *void_second)
549 {
550 #define value1 *((int *) void_first)
551 #define value2 *((int *) void_second)
552
553 return value1 - value2;
554
555 #undef value1
556 #undef value2
557 }
558
559 /*-------------------------------------------------------------------------.
560 | Sort helper. Compare two item indices. Items having identical |
561 | checksums should be brought together. Whenever two checksums are equal, |
562 | items are then ordered by the checksum of the following items, and this |
563 | way going forward until a difference is found. |
564 `-------------------------------------------------------------------------*/
565
566 static int
compare_for_checksum_runs(const void * void_first,const void * void_second)567 compare_for_checksum_runs (const void *void_first, const void *void_second)
568 {
569 #define value1 *((int *) void_first)
570 #define value2 *((int *) void_second)
571
572 ITEM *item1 = item_array + value1;
573 ITEM *item2 = item_array + value2;
574 int result;
575
576 while (1)
577 {
578 /* Order checksums. */
579
580 result = item1->checksum - item2->checksum;
581 if (result)
582 return result;
583
584 /* Seek forward for a difference. */
585
586 FORWARD_ITEM (item1);
587 FORWARD_ITEM (item2);
588
589 /* Sentinels are never equal (unless really the same). They go after
590 all checksums, and compare so to stabilise the sort. */
591
592 if (item_type (item1) == SENTINEL)
593 return item_type (item2) == SENTINEL ? value1 - value2 : 1;
594
595 if (item_type (item2) == SENTINEL)
596 return -1;
597 }
598
599 #undef value1
600 #undef value2
601 }
602
603 /*-----------------.
604 | Add a new item. |
605 `-----------------*/
606
607 static inline void
new_item(enum type type,int checksum)608 new_item (enum type type, int checksum)
609 {
610 ITEM *item;
611
612 if (items % (64 * TYPES_PER_WORD) == 0)
613 {
614 item_array = (ITEM *)
615 xrealloc (item_array, (items + 64 * TYPES_PER_WORD) * sizeof (ITEM));
616 #if SAFER_SLOWER
617 type_array = (unsigned *)
618 xrealloc (type_array,
619 (items / TYPES_PER_WORD + 64) * sizeof (unsigned));
620 #endif
621 }
622
623 item = item_array + items++;
624 set_item_type (item, type);
625 item->checksum = checksum;
626 }
627
628 /*---------------------.
629 | Add a new sentinel. |
630 `---------------------*/
631
632 static void
new_sentinel(void)633 new_sentinel (void)
634 {
635 new_item (SENTINEL, 0);
636 }
637
638 /*------------------------------------------------------------------------.
639 | Find how many identical normal items follow, given two start items. |
640 | Ensure matchable items correspond to each other, but skip over white |
641 | items. The starting items may not be sentinels nor white items. Fuzzy |
642 | items, if any, ought to be embedded and correspond to each other. |
643 `------------------------------------------------------------------------*/
644
645 static int
identical_size(int index1,int index2)646 identical_size (int index1, int index2)
647 {
648 ITEM *item1 = item_array + index1;
649 ITEM *item2 = item_array + index2;
650 int normal_count = 0;
651 int mismatch_count = 0;
652
653 while (1)
654 {
655 if (item1->checksum == item2->checksum
656 #if 0
657 /* Just assume that identical checksums imply identical types. */
658 && item_type (item1) == item_type (item2)
659 #endif
660 )
661 {
662 if (item_type (item1) == NORMAL)
663 normal_count++;
664 }
665 else if (normal_count > 0 && mismatch_count < tolerance)
666 {
667 if (item_type (item1) == NORMAL && item_type (item2) == NORMAL)
668 normal_count++;
669 mismatch_count++;
670 }
671 else
672 break;
673
674 FORWARD_ITEM (item1);
675 FORWARD_ITEM (item2);
676
677 if (item_type (item1) == SENTINEL || item_type (item2) == SENTINEL)
678 break;
679 }
680
681 return normal_count;
682 }
683
684 /*-----------------------------------------------------------------------.
685 | Count normal items from START to FINISH (excluded). However, if there |
686 | are MAXIMUM or more normal input items, return a negative number. |
687 `-----------------------------------------------------------------------*/
688
689 static int
item_distance(int start,int finish,int maximum)690 item_distance (int start, int finish, int maximum)
691 {
692 int counter = 0;
693 ITEM *item;
694
695 assert (start < finish);
696 assert (maximum > 0);
697
698 for (item = item_array + start; item < item_array + finish; item++)
699 if (item_type (item) == NORMAL)
700 {
701 counter++;
702 if (counter == maximum)
703 return -1;
704 }
705
706 return counter;
707 }
708
709 /* Files. */
710
711 /* All input files use a single continuous item counting scheme. To revert
712 item numbers back to actual file and item references, the following array
713 maintains the cumulated number items seen prior to the beginning of each
714 file, for each file given as argument to the program. To ease some
715 algorithms, a few sentinel items exist: one before the first file, one
716 between each file, and one after the last file. Each file counts all
717 sentinels preceding it. */
718
719 struct input
720 {
721 const char *file_name; /* name of input file */
722 struct stat stat_buffer; /* stat information for the file */
723 char nick_name[4]; /* short name of the file */
724 FILE *file; /* file being read */
725 char *memory_copy; /* buffer containing the file, or NULL */
726
727 /* Reading the file, one line or one character at a time. */
728 char *line; /* line from file */
729 char *cursor; /* cursor into line, or NULL at end of file */
730 char *limit; /* limit value for cursor */
731 size_t line_allocated; /* allocated length of line */
732
733 /* Rescanning of the file, one item at a time. */
734 int first_item; /* index of first item in this file */
735 int item_limit; /* one past last item index in this file */
736 int item; /* index of next item to consider */
737
738 /* Merging views. */
739 int *indirect_cursor; /* cursor into indirect_array */
740 int *indirect_limit; /* one past limit value of indirect_cursor */
741
742 /* Output control. */
743 short listing_allowed; /* if difference output allowed */
744 const char *user_start; /* user string before difference */
745 const char *user_stop; /* user string after difference */
746 const char *term_start; /* terminal string before difference */
747 const char *term_stop; /* terminal string after difference */
748 };
749
750 static struct input *input_array = NULL;
751 static int inputs = 0;
752
753 int common_listing_allowed; /* if common output allowed */
754
755 /* Convenience macros. */
756
757 #define left (input_array + 0)
758 #define right (input_array + 1)
759
760 #define INPUT_MEMBER(Input) \
761 (member_array + *(Input)->indirect_cursor)
762
763 #define INPUT_CLUSTER(Input) \
764 (cluster_array + member_array[*(Input)->indirect_cursor].cluster_number)
765
766 #if DEBUGGING
767
768 /*---------------------.
769 | Dump a given INPUT. |
770 `---------------------*/
771
772 static void
dump_input(struct input * input)773 dump_input (struct input *input)
774 {
775 fprintf (stderr, "%s1,%d\t%7d\t%s\n",
776 input->nick_name, input->item_limit - input->first_item,
777 input->first_item, input->file_name);
778 }
779
780 /*------------------.
781 | Dump all inputs. |
782 `------------------*/
783
784 static void
dump_all_inputs(void)785 dump_all_inputs (void)
786 {
787 int counter;
788
789 fputs ("\f\n", stderr);
790 for (counter = 0; counter < inputs; counter++)
791 dump_input (input_array + counter);
792 }
793
794 #endif /* DEBUGGING */
795
796 /*---------------------------------------------------------------------.
797 | Swallow the whole INPUT into a contiguous region of memory. In the |
798 | INPUT structure, file name and stat buffer are already initialised. |
799 `---------------------------------------------------------------------*/
800
801 /* Define to reallocation increment when swallowing input. */
802 #define SWALLOW_BUFFER_STEP 20000
803
804 static void
swallow_input(struct input * input)805 swallow_input (struct input *input)
806 {
807 int handle; /* file descriptor number */
808 size_t allocated_length; /* allocated length of memory buffer */
809 size_t length; /* total length read so far */
810 int read_length; /* number of character gotten on last read */
811
812 /* Standard input is already opened. In all other cases, open the file
813 from its name. */
814
815 if (strcmp (input->file_name, STDIN_PRINTED_NAME) == 0)
816 handle = fileno (stdin);
817 else if (handle = open (input->file_name, O_RDONLY), handle < 0)
818 error (EXIT_ERROR, errno, "%s", input->file_name);
819
820 /* If the file is a plain, regular file, allocate the memory buffer all at
821 once and swallow the file in one blow. In other cases, read the file
822 repeatedly in smaller chunks until we have it all, reallocating memory
823 once in a while, as we go. */
824
825 #if MSDOS
826
827 /* On MSDOS, we cannot predict in memory size from file size, because of
828 end of line conversions. */
829
830 #else
831
832 if (S_ISREG (input->stat_buffer.st_mode))
833 {
834 input->memory_copy = (char *)
835 xmalloc ((size_t) input->stat_buffer.st_size);
836
837 if (read
838 (handle, input->memory_copy,
839 (size_t) input->stat_buffer.st_size) != input->stat_buffer.st_size)
840 error (EXIT_ERROR, errno, "%s", input->file_name);
841 }
842 else
843 #endif
844
845 {
846 input->memory_copy = xmalloc ((size_t) SWALLOW_BUFFER_STEP);
847 allocated_length = SWALLOW_BUFFER_STEP;
848
849 length = 0;
850 while (read_length = read (handle, input->memory_copy + length,
851 allocated_length - length), read_length > 0)
852 {
853 length += read_length;
854 if (length == allocated_length)
855 {
856 allocated_length += SWALLOW_BUFFER_STEP;
857 input->memory_copy = (char *)
858 xrealloc (input->memory_copy, allocated_length);
859 }
860 }
861
862 if (read_length < 0)
863 error (EXIT_ERROR, errno, "%s", input->file_name);
864 }
865
866 /* Close the file, but only if it was not the standard input. */
867
868 if (handle != fileno (stdin))
869 close (handle);
870 }
871
872 /*-------------------------------------------.
873 | Register a file NAME to be later studied. |
874 `-------------------------------------------*/
875
876 static void
new_input(const char * name)877 new_input (const char *name)
878 {
879 static int stdin_swallowed = 0;
880 struct input *input;
881
882 /* Add a new input file descriptor, read in stdin right away. */
883
884 if (inputs % 16 == 0)
885 input_array = (struct input *)
886 xrealloc (input_array, (inputs + 16) * sizeof (struct input));
887
888 input = input_array + inputs++;
889 if (strcmp (name, "") == 0 || strcmp (name, "-") == 0)
890 if (stdin_swallowed)
891 error (EXIT_ERROR, 0, _("only one file may be standard input"));
892 else
893 {
894 input->file_name = STDIN_PRINTED_NAME;
895 if (fstat (fileno (stdin), &input->stat_buffer) != 0)
896 error (EXIT_ERROR, errno, "%s", input->file_name);
897 swallow_input (input);
898 stdin_swallowed = 1;
899 }
900 else
901 {
902 input->file_name = name;
903 if (stat (input->file_name, &input->stat_buffer) != 0)
904 error (EXIT_ERROR, errno, "%s", input->file_name);
905 if ((input->stat_buffer.st_mode & S_IFMT) == S_IFDIR)
906 error (EXIT_ERROR, 0, _("directories not supported"));
907 input->memory_copy = NULL;
908 }
909 }
910
911 /*-------------------------------.
912 | Prepare INPUT for re-reading. |
913 `-------------------------------*/
914
915 static void
open_input(struct input * input)916 open_input (struct input *input)
917 {
918 if (input->memory_copy)
919 input->limit = input->memory_copy;
920 else
921 {
922 if (input->file = fopen (input->file_name, "r"), !input->file)
923 error (EXIT_ERROR, errno, "%s", input->file_name);
924
925 input->line = NULL;
926 }
927 input->line_allocated = 0;
928 }
929
930 static void
close_input(struct input * input)931 close_input (struct input *input)
932 {
933 if (input->line_allocated > 0)
934 free (input->line);
935
936 if (!input->memory_copy)
937 fclose (input->file);
938 }
939
940 /*-------------------------------------------------------------------------.
941 | Prepare INPUT for next line, then available from INPUT->LINE through |
942 | INPUT->LIMIT (excluded). INPUT->CURSOR will point to first character of |
943 | the line, or will be NULL at end of file. |
944 `-------------------------------------------------------------------------*/
945
946 static void
input_line(struct input * input)947 input_line (struct input *input)
948 {
949 if (input->memory_copy)
950 {
951 const char *limit = input->memory_copy + input->stat_buffer.st_size;
952 char *cursor;
953
954 input->line = input->limit;
955
956 for (cursor = input->line; cursor < limit && *cursor != '\n'; cursor++)
957 ;
958
959 if (cursor < limit)
960 {
961 cursor++;
962 input->cursor = input->line;
963 }
964 else if (cursor == input->line)
965 input->cursor = NULL;
966 else
967 input->cursor = input->line;
968
969 input->limit = cursor;
970 }
971 else
972 {
973 int length
974 = getline (&input->line, &input->line_allocated, input->file);
975
976 if (length < 0)
977 input->cursor = NULL;
978 else
979 {
980 input->cursor = input->line;
981 input->limit = input->line + length;
982 }
983 }
984 }
985
986 /*-------------------------------------------------------------------------.
987 | Prepare INPUT for next character, then available through *INPUT->CURSOR, |
988 | unless INPUT->CURSOR is NULL for indicating end of file. |
989 `-------------------------------------------------------------------------*/
990
991 static void
input_character_helper(struct input * input)992 input_character_helper (struct input *input)
993 {
994 int counter;
995
996 /* Read in a new line, but skip ignorable ones. FIXME: This is just not
997 right. Ignorable lines may need copying! */
998
999 do
1000 {
1001 input_line (input);
1002
1003 /* Possibly check if the line should be ignored. */
1004
1005 for (counter = 0; counter < ignore_regexps; counter++)
1006 if (re_match (ignore_regexp_array[counter],
1007 input->line, input->limit - input->line, 0, NULL) > 0)
1008 break;
1009 }
1010 while (counter < ignore_regexps);
1011
1012 assert (!input->cursor || input->cursor < input->limit);
1013 }
1014
1015 static inline void
input_character(struct input * input)1016 input_character (struct input *input)
1017 {
1018 assert (input->cursor);
1019 input->cursor++;
1020 if (input->cursor == input->limit)
1021 input_character_helper (input);
1022 }
1023
1024 /*------------------------------------------------------------.
1025 | Construct descriptors for all items of a given INPUT file. |
1026 `------------------------------------------------------------*/
1027
1028 static void
study_input(struct input * input)1029 study_input (struct input *input)
1030 {
1031 #if SAVE_AND_SLOW
1032 # define ADJUST_CHECKSUM(Character) \
1033 checksum = (checksum << 5) + (checksum >> (BITS_PER_WORD - 5)) \
1034 + (Character & 0xFF)
1035 #else
1036 # define ADJUST_CHECKSUM(Character) \
1037 checksum = (checksum << 5) + \
1038 (checksum >> (BITS_PER_WORD - BITS_PER_TYPE - 5)) \
1039 + (Character & 0xFF)
1040 #endif
1041
1042 int item_count; /* number of items read */
1043 char *buffer = NULL; /* line buffer */
1044 int length = 0; /* actual line length in buffer */
1045
1046 /* Read the file and checksum all items. */
1047
1048 if (verbose)
1049 fprintf (stderr, _("Reading %s"), input->file_name);
1050
1051 open_input (input);
1052 input->first_item = items;
1053 item_count = 0;
1054
1055 /* Read all lines. */
1056
1057 while (input_line (input), input->cursor)
1058 {
1059 int counter;
1060
1061 /* Possibly check if the line should be ignored. */
1062
1063 for (counter = 0; counter < ignore_regexps; counter++)
1064 if (re_match (ignore_regexp_array[counter],
1065 input->line, input->limit - input->line, 0, NULL) > 0)
1066 break;
1067
1068 if (counter < ignore_regexps)
1069 {
1070 if (!word_mode)
1071 new_item (WHITE, 0);
1072 continue;
1073 }
1074
1075 /* The line is not being ignored. */
1076
1077 /* We prefer `cursor < buffer + length' over `*cursor' in the loop
1078 tests, so embedded NULs can be handled. */
1079
1080 if (word_mode)
1081 {
1082 char *cursor = input->line;
1083 unsigned checksum;
1084
1085 /* FIXME: Might be simplified out of the line loop. */
1086
1087 while (cursor < input->limit)
1088 {
1089 while (cursor < input->limit && isspace (*cursor))
1090 cursor++;
1091
1092 if (cursor < input->limit)
1093 {
1094 checksum = 0;
1095 while (cursor < input->limit && !isspace (*cursor))
1096 {
1097 ADJUST_CHECKSUM (*cursor);
1098 cursor++;
1099 }
1100 new_item (NORMAL, checksum);
1101 item_count++;
1102 }
1103 }
1104 }
1105 else
1106 {
1107 char *cursor;
1108 unsigned checksum = 0;
1109 int line_has_delims = 0;
1110 int line_has_alnums = 0;
1111
1112 for (cursor = input->line; cursor < input->limit; cursor++)
1113 if (isspace (*cursor))
1114 {
1115 if (!ignore_all_space)
1116 {
1117 if (ignore_space_change)
1118 {
1119 ADJUST_CHECKSUM (' ');
1120 while (cursor + 1 < buffer + length
1121 && isspace (cursor[1]))
1122 cursor++;
1123 }
1124 else
1125 ADJUST_CHECKSUM (*cursor);
1126 }
1127 }
1128 else if (isalpha (*cursor))
1129 {
1130 line_has_alnums = 1;
1131 if (ignore_case && islower (*cursor))
1132 {
1133 char character = toupper (*cursor);
1134
1135 ADJUST_CHECKSUM (character);
1136 }
1137 else
1138 ADJUST_CHECKSUM (*cursor);
1139 }
1140 else if (isdigit (*cursor))
1141 {
1142 line_has_alnums = 1;
1143 ADJUST_CHECKSUM (*cursor);
1144 }
1145 else
1146 {
1147 line_has_delims = 1;
1148 ADJUST_CHECKSUM (*cursor);
1149 }
1150
1151 /* Register the checksum. */
1152
1153 if (line_has_alnums)
1154 new_item (NORMAL, checksum);
1155 else if (line_has_delims)
1156 new_item (ignore_delimiters ? DELIMS : NORMAL, checksum);
1157 else if (ignore_blank_lines)
1158 new_item (WHITE, checksum);
1159 else
1160 new_item (ignore_delimiters ? DELIMS : NORMAL, checksum);
1161
1162 item_count++;
1163 }
1164 }
1165
1166 input->item_limit = items;
1167
1168 /* Cleanup. */
1169
1170 close_input (input);
1171
1172 if (verbose)
1173 fprintf (stderr, ngettext (", %d item\n", ", %d items\n", item_count),
1174 item_count);
1175
1176 #undef ADJUST_CHECKSUM
1177 }
1178
1179 /*------------------------.
1180 | Study all input files. |
1181 `------------------------*/
1182
1183 /* If all input files fit in MAXIMUM_TOTAL_BUFFER, they are all swallowed in
1184 memory at once. Otherwise, they are read from disk one line at a time.
1185 A fixed value is far than ideal: we should consider how much real memory
1186 space is available, and I do not know how to do this portably. */
1187 #define MAXIMUM_TOTAL_BUFFER 500000
1188
1189 static void
study_all_inputs(void)1190 study_all_inputs (void)
1191 {
1192 struct input *input;
1193 int total_size;
1194
1195 /* Compute nick names for all files. */
1196
1197 if (inputs == 2)
1198 {
1199 /* If only two input files, just mimick `diff' output. */
1200
1201 strcpy (input_array[0].nick_name, "-");
1202 strcpy (input_array[1].nick_name, "+");
1203 }
1204 else
1205 {
1206 int name_length = 1;
1207 int file_limit = 26;
1208
1209 /* Choose nick names as sequences of lower case letters. */
1210
1211 while (inputs > file_limit)
1212 {
1213 name_length++;
1214 file_limit *= 26;
1215 }
1216
1217 for (input = input_array; input < input_array + inputs; input++)
1218 {
1219 int value = input - input_array;
1220 char *cursor = input->nick_name + name_length;
1221
1222 *cursor-- = '\0';
1223 while (cursor >= input->nick_name)
1224 {
1225 *cursor-- = value % 26 + 'a';
1226 value /= 26;
1227 }
1228 }
1229 }
1230
1231 /* Swallow all files not already copied in memory, if room permits. */
1232
1233 total_size = 0;
1234 for (input = input_array; input < input_array + inputs; input++)
1235 total_size += input->stat_buffer.st_size;
1236
1237 if (total_size <= MAXIMUM_TOTAL_BUFFER)
1238 for (input = input_array; input < input_array + inputs; input++)
1239 if (!input->memory_copy)
1240 swallow_input (input);
1241
1242 /* Compute all checksums. */
1243
1244 new_sentinel ();
1245 for (input = input_array; input < input_array + inputs; input++)
1246 {
1247 study_input (input);
1248 new_sentinel ();
1249 }
1250
1251 if (verbose)
1252 {
1253 fprintf (stderr, _("Read summary:"));
1254 fprintf (stderr, ngettext (" %d file,", " %d files,", inputs), inputs);
1255 fprintf (stderr, ngettext (" %d item\n", " %d items\n",
1256 items - inputs - 1), items - inputs - 1);
1257 }
1258
1259 #if DEBUGGING
1260 if (debugging)
1261 {
1262 dump_all_items ();
1263 dump_all_inputs ();
1264 }
1265 #endif
1266 }
1267
1268 /* Item references. */
1269
1270 struct reference
1271 {
1272 struct input *input; /* input file for reference */
1273 int number; /* item number for reference */
1274 };
1275
1276 /*-------------------------------------------------------------------------.
1277 | Explicit a reference for internal item NUMBER. The first sentinel has |
1278 | line 0, other sentinels have one more than number of lines in the file. |
1279 `-------------------------------------------------------------------------*/
1280
1281 static struct reference
get_reference(int number)1282 get_reference (int number)
1283 {
1284 struct input *input;
1285 struct reference result;
1286
1287 assert (number < items);
1288
1289 /* FIXME: if we are studying many files, linear searches become a bit
1290 crude, binary searches would be better in such cases. */
1291
1292 for (input = input_array; number >= input->item_limit + 1; input++)
1293 ;
1294
1295 result.input = input;
1296 result.number = number - input->first_item + 1;
1297 return result;
1298 }
1299
1300 /*------------------------------------------------------------------------.
1301 | Produce a short string referencing an internal item NUMBER. The result |
1302 | is statically allocated into a ring of little buffers. |
1303 `------------------------------------------------------------------------*/
1304
1305 static char *
reference_string(int number)1306 reference_string (int number)
1307 {
1308 #define RING_LENGTH 4
1309
1310 struct reference reference;
1311 static char buffer[RING_LENGTH][20];
1312 static int next = RING_LENGTH - 1;
1313
1314 if (next == RING_LENGTH - 1)
1315 next = 0;
1316 else
1317 next++;
1318
1319 reference = get_reference (number);
1320
1321 if (number == reference.input->item_limit)
1322 sprintf (buffer[next], "%s.EOF", reference.input->nick_name);
1323 else
1324 sprintf (buffer[next], "%s%d",
1325 reference.input->nick_name, reference.number);
1326
1327 return buffer[next];
1328
1329 #undef RING_LENGTH
1330 }
1331
1332 #if DEBUGGING
1333
1334 /*-------------------------------------------------------------------------.
1335 | Dump a reference to a given item in compilation error format. Ensure at |
1336 | least one space, and in fact, enough to align diagnostics as we go. |
1337 `-------------------------------------------------------------------------*/
1338
1339 static void
dump_reference(int item)1340 dump_reference (int item)
1341 {
1342 struct reference reference;
1343 char buffer[20];
1344 static int last_width = 0;
1345 int width;
1346
1347 reference = get_reference (item);
1348 sprintf (buffer, "%d", reference.number);
1349
1350 fprintf (stderr, "%s:%s: ", reference.input->file_name, buffer);
1351 width = strlen (reference.input->file_name) + strlen (buffer);
1352 if (width < last_width)
1353 while (width++ < last_width)
1354 putc (' ', stderr);
1355 else
1356 last_width = width;
1357 }
1358
1359 #endif /* DEBUGGING */
1360
1361 /* Clusters and members. */
1362
1363 /* A cluster represents one set of items which is repeated at two or more
1364 places in the input files, while each repetition is called a cluster
1365 member. The member size is an item count which excludes white items. */
1366
1367 struct cluster
1368 {
1369 short first_member; /* index in member_array array */
1370 short item_count; /* size of each member */
1371 };
1372
1373 static struct cluster *cluster_array = NULL;
1374 int clusters = 0;
1375
1376 /* A cluster member starts at some item number and runs for a given number
1377 of items. Ignorable items may be embedded in a cluster member, so
1378 cluster members of a single cluster may have different lengths, but a
1379 cluster member never starts nor end with an white item. Sentinels
1380 are never part of any cluster. By construction, all cluster members of a
1381 same cluster are next to each other. */
1382
1383 struct member
1384 {
1385 short cluster_number; /* ordinal of cluster, counted from 0 */
1386 int first_item; /* number of first item for this member */
1387 };
1388
1389 static struct member *member_array = NULL;
1390 static int members = 0;
1391
1392 /* Convenience macros. */
1393
1394 #define MEMBER_LIMIT(Cluster) \
1395 (member_array + ((Cluster) + 1)->first_member)
1396
1397 #define MEMBERS(Cluster) \
1398 (((Cluster) + 1)->first_member - (Cluster)->first_member)
1399
1400 /*---------------------------------------------------------------------.
1401 | Find how many input items are part of a given cluster MEMBER. The |
1402 | returned count includes embedded items having less significance, but |
1403 | exclude last white items. |
1404 `---------------------------------------------------------------------*/
1405
1406 static int
real_member_size(struct member * member)1407 real_member_size (struct member *member)
1408 {
1409 int number = cluster_array[member->cluster_number].item_count;
1410 ITEM *item = item_array + member->first_item;
1411 ITEM *last_item;
1412
1413 /* Skip over NUMBER normal items. */
1414
1415 while (number > 0)
1416 switch (item_type (item))
1417 {
1418 case NORMAL:
1419 number--;
1420 /* Fall through. */
1421
1422 case DELIMS:
1423 case WHITE:
1424 item++;
1425 break;
1426
1427 case SENTINEL:
1428 /* Cannot happen. */
1429 abort ();
1430 }
1431
1432 /* Skip over supplementary matchable or white items, return the item
1433 count as for the last normal or matchable item. */
1434
1435 last_item = item;
1436 while (1)
1437 switch (item_type (item))
1438 {
1439 case NORMAL:
1440 case SENTINEL:
1441 return last_item - item_array - member->first_item;
1442
1443 case DELIMS:
1444 last_item = item++;
1445 break;
1446
1447 case WHITE:
1448 item++;
1449 break;
1450 }
1451 }
1452
1453 #if DEBUGGING
1454
1455 /*-----------------------.
1456 | Dump a given CLUSTER. |
1457 `-----------------------*/
1458
1459 static void
dump_cluster(struct cluster * cluster)1460 dump_cluster (struct cluster *cluster)
1461 {
1462 int counter;
1463 struct member *member;
1464
1465 fprintf (stderr, "{%ld},%d",
1466 (long) (cluster - cluster_array), cluster->item_count);
1467 for (counter = cluster->first_member;
1468 counter < (cluster + 1)->first_member; counter++)
1469 {
1470 member = member_array + counter;
1471 fprintf (stderr, "%c[%d] %s,%d",
1472 counter == cluster->first_member ? '\t' : ' ', counter,
1473 reference_string (member->first_item),
1474 real_member_size (member));
1475 }
1476 putc ('\n', stderr);
1477 }
1478
1479 /*--------------------.
1480 | Dump all clusters. |
1481 `--------------------*/
1482
1483 static void
dump_all_clusters(void)1484 dump_all_clusters (void)
1485 {
1486 int counter;
1487
1488 fputs ("\f\n", stderr);
1489 for (counter = 0; counter < clusters; counter++)
1490 dump_cluster (cluster_array + counter);
1491 }
1492
1493 /*----------------------.
1494 | Dump a given MEMBER. |
1495 `----------------------*/
1496
1497 static void
dump_member(struct member * member)1498 dump_member (struct member *member)
1499 {
1500 fprintf (stderr, " [%ld]\t{%d}\t%s,%d+\n",
1501 (long) (member - member_array), member->cluster_number,
1502 reference_string (member->first_item),
1503 cluster_array[member->cluster_number].item_count);
1504 }
1505
1506 /*-------------------.
1507 | Dump all members. |
1508 `-------------------*/
1509
1510 static void
dump_all_members(void)1511 dump_all_members (void)
1512 {
1513 int counter;
1514
1515 fputs ("\f\n", stderr);
1516 for (counter = 0; counter < members; counter++)
1517 dump_member (member_array + counter);
1518 }
1519
1520 #endif /* DEBUGGING */
1521
1522 /*----------------------------------------------------------------------.
1523 | Sort helper. Compare two member indices, using the original input |
1524 | order. When two members have the same start, put the longest first. |
1525 `----------------------------------------------------------------------*/
1526
1527 static int
compare_for_member_start(const void * void_first,const void * void_second)1528 compare_for_member_start (const void *void_first, const void *void_second)
1529 {
1530 #define value1 *((int *) void_first)
1531 #define value2 *((int *) void_second)
1532
1533 int result;
1534
1535 struct member *member1 = member_array + value1;
1536 struct member *member2 = member_array + value2;
1537
1538 result = member1->first_item - member2->first_item;
1539 if (result)
1540 return result;
1541
1542 return (cluster_array[member2->cluster_number].item_count
1543 - cluster_array[member1->cluster_number].item_count);
1544
1545 #undef value1
1546 #undef value2
1547 }
1548
1549 /*---------------------------------------------------------------------.
1550 | Add a new cluster, for which each member has COUNT non-white items. |
1551 `---------------------------------------------------------------------*/
1552
1553 static inline void
new_cluster(int count)1554 new_cluster (int count)
1555 {
1556 struct cluster *cluster;
1557
1558 if (clusters % 8 == 0)
1559 cluster_array = (struct cluster *)
1560 xrealloc (cluster_array, (clusters + 8) * sizeof (struct cluster));
1561
1562 cluster = cluster_array + clusters++;
1563 cluster->first_member = members;
1564 cluster->item_count = count;
1565 }
1566
1567 /*---------------------------.
1568 | Add a new cluster member. |
1569 `---------------------------*/
1570
1571 static inline void
new_member(int item)1572 new_member (int item)
1573 {
1574 struct member *member;
1575
1576 if (members % 8 == 0)
1577 member_array = (struct member *)
1578 xrealloc (member_array, (members + 8) * sizeof (struct member));
1579
1580 member = member_array + members++;
1581 member->cluster_number = clusters - 1;
1582 member->first_item = item;
1583 }
1584
1585 /*------------------------------------.
1586 | Output a cluster explanation line. |
1587 `------------------------------------*/
1588
1589 static void
explain_member(char prefix,struct member * quote)1590 explain_member (char prefix, struct member *quote)
1591 {
1592 struct cluster *cluster = cluster_array + quote->cluster_number;
1593 struct member *member;
1594 int counter;
1595 struct reference reference;
1596
1597 putc (prefix, output_file);
1598 for (member = member_array + cluster->first_member;
1599 member < MEMBER_LIMIT (cluster); member++)
1600 {
1601 reference = get_reference (member->first_item);
1602 fprintf (output_file, member == quote ? " %s%d,%d" : " (%s%d,%d)",
1603 reference.input->nick_name, reference.number,
1604 real_member_size (member));
1605 }
1606 putc ('\n', output_file);
1607 }
1608
1609 /*----------------------.
1610 | Search for clusters. |
1611 `----------------------*/
1612
1613 static void
prepare_clusters(void)1614 prepare_clusters (void)
1615 {
1616 /* The array of indirect items have similar contents next to each other. */
1617 int *indirect_item_array = xmalloc (items * sizeof (int));
1618 int indirect_items = 0; /* number of entries in indirect_item_array */
1619 int *cluster_set; /* index for first member in set of clusters */
1620 int *member_set; /* index for first member in current cluster */
1621
1622 /* For a given index, SIZE_ARRAY[index] describes the common item size
1623 between item CLUSTER_SET[index] and CLUSTER_SET[index + 1]. */
1624 int *size_array = NULL;
1625 int allocated_sizes = 0; /* allocated entries for size_array */
1626 int sizes; /* number of entries in size_array */
1627 int *size; /* cursor in size_array */
1628
1629 /* Here is a bag of starts indices for members of the same cluster (having
1630 all CLUSTER_SIZE worth items). These indices are values selected out
1631 of INDIRECT_ITEM_ARRAY, between FIRST and LAST (included). */
1632 int *sorter_array = NULL; /* for ensuring members are in nice order */
1633 int sorters; /* number of members in sorter_array */
1634 int *sorter; /* cursor into sorter_array */
1635 int *cursor; /* cursor into sorter_array */
1636
1637 ITEM *item; /* cursor in item_array */
1638
1639 int cluster_size; /* size of current cluster */
1640 int size_value; /* current size, or previous cluster size */
1641 int counter; /* all purpose counter */
1642 int checksum; /* possible common cheksum of previous items */
1643
1644 /* Sort indices. */
1645
1646 #if DEBUGGING
1647 if (debugging)
1648 fprintf (stderr, _("Sorting"));
1649 #endif
1650
1651 for (counter = 0; counter < items; counter++)
1652 if (item_type (item_array + counter) == NORMAL
1653 || item_type (item_array + counter) == DELIMS)
1654 indirect_item_array[indirect_items++] = counter;
1655 if (indirect_items < items)
1656 indirect_item_array = (int *)
1657 xrealloc (indirect_item_array, indirect_items * sizeof (int));
1658 qsort (indirect_item_array, indirect_items, sizeof (int),
1659 compare_for_checksum_runs);
1660
1661 /* Find all clusters. */
1662
1663 #if DEBUGGING
1664 if (debugging)
1665 fprintf (stderr, _(", clustering"));
1666 #endif
1667
1668 for (cluster_set = indirect_item_array;
1669 cluster_set + 1 < indirect_item_array + indirect_items;
1670 cluster_set += sizes + 1)
1671 {
1672 /* Find all members beginning with the same run of checksums. These
1673 will be later distributed among a few clusters of various sizes,
1674 and so, are common to the incoming set of clusters. */
1675
1676 sizes = 0;
1677 while (cluster_set + sizes + 1 < indirect_item_array + indirect_items)
1678 {
1679 size_value
1680 = identical_size (cluster_set[sizes], cluster_set[sizes + 1]);
1681 if (size_value < minimum_size)
1682 break;
1683
1684 if (sizes == allocated_sizes)
1685 {
1686 allocated_sizes += 32;
1687 size_array = (int *)
1688 xrealloc (size_array, allocated_sizes * sizeof (int));
1689 sorter_array = (int *)
1690 xrealloc (sorter_array, (allocated_sizes + 1) * sizeof (int));
1691 }
1692 size_array[sizes++] = size_value;
1693 }
1694
1695 /* This test is not necessary for the algorithm to work, but this
1696 fairly common case might be worth a bit of speedup. Maybe! */
1697
1698 if (sizes == 0)
1699 continue;
1700
1701 /* Output all clusters of the set from the tallest to the shortest.
1702 Here, size refers to number of items in members, and not to the
1703 number of members in a cluster. (In fact, it is very expectable
1704 that shortest clusters have more members.) */
1705
1706 cluster_size = 0;
1707 while (1)
1708 {
1709 /* Discover the maximum cluster size not yet processed. */
1710
1711 size_value = cluster_size;
1712 cluster_size = 0;
1713
1714 for (size = size_array; size < size_array + sizes; size++)
1715 if ((size_value == 0 || *size < size_value)
1716 && *size > cluster_size)
1717 cluster_size = *size;
1718
1719 /* If the cluster size did not decrease, we cannot do more. */
1720
1721 if (cluster_size == 0)
1722 break;
1723
1724 /* Consider all consecutive members having at least the cluster
1725 size in common: they form a cluster. But any gap in the
1726 sequence represents a change of cluster: same length, but
1727 different contents. */
1728
1729 size = size_array;
1730 while (1)
1731 {
1732 /* Find all members for one cluster. However, after having
1733 skipped over the gap, just break out if nothing remains. */
1734
1735 while (size < size_array + sizes && *size < cluster_size)
1736 size++;
1737 if (size == size_array + sizes)
1738 break;
1739
1740 member_set = cluster_set + (size - size_array);
1741 sorters = 1;
1742 while (size < size_array + sizes && *size >= cluster_size)
1743 {
1744 sorters++;
1745 size++;
1746 }
1747
1748 /* No cluster may be a proper subset of another. That is, if
1749 all putative members are preceded by identical items, then
1750 they are indeed part of a bigger cluster, which has already
1751 been or will later be caught. In such case, just avoid
1752 retaining them here. This also prevents quadratic
1753 behaviour which, I presume, would be heavy on computation.
1754
1755 This test may be defeated by tolerant matches. Maybe
1756 tolerant matches will just go away. Surely, tolerant
1757 matches later require subset member elimination. */
1758
1759 item = item_array + member_set[0];
1760 BACKWARD_ITEM (item);
1761 if (item_type (item) != SENTINEL)
1762 {
1763 checksum = item->checksum;
1764
1765 for (counter = 1; counter < sorters; counter++)
1766 {
1767 item = item_array + member_set[counter];
1768 BACKWARD_ITEM (item);
1769 if (item_type (item) == SENTINEL
1770 || item->checksum != checksum)
1771 break;
1772 }
1773 if (counter == sorters)
1774 continue;
1775 }
1776
1777 /* Skip any cluster member overlapping with the previous
1778 member of the same cluster. If doing so, chop the size of
1779 the first member just before the overlap point: this should
1780 later trigger a cluster having this reduced size. */
1781
1782 memcpy (sorter_array, member_set, sorters * sizeof (int));
1783 qsort (sorter_array, sorters, sizeof (int),
1784 compare_for_positions);
1785 cursor = sorter_array;
1786 for (counter = 1; counter < sorters; counter++)
1787 {
1788 size_value = item_distance (*cursor, sorter_array[counter],
1789 cluster_size);
1790 if (size_value < 0)
1791 *++cursor = sorter_array[counter];
1792 else
1793 {
1794 int counter2;
1795
1796 for (counter2 = 0; counter2 < sizes; counter2++)
1797 if (cluster_set[counter2] == *cursor
1798 || cluster_set[counter2] == sorter_array[counter])
1799 {
1800 assert (size_value < size_array[counter2]);
1801 size_array[counter2] = size_value;
1802 break;
1803 }
1804 assert (counter2 < sizes);
1805 }
1806 }
1807 sorters = cursor - sorter_array + 1;
1808
1809 /* Create the cluster only if at least two members remain. */
1810
1811 if (sorters >= 2)
1812 {
1813 new_cluster (cluster_size);
1814 for (counter = 0; counter < sorters; counter++)
1815 new_member (sorter_array[counter]);
1816 }
1817 }
1818 }
1819 }
1820
1821 /* Add a sentinel cluster at the end. */
1822
1823 new_cluster (0);
1824 clusters--;
1825
1826 /* Cleanup. */
1827
1828 free (indirect_item_array);
1829 free (size_array);
1830
1831 #if DEBUGGING
1832 if (debugging)
1833 {
1834 fprintf (stderr, _(", done\n"));
1835 dump_all_clusters ();
1836 }
1837 #endif
1838 }
1839
1840 /*------------------------------------------------------------.
1841 | Check if a MEMBER is active in some file other than INPUT. |
1842 `------------------------------------------------------------*/
1843
1844 static int
active_elsewhere(struct member * member,struct input * input)1845 active_elsewhere (struct member *member, struct input *input)
1846 {
1847 struct cluster *cluster = cluster_array + member->cluster_number;
1848
1849 /* FIXME: maybe it is sufficient to test first and last member only? */
1850
1851 for (member = member_array + cluster->first_member;
1852 member < MEMBER_LIMIT (cluster); member++)
1853 if (member->first_item < input->first_item
1854 || member->first_item + real_member_size (member) > input->item_limit)
1855 return 1;
1856
1857 return 0;
1858 }
1859
1860 /* Indirect members. */
1861
1862 /* The following array gives indices into the array of cluster members,
1863 member_array above, in such a way that the succession of indices points
1864 to members in the original textual order of input files. */
1865
1866 static int *indirect_array;
1867 static int indirects;
1868
1869 /*----------------------------------------.
1870 | Compute the array of indirect members. |
1871 `----------------------------------------*/
1872
1873 static void
prepare_indirects(void)1874 prepare_indirects (void)
1875 {
1876 int counter;
1877 struct member *member;
1878 struct input *input;
1879 int duplicated_items;
1880
1881 #if DEBUGGING
1882 if (debugging)
1883 fprintf (stderr, _("Sorting members"));
1884 #endif
1885
1886 indirect_array = xmalloc (members * sizeof (int));
1887 for (counter = 0; counter < members; counter++)
1888 indirect_array[counter] = counter;
1889 if (members > 1)
1890 qsort (indirect_array, members, sizeof (int), compare_for_member_start);
1891 indirects = members;
1892
1893 #if DEBUGGING
1894 if (debugging)
1895 {
1896 fprintf (stderr, _(", done\n"));
1897 fputs ("\f\n", stderr);
1898 for (counter = 0; counter < indirects; counter++)
1899 {
1900 member = member_array + indirect_array[counter];
1901 dump_reference (member->first_item);
1902 dump_member (member_array + indirect_array[counter]);
1903 if (counter < indirects - 1
1904 && (member_array[indirect_array[counter + 1]].first_item
1905 < member->first_item + real_member_size (member)))
1906 {
1907 dump_reference (member->first_item);
1908 fprintf (stderr, "Overlapping with next member\n");
1909 }
1910 }
1911 }
1912 #endif
1913
1914 if (verbose)
1915 {
1916 fprintf (stderr, _("Work summary:"));
1917 fprintf (stderr, ngettext (" %d cluster,", " %d clusters,", clusters),
1918 clusters);
1919 fprintf (stderr, ngettext (" %d member\n", " %d members\n", members),
1920 members);
1921 }
1922 }
1923
1924 /* Mergings. */
1925
1926 /* A merging is an indication for advancing in one given input file, by
1927 catching up all items (differences) until a cluster member, and then
1928 skipping over that member. Mergings are ordered so to maintain parallel
1929 advancement between files. A merging group is a fragment in the sequence
1930 of consecutive mergings, all mergings in a group have members from the
1931 same cluster. A merging group is then variable in length, a flag in the
1932 merging sequence signals the beginning of a new group.
1933
1934 However, some cluster members might have to be considered as differences
1935 preceding the more genuine member of a group, because they represent
1936 cross matches which are not easily represented in a parallel sequence.
1937 Another flag signals such members, which then escape the rule that all
1938 members of a group share the same cluster. */
1939
1940 struct merging
1941 {
1942 unsigned group_flag:1; /* beginning of a new merge group */
1943 unsigned cross_flag:1; /* member isolated by cross matches */
1944 unsigned input_number:14; /* input file designator */
1945 unsigned member_number:16; /* member designator */
1946 };
1947
1948 static struct merging *merging_array = NULL;
1949 static int mergings = 0;
1950
1951 #if DEBUGGING
1952
1953 /*--------------------.
1954 | Dump all mergings. |
1955 `--------------------*/
1956
1957 static void
dump_all_mergings(void)1958 dump_all_mergings (void)
1959 {
1960 struct merging *merging;
1961 struct reference reference;
1962
1963 fputs ("\f\n", stderr);
1964 for (merging = merging_array; merging < merging_array + mergings; merging++)
1965 {
1966 if (merging->group_flag)
1967 {
1968 if (merging > merging_array)
1969 putc ('\n', stderr);
1970 fprintf (stderr, "<%ld>", (long) (merging - merging_array));
1971 }
1972 reference
1973 = get_reference (member_array[merging->member_number].first_item);
1974 fprintf (stderr, "\t[%d]%c%s%d", merging->member_number,
1975 merging->cross_flag ? 'X' : ' ',
1976 reference.input->nick_name, reference.number);
1977 assert (reference.input == input_array + merging->input_number);
1978 }
1979 putc ('\n', stderr);
1980 }
1981
1982 #endif /* DEBUGGING */
1983
1984 /*------------------------------------------------------------------.
1985 | After having output PREFIX, explain the merging group starting at |
1986 | MERGING and extending until LIMIT. |
1987 `------------------------------------------------------------------*/
1988
1989 static void
explain_group(char prefix,struct merging * merging,struct merging * limit)1990 explain_group (char prefix, struct merging *merging, struct merging *limit)
1991 {
1992 struct cluster *cluster
1993 = cluster_array + member_array[merging->member_number].cluster_number;
1994 int counter;
1995 struct input *input;
1996 struct member *member;
1997 struct merging *cursor;
1998 struct reference reference;
1999
2000 putc (prefix, output_file);
2001 for (counter = cluster->first_member;
2002 counter < (cluster + 1)->first_member; counter++)
2003 {
2004 member = member_array + counter;
2005 reference = get_reference (member->first_item);
2006
2007 for (cursor = merging;
2008 cursor < limit && cursor->member_number != counter; cursor++)
2009 ;
2010
2011 fprintf (output_file, cursor < limit ? " %s%d,%d" : " (%s%d,%d)",
2012 reference.input->nick_name,
2013 reference.number, real_member_size (member));
2014 }
2015 putc ('\n', output_file);
2016 }
2017
2018 /*--------------------.
2019 | Add a new merging. |
2020 `--------------------*/
2021
2022 static void
new_merging(int group,int cross,struct input * input,struct member * member)2023 new_merging (int group, int cross, struct input *input, struct member *member)
2024 {
2025 struct merging *merging = merging_array + mergings++;
2026
2027 merging->group_flag = group;
2028 merging->cross_flag = cross;
2029 merging->input_number = input - input_array;
2030 merging->member_number = member - member_array;
2031 }
2032
2033 /*------------------------------.
2034 | Decide the merging sequence. |
2035 `------------------------------*/
2036
2037 void
prepare_mergings(void)2038 prepare_mergings (void)
2039 {
2040 int *cursor;
2041
2042 struct input *input0;
2043 struct input *input;
2044 struct member *member;
2045
2046 struct cluster *cluster; /* current cluster candidate */
2047 int cost; /* cost associate with current candidate */
2048 struct cluster *best_cluster; /* best cluster candidate for hunk */
2049 int best_cost; /* cost associated with best cluster */
2050 int group_flag; /* set if next merging starts group */
2051
2052 /* Remove member overlaps. */
2053
2054 if (members > 0)
2055 {
2056 int counter;
2057
2058 /* When members overlap, retain only the shortest ones (for now). */
2059
2060 indirects = 0;
2061 for (counter = 0; counter < members - 1; counter++)
2062 {
2063 member = member_array + indirect_array[counter];
2064 if (member->first_item + real_member_size (member)
2065 <= member_array[indirect_array[counter + 1]].first_item)
2066 indirect_array[indirects++] = indirect_array[counter];
2067 else
2068 /* Invalidate this member. */
2069 member->cluster_number = -1;
2070 }
2071 indirect_array[indirects++] = indirect_array[counter];
2072 }
2073
2074 /* Once indirects sorted and ready, this array is logically split into
2075 segments, one per input file, which segments might be later scanned in
2076 parallel a few times. All required state variables for resuming the
2077 scan are held into the last few fields of the input structure. */
2078
2079 cursor = indirect_array;
2080 for (input = input_array; input < input_array + inputs; input++)
2081 {
2082 input->item = input->first_item;
2083 input->indirect_cursor = cursor;
2084 while (cursor < indirect_array + indirects
2085 && member_array[*cursor].first_item < input->item_limit)
2086 cursor++;
2087 input->indirect_limit = cursor;
2088 }
2089
2090 /* Allocate an array for discovered mergings. */
2091
2092 merging_array = xmalloc (indirects * sizeof (struct merging));
2093 mergings = 0;
2094
2095 /* Repetitively consider the incoming cluster members for all inputs, and
2096 select at each iteration the cluster members causing least differences.
2097 This may indirectly trigger bigger differences into later iterations.
2098 Some thought should be given to global optimisation. FIXME. */
2099
2100 while (1)
2101 {
2102 #if DEBUGGING
2103 if (debugging)
2104 {
2105 fprintf (stderr, "\nCHOOSING NEXT CLUSTER\n");
2106
2107 for (input = input_array; input < input_array + inputs; input++)
2108 if (input->indirect_cursor < input->indirect_limit)
2109 {
2110 fprintf (stderr, "%s:\t", reference_string (input->item));
2111 dump_member (INPUT_MEMBER (input));
2112 }
2113 else
2114 fprintf (stderr, "%s:\tNone\n", reference_string (input->item));
2115
2116 putc ('\n', stderr);
2117 }
2118 #endif
2119
2120 best_cluster = NULL;
2121 for (input0 = input_array; input0 < input_array + inputs; input0++)
2122 if (input0->indirect_cursor < input0->indirect_limit)
2123 {
2124 cluster = INPUT_CLUSTER (input0);
2125 #if DEBUGGING
2126 if (debugging)
2127 {
2128 fprintf (stderr, "%s:\t", reference_string (input0->item));
2129 dump_cluster (cluster);
2130 }
2131 #endif
2132
2133 /* Skip this input if we have already evaluated the cluster. */
2134
2135 for (input = input_array; input < input0; input++)
2136 if (input->indirect_cursor < input->indirect_limit
2137 && INPUT_CLUSTER (input) == cluster)
2138 break;
2139 if (input < input0)
2140 continue;
2141
2142 /* Evaluate the cost of the cluster. This will be the total
2143 number of difference items needed in the listing for getting
2144 to the point of printing the cluster. For computing it,
2145 check all members of the evaluated cluster, retaining only
2146 one member per file, in fact, exactly the first which has not
2147 been listed yet. */
2148
2149 cost = 0;
2150 input = input_array;
2151 member = member_array + cluster->first_member;
2152 while (member < MEMBER_LIMIT (cluster))
2153 {
2154 /* Just ignore invalidated members. */
2155
2156 if (member->cluster_number < 0)
2157 {
2158 member++;
2159 continue;
2160 }
2161
2162 /* Find the proper file. */
2163
2164 while (member->first_item >= input->item_limit)
2165 input++;
2166
2167 /* Ignore a member that would have been already listed. */
2168
2169 if (member->first_item < input->item)
2170 {
2171 member++;
2172 continue;
2173 }
2174
2175 /* Accumulate cost for this member. */
2176
2177 #if DEBUGGING
2178 if (debugging)
2179 fprintf (stderr, " Cost += %d [%s..%s)\n",
2180 member->first_item - input->item,
2181 reference_string (input->item),
2182 reference_string (member->first_item));
2183 #endif
2184 cost += (member++)->first_item - input->item;
2185
2186 /* Skip other members that would appear later in same file. */
2187
2188 while (member < MEMBER_LIMIT (cluster)
2189 && member->first_item < input->item_limit)
2190 member++;
2191 }
2192
2193 /* Retain the best cluster so far. */
2194
2195 #if DEBUGGING
2196 if (debugging)
2197 {
2198 if (best_cluster)
2199 fprintf (stderr, " {%ld}=%d <-> {%ld}=%d\n",
2200 (long) (best_cluster - cluster_array), best_cost,
2201 (long) (cluster - cluster_array), cost);
2202 else
2203 fprintf (stderr, " {%ld}=%d\n",
2204 (long) (cluster - cluster_array), cost);
2205 }
2206 #endif
2207 if (!best_cluster || cost < best_cost)
2208 {
2209 best_cluster = cluster;
2210 best_cost = cost;
2211 }
2212 }
2213
2214 /* Get out if everything has been done. */
2215
2216 if (!best_cluster)
2217 break;
2218
2219 #if DEBUGGING
2220 if (debugging)
2221 {
2222 fprintf (stderr, "\nCHOICE\t");
2223 dump_cluster (best_cluster);
2224 putc ('\n', stderr);
2225 }
2226 #endif
2227
2228 /* Save found mergings, while moving all items pointers to after the
2229 members of the best cluster. */
2230
2231 input = input_array;
2232 group_flag = 1;
2233 member = member_array + best_cluster->first_member;
2234 while (member < MEMBER_LIMIT (best_cluster))
2235 {
2236 /* Just ignore invalidated members. */
2237
2238 if (member->cluster_number < 0)
2239 {
2240 member++;
2241 continue;
2242 }
2243
2244 /* Find the proper file. */
2245
2246 #if DEBUGGING
2247 if (debugging)
2248 {
2249 putc ('v', stderr);
2250 dump_member (member);
2251 }
2252 #endif
2253 while (member->first_item >= input->item_limit)
2254 input++;
2255
2256 /* Ignore a member that would have been already listed. */
2257
2258 if (member->first_item < input->item)
2259 {
2260 member++;
2261 continue;
2262 }
2263
2264 /* Skip all crossed members while adding them as crossed mergings. */
2265
2266 while (INPUT_MEMBER (input) != member)
2267 {
2268 new_merging (group_flag, 1, input, INPUT_MEMBER (input));
2269 group_flag = 0;
2270 input->indirect_cursor++;
2271 }
2272 assert (input->indirect_cursor < input->indirect_limit);
2273 assert (INPUT_MEMBER (input)->first_item < input->item_limit);
2274
2275 /* Skip the selected member while adding it as a direct merging. */
2276
2277 new_merging (group_flag, 0, input, member);
2278 group_flag = 0;
2279 input->item = member->first_item + real_member_size (member);
2280 input->indirect_cursor++;
2281
2282 /* Skip other members that would appear later in same file. */
2283
2284 while (member < MEMBER_LIMIT (best_cluster)
2285 && member->first_item < input->item_limit)
2286 member++;
2287 }
2288 }
2289
2290 #if DEBUGGING
2291 if (debugging)
2292 dump_all_mergings ();
2293 #endif
2294
2295 assert (mergings == indirects);
2296
2297 if (verbose)
2298 {
2299 fprintf (stderr, _("Work summary:"));
2300 fprintf (stderr, ngettext (" %d cluster,", " %d clusters,", clusters),
2301 clusters);
2302 fprintf (stderr, ngettext (" %d member,", " %d members,", members),
2303 members);
2304 fprintf (stderr, ngettext (" %d overlap\n", " %d overlaps\n",
2305 members - indirects), members - indirects);
2306 }
2307 }
2308
2309 /* Terminal and pager support. */
2310
2311 /* How to start underlining. */
2312 const char *termcap_start_underline = NULL;
2313
2314 /* How to stop underlining. */
2315 const char *termcap_stop_underline = NULL;
2316
2317 /* How to start bolding. */
2318 const char *termcap_start_bold = NULL;
2319
2320 /* How to stop bolding. */
2321 const char *termcap_stop_bold = NULL;
2322
2323 enum emphasis
2324 {
2325 STRAIGHT, /* no emphasis */
2326 UNDERLINED, /* underlined text */
2327 BOLD /* bold text */
2328 };
2329
2330 /* Current output emphasis. */
2331 enum emphasis current_emphasis = STRAIGHT;
2332
2333 /*-----------------------------.
2334 | Select strings for marking. |
2335 `-----------------------------*/
2336
2337 static void
initialize_strings(void)2338 initialize_strings (void)
2339 {
2340 struct input *input;
2341
2342 #if HAVE_TPUTS
2343 if (find_termcap)
2344 {
2345 const char *name; /* terminal capability name */
2346 char term_buffer[2048]; /* terminal description */
2347 static char *buffer; /* buffer for capabilities */
2348 char *filler; /* cursor into allocated strings */
2349 int success; /* tgetent results */
2350
2351 name = getenv ("TERM");
2352 if (name == NULL)
2353 error (EXIT_ERROR, 0,
2354 _("select a terminal through the TERM environment variable"));
2355 success = tgetent (term_buffer, name);
2356 if (success < 0)
2357 error (EXIT_ERROR, 0, _("could not access the termcap data base"));
2358 if (success == 0)
2359 error (EXIT_ERROR, 0, _("terminal type `%s' is not defined"), name);
2360 buffer = (char *) malloc (strlen (term_buffer));
2361 filler = buffer;
2362
2363 termcap_start_underline = tgetstr ("us", &filler);
2364 termcap_stop_underline = tgetstr ("ue", &filler);
2365 termcap_start_bold = tgetstr ("so", &filler);
2366 termcap_stop_bold = tgetstr ("se", &filler);
2367 }
2368 #endif /* HAVE_TPUTS */
2369
2370 /* Ensure some default strings. For all input files except last, prefer
2371 underline. For last input file, prefer bold. */
2372
2373 for (input = input_array; input < input_array + inputs - 1; input++)
2374 {
2375 input->term_start = termcap_start_underline;
2376 input->term_stop = termcap_stop_underline;
2377
2378 if (!overstrike)
2379 {
2380 if (!termcap_start_underline && !input->user_start)
2381 input->user_start = "[-";
2382 if (!termcap_stop_underline && !input->user_stop)
2383 input->user_stop = "-]";
2384 }
2385 }
2386
2387 input->term_start = termcap_start_bold;
2388 input->term_stop = termcap_stop_bold;
2389
2390 if (!overstrike)
2391 {
2392 if (!termcap_start_bold && !input->user_start)
2393 input->user_start = "{+";
2394 if (!termcap_stop_bold && !input->user_stop)
2395 input->user_stop = "+}";
2396 }
2397 }
2398
2399 #if HAVE_TPUTS
2400
2401 /*-----------------------------------------.
2402 | Write one character for tputs function. |
2403 `-----------------------------------------*/
2404
2405 static int
putc_for_tputs(int character)2406 putc_for_tputs (int character)
2407 {
2408 return putc (character, output_file);
2409 }
2410
2411 #endif /* HAVE_TPUTS */
2412
2413 /*-----------------------.
2414 | Set current EMPHASIS. |
2415 `-----------------------*/
2416
2417 static void
set_emphasis(enum emphasis emphasis)2418 set_emphasis (enum emphasis emphasis)
2419 {
2420 #if HAVE_TPUTS
2421 switch (current_emphasis)
2422 {
2423 case STRAIGHT:
2424 switch (emphasis)
2425 {
2426 case STRAIGHT:
2427 /* Done. */
2428 break;
2429
2430 case UNDERLINED:
2431 if (termcap_start_underline)
2432 tputs (termcap_start_underline, 0, putc_for_tputs);
2433 break;
2434
2435 case BOLD:
2436 if (termcap_start_bold)
2437 tputs (termcap_start_bold, 0, putc_for_tputs);
2438 break;
2439 }
2440 break;
2441
2442 case UNDERLINED:
2443 switch (emphasis)
2444 {
2445 case STRAIGHT:
2446 if (termcap_stop_underline)
2447 tputs (termcap_stop_underline, 0, putc_for_tputs);
2448 break;
2449
2450 case UNDERLINED:
2451 /* Done. */
2452 break;
2453
2454 case BOLD:
2455 if (termcap_stop_underline)
2456 tputs (termcap_stop_underline, 0, putc_for_tputs);
2457 if (termcap_start_bold)
2458 tputs (termcap_start_bold, 0, putc_for_tputs);
2459 break;
2460 }
2461 break;
2462
2463 case BOLD:
2464 switch (emphasis)
2465 {
2466 case STRAIGHT:
2467 if (termcap_stop_bold)
2468 tputs (termcap_stop_bold, 0, putc_for_tputs);
2469 break;
2470
2471 case UNDERLINED:
2472 if (termcap_stop_bold)
2473 tputs (termcap_stop_bold, 0, putc_for_tputs);
2474 if (termcap_start_underline)
2475 tputs (termcap_start_underline, 0, putc_for_tputs);
2476 break;
2477
2478 case BOLD:
2479 /* Done. */
2480 break;
2481 }
2482 break;
2483 }
2484 #endif /* HAVE_TPUTS */
2485
2486 current_emphasis = emphasis;
2487 }
2488
2489 /*---------------------------------------------------------.
2490 | Push a new EMPHASIS, or pop it out back to what it was. |
2491 `---------------------------------------------------------*/
2492
2493 #define EMPHASIS_STACK_LENGTH 1
2494
2495 static enum emphasis emphasis_array[EMPHASIS_STACK_LENGTH];
2496 static int emphasises = 0;
2497
2498 static void
push_emphasis(enum emphasis emphasis)2499 push_emphasis (enum emphasis emphasis)
2500 {
2501 assert (emphasises < EMPHASIS_STACK_LENGTH);
2502
2503 emphasis_array[emphasises++] = current_emphasis;
2504 set_emphasis (emphasis);
2505 }
2506
2507 static void
pop_emphasis(void)2508 pop_emphasis (void)
2509 {
2510 assert (emphasises > 0);
2511
2512 set_emphasis (emphasis_array[--emphasises]);
2513 }
2514
2515 /*--------------------------------------------------------------------------.
2516 | Output a STRING having LENGTH characters, subject to the current |
2517 | emphasis. If EXPAND is nonzero, expand TABs. For expansion to work |
2518 | properly, this routine should always be called while copying input data. |
2519 `--------------------------------------------------------------------------*/
2520
2521 static void
output_characters(const char * string,int length,int expand)2522 output_characters (const char *string, int length, int expand)
2523 {
2524 static unsigned column = 0;
2525
2526 const char *cursor;
2527 int counter;
2528
2529 if (overstrike || expand)
2530 for (cursor = string; cursor < string + length; cursor++)
2531 switch (*cursor)
2532 {
2533 /* Underlining or overstriking whitespace surely looks strange,
2534 but "less" does understand such things as emphasis requests. */
2535
2536 case '\n':
2537 case '\r':
2538 case '\v':
2539 case '\f':
2540 putc (*cursor, output_file);
2541 column = 0;
2542 break;
2543
2544 case '\b':
2545 putc ('\b', output_file);
2546 if (column > 0)
2547 column--;
2548 break;
2549
2550 case '\t':
2551 if (expand)
2552 do
2553 {
2554 if (overstrike)
2555 switch (current_emphasis)
2556 {
2557 case STRAIGHT:
2558 putc (' ', output_file);
2559 break;
2560
2561 case UNDERLINED:
2562 putc ('_', output_file);
2563 if (overstrike_for_less)
2564 {
2565 putc ('\b', output_file);
2566 putc (' ', output_file);
2567 }
2568 break;
2569
2570 case BOLD:
2571 putc (*cursor, output_file);
2572 if (overstrike_for_less)
2573 {
2574 putc ('\b', output_file);
2575 putc (' ', output_file);
2576 }
2577 break;
2578 }
2579 else
2580 putc (' ', output_file);
2581 column++;
2582 }
2583 while (column % 8 != 0);
2584 else
2585 putc ('\t', output_file);
2586 break;
2587
2588 case ' ':
2589 if (overstrike)
2590 switch (current_emphasis)
2591 {
2592 case STRAIGHT:
2593 putc (' ', output_file);
2594 break;
2595
2596 case UNDERLINED:
2597 putc ('_', output_file);
2598 if (overstrike_for_less)
2599 {
2600 putc ('\b', output_file);
2601 putc (' ', output_file);
2602 }
2603 break;
2604
2605 case BOLD:
2606 putc (' ', output_file);
2607 if (overstrike_for_less)
2608 {
2609 putc ('\b', output_file);
2610 putc (' ', output_file);
2611 }
2612 break;
2613 }
2614 else
2615 putc (' ', output_file);
2616 column++;
2617 break;
2618
2619 case '_':
2620 if (overstrike_for_less && current_emphasis == UNDERLINED)
2621 {
2622 /* Overstriking an underline would make it bold in "less". */
2623 putc ('_', output_file);
2624 column++;
2625 break;
2626 }
2627 /* Fall through. */
2628
2629 default:
2630 if (overstrike)
2631 switch (current_emphasis)
2632 {
2633 case STRAIGHT:
2634 putc (*cursor, output_file);
2635 break;
2636
2637 case UNDERLINED:
2638 putc ('_', output_file);
2639 putc ('\b', output_file);
2640 putc (*cursor, output_file);
2641 break;
2642
2643 case BOLD:
2644 putc (*cursor, output_file);
2645 putc ('\b', output_file);
2646 putc (*cursor, output_file);
2647 break;
2648 }
2649 else
2650 putc (*cursor, output_file);
2651 column++;
2652 break;
2653 }
2654 else
2655 fwrite (string, length, 1, output_file);
2656 }
2657
2658 /*------------------------------------------------------------------------.
2659 | While copying INPUT, select EMPHASIS, with preferred type for printer. |
2660 `------------------------------------------------------------------------*/
2661
2662 static void
start_of_emphasis(struct input * input,enum emphasis emphasis)2663 start_of_emphasis (struct input *input, enum emphasis emphasis)
2664 {
2665 assert (current_emphasis == STRAIGHT);
2666
2667 /* Avoid any emphasis if it would be useless. */
2668
2669 #if FIXME
2670 if (!common_listing_allowed
2671 && (!right->listing_allowed || !left->listing_allowed))
2672 return;
2673 #endif
2674
2675 #if HAVE_TPUTS
2676 if (input->term_start)
2677 tputs (input->term_start, 0, putc_for_tputs);
2678 #endif
2679
2680 if (input->user_start)
2681 fprintf (output_file, "%s", input->user_start);
2682
2683 current_emphasis = emphasis;
2684 }
2685
2686 /*-----------------------------------------------.
2687 | Indicate end of emphasis while copying INPUT. |
2688 `-----------------------------------------------*/
2689
2690 static void
end_of_emphasis(struct input * input)2691 end_of_emphasis (struct input *input)
2692 {
2693 assert (current_emphasis != STRAIGHT);
2694
2695 /* Avoid any emphasis if it would be useless. */
2696
2697 #if FIXME
2698 if (!common_listing_allowed
2699 && (!right->listing_allowed || !left->listing_allowed))
2700 return;
2701 #endif
2702
2703 if (input->user_stop)
2704 fprintf (output_file, "%s", input->user_stop);
2705
2706 #if HAVE_TPUTS
2707 if (input->term_stop)
2708 tputs (input->term_stop, 0, putc_for_tputs);
2709 #endif
2710
2711 current_emphasis = STRAIGHT;
2712 }
2713
2714 /*-------------------------------------------------------------------.
2715 | Launch the output pager if any. If INPUT is not NULL, do it for a |
2716 | specific input file. |
2717 `-------------------------------------------------------------------*/
2718
2719 static void
launch_output_program(struct input * input)2720 launch_output_program (struct input *input)
2721 {
2722 if (paginate)
2723 {
2724 if (input)
2725 output_file = writepipe ("pr", "-f", "-h", input->file_name, NULL);
2726 else
2727 output_file = writepipe ("pr", "-f", NULL);
2728 }
2729 else
2730 {
2731 char *program; /* name of the pager */
2732 char *basename; /* basename of the pager */
2733
2734 /* Check if a output program should be called, and which one. Avoid
2735 all paging if only statistics are needed. */
2736
2737 if (autopager && isatty (fileno (stdout))
2738 #if FIXME
2739 && (left->listing_allowed
2740 || right->listing_allowed || common_listing_allowed)
2741 #endif
2742 )
2743 {
2744 program = getenv ("WDIFF_PAGER");
2745 if (program == NULL)
2746 program = getenv ("PAGER");
2747 #ifdef PAGER_PROGRAM
2748 if (program == NULL)
2749 program = PAGER_PROGRAM;
2750 #endif
2751 }
2752 else
2753 program = NULL;
2754
2755 /* Use stdout as default output. */
2756
2757 #if DEBUGGING
2758 output_file = debugging ? stderr : stdout;
2759 #else
2760 output_file = stdout;
2761 #endif
2762
2763 /* If we should use a pager, launch it. */
2764
2765 if (program && *program)
2766 {
2767 char *lessenv;
2768
2769 lessenv = getenv ("LESS");
2770 if (lessenv == NULL)
2771 {
2772 setenv ("LESS", LESS_DEFAULT_OPTS, 0);
2773 }
2774 else
2775 {
2776 if (asprintf (&lessenv, "%s %s", LESS_DEFAULT_OPTS, lessenv) ==
2777 -1)
2778 {
2779 xalloc_die ();
2780 return;
2781 }
2782 else
2783 {
2784 setenv ("LESS", lessenv, 1);
2785 }
2786 }
2787
2788 output_file = writepipe (program, NULL);
2789 if (!output_file)
2790 error (EXIT_ERROR, errno, "%s", program);
2791 }
2792 }
2793 }
2794
2795 /*-----------------------------.
2796 | Complete the pager program. |
2797 `-----------------------------*/
2798
2799 static void
complete_output_program(void)2800 complete_output_program (void)
2801 {
2802 /* Let the user play at will inside the pager, until s/he exits, before
2803 proceeding any further. */
2804
2805 if (output_file && output_file != stdout)
2806 {
2807 fclose (output_file);
2808 wait (NULL);
2809 }
2810 }
2811
2812 /* Other listing services. */
2813
2814 enum margin_mode
2815 {
2816 EMPTY_MARGIN, /* nothing in left margin */
2817 LOCATION_IN_MARGIN, /* number of next item */
2818 FILLER_IN_MARGIN, /* bars for the length of nick name */
2819 FILE_IN_MARGIN /* nick name for file */
2820 };
2821
2822 /*-------------------------------------------.
2823 | For file INPUT, produce a kind of MARGIN. |
2824 `-------------------------------------------*/
2825
2826 static void
make_margin(struct input * input,enum margin_mode margin)2827 make_margin (struct input *input, enum margin_mode margin)
2828 {
2829 char buffer[15];
2830 char *cursor;
2831
2832 switch (margin)
2833 {
2834 case EMPTY_MARGIN:
2835 return;
2836
2837 case LOCATION_IN_MARGIN:
2838 sprintf (buffer, "%s%d", input->nick_name,
2839 input->item - input->first_item + 1);
2840 break;
2841
2842 case FILLER_IN_MARGIN:
2843 cursor = buffer + strlen (input->nick_name);
2844 *cursor-- = '\0';
2845 while (cursor > buffer)
2846 *cursor-- = '|';
2847 break;
2848
2849 case FILE_IN_MARGIN:
2850 strcpy (buffer, input->nick_name);
2851 break;
2852 }
2853
2854 if (word_mode)
2855 {
2856 push_emphasis (UNDERLINED);
2857 output_characters (buffer, strlen (buffer), 0);
2858 pop_emphasis ();
2859 putc (initial_tab ? '\t' : ' ', output_file);
2860 }
2861 else
2862 {
2863 push_emphasis (STRAIGHT);
2864 output_characters (buffer, strlen (buffer), 0);
2865 putc (initial_tab ? '\t' : ' ', output_file);
2866 pop_emphasis ();
2867 }
2868 }
2869
2870 /*-------------------------.
2871 | Skip a line from INPUT. |
2872 `-------------------------*/
2873
2874 static void
skip_line_item(struct input * input)2875 skip_line_item (struct input *input)
2876 {
2877 input_line (input);
2878 assert (input->cursor);
2879
2880 input->item++;
2881 }
2882
2883 /*-------------------------.
2884 | Copy a line from INPUT. |
2885 `-------------------------*/
2886
2887 static void
copy_line_item(struct input * input,enum margin_mode margin)2888 copy_line_item (struct input *input, enum margin_mode margin)
2889 {
2890 input_line (input);
2891 assert (input->cursor);
2892
2893 make_margin (input, margin);
2894 output_characters (input->line, input->limit - input->line, expand_tabs);
2895
2896 input->item++;
2897 }
2898
2899 /*---------------------------------.
2900 | Skip over white space on INPUT. |
2901 `---------------------------------*/
2902
2903 static void
skip_whitespace(struct input * input)2904 skip_whitespace (struct input *input)
2905 {
2906 assert (input->cursor);
2907 while (input->cursor && isspace (*input->cursor))
2908 input_character (input);
2909
2910 /* FIXME: Maybe worth following columns for TAB expansion, maybe not? */
2911 }
2912
2913 /*-----------------------------------------------------------------.
2914 | Copy white space from INPUT to FILE, ensuring a kind of MARGIN. |
2915 `-----------------------------------------------------------------*/
2916
2917 static void
copy_whitespace(struct input * input,enum margin_mode margin)2918 copy_whitespace (struct input *input, enum margin_mode margin)
2919 {
2920 char *string = input->cursor;
2921
2922 assert (input->cursor);
2923 while (input->cursor && isspace (*input->cursor))
2924 if (++input->cursor == input->limit)
2925 {
2926 if (input->cursor[-1] == '\n')
2927 {
2928 output_characters (string, input->cursor - string - 1,
2929 expand_tabs);
2930
2931 /* While changing lines, ensure we stop any special display
2932 prior to, and restore the special display after. */
2933
2934 if (avoid_wraps && input->user_stop)
2935 fprintf (output_file, "%s", input->user_stop);
2936 #if HAVE_TPUTS
2937 if (input->term_stop)
2938 tputs (input->term_stop, 0, putc_for_tputs);
2939 #endif
2940
2941 output_characters (input->cursor - 1, 1, expand_tabs);
2942 make_margin (input, margin);
2943
2944 #if HAVE_TPUTS
2945 if (input->term_start)
2946 tputs (input->term_start, 0, putc_for_tputs);
2947 #endif
2948 if (avoid_wraps && input->user_start)
2949 fprintf (output_file, "%s", input->user_start);
2950 }
2951 else
2952 output_characters (string, input->cursor - string, expand_tabs);
2953
2954 input_character_helper (input);
2955 string = input->cursor;
2956 }
2957
2958 if (string && input->cursor > string)
2959 output_characters (string, input->cursor - string, expand_tabs);
2960 }
2961
2962 /*-------------------------------------.
2963 | Skip over non-white space on INPUT. |
2964 `-------------------------------------*/
2965
2966 static void
skip_word_item(struct input * input)2967 skip_word_item (struct input *input)
2968 {
2969 assert (input->cursor && !isspace (*input->cursor));
2970 while (input->cursor && !isspace (*input->cursor))
2971 input_character (input);
2972
2973 /* FIXME: Maybe worth following columns for TAB expansion, maybe not? */
2974
2975 input->item++;
2976 }
2977
2978 /*------------------------------------------.
2979 | Copy non white space from INPUT to FILE. |
2980 `------------------------------------------*/
2981
2982 static void
copy_word_item(struct input * input)2983 copy_word_item (struct input *input)
2984 {
2985 char *string = input->cursor;
2986
2987 assert (input->cursor && !isspace (*input->cursor));
2988 while (input->cursor && !isspace (*input->cursor))
2989 input->cursor++;
2990
2991 output_characters (string, input->cursor - string, expand_tabs);
2992
2993 if (input->cursor == input->limit)
2994 input_character_helper (input);
2995
2996 input->item++;
2997 }
2998
2999 /*------------------------------------------.
3000 | Skip a difference from INPUT up to ITEM. |
3001 `------------------------------------------*/
3002
3003 static void
skip_until(struct input * input,int item)3004 skip_until (struct input *input, int item)
3005 {
3006 assert (input->item <= item);
3007 if (word_mode)
3008 while (input->item < item)
3009 {
3010 skip_whitespace (input);
3011 skip_word_item (input);
3012 }
3013 else
3014 while (input->item < item)
3015 skip_line_item (input);
3016 }
3017
3018 /*------------------------------------------------------------------.
3019 | Copy a difference from INPUT up to ITEM, using a kind of MARGIN. |
3020 `------------------------------------------------------------------*/
3021
3022 static void
copy_until(struct input * input,int item,enum margin_mode margin)3023 copy_until (struct input *input, int item, enum margin_mode margin)
3024 {
3025 assert (input->item <= item);
3026 if (word_mode)
3027 while (input->item < item)
3028 {
3029 copy_whitespace (input, margin);
3030 copy_word_item (input);
3031 }
3032 else
3033 while (input->item < item)
3034 copy_line_item (input, margin);
3035 }
3036
3037 /*------------------------------------.
3038 | From INPUT, skip a cluster MEMBER. |
3039 `------------------------------------*/
3040
3041 static void
skip_member_proper(struct input * input,struct member * member)3042 skip_member_proper (struct input *input, struct member *member)
3043 {
3044 assert (input->item == member->first_item);
3045 skip_until (input, member->first_item + real_member_size (member));
3046 }
3047
3048 /*------------------------------------.
3049 | From INPUT, copy a cluster MEMBER. |
3050 `------------------------------------*/
3051
3052 static void
copy_member_proper(struct input * input,struct member * member)3053 copy_member_proper (struct input *input, struct member *member)
3054 {
3055 assert (input->item == member->first_item);
3056 copy_until (input, member->first_item + real_member_size (member),
3057 FILLER_IN_MARGIN);
3058 }
3059
3060 /* Listing control. */
3061
3062 /*------------------------------------------.
3063 | Relist all input files with annotations. |
3064 `------------------------------------------*/
3065
3066 static void
relist_annotated_files(void)3067 relist_annotated_files (void)
3068 {
3069 /* The array of actives holds all members being concurrently listed. If
3070 there is no overlapping members, the array will never hold more than
3071 one member. The index in this array is also the column position of the
3072 vertical line bracing the member, in the annotated listing. For
3073 preserving verticality of lines after the termination of some members,
3074 NULL members in the array stand for embedded white columns. */
3075
3076 struct active
3077 {
3078 struct member *member; /* active member, or NULL */
3079 int remaining; /* count of remaining lines to list */
3080 };
3081 struct active *active_array = NULL;
3082 int actives = 0;
3083 int allocated_actives = 0;
3084
3085 int other_count = 0; /* how many actives in other files */
3086
3087 enum margin_mode margin_mode =
3088 show_links ? LOCATION_IN_MARGIN : EMPTY_MARGIN;
3089
3090 struct input *input = NULL;
3091 int *cursor;
3092 int counter;
3093 FILE *file;
3094 ITEM *item;
3095 struct active *active;
3096 struct member *member;
3097 struct cluster *cluster;
3098 int ordinal;
3099 struct reference reference;
3100 char buffer[20];
3101
3102 #if DEBUGGING
3103 if (debugging)
3104 fputs ("\f\n", stderr);
3105 #endif
3106
3107 /* Prepare terminal. */
3108
3109 if (!paginate)
3110 {
3111 launch_output_program (input);
3112 initialize_strings ();
3113 }
3114
3115 /* Prepare to scan all members. */
3116
3117 if (indirects > 0)
3118 {
3119 cursor = indirect_array;
3120 member = member_array + *cursor;
3121 }
3122 else
3123 cursor = NULL;
3124
3125 /* Process all files. */
3126
3127 for (input = input_array; input < input_array + inputs; input++)
3128 {
3129 /* Prepare terminal. */
3130
3131 if (paginate)
3132 {
3133 launch_output_program (input);
3134 if (input == input_array)
3135 initialize_strings ();
3136 }
3137 else
3138 {
3139 if (input > input_array)
3140 fputs ("\f\n", output_file);
3141 fprintf (output_file, "@@@ %s\n", input->file_name);
3142 }
3143
3144 /* Prepare for file. */
3145
3146 open_input (input);
3147 input->item = input->first_item;
3148
3149 /* Process all items. */
3150
3151 if (word_mode && input->item < input->item_limit)
3152 {
3153 make_margin (input, margin_mode);
3154 input_line (input);
3155 }
3156
3157 while (input->item < input->item_limit)
3158 {
3159 if (word_mode)
3160 copy_whitespace (input, margin_mode);
3161
3162 item = item_array + input->item;
3163
3164 /* See if we are starting any member. */
3165
3166 while (cursor && member->first_item == input->item)
3167 {
3168 cluster = cluster_array + member->cluster_number;
3169 ordinal = member - member_array - cluster->first_member - 1;
3170
3171 /* Obtain some active slot for the new member. */
3172
3173 if (actives)
3174 {
3175 /* Set ACTIVE to the rightmost not-in-use active, or after
3176 all actives if they are all in use. */
3177
3178 active = active_array + actives;
3179 for (counter = 0; counter < actives; counter++)
3180 if (!active_array[counter].member)
3181 active = active_array + counter;
3182
3183 /* Output vertical bars as needed for all prior actives. */
3184
3185 if (!word_mode)
3186 for (counter = 0;
3187 active_array + counter < active; counter++)
3188 if (active_array[counter].member)
3189 putc ('|', output_file);
3190 else
3191 putc (' ', output_file);
3192
3193 /* Ensure active will be allocated if necessary. */
3194
3195 if (active == active_array + actives)
3196 active = NULL;
3197 }
3198 else
3199 active = NULL;
3200
3201 /* Allocate and initialise the active. */
3202
3203 if (!active)
3204 {
3205 if (actives == allocated_actives)
3206 {
3207 allocated_actives += 8;
3208 active_array = (struct active *)
3209 xrealloc (active_array,
3210 allocated_actives *
3211 (sizeof (struct active)));
3212 }
3213 active = active_array + actives++;
3214 }
3215 active->member = member;
3216 active->remaining
3217 = cluster_array[member->cluster_number].item_count;
3218
3219 /* Print a pointer to the previous member of same cluster. */
3220
3221 if (ordinal >= 0)
3222 {
3223 reference = get_reference
3224 (member_array
3225 [cluster->first_member + ordinal].first_item);
3226
3227 if (word_mode)
3228 {
3229 if (show_links)
3230 {
3231 sprintf (buffer, "[%c%s%d",
3232 (char) (active - active_array + 'A'),
3233 reference.input->nick_name,
3234 reference.number);
3235 push_emphasis (UNDERLINED);
3236 output_characters (buffer, strlen (buffer), 0);
3237 pop_emphasis ();
3238 putc (' ', output_file);
3239 }
3240 }
3241 else
3242 {
3243 fprintf (output_file, ".-> [%d/%d] ",
3244 ordinal + 1, MEMBERS (cluster));
3245 sprintf (buffer, "%s%d",
3246 reference.input->nick_name, reference.number);
3247 push_emphasis (UNDERLINED);
3248 output_characters (buffer, strlen (buffer), 0);
3249 pop_emphasis ();
3250 if (reference.input != input)
3251 fprintf (output_file, " (%s)",
3252 reference.input->file_name);
3253 putc ('\n', output_file);
3254 }
3255 }
3256 else if (word_mode)
3257 {
3258 if (show_links)
3259 {
3260 sprintf (buffer, "[%c",
3261 (char) (active - active_array + 'A'));
3262 push_emphasis (UNDERLINED);
3263 output_characters (buffer, strlen (buffer), 0);
3264 pop_emphasis ();
3265 putc (' ', output_file);
3266 }
3267 }
3268 else
3269 fputs (".-\n", output_file);
3270
3271 /* Bold text appearing in other files. */
3272
3273 if (active_elsewhere (member, input))
3274 if (other_count++ == 0)
3275 set_emphasis (BOLD);
3276
3277 /* Advance cursor. */
3278
3279 if (++cursor < indirect_array + indirects)
3280 member = member_array + *cursor;
3281 else
3282 cursor = NULL;
3283 }
3284
3285 /* List the item itself. */
3286
3287 if (word_mode)
3288 copy_word_item (input);
3289 else
3290 {
3291 for (counter = 0; counter < actives; counter++)
3292 if (active_array[counter].member)
3293 putc ('|', output_file);
3294 else
3295 putc (' ', output_file);
3296
3297 if (counter < 7)
3298 {
3299 sprintf (buffer, "%s%d", input->nick_name,
3300 input->item - input->first_item + 1);
3301 for (counter += strlen (buffer); counter < 7; counter++)
3302 putc (' ', output_file);
3303 push_emphasis (UNDERLINED);
3304 output_characters (buffer + counter - 7,
3305 strlen (buffer + counter - 7), 0);
3306 pop_emphasis ();
3307 }
3308
3309 if (initial_tab)
3310 putc ('\t', output_file);
3311 else
3312 putc (' ', output_file);
3313
3314 copy_line_item (input, EMPTY_MARGIN);
3315 }
3316
3317 /* See if we are ending any member. */
3318
3319 if (!actives)
3320 continue;
3321
3322 active = active_array + actives;
3323 while (--active >= active_array)
3324 if (item_type (item) == NORMAL && --active->remaining == 0)
3325 {
3326 cluster = cluster_array + active->member->cluster_number;
3327 ordinal
3328 = active->member - member_array - cluster->first_member + 1;
3329
3330 /* Print a pointer to the next member of same cluster. */
3331
3332 if (!word_mode)
3333 {
3334 for (counter = 0;
3335 active_array + counter < active; counter++)
3336 if (active_array[counter].member)
3337 putc ('|', output_file);
3338 else
3339 putc (' ', output_file);
3340 }
3341
3342 if (ordinal < MEMBERS (cluster))
3343 {
3344 reference = get_reference
3345 (member_array
3346 [cluster->first_member + ordinal].first_item);
3347
3348 if (word_mode)
3349 {
3350 if (show_links)
3351 {
3352 putc (' ', output_file);
3353 sprintf (buffer, "%s%d%c]",
3354 reference.input->nick_name,
3355 reference.number,
3356 (char) (active - active_array + 'A'));
3357 push_emphasis (UNDERLINED);
3358 output_characters (buffer, strlen (buffer), 0);
3359 pop_emphasis ();
3360 }
3361 }
3362 else
3363 {
3364 fprintf (output_file, "`-> [%d/%d] ",
3365 ordinal + 1, MEMBERS (cluster));
3366 sprintf (buffer, "%s%d",
3367 reference.input->nick_name,
3368 reference.number);
3369 push_emphasis (UNDERLINED);
3370 output_characters (buffer, strlen (buffer), 0);
3371 pop_emphasis ();
3372 if (reference.input != input)
3373 fprintf (output_file, " (%s)",
3374 reference.input->file_name);
3375 putc ('\n', output_file);
3376 }
3377 }
3378 else if (word_mode)
3379 {
3380 if (show_links)
3381 {
3382 putc (' ', output_file);
3383 sprintf (buffer, "%c]",
3384 (char) (active - active_array + 'A'));
3385 push_emphasis (UNDERLINED);
3386 output_characters (buffer, strlen (buffer), 0);
3387 pop_emphasis ();
3388 }
3389 }
3390 else
3391 fputs ("`-\n", output_file);
3392
3393 /* Do not bold text not appearing in any other file. */
3394
3395 if (active_elsewhere (active->member, input))
3396 if (--other_count == 0)
3397 set_emphasis (STRAIGHT);
3398
3399 /* Remove as many active members as we can. */
3400
3401 active->member = NULL;
3402 if (active == active_array + actives - 1)
3403 while (actives > 0 && !active_array[actives - 1].member)
3404 actives--;
3405 }
3406 }
3407
3408 /* Finish file. */
3409
3410 assert (actives == 0);
3411 assert (other_count == 0);
3412
3413 close_input (input);
3414
3415 if (paginate)
3416 complete_output_program ();
3417 }
3418
3419 if (!paginate)
3420 complete_output_program ();
3421 }
3422
3423 /*--------------------------------------------------------------------.
3424 | Make a merged listing of input files. If UNIFIED, produce unified |
3425 | context diffs instead of plain context diffs. If CROSSED, identify |
3426 | crossed blocks. |
3427 `--------------------------------------------------------------------*/
3428
3429 static void
relist_merged_lines(int unified,int crossed)3430 relist_merged_lines (int unified, int crossed)
3431 {
3432 int counter;
3433 struct input *input;
3434 struct member *member;
3435 struct cluster *chosen_cluster;
3436 struct reference reference;
3437 struct merging *merging;
3438 struct merging *group_limit;
3439 struct merging *cursor;
3440 int listed;
3441
3442 /* Prepare terminal. */
3443
3444 launch_output_program (NULL);
3445 initialize_strings ();
3446
3447 /* FIXME: Merging requires all input files to be opened in parallel. This
3448 is not realistic when there are really many input files: in such cases,
3449 one has no other choice than to avoid merged listings and resort to the
3450 full relisting option instead. */
3451
3452 for (input = input_array; input < input_array + inputs; input++)
3453 {
3454 open_input (input);
3455 input->item = input->first_item;
3456 }
3457
3458 /* Process all merging groups, one at a time. */
3459
3460 #if DEBUGGING
3461 if (debugging)
3462 fputs ("\f\n", stderr);
3463 #endif
3464
3465 for (merging = merging_array;
3466 merging < merging_array + mergings; merging = group_limit)
3467 {
3468 /* Find the extent of the merging group, and count genuine mergings. */
3469
3470 counter = 0;
3471 if (!merging->cross_flag)
3472 counter++;
3473
3474 for (cursor = merging + 1;
3475 cursor < merging_array + mergings && !cursor->group_flag; cursor++)
3476 if (!cursor->cross_flag)
3477 counter++;
3478
3479 group_limit = cursor;
3480
3481 /* Prepare for a new hunk. Hmph! Not really yet... */
3482
3483 /* Output differences, which are items before members. Also consider
3484 any crossed member as a difference and output it on the same blow. */
3485
3486 for (cursor = merging; cursor < group_limit; cursor++)
3487 {
3488 input = input_array + cursor->input_number;
3489 member = member_array + cursor->member_number;
3490 copy_until (input, member->first_item, FILE_IN_MARGIN);
3491 if (cursor->cross_flag)
3492 {
3493 explain_group ('/', cursor, cursor + 1);
3494 copy_member_proper (input, member);
3495 explain_group ('\\', cursor, cursor + 1);
3496 }
3497 }
3498
3499 /* Describe members, which we are about to skip. */
3500
3501 explain_group ('/', merging, group_limit);
3502
3503 listed = 0;
3504 for (cursor = merging; cursor < group_limit; cursor++)
3505 if (!cursor->cross_flag)
3506 {
3507 input = input_array + cursor->input_number;
3508 member = member_array + cursor->member_number;
3509 if (listed)
3510 skip_member_proper (input, member);
3511 else
3512 {
3513 copy_member_proper (input, member);
3514 listed = 1;
3515 }
3516 }
3517
3518 explain_group ('\\', merging, group_limit);
3519 }
3520
3521 /* Copy remaining differences and clean up. */
3522
3523 for (input = input_array; input < input_array + inputs; input++)
3524 {
3525 copy_until (input, input->item_limit, FILE_IN_MARGIN);
3526 close_input (input);
3527 }
3528 }
3529
3530 /*-----------------------------------------------------.
3531 | Study diff output and use it to drive reformatting. |
3532 `-----------------------------------------------------*/
3533
3534 /* Define the separator lines when output is inhibited. */
3535 #define SEPARATOR_LINE \
3536 "======================================================================"
3537
3538 static void
relist_merged_words(void)3539 relist_merged_words (void)
3540 {
3541 int counter;
3542 struct input *input;
3543 struct member *member;
3544 struct cluster *chosen_cluster;
3545 struct reference reference;
3546 struct merging *merging;
3547 struct merging *group_limit;
3548 struct merging *cursor;
3549 int listed;
3550
3551 int count_total_left; /* count of total words in left file */
3552 int count_total_right; /* count of total words in right file */
3553 int count_isolated_left; /* count of deleted words in left file */
3554 int count_isolated_right; /* count of added words in right file */
3555 int count_changed_left; /* count of changed words in left file */
3556 int count_changed_right; /* count of changed words in right file */
3557
3558 /* Prepare terminal. */
3559
3560 launch_output_program (NULL);
3561 initialize_strings ();
3562
3563 /* Rewind input files. */
3564
3565 for (input = input_array; input < input_array + inputs; input++)
3566 {
3567 open_input (input);
3568 input->item = input->first_item;
3569 input_line (input);
3570 }
3571
3572 count_total_left = left->item_limit - left->first_item;
3573 count_total_right = right->item_limit - right->first_item;
3574 count_isolated_left = 0;
3575 count_isolated_right = 0;
3576 count_changed_left = 0;
3577 count_changed_right = 0;
3578
3579 /* Process all merging groups, one at a time. */
3580
3581 for (merging = merging_array;
3582 merging < merging_array + mergings; merging = group_limit)
3583 {
3584 /* Find the extent of the merging group, and count genuine mergings. */
3585
3586 counter = 0;
3587 if (!merging->cross_flag)
3588 counter++;
3589
3590 for (cursor = merging + 1;
3591 cursor < merging_array + mergings && !cursor->group_flag; cursor++)
3592 if (!cursor->cross_flag)
3593 counter++;
3594
3595 group_limit = cursor;
3596
3597 /* Output differences, which are items before members. Also consider
3598 any crossed member as a difference and output it on the same blow. */
3599
3600 for (cursor = merging; cursor < group_limit; cursor++)
3601 {
3602 input = input_array + cursor->input_number;
3603 member = member_array + cursor->member_number;
3604 if (input->listing_allowed)
3605 {
3606 if (input->item < member->first_item || cursor->cross_flag)
3607 {
3608 if (input == left)
3609 copy_whitespace (input, EMPTY_MARGIN);
3610 else
3611 skip_whitespace (input);
3612
3613 if (input == input_array + inputs - 1)
3614 start_of_emphasis (input, BOLD);
3615 else
3616 start_of_emphasis (input, UNDERLINED);
3617
3618 if (input->item < member->first_item)
3619 {
3620 copy_word_item (input);
3621 copy_until (input, member->first_item, FILE_IN_MARGIN);
3622 }
3623 if (cursor->cross_flag)
3624 copy_member_proper (input, member);
3625
3626 end_of_emphasis (input);
3627 }
3628 }
3629 else
3630 {
3631 skip_until (input, member->first_item);
3632 if (cursor->cross_flag)
3633 skip_member_proper (input, member);
3634 }
3635 }
3636
3637 /* Use separator lines to disambiguate the output. */
3638
3639 if (common_listing_allowed)
3640 {
3641 if (!left->listing_allowed && !right->listing_allowed)
3642 fprintf (output_file, "\n%s\n", SEPARATOR_LINE);
3643 }
3644 else
3645 fprintf (output_file, "\n%s\n", SEPARATOR_LINE);
3646
3647 /* Describe members, which we are about to skip. */
3648
3649 listed = 0;
3650 for (cursor = merging; cursor < group_limit; cursor++)
3651 if (!cursor->cross_flag)
3652 {
3653 input = input_array + cursor->input_number;
3654 member = member_array + cursor->member_number;
3655 if (listed)
3656 skip_member_proper (input, member);
3657 else
3658 {
3659 copy_member_proper (input, member);
3660 listed = 1;
3661 }
3662 }
3663 }
3664
3665 /* Copy remaining differences and clean up. Copy from left side if the
3666 user wanted to see only the common code and deleted words. */
3667
3668 if (!common_listing_allowed
3669 && (left->listing_allowed || right->listing_allowed))
3670 fprintf (output_file, "\n%s\n", SEPARATOR_LINE);
3671
3672 for (input = input_array; input < input_array + inputs; input++)
3673 {
3674 if (input->listing_allowed)
3675 {
3676 if (input->item < input->item_limit)
3677 {
3678 if (input == left)
3679 copy_whitespace (input, EMPTY_MARGIN);
3680 else
3681 skip_whitespace (input);
3682
3683 if (input == input_array + inputs - 1)
3684 start_of_emphasis (input, BOLD);
3685 else
3686 start_of_emphasis (input, UNDERLINED);
3687
3688 copy_word_item (input);
3689 copy_until (input, input->item_limit, FILE_IN_MARGIN);
3690
3691 end_of_emphasis (input);
3692 }
3693 copy_whitespace (input, EMPTY_MARGIN);
3694 }
3695 close_input (input);
3696 }
3697
3698 /* Print merging statistics. */
3699
3700 if (verbose)
3701 {
3702 int count_common_left; /* words unchanged in left file */
3703 int count_common_right; /* words unchanged in right file */
3704
3705 count_common_left
3706 = count_total_left - count_isolated_left - count_changed_left;
3707 count_common_right
3708 = count_total_right - count_isolated_right - count_changed_right;
3709
3710 printf (ngettext ("%s: %d word", "%s: %d words", count_total_left),
3711 left->file_name, count_total_left);
3712 if (count_total_left > 0)
3713 {
3714 printf (ngettext (" %d %.0f%% common", " %d %.0f%% common",
3715 count_common_left), count_common_left,
3716 count_common_left * 100. / count_total_left);
3717 printf (ngettext (" %d %.0f%% deleted", " %d %.0f%% deleted",
3718 count_isolated_left), count_isolated_left,
3719 count_isolated_left * 100. / count_total_left);
3720 printf (ngettext (" %d %.0f%% changed", " %d %.0f%% changed",
3721 count_changed_left), count_changed_left,
3722 count_changed_left * 100. / count_total_left);
3723 }
3724 printf ("\n");
3725
3726 printf (ngettext ("%s: %d word", "%s: %d words", count_total_right),
3727 right->file_name, count_total_right);
3728 if (count_total_right > 0)
3729 {
3730 printf (ngettext (" %d %.0f%% common", " %d %.0f%% common",
3731 count_common_right), count_common_right,
3732 count_common_right * 100. / count_total_right);
3733 printf (ngettext (" %d %.0f%% inserted", " %d %.0f%% inserted",
3734 count_isolated_right), count_isolated_right,
3735 count_isolated_right * 100. / count_total_right);
3736 printf (ngettext (" %d %.0f%% changed", " %d %.0f%% changed",
3737 count_changed_right), count_changed_right,
3738 count_changed_right * 100. / count_total_right);
3739 }
3740 printf ("\n");
3741 }
3742
3743 /* Set exit status. */
3744
3745 if (count_isolated_left || count_isolated_right
3746 || count_changed_left || count_changed_right)
3747 exit_status = EXIT_DIFFERENCE;
3748
3749 /* Reset the terminal. */
3750
3751 complete_output_program ();
3752 }
3753
3754 /* Main control. */
3755
3756 /*-----------------------------------------------.
3757 | Explain how to use the program, then get out. |
3758 `-----------------------------------------------*/
3759
3760 static void
usage(int status)3761 usage (int status)
3762 {
3763 if (status != EXIT_SUCCESS)
3764 fprintf (stderr, _("Try `%s --help' for more information.\n"),
3765 program_name);
3766 else
3767 {
3768 /* *INDENT-OFF* */
3769 fputs (_("\
3770 mdiff - Studies multiple files and searches for similar sequences, it then\n\
3771 produces possibly detailed lists of differences and similarities.\n"),
3772 stdout);
3773 fputs ("\n", stdout);
3774 printf (_("\
3775 Usage: %s [OPTION]... [FILE]...\n"),
3776 program_name);
3777
3778 fputs (_("\nOperation modes:\n"), stdout);
3779 fputs (_(" -h (ignored)\n"), stdout);
3780 fputs (_(" -v, --verbose report a few statistics on stderr\n"), stdout);
3781 fputs (_(" --help display this help then exit\n"), stdout);
3782 fputs (_(" --version display program version then exit\n"), stdout);
3783
3784 fputs (_("\nFormatting output:\n"), stdout);
3785 fputs (_(" -T, --initial-tab produce TAB instead of initial space\n"), stdout);
3786 fputs (_(" -l, --paginate paginate output through `pr'\n"), stdout);
3787 fputs (_(" -S, --string[=STRING] take note of another user STRING\n"), stdout);
3788 fputs (_(" -V, --show-links give file and line references in annotations\n"), stdout);
3789 fputs (_(" -t, --expand-tabs expand tabs to spaces in the output\n"), stdout);
3790
3791 #if DEBUGGING
3792 fputs (_("\nDebugging:\n"), stdout);
3793 fputs (_(" -0, --debugging output many details about what is going on\n"), stdout);
3794 #endif
3795
3796 fputs (_("\nWord mode options:\n"), stdout);
3797 fputs (_(" -1, --no-deleted inhibit output of deleted words\n"), stdout);
3798 fputs (_(" -2, --no-inserted inhibit output of inserted words\n"), stdout);
3799 fputs (_(" -3, --no-common inhibit output of common words\n"), stdout);
3800 fputs (_(" -A, --auto-pager automatically calls a pager\n"), stdout);
3801 fputs (_(" -k, --less-mode variation of printer mode for \"less\"\n"), stdout);
3802 fputs (_(" -m, --avoid-wraps do not extend fields through newlines\n"), stdout);
3803 fputs (_(" -o, --printer overstrike as for printers\n"), stdout);
3804 fputs (_(" -z, --terminal use termcap as for terminal displays\n"), stdout);
3805 fputs (_(" -O, --item-regexp=REGEXP compare items as defined by REGEXP\n"), stdout);
3806 fputs (_(" -W, --word-mode compare words instead of lines\n"), stdout);
3807
3808 /***
3809 fputs (_("\nComparing files:\n"), stdout);
3810 fputs (_("* -H, --speed-large-files go faster, for when numerous small changes\n"), stdout);
3811 fputs (_("* -a, --text report line differences (text file default)\n"), stdout);
3812 fputs (_("* -d, --minimal try harder for a smaller set of changes\n"), stdout);
3813 fputs (_("* -q, --brief only says if files differ (binary default)\n"), stdout);
3814 fputs (_("* --horizon-lines=LINES keep LINES lines in common prefixes/suffixes\n"), stdout);
3815 ***/
3816 /***
3817 fputs (_("\nComparing directories:\n"), stdout);
3818 fputs (_("* -N, --new-file consider missing files to be empty\n"), stdout);
3819 fputs (_("* -P, --unidirectional-new-file consider missing old files to be empty\n"), stdout);
3820 fputs (_("* -S, --starting-file=FILE resume directory comparison with FILE\n"), stdout);
3821 fputs (_("* -X, --exclude-from=FILE ignore files matching patterns from FILE\n"), stdout);
3822 fputs (_("* -r, --recursive recursively compare subdirectories\n"), stdout);
3823 fputs (_("* -s, --report-identical-files report when two files are the same\n"), stdout);
3824 fputs (_("* -x, --exclude=PATTERN ignore files (dirs) matching PATTERN\n"), stdout);
3825 ***/
3826 /***
3827 fputs (_("\nIgnoring text:\n"), stdout);
3828 fputs (_(" -B, --ignore-blank-lines ignore blank lines\n"), stdout);
3829 fputs (_("* -I, --ignore-matching-lines=REGEXP ignore lines matching REGEXP\n"), stdout);
3830 fputs (_(" -b, --ignore-space-change ignore amount of white space\n"), stdout);
3831 fputs (_(" -i, --ignore-case ignore case differences\n"), stdout);
3832 fputs (_(" -w, --ignore-all-space ignore white space\n"), stdout);
3833 fputs (_("\nClustering:\n"), stdout);
3834 fputs (_(" -G, --relist-files list all input files with annotations\n"), stdout);
3835 fputs (_(" -J, --minimum-size=ITEMS ignore clusters not having that many ITEMS\n"), stdout);
3836 fputs (_(" -j, --ignore-delimiters do not count items having only delimiters\n"), stdout);
3837 ***/
3838 /***
3839 fputs (_("\nDetailed output formats:\n"), stdout);
3840 fputs (_("* -D, --ifdef=NAME output `#ifdef NAME' format\n"), stdout);
3841 fputs (_("* --changed-group-format=FORMAT use FORMAT for changed lines\n"), stdout);
3842 fputs (_("* --new-group-format=FORMAT use FORMAT for inserted lines\n"), stdout);
3843 fputs (_("* --new-line-format=FORMAT use FORMAT for inserted line\n"), stdout);
3844 fputs (_("* --old-group-format=FORMAT use FORMAT for deleted lines\n"), stdout);
3845 fputs (_("* --old-line-format=FORMAT use FORMAT for deleted line\n"), stdout);
3846 fputs (_("* --unchanged-group-format=FORMAT use FORMAT for unchanged lines\n"), stdout);
3847 fputs (_("* --unchanged-line-format=FORMAT use FORMAT for unchanged line\n"), stdout);
3848 fputs (_("* --line-format=FORMAT --{old,new,unchanged}-line-format\n"), stdout);
3849 ***/
3850 /***
3851 fputs (_("\nScript-like formats:\n"), stdout);
3852 fputs (_(" (none of -CDUcefnuy) output normal diffs\n"), stdout);
3853 fputs (_("* -e, --ed output a valid `ed' script\n"), stdout);
3854 fputs (_("* -f, --forward-ed mix between -e and -n (not very useful)\n"), stdout);
3855 fputs (_("* -n, --rcs output RCS format (internally used by RCS)\n"), stdout);
3856 ***/
3857 /***
3858 fputs (_("\nContext and unified formats:\n"), stdout);
3859 fputs (_("* -F, --show-function-line=REGEXP show previous context matching REGEXP\n"), stdout);
3860 fputs (_("* -p, --show-c-function show which C function for each change\n"), stdout);
3861 ***/
3862 /***
3863 fputs (_("* -C, --context=LINES as -c, also select context size in lines\n"), stdout);
3864 fputs (_("* -L, --label=LABEL use from/to LABEL instead of file name (twice)\n"), stdout);
3865 fputs (_("* -U, --unified=LINES as -u, also select context size in lines\n"), stdout);
3866 fputs (_("* -c, --context output context diffs (default 3 context lines)\n"), stdout);
3867 fputs (_("* -u, --unified output unidiffs (default 3 context lines)\n"), stdout);
3868 fputs (_("* -LINES (obsolete: select context size in lines)\n"), stdout);
3869 ***/
3870 /***
3871 fputs (_("\nSide by side format:\n"), stdout);
3872 fputs (_("* -W, --width=COLUMNS use width of COLUMNS\n"), stdout);
3873 fputs (_("* -y, --side-by-side use side by side output format\n"), stdout);
3874 fputs (_("* --left-column print only left column line when common\n"), stdout);
3875 fputs (_("* --sdiff-merge-assist (internally used by `sdiff')\n"), stdout);
3876 fputs (_("* --suppress-common-lines do not print common lines\n"), stdout);
3877 ***/
3878 /***
3879 fputs (_("\n\
3880 FORMAT is made up of characters standing for themselves, except:\n\
3881 %%%% a single %%\n\
3882 %%c'C' quoted character C\n\
3883 %%c'\\O' character having value O, from 1 to 3 octal digits\n\
3884 %%(A=B?T:E) if A is B then T else E; A B number or VARIABLE; T E FORMAT\n\
3885 %%FN use SPECIF specification F to print VARIABLE value N\n\
3886 %%< [group] old, each line through --old-line-format\n\
3887 %%> [group] new, each line through --new-line-format\n\
3888 %%= [group] unchanged, each line through --unchanged-line-format\n\
3889 %%l [line] without its possible trailing newline\n\
3890 %%L [line] with its possible trailing newline\n"),
3891 stdout);
3892 ***/
3893 /***
3894 fputs (_("\n\
3895 SPECIF is [-][W[.D]]{doxX} as in C printf\n"),
3896 stdout);
3897 ***/
3898 /***
3899 fputs (_("\n\
3900 VARIABLE is {eflmn} for old group or {EFLMN} for new group\n\
3901 {eE} line number just before group\n\
3902 {fF} first line number of group\n\
3903 {lL} last line number of group\n\
3904 {mM} line number just after group\n\
3905 {nN} number of lines in the group\n"),
3906 stdout);
3907 ***/
3908 /***
3909 fputs (_("\nStandard diff options:\n"), stdout);
3910 fputs (_(" -i, --ignore-case consider upper- and lower-case to be the same\n"), stdout);
3911 fputs (_(" -w, --ignore-all-space ignore all white space\n"), stdout);
3912 fputs (_(" -b, --ignore-space-change ignore changes in the amount of white space\n"), stdout);
3913 fputs (_(" -B, --ignore-blank-lines ignore changes whose lines are all blank\n"), stdout);
3914 fputs (_(" -I, --ignore-matching-lines=RE ignore changes whose lines all match RE\n"), stdout);
3915 fputs (_(" -a, --text treat all files as text\n"), stdout);
3916 fputs (_("\
3917 -c, --context[=NUMBER] output regular context diffs,\n\
3918 changing to NUMBER lines of context\n"), stdout);
3919 fputs (_("\
3920 -u, --unified[=NUMBER] output unified context diffs or unidiffs,\n\
3921 with NUMBER lines of context\n"), stdout);
3922 fputs (_(" -C, --context=NUM output NUM lines of copied context\n"), stdout);
3923 fputs (_(" -U, --unified=NUM output NUM lines of unified context\n"), stdout);
3924 fputs (_(" -L, --label=LABEL use LABEL instead of file name\n"), stdout);
3925 fputs (_(" -p, --show-c-function show which C function each change is in\n"), stdout);
3926 fputs (_(" -F, --show-function-line=RE show the most recent line matching RE\n"), stdout);
3927 fputs (_(" -q, --brief output only whether files differ\n"), stdout);
3928 fputs (_(" -e, --ed output an ed script\n"), stdout);
3929 fputs (_(" -n, --rcs output an RCS format diff\n"), stdout);
3930 fputs (_(" -y, --side-by-side output in two columns\n"), stdout);
3931 fputs (_(" -w, --width=NUM output at most NUM (default 130) characters per line\n"), stdout);
3932 fputs (_(" --left-column output only the left column of common lines\n"), stdout);
3933 fputs (_(" --suppress-common-lines do not output common lines\n"), stdout);
3934 fputs (_(" -D, --ifdef=NAME output merged file to show `#ifdef NAME' diffs\n"), stdout);
3935 fputs (_(" --GTYPE-group-format=GFMT GTYPE input groups with GFMT\n"), stdout);
3936 fputs (_(" --line-format=LFMT all input lines with LFMT\n"), stdout);
3937 fputs (_(" --LTYPE-line-format=LFMT LTYPE input lines with LFMT\n"), stdout);
3938 fputs (_(" -l, --paginate pass the output through `pr' to paginate it\n"), stdout);
3939 fputs (_(" -t, --expand-tabs expand tabs to spaces in output\n"), stdout);
3940 fputs (_(" -T, --initial-tab make tabs line up by prepending a tab\n"), stdout);
3941 fputs (_(" -r, --recursive recursively compare any subdirectories found\n"), stdout);
3942 fputs (_(" -N, --new-file treat absent files as empty\n"), stdout);
3943 fputs (_(" -P, --unidirectional-new-file treat absent first files as empty\n"), stdout);
3944 fputs (_(" -s, --report-identical-files report when two files are the same\n"), stdout);
3945 fputs (_(" -x, --exclude=PAT exclude files that match PAT\n"), stdout);
3946 fputs (_(" -X, --exclude-from=FILE exclude files that match any pattern in FILE\n"), stdout);
3947 fputs (_(" -S, --starting-file=FILE start with FILE when comparing directories\n"), stdout);
3948 fputs (_(" --horizon-lines=NUM keep NUM lines of the common prefix and suffix\n"), stdout);
3949 fputs (_(" -d, --minimal try hard to find a smaller set of changes\n"), stdout);
3950 fputs (_(" -H, --speed-large-files assume large files and many scattered small changes\n"), stdout);
3951 ***/
3952 /***
3953 fputs (_("\
3954 \n\
3955 By default, context diffs have an horizon of two lines.\n"),
3956 stdout);
3957 ***/
3958 /***
3959 fputs (_("\
3960 \n\
3961 LTYPE is `old', `new', or `unchanged'. GTYPE is LTYPE or `changed'.\n\
3962 GFMT may contain:\n\
3963 %< lines from FILE1\n\
3964 %> lines from FILE2\n\
3965 %= lines common to FILE1 and FILE2\n\
3966 %[-][WIDTH][.[PREC]]{doxX}LETTER printf-style spec for LETTER\n\
3967 LETTERs are as follows for new group, lower case for old group:\n\
3968 F first line number\n\
3969 L last line number\n\
3970 N number of lines = L-F+1\n\
3971 E F-1\n\
3972 M L+1\n"),
3973 stdout);
3974 fputs (_("\
3975 LFMT may contain:\n\
3976 %L contents of line\n\
3977 %l contents of line, excluding any trailing newline\n\
3978 %[-][WIDTH][.[PREC]]{doxX}n printf-style spec for input line number\n\
3979 Either GFMT or LFMT may contain:\n\
3980 %% %\n\
3981 %c'C' the single character C\n\
3982 %c'\\OOO' the character with octal code OOO\n"),
3983 stdout);
3984 ***/
3985 /***
3986 fputs (_("\nOld mdiff options:\n"), stdout);
3987 fputs (_("* -f, --fuzz-items=ITEMS no more than ITEMS non matching in a cluster\n"), stdout);
3988 ***/
3989
3990 fputs ("\n", stdout);
3991 fputs (_("With no FILE, or when FILE is -, read standard input.\n"), stdout);
3992 fputs ("\n", stdout);
3993 fputs (_("Report bugs to <wdiff-bugs@gnu.org>.\n"), stdout);
3994 /* *INDENT-ON* */
3995 }
3996 exit (status);
3997 }
3998
3999 /*----------------------------------------------------------------------.
4000 | Main program. Decode ARGC arguments passed through the ARGV array of |
4001 | strings, then launch execution. |
4002 `----------------------------------------------------------------------*/
4003
4004 #define UNIMPLEMENTED(Option) \
4005 error (0, 0, _("ignoring option %s (not implemented)"), (Option))
4006
4007 int
main(int argc,char * const * argv)4008 main (int argc, char *const *argv)
4009 {
4010 int option_char; /* option character */
4011 struct input *input; /* cursor in input array */
4012
4013 int inhibit_left = 0;
4014 int inhibit_right = 0;
4015 int inhibit_common = 0;
4016 const char *delete_start = NULL;
4017 const char *delete_stop = NULL;
4018 const char *insert_start = NULL;
4019 const char *insert_stop = NULL;
4020
4021 program_name = argv[0];
4022 setlocale (LC_ALL, "");
4023 bindtextdomain (PACKAGE, LOCALEDIR);
4024 textdomain (PACKAGE);
4025
4026 /* Decode command options. */
4027
4028 while (option_char = getopt_long (argc, (char **) argv, OPTION_STRING,
4029 long_options, NULL), option_char != EOF)
4030 switch (option_char)
4031 {
4032 default:
4033 usage (EXIT_ERROR);
4034
4035 case '\0':
4036 break;
4037
4038 case '0':
4039 #if DEBUGGING
4040 debugging = 1;
4041 verbose = 1;
4042 #else
4043 UNIMPLEMENTED ("--debugging");
4044 #endif
4045 break;
4046
4047 case '1':
4048 inhibit_left = 1;
4049 break;
4050
4051 case '2':
4052 inhibit_right = 1;
4053 break;
4054
4055 case '3':
4056 inhibit_common = 1;
4057 break;
4058
4059 case 'A':
4060 autopager = 1;
4061 break;
4062
4063 case 'B':
4064 ignore_blank_lines = 1;
4065 break;
4066
4067 case 'D':
4068 ifdef = optarg;
4069 UNIMPLEMENTED ("--ifdef");
4070 break;
4071
4072 case 'F':
4073 show_function_line = optarg;
4074 UNIMPLEMENTED ("--show-function-line");
4075 break;
4076
4077 case 'G':
4078 relist_files = 1;
4079 break;
4080
4081 case 'H':
4082 speed_large_files = 1;
4083 UNIMPLEMENTED ("--speed-large-files");
4084 break;
4085
4086 case 'I':
4087 if (ignore_regexps % 8 == 0)
4088 ignore_regexp_array = (struct re_pattern_buffer **)
4089 xrealloc (ignore_regexp_array,
4090 ((ignore_regexps + 8)
4091 * sizeof (struct re_pattern_buffer *)));
4092 ignore_regexp_array[ignore_regexps++]
4093 = alloc_and_compile_regex (optarg);
4094 break;
4095
4096 case 'J':
4097 minimum_size = atoi (optarg);
4098 break;
4099
4100 case 'L':
4101 label = optarg;
4102 UNIMPLEMENTED ("--label");
4103 break;
4104
4105 case 'N':
4106 new_file = 1;
4107 UNIMPLEMENTED ("--new-file");
4108 break;
4109
4110 case 'O':
4111 UNIMPLEMENTED ("--compare-words");
4112 item_regexp = alloc_and_compile_regex (optarg);
4113 break;
4114
4115 case 'P':
4116 unidirectional_new_file = 1;
4117 UNIMPLEMENTED ("--unidirectional-new-file");
4118 break;
4119
4120 case 'Q':
4121 insert_start = optarg;
4122 break;
4123
4124 case 'R':
4125 insert_stop = optarg;
4126 break;
4127
4128 case 'S':
4129 starting_file = optarg;
4130 UNIMPLEMENTED ("--starting-file");
4131 break;
4132
4133 case 'T':
4134 initial_tab = 1;
4135 break;
4136
4137 case 'V':
4138 show_links = 1;
4139 break;
4140
4141 case 'W':
4142 word_mode = 1;
4143 break;
4144
4145 case 'X':
4146 exclude_from = optarg;
4147 UNIMPLEMENTED ("--exclude-from");
4148 break;
4149
4150 case 'Y':
4151 delete_start = optarg;
4152 break;
4153
4154 case 'Z':
4155 delete_stop = optarg;
4156 break;
4157
4158 case 'a':
4159 text = 1;
4160 UNIMPLEMENTED ("--text");
4161 break;
4162
4163 case 'b':
4164 ignore_space_change = 1;
4165 ignore_blank_lines = 1;
4166 break;
4167
4168 case 'C':
4169 case 'c':
4170 UNIMPLEMENTED ("--context");
4171 if (optarg)
4172 horizon_lines = atoi (optarg);
4173 context = 1;
4174 break;
4175
4176 case 'd':
4177 minimal = 1;
4178 UNIMPLEMENTED ("--minimal");
4179 break;
4180
4181 case 'e':
4182 ed = 1;
4183 UNIMPLEMENTED ("--ed");
4184 break;
4185
4186 #if FIXME
4187 case 'h':
4188 UNIMPLEMENTED ("-h");
4189 break;
4190 #endif
4191
4192 case 'i':
4193 ignore_case = 1;
4194 break;
4195
4196 case 'j':
4197 ignore_delimiters = 1;
4198 break;
4199
4200 case 'k':
4201 if (find_termcap < 0)
4202 find_termcap = 0;
4203 overstrike = 1;
4204 overstrike_for_less = 1;
4205 break;
4206
4207 case 'l':
4208 paginate = 1;
4209 break;
4210
4211 case 'm':
4212 avoid_wraps = 1;
4213 break;
4214
4215 case 'n':
4216 rcs = 1;
4217 UNIMPLEMENTED ("--rcs");
4218 break;
4219
4220 case 'o':
4221 overstrike = 1;
4222 break;
4223
4224 case 'p':
4225 show_c_function = 1;
4226 UNIMPLEMENTED ("--show-c-function");
4227 break;
4228
4229 case 'q':
4230 brief = 1;
4231 UNIMPLEMENTED ("--brief");
4232 break;
4233
4234 case 'r':
4235 recursive = 1;
4236 UNIMPLEMENTED ("--recursive");
4237 break;
4238
4239 #if FIXME
4240 case 's':
4241 report_identical_files = 1;
4242 UNIMPLEMENTED ("--report-identical-files");
4243 break;
4244 #endif
4245
4246 case TOLERANCE_OPTION: /* mdiff draft */
4247 UNIMPLEMENTED ("--tolerance");
4248 tolerance = atoi (optarg);
4249 break;
4250
4251 case 't':
4252 expand_tabs = 1;
4253 UNIMPLEMENTED ("--expand-tabs");
4254 break;
4255
4256 case 'U':
4257 case 'u':
4258 UNIMPLEMENTED ("--unified");
4259 if (optarg)
4260 horizon_lines = atoi (optarg);
4261 unified = 1;
4262 break;
4263
4264 case 'v':
4265 verbose = 1;
4266 break;
4267
4268 case 'w':
4269 ignore_all_space = 1;
4270 break;
4271
4272 #if FIXME
4273 case 'w':
4274 width = atoi (optarg);
4275 UNIMPLEMENTED ("--width");
4276 break;
4277 #endif
4278
4279 case 'x':
4280 exclude = optarg;
4281 UNIMPLEMENTED ("--exclude");
4282 break;
4283
4284 case 'y':
4285 side_by_side = 1;
4286 UNIMPLEMENTED ("--side-by-side");
4287 break;
4288
4289 case 'K':
4290 /* compatibility option, equal to -t now */
4291 /* fall through */
4292
4293 case 'z':
4294 #if HAVE_TPUTS
4295 if (find_termcap < 0)
4296 find_termcap = 1;
4297 break;
4298 #else
4299 error (EXIT_ERROR, 0, _("cannot use -z, termcap not available"));
4300 #endif
4301
4302 case GTYPE_GROUP_FORMAT_OPTION:
4303 gtype_group_format = optarg;
4304 UNIMPLEMENTED ("--gtype-group-format");
4305 break;
4306
4307 case HORIZON_LINES_OPTION:
4308 horizon_lines = atoi (optarg);
4309 UNIMPLEMENTED ("--horizon-lines");
4310 break;
4311
4312 case LEFT_COLUMN_OPTION:
4313 left_column = 1;
4314 UNIMPLEMENTED ("--left-column");
4315 break;
4316
4317 case LINE_FORMAT_OPTION:
4318 line_format = optarg;
4319 UNIMPLEMENTED ("--line-format");
4320 break;
4321
4322 case LTYPE_LINE_FORMAT_OPTION:
4323 ltype_line_format = optarg;
4324 UNIMPLEMENTED ("--ltype-line-format");
4325 break;
4326
4327 case SUPPRESS_COMMON_LINES_OPTION:
4328 suppress_common_lines = 1;
4329 UNIMPLEMENTED ("--suppress-common-lines");
4330 break;
4331 }
4332
4333 if (minimum_size < 0)
4334 minimum_size = relist_files ? (word_mode ? 10 : 5) : 1;
4335
4336 if (!relist_files && word_mode && argc - optind != 2)
4337 {
4338 error (0, 0, _("word merging for two files only (so far)"));
4339 usage (EXIT_ERROR);
4340 }
4341
4342 /* If find_termcap still undecided, make it true only if autopager is
4343 set while stdout is directed to a terminal. This decision might be
4344 reversed later, if the pager happens to be "less". */
4345
4346 if (find_termcap < 0)
4347 find_termcap = autopager && isatty (fileno (stdout));
4348
4349 /* Process trivial options. */
4350
4351 if (show_version)
4352 {
4353 printf ("mdiff (GNU %s) %s\n", PACKAGE, VERSION);
4354 fputs (_("\
4355 \n\
4356 Copyright (C) 1992, 1997, 1998, 1999, 2010 Free Software Foundation, Inc.\n"), stdout);
4357 fputs (_("\
4358 This is free software; see the source for copying conditions. There is NO\n\
4359 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"), stdout);
4360 fputs (_("\
4361 \n\
4362 Written by Franc,ois Pinard <pinard@iro.umontreal.ca>.\n"), stdout);
4363 exit (EXIT_SUCCESS);
4364 }
4365
4366 if (show_help)
4367 usage (EXIT_SUCCESS);
4368
4369 /* Register all input files. */
4370
4371 if (optind == argc)
4372 new_input ("-");
4373 else
4374 while (optind < argc)
4375 new_input (argv[optind++]);
4376
4377 /* Save some option values. */
4378
4379 if (inputs == 2)
4380 {
4381 left->listing_allowed = !inhibit_left;
4382 left->user_start = delete_start;
4383 left->user_stop = delete_stop;
4384
4385 right->listing_allowed = !inhibit_right;
4386 right->user_start = insert_start;
4387 right->user_stop = insert_stop;
4388
4389 common_listing_allowed = !inhibit_common;
4390 }
4391 else
4392 {
4393 if (inhibit_left || inhibit_right || inhibit_common
4394 || delete_start || delete_stop || insert_start || insert_stop)
4395 error (0, 0, _("options -123RSYZ meaningful only when two inputs"));
4396 }
4397
4398 /* Do all the crunching. */
4399
4400 study_all_inputs ();
4401 prepare_clusters ();
4402 prepare_indirects ();
4403 if (!relist_files)
4404 prepare_mergings ();
4405
4406 /* Output results. */
4407
4408 output_file = stdout;
4409
4410 if (relist_files)
4411 relist_annotated_files ();
4412 else if (context)
4413 relist_merged_lines (0, 1);
4414 else if (unified)
4415 relist_merged_lines (1, 1);
4416 else if (word_mode)
4417 relist_merged_words ();
4418
4419 /* Clean up. */
4420
4421 exit (exit_status);
4422 }
4423