1 /* Find similar sequences in multiple files and report differences.
2    Copyright (C) 1992, 1997, 1998, 1999, 2011 Free Software Foundation, Inc.
3    Francois Pinard <pinard@iro.umontreal.ca>, 1997.
4 
5    This program is free software: you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation, either version 3 of the License, or
8    (at your option) any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 
18 */
19 
20 #include "wdiff.h"
21 
22 /* Define to 1 to compile in a debugging option.  */
23 #define DEBUGGING 1
24 
25 /* Define to the printed name of standard input.  */
26 #define STDIN_PRINTED_NAME "<stdin>"
27 
28 /* Define to 1 for requesting the capability of doing full comparisons, then
29    not necessarily relying on checksums (both methods are available).  If 0
30    instead, always rely on checksums and pack them with item types into the
31    item structure: this yields faster code and disallows paranoid mode.  */
32 #define SAFER_SLOWER 0
33 
34 /* One may also, optionally, define a default PAGER_PROGRAM.  This
35    might be done using the --with-default-pager=PAGER configure
36    switch.  If PAGER_PROGRAM is undefined and neither the WDIFF_PAGER
37    nor the PAGER environment variable is set, none will be used.  */
38 
39 /* We do termcap init ourselves, so pass -X.
40    We might do coloring, so pass -R. */
41 #define LESS_DEFAULT_OPTS "-X -R"
42 
43 /*-----------------------.
44 | Library declarations.  |
45 `-----------------------*/
46 
47 #if DEBUGGING
48 # include <assert.h>
49 #else
50 # define assert (Expr)
51 #endif
52 
53 #include <ctype.h>
54 #include <string.h>
55 
56 char *strstr ();
57 
58 #if HAVE_TPUTS
59 # if HAVE_TERMCAP_H
60 #  include <termcap.h>
61 # else
62 #  if HAVE_TERMLIB_H
63 #   include <termlib.h>
64 #  else
65 #   if HAVE_CURSES_H
66 #    include <curses.h>
67 #   else
68 #    if HAVE_NCURSES_H
69 #     include <ncurses.h>
70 #    endif
71 #   endif
72 #   if HAVE_TERM_H
73 #    include <term.h>
74 #   endif
75 #  endif
76 # endif
77 #endif
78 
79 #include <fcntl.h>
80 #include <sys/stat.h>
81 #include <unistd.h>
82 #include <getopt.h>
83 #include <locale.h>
84 #include <sys/wait.h>
85 
86 #include "regex.h"
87 #define CHAR_SET_SIZE 256
88 
89 #define MASK(Length) (~(~0 << (Length)))
90 
91 /*---------------------.
92 | Local declarations.  |
93 `---------------------*/
94 
95 /* Exit codes values.  */
96 #define EXIT_DIFFERENCE 1	/* some differences found */
97 #define EXIT_ERROR 2		/* any other reason for exit */
98 
99 /* Define pseudo short options for long options without short options.  */
100 #define GTYPE_GROUP_FORMAT_OPTION	10
101 #define HORIZON_LINES_OPTION		11
102 #define LEFT_COLUMN_OPTION		12
103 #define LINE_FORMAT_OPTION		13
104 #define LTYPE_LINE_FORMAT_OPTION	14
105 #define SUPPRESS_COMMON_LINES_OPTION	15
106 #define TOLERANCE_OPTION		16
107 
108 /* The name this program was run with. */
109 const char *program_name;
110 
111 /* If nonzero, display usage information and exit.  */
112 static int show_help = 0;
113 
114 /* If nonzero, print the version on standard output and exit.  */
115 static int show_version = 0;
116 
117 /* Other variables.  */
118 
119 FILE *output_file;		/* file or pipe to which we write output */
120 int exit_status = EXIT_SUCCESS;	/* status at end of execution */
121 
122 /* Options variables.  */
123 
124 #define OPTION_STRING \
125   "0123ABC::D:F:GHI:J:KL:NO:PQ:RS:TU::VWX:Y:Z:abc::dehijklmnopqrs:tu::vw:xyz"
126 /*      BC: D:F: HI:   L:N  P   S:TU:   X:    abc  dehi  l n pqrs:tu  vw:xy  */
127 /* The line above gives GNU diff options, for reference.  */
128 
129 /* Long options equivalences.  */
130 static const struct option long_options[] = {
131   {"auto-pager", no_argument, NULL, 'A'},
132   {"avoid-wraps", no_argument, NULL, 'm'},
133   {"brief", no_argument, NULL, 'q'},
134   {"context", optional_argument, NULL, 'c'},
135   {"debugging", no_argument, NULL, '0'},
136   {"ed", no_argument, NULL, 'e'},
137   {"exclude-from", required_argument, NULL, 'X'},
138   {"exclude", required_argument, NULL, 'x'},
139   {"expand-tabs", no_argument, NULL, 't'},
140   {"GTYPE-group-format", required_argument, NULL, GTYPE_GROUP_FORMAT_OPTION},
141   {"help", no_argument, &show_help, 1},
142   {"horizon-lines", required_argument, NULL, HORIZON_LINES_OPTION},
143   {"ifdef", required_argument, NULL, 'D'},
144   {"ignore-all-space", no_argument, NULL, 'w'},
145   {"ignore-blank-lines", no_argument, NULL, 'B'},
146   {"ignore-case", no_argument, NULL, 'i'},
147   {"ignore-matching-lines", required_argument, NULL, 'I'},
148   {"ignore-space-change", no_argument, NULL, 'b'},
149   {"initial-tab", no_argument, NULL, 'T'},
150   {"item-regexp", required_argument, NULL, 'O'},
151   {"label", required_argument, NULL, 'L'},
152   {"left-column", no_argument, NULL, LEFT_COLUMN_OPTION},
153   {"less-mode", no_argument, NULL, 'k'},
154   {"line-format", required_argument, NULL, LINE_FORMAT_OPTION},
155   {"LTYPE-line-format", required_argument, NULL, LTYPE_LINE_FORMAT_OPTION},
156   {"minimal", no_argument, NULL, 'd'},
157   {"minimum-size", required_argument, NULL, 'J'},
158   {"new-file", no_argument, NULL, 'N'},
159   {"no-common", no_argument, NULL, '3'},
160   {"no-deleted", no_argument, NULL, '1'},
161   {"no-init-term", no_argument, NULL, 'K'},	/* backwards compatibility */
162   {"no-inserted", no_argument, NULL, '2'},
163   {"ignore-delimiters", no_argument, NULL, 'j'},
164   {"paginate", no_argument, NULL, 'l'},
165   {"printer", no_argument, NULL, 'o'},
166   {"rcs", no_argument, NULL, 'n'},
167   {"recursive", no_argument, NULL, 'r'},
168   {"relist-files", no_argument, NULL, 'G'},
169   {"report-identical-files", no_argument, NULL, 's'},
170   {"show-c-function", no_argument, NULL, 'p'},
171   {"show-function-line", required_argument, NULL, 'F'},
172   {"show-links", no_argument, NULL, 'R'},
173   {"side-by-side", no_argument, NULL, 'y'},
174   {"speed-large-files", no_argument, NULL, 'H'},
175   {"starting-file", required_argument, NULL, 'S'},
176   {"string", optional_argument, NULL, 'Z'},
177   {"suppress-common-lines", no_argument, NULL, SUPPRESS_COMMON_LINES_OPTION},
178   {"terminal", no_argument, NULL, 'z'},
179   {"text", no_argument, NULL, 'a'},
180   {"tolerance", required_argument, NULL, TOLERANCE_OPTION},
181   {"unidirectional-new-file", no_argument, NULL, 'P'},
182   {"unified", optional_argument, NULL, 'u'},
183   {"verbose", no_argument, NULL, 'v'},
184   {"version", no_argument, &show_version, 1},
185   {"width", required_argument, NULL, 'w'},
186   {"word-mode", required_argument, NULL, 'W'},
187   {0, 0, 0, 0},
188 };
189 
190 /*--------------------.
191 | Available options.  |
192 `--------------------*/
193 
194 /* If calling the pager automatically.  */
195 static int autopager = 0;
196 
197 /* End/restart strings at end of lines.  */
198 static int avoid_wraps = 0;
199 
200 /* If nonzero, output quite verbose detail of operations.  */
201 static int debugging = 0;
202 
203 /* Initialize the termcap strings.  */
204 static int find_termcap = -1;	/* undecided yet */
205 
206 /* Ignore all white space.  */
207 static int ignore_all_space = 0;
208 
209 /* Ignore changes whose lines are all blank.  */
210 static int ignore_blank_lines = 0;
211 
212 /* Consider upper-case and lower-case to be the same.  */
213 static int ignore_case = 0;
214 
215 /* If nonzero, segregate matchable only items out of normal items.  */
216 static int ignore_delimiters = 0;
217 
218 /* Ignore changes whose lines all match regular expression.  */
219 static struct re_pattern_buffer **ignore_regexp_array = NULL;
220 static int ignore_regexps = 0;
221 
222 /* Ignore changes in the amount of white space.  */
223 static int ignore_space_change = 0;
224 
225 /* Make tabs line up by prepending a tab.  */
226 static int initial_tab = 0;
227 
228 /* Try hard to find a smaller set of changes.  Unimplemented.  */
229 static int minimal = 0;
230 
231 /* Minimum size of a cluster, not counting ignored items.  */
232 static int minimum_size = -1;	/* undecided yet */
233 
234 /* If using printer overstrikes.  */
235 static int overstrike = 0;
236 
237 /* If output aimed to the "less" program (overstrike even whitespace).  */
238 static int overstrike_for_less = 0;
239 
240 /* Pass the output through `pr' to paginate it.  */
241 static int paginate = 0;
242 
243 /* List all input files with annotations.  */
244 static int relist_files = 0;
245 
246 /* Give file and line references in annotated listings.  */
247 static int show_links = 0;
248 
249 /* Assume large files and many scattered small changes.  Unimplemented.  */
250 static int speed_large_files = 0;
251 
252 /* If nonzero, show progress of operations.  */
253 static int verbose = 0;
254 
255 /* Compare words, use REGEXP to define item.  */
256 static int word_mode = 0;
257 static struct re_pattern_buffer *item_regexp = NULL;
258 
259 /*---------------.
260 | diff options.  |
261 `---------------*/
262 
263 /* Treat all files as text.  */
264 static int text = 0;
265 
266 /* Output regular context diffs.  */
267 static int context = 0;
268 
269 /* Output unified context diffs or unidiffs.  */
270 static int unified = 0;
271 
272 /* Use LABEL instead of file name.  */
273 static const char *label = NULL;
274 
275 /* Show which C function each change is in.  */
276 static int show_c_function = 0;
277 
278 /* Show the most recent line matching RE.  */
279 static const char *show_function_line = NULL;
280 
281 /* Output only whether files differ.  */
282 static int brief = 0;
283 
284 /* Output an ed script.  */
285 static int ed = 0;
286 
287 /* Output an RCS format diff.  */
288 static int rcs = 0;
289 
290 /* Output in two columns.  */
291 static int side_by_side = 0;
292 
293 /* Output at most NUM characters per line.  */
294 static int width = 130;
295 
296 /* Output only the left column of common lines.  */
297 static int left_column = 0;
298 
299 /* Do not output common lines.  */
300 static int suppress_common_lines = 0;
301 
302 /* Output merged file to show `#ifdef NAME' diffs.  */
303 static const char *ifdef = NULL;
304 
305 /* GTYPE input groups with GFMT.  */
306 static const char *gtype_group_format = NULL;
307 
308 /* All input lines with LFMT.  */
309 static const char *line_format = NULL;
310 
311 /* LTYPE input lines with LFMT.  */
312 static const char *ltype_line_format = NULL;
313 
314 /* Expand tabs to spaces in output.  */
315 static int expand_tabs = 0;
316 
317 /* Recursively compare any subdirectories found.  */
318 static int recursive = 0;
319 
320 /* Treat absent files as empty.  */
321 static int new_file = 0;
322 
323 /* Treat absent first files as empty.  */
324 static int unidirectional_new_file = 0;
325 
326 /* Report when two files are the same.  */
327 static int report_identical_files = 0;
328 
329 /* Exclude files that match PAT.  */
330 static const char *exclude = NULL;
331 
332 /* Exclude files that match any pattern in FILE.  */
333 static const char *exclude_from = NULL;
334 
335 /* Start with FILE when comparing directories.  */
336 static const char *starting_file = NULL;
337 
338 /* Keep NUM lines of the common prefix and suffix.  */
339 static int horizon_lines = 2;
340 
341 /*----------------------.
342 | mdiff draft options.  |
343 `----------------------*/
344 
345 /* Maximum number of non-matching items for cluster members.  */
346 static int tolerance = 0;
347 
348 /* Regular expressions.  */
349 
350 /*-------------------------------------------------------------------.
351 | Compile the regex represented by STRING, diagnose and abort if any |
352 | error.  Returns the compiled regex structure.			     |
353 `-------------------------------------------------------------------*/
354 
355 static struct re_pattern_buffer *
alloc_and_compile_regex(const char * string)356 alloc_and_compile_regex (const char *string)
357 {
358   struct re_pattern_buffer *pattern;	/* newly allocated structure */
359   const char *message;		/* error message returned by regex.c */
360 
361   pattern = (struct re_pattern_buffer *)
362     xmalloc (sizeof (struct re_pattern_buffer));
363   memset (pattern, 0, sizeof (struct re_pattern_buffer));
364 
365   pattern->buffer = NULL;
366   pattern->allocated = 0;
367 #if 0
368   /* FIXME */
369   pattern->translate = ignore_case ? (char *) folded_chars : NULL;
370 #endif
371 
372   pattern->fastmap = xmalloc ((size_t) CHAR_SET_SIZE);
373 
374   message = re_compile_pattern (string, (int) strlen (string), pattern);
375   if (message)
376     error (EXIT_ERROR, 0, _("%s (for regexp `%s')"), message, string);
377 
378   /* The fastmap should be compiled before `re_match'.  This would not be
379      necessary if `re_search' was called first.  */
380 
381   re_compile_fastmap (pattern);
382 
383 #if WITH_REGEX
384 
385   /* Do not waste extra allocated space.  */
386 
387   if (pattern->allocated > pattern->used)
388     {
389       pattern->buffer = xrealloc (pattern->buffer, (size_t) pattern->used);
390       pattern->allocated = pattern->used;
391     }
392 #endif
393 
394   return pattern;
395 }
396 
397 /* Items.  */
398 
399 /* Each item has a type which is meaningful to the clustering process.
400    NORMAL items and DELIMS items fully participate in cluster
401    construction, yet only NORMAL items are worth participating into building
402    the minimum size count, merely DELIMS items do not.  WHITE items, even if
403    they do exist in input files, are a little like if they were just not
404    there: they are often skipped over.  SENTINEL items do not correspond to
405    any real input item, they are used to mark file boundaries, and may not
406    compare equally to any other item.
407 
408    The names NORMAL, DELIMS, WHITE and SENTINEL are a bit generic and are
409    used mainly to trigger the imagination of the reader.  Practice may be
410    quite different.  For example, actual white items may well be classified
411    as DELIMS or NORMAL, items having only delimiters may be seen as NORMAL,
412    items making up a block comment may be considered as WHITE.  Moreover,
413    SENTINEL items could be introduced at function boundaries, say, if we
414    wanted to limit cluster members to a single function, etc.  */
415 
416 enum type
417 {
418   NORMAL,			/* meaningful, countable items */
419   DELIMS,			/* items having only delimiters */
420   WHITE,			/* white items */
421   SENTINEL			/* file boundaries */
422 };
423 
424 /* Comparison work is done on checksums of items, rather than input items
425    themselves.  The following array maintains one entry for each item,
426    whether a real item or a sentinel.  Checksums, which are meaningful only
427    for NORMAL or DELIMS items, are computed so they ignore parts of the item
428    which are not really meaningful for the purpose of comparisons.
429 
430    If SAFER_SLOWER has been selected, checksums may be avoided and pointers
431    used instead.  For pointers to occupy the full space they might need, the
432    type bits are kept apart but tightly packed.  Otherwise, the TYPE bits
433    are part of the structured item, eating out some chekcsum space.  */
434 
435 #define BITS_PER_CHAR		8
436 #define BITS_PER_TYPE		2
437 #define BITS_PER_WORD		(sizeof (unsigned) * BITS_PER_CHAR)
438 #define TYPES_PER_WORD		(BITS_PER_WORD / BITS_PER_TYPE)
439 
440 #if SAFER_SLOWER
441 
442 # define ITEM			union item
443 
444 union item
445 {
446   unsigned checksum;
447   char *pointer;
448 };
449 
450 static union item *item_array = NULL;
451 static unsigned *type_array = NULL;
452 static int items = 0;
453 
454 static inline enum type
item_type(ITEM * item)455 item_type (ITEM * item)
456 {
457   int position = item - item_array;
458   int shift = position % TYPES_PER_WORD * BITS_PER_TYPE;
459   unsigned *pointer = type_array + position / TYPES_PER_WORD;
460 
461   return (enum type) (MASK (BITS_PER_TYPE) & *pointer >> shift);
462 }
463 
464 static inline void
set_item_type(ITEM * item,enum type type)465 set_item_type (ITEM * item, enum type type)
466 {
467   int position = item - item_array;
468   int shift = position % TYPES_PER_WORD * BITS_PER_TYPE;
469   unsigned *pointer = type_array + position / TYPES_PER_WORD;
470 
471   *pointer &= ~MASK (BITS_PER_TYPE) << shift;
472   *pointer |= type << shift;
473 }
474 
475 #else /* not SAFER_SLOWER */
476 
477 # define ITEM			struct item
478 
479 struct item
480 {
481   enum type type:BITS_PER_TYPE;
482   unsigned checksum:BITS_PER_WORD - BITS_PER_TYPE;
483 };
484 
485 static struct item *item_array = NULL;
486 static int items = 0;
487 
488 static inline enum type
item_type(ITEM * item)489 item_type (ITEM * item)
490 {
491   return item->type;
492 }
493 
494 static inline void
set_item_type(ITEM * item,enum type type)495 set_item_type (ITEM * item, enum type type)
496 {
497   item->type = type;
498 }
499 
500 #endif /* not SAFER_SLOWER */
501 
502 #if DEBUGGING
503 
504 /*---------------------.
505 | Dump a given INPUT.  |
506 `---------------------*/
507 
508 static void
dump_item(ITEM * item)509 dump_item (ITEM * item)
510 {
511   static const char *item_type_string[4] = { "", "delim", "white", "SENTIN" };
512 
513   fprintf (stderr, "(%ld)\t%s\t%08x\n", (long) (item - item_array),
514 	   item_type_string[item_type (item)], item->checksum);
515 }
516 
517 /*------------------.
518 | Dump all items.  |
519 `------------------*/
520 
521 static void
dump_all_items(void)522 dump_all_items (void)
523 {
524   int counter;
525 
526   fputs ("\f\n", stderr);
527   for (counter = 0; counter < items; counter++)
528     dump_item (item_array + counter);
529 }
530 
531 #endif /* DEBUGGING */
532 
533 /*-------------------------------------------------------------------.
534 | Move item pointer forward or backward, skipping over white items.  |
535 `-------------------------------------------------------------------*/
536 
537 #define FORWARD_ITEM(Pointer) \
538   do { Pointer++; } while (item_type (Pointer) == WHITE)
539 
540 #define BACKWARD_ITEM(Pointer) \
541   do { Pointer--; } while (item_type (Pointer) == WHITE)
542 
543 /*------------------------------------------------------------------.
544 | Sort helper.  Compare two item indices.  Lower indices go first.  |
545 `------------------------------------------------------------------*/
546 
547 static int
compare_for_positions(const void * void_first,const void * void_second)548 compare_for_positions (const void *void_first, const void *void_second)
549 {
550 #define value1 *((int *) void_first)
551 #define value2 *((int *) void_second)
552 
553   return value1 - value2;
554 
555 #undef value1
556 #undef value2
557 }
558 
559 /*-------------------------------------------------------------------------.
560 | Sort helper.  Compare two item indices.  Items having identical	   |
561 | checksums should be brought together.  Whenever two checksums are equal, |
562 | items are then ordered by the checksum of the following items, and this  |
563 | way going forward until a difference is found.			   |
564 `-------------------------------------------------------------------------*/
565 
566 static int
compare_for_checksum_runs(const void * void_first,const void * void_second)567 compare_for_checksum_runs (const void *void_first, const void *void_second)
568 {
569 #define value1 *((int *) void_first)
570 #define value2 *((int *) void_second)
571 
572   ITEM *item1 = item_array + value1;
573   ITEM *item2 = item_array + value2;
574   int result;
575 
576   while (1)
577     {
578       /* Order checksums.  */
579 
580       result = item1->checksum - item2->checksum;
581       if (result)
582 	return result;
583 
584       /* Seek forward for a difference.  */
585 
586       FORWARD_ITEM (item1);
587       FORWARD_ITEM (item2);
588 
589       /* Sentinels are never equal (unless really the same).  They go after
590          all checksums, and compare so to stabilise the sort.  */
591 
592       if (item_type (item1) == SENTINEL)
593 	return item_type (item2) == SENTINEL ? value1 - value2 : 1;
594 
595       if (item_type (item2) == SENTINEL)
596 	return -1;
597     }
598 
599 #undef value1
600 #undef value2
601 }
602 
603 /*-----------------.
604 | Add a new item.  |
605 `-----------------*/
606 
607 static inline void
new_item(enum type type,int checksum)608 new_item (enum type type, int checksum)
609 {
610   ITEM *item;
611 
612   if (items % (64 * TYPES_PER_WORD) == 0)
613     {
614       item_array = (ITEM *)
615 	xrealloc (item_array, (items + 64 * TYPES_PER_WORD) * sizeof (ITEM));
616 #if SAFER_SLOWER
617       type_array = (unsigned *)
618 	xrealloc (type_array,
619 		  (items / TYPES_PER_WORD + 64) * sizeof (unsigned));
620 #endif
621     }
622 
623   item = item_array + items++;
624   set_item_type (item, type);
625   item->checksum = checksum;
626 }
627 
628 /*---------------------.
629 | Add a new sentinel.  |
630 `---------------------*/
631 
632 static void
new_sentinel(void)633 new_sentinel (void)
634 {
635   new_item (SENTINEL, 0);
636 }
637 
638 /*------------------------------------------------------------------------.
639 | Find how many identical normal items follow, given two start items.	  |
640 | Ensure matchable items correspond to each other, but skip over white	  |
641 | items.  The starting items may not be sentinels nor white items.  Fuzzy |
642 | items, if any, ought to be embedded and correspond to each other.	  |
643 `------------------------------------------------------------------------*/
644 
645 static int
identical_size(int index1,int index2)646 identical_size (int index1, int index2)
647 {
648   ITEM *item1 = item_array + index1;
649   ITEM *item2 = item_array + index2;
650   int normal_count = 0;
651   int mismatch_count = 0;
652 
653   while (1)
654     {
655       if (item1->checksum == item2->checksum
656 #if 0
657 	  /* Just assume that identical checksums imply identical types.  */
658 	  && item_type (item1) == item_type (item2)
659 #endif
660 	)
661 	{
662 	  if (item_type (item1) == NORMAL)
663 	    normal_count++;
664 	}
665       else if (normal_count > 0 && mismatch_count < tolerance)
666 	{
667 	  if (item_type (item1) == NORMAL && item_type (item2) == NORMAL)
668 	    normal_count++;
669 	  mismatch_count++;
670 	}
671       else
672 	break;
673 
674       FORWARD_ITEM (item1);
675       FORWARD_ITEM (item2);
676 
677       if (item_type (item1) == SENTINEL || item_type (item2) == SENTINEL)
678 	break;
679     }
680 
681   return normal_count;
682 }
683 
684 /*-----------------------------------------------------------------------.
685 | Count normal items from START to FINISH (excluded).  However, if there |
686 | are MAXIMUM or more normal input items, return a negative number.      |
687 `-----------------------------------------------------------------------*/
688 
689 static int
item_distance(int start,int finish,int maximum)690 item_distance (int start, int finish, int maximum)
691 {
692   int counter = 0;
693   ITEM *item;
694 
695   assert (start < finish);
696   assert (maximum > 0);
697 
698   for (item = item_array + start; item < item_array + finish; item++)
699     if (item_type (item) == NORMAL)
700       {
701 	counter++;
702 	if (counter == maximum)
703 	  return -1;
704       }
705 
706   return counter;
707 }
708 
709 /* Files.  */
710 
711 /* All input files use a single continuous item counting scheme.  To revert
712    item numbers back to actual file and item references, the following array
713    maintains the cumulated number items seen prior to the beginning of each
714    file, for each file given as argument to the program.  To ease some
715    algorithms, a few sentinel items exist: one before the first file, one
716    between each file, and one after the last file.  Each file counts all
717    sentinels preceding it.  */
718 
719 struct input
720 {
721   const char *file_name;	/* name of input file */
722   struct stat stat_buffer;	/* stat information for the file */
723   char nick_name[4];		/* short name of the file */
724   FILE *file;			/* file being read */
725   char *memory_copy;		/* buffer containing the file, or NULL */
726 
727   /* Reading the file, one line or one character at a time.  */
728   char *line;			/* line from file */
729   char *cursor;			/* cursor into line, or NULL at end of file */
730   char *limit;			/* limit value for cursor */
731   size_t line_allocated;	/* allocated length of line */
732 
733   /* Rescanning of the file, one item at a time.  */
734   int first_item;		/* index of first item in this file */
735   int item_limit;		/* one past last item index in this file */
736   int item;			/* index of next item to consider */
737 
738   /* Merging views.  */
739   int *indirect_cursor;		/* cursor into indirect_array */
740   int *indirect_limit;		/* one past limit value of indirect_cursor */
741 
742   /* Output control.  */
743   short listing_allowed;	/* if difference output allowed */
744   const char *user_start;	/* user string before difference */
745   const char *user_stop;	/* user string after difference */
746   const char *term_start;	/* terminal string before difference */
747   const char *term_stop;	/* terminal string after difference */
748 };
749 
750 static struct input *input_array = NULL;
751 static int inputs = 0;
752 
753 int common_listing_allowed;	/* if common output allowed */
754 
755 /* Convenience macros.  */
756 
757 #define left (input_array + 0)
758 #define right (input_array + 1)
759 
760 #define INPUT_MEMBER(Input) \
761   (member_array + *(Input)->indirect_cursor)
762 
763 #define INPUT_CLUSTER(Input) \
764   (cluster_array + member_array[*(Input)->indirect_cursor].cluster_number)
765 
766 #if DEBUGGING
767 
768 /*---------------------.
769 | Dump a given INPUT.  |
770 `---------------------*/
771 
772 static void
dump_input(struct input * input)773 dump_input (struct input *input)
774 {
775   fprintf (stderr, "%s1,%d\t%7d\t%s\n",
776 	   input->nick_name, input->item_limit - input->first_item,
777 	   input->first_item, input->file_name);
778 }
779 
780 /*------------------.
781 | Dump all inputs.  |
782 `------------------*/
783 
784 static void
dump_all_inputs(void)785 dump_all_inputs (void)
786 {
787   int counter;
788 
789   fputs ("\f\n", stderr);
790   for (counter = 0; counter < inputs; counter++)
791     dump_input (input_array + counter);
792 }
793 
794 #endif /* DEBUGGING */
795 
796 /*---------------------------------------------------------------------.
797 | Swallow the whole INPUT into a contiguous region of memory.  In the  |
798 | INPUT structure, file name and stat buffer are already initialised.  |
799 `---------------------------------------------------------------------*/
800 
801 /* Define to reallocation increment when swallowing input.  */
802 #define SWALLOW_BUFFER_STEP 20000
803 
804 static void
swallow_input(struct input * input)805 swallow_input (struct input *input)
806 {
807   int handle;			/* file descriptor number */
808   size_t allocated_length;	/* allocated length of memory buffer */
809   size_t length;		/* total length read so far */
810   int read_length;		/* number of character gotten on last read */
811 
812   /* Standard input is already opened.  In all other cases, open the file
813      from its name.  */
814 
815   if (strcmp (input->file_name, STDIN_PRINTED_NAME) == 0)
816     handle = fileno (stdin);
817   else if (handle = open (input->file_name, O_RDONLY), handle < 0)
818     error (EXIT_ERROR, errno, "%s", input->file_name);
819 
820   /* If the file is a plain, regular file, allocate the memory buffer all at
821      once and swallow the file in one blow.  In other cases, read the file
822      repeatedly in smaller chunks until we have it all, reallocating memory
823      once in a while, as we go.  */
824 
825 #if MSDOS
826 
827   /* On MSDOS, we cannot predict in memory size from file size, because of
828      end of line conversions.  */
829 
830 #else
831 
832   if (S_ISREG (input->stat_buffer.st_mode))
833     {
834       input->memory_copy = (char *)
835 	xmalloc ((size_t) input->stat_buffer.st_size);
836 
837       if (read
838 	  (handle, input->memory_copy,
839 	   (size_t) input->stat_buffer.st_size) != input->stat_buffer.st_size)
840 	error (EXIT_ERROR, errno, "%s", input->file_name);
841     }
842   else
843 #endif
844 
845     {
846       input->memory_copy = xmalloc ((size_t) SWALLOW_BUFFER_STEP);
847       allocated_length = SWALLOW_BUFFER_STEP;
848 
849       length = 0;
850       while (read_length = read (handle, input->memory_copy + length,
851 				 allocated_length - length), read_length > 0)
852 	{
853 	  length += read_length;
854 	  if (length == allocated_length)
855 	    {
856 	      allocated_length += SWALLOW_BUFFER_STEP;
857 	      input->memory_copy = (char *)
858 		xrealloc (input->memory_copy, allocated_length);
859 	    }
860 	}
861 
862       if (read_length < 0)
863 	error (EXIT_ERROR, errno, "%s", input->file_name);
864     }
865 
866   /* Close the file, but only if it was not the standard input.  */
867 
868   if (handle != fileno (stdin))
869     close (handle);
870 }
871 
872 /*-------------------------------------------.
873 | Register a file NAME to be later studied.  |
874 `-------------------------------------------*/
875 
876 static void
new_input(const char * name)877 new_input (const char *name)
878 {
879   static int stdin_swallowed = 0;
880   struct input *input;
881 
882   /* Add a new input file descriptor, read in stdin right away.  */
883 
884   if (inputs % 16 == 0)
885     input_array = (struct input *)
886       xrealloc (input_array, (inputs + 16) * sizeof (struct input));
887 
888   input = input_array + inputs++;
889   if (strcmp (name, "") == 0 || strcmp (name, "-") == 0)
890     if (stdin_swallowed)
891       error (EXIT_ERROR, 0, _("only one file may be standard input"));
892     else
893       {
894 	input->file_name = STDIN_PRINTED_NAME;
895 	if (fstat (fileno (stdin), &input->stat_buffer) != 0)
896 	  error (EXIT_ERROR, errno, "%s", input->file_name);
897 	swallow_input (input);
898 	stdin_swallowed = 1;
899       }
900   else
901     {
902       input->file_name = name;
903       if (stat (input->file_name, &input->stat_buffer) != 0)
904 	error (EXIT_ERROR, errno, "%s", input->file_name);
905       if ((input->stat_buffer.st_mode & S_IFMT) == S_IFDIR)
906 	error (EXIT_ERROR, 0, _("directories not supported"));
907       input->memory_copy = NULL;
908     }
909 }
910 
911 /*-------------------------------.
912 | Prepare INPUT for re-reading.  |
913 `-------------------------------*/
914 
915 static void
open_input(struct input * input)916 open_input (struct input *input)
917 {
918   if (input->memory_copy)
919     input->limit = input->memory_copy;
920   else
921     {
922       if (input->file = fopen (input->file_name, "r"), !input->file)
923 	error (EXIT_ERROR, errno, "%s", input->file_name);
924 
925       input->line = NULL;
926     }
927   input->line_allocated = 0;
928 }
929 
930 static void
close_input(struct input * input)931 close_input (struct input *input)
932 {
933   if (input->line_allocated > 0)
934     free (input->line);
935 
936   if (!input->memory_copy)
937     fclose (input->file);
938 }
939 
940 /*-------------------------------------------------------------------------.
941 | Prepare INPUT for next line, then available from INPUT->LINE through     |
942 | INPUT->LIMIT (excluded).  INPUT->CURSOR will point to first character of |
943 | the line, or will be NULL at end of file.                                |
944 `-------------------------------------------------------------------------*/
945 
946 static void
input_line(struct input * input)947 input_line (struct input *input)
948 {
949   if (input->memory_copy)
950     {
951       const char *limit = input->memory_copy + input->stat_buffer.st_size;
952       char *cursor;
953 
954       input->line = input->limit;
955 
956       for (cursor = input->line; cursor < limit && *cursor != '\n'; cursor++)
957 	;
958 
959       if (cursor < limit)
960 	{
961 	  cursor++;
962 	  input->cursor = input->line;
963 	}
964       else if (cursor == input->line)
965 	input->cursor = NULL;
966       else
967 	input->cursor = input->line;
968 
969       input->limit = cursor;
970     }
971   else
972     {
973       int length
974 	= getline (&input->line, &input->line_allocated, input->file);
975 
976       if (length < 0)
977 	input->cursor = NULL;
978       else
979 	{
980 	  input->cursor = input->line;
981 	  input->limit = input->line + length;
982 	}
983     }
984 }
985 
986 /*-------------------------------------------------------------------------.
987 | Prepare INPUT for next character, then available through *INPUT->CURSOR, |
988 | unless INPUT->CURSOR is NULL for indicating end of file.                 |
989 `-------------------------------------------------------------------------*/
990 
991 static void
input_character_helper(struct input * input)992 input_character_helper (struct input *input)
993 {
994   int counter;
995 
996   /* Read in a new line, but skip ignorable ones.  FIXME: This is just not
997      right.  Ignorable lines may need copying!  */
998 
999   do
1000     {
1001       input_line (input);
1002 
1003       /* Possibly check if the line should be ignored.  */
1004 
1005       for (counter = 0; counter < ignore_regexps; counter++)
1006 	if (re_match (ignore_regexp_array[counter],
1007 		      input->line, input->limit - input->line, 0, NULL) > 0)
1008 	  break;
1009     }
1010   while (counter < ignore_regexps);
1011 
1012   assert (!input->cursor || input->cursor < input->limit);
1013 }
1014 
1015 static inline void
input_character(struct input * input)1016 input_character (struct input *input)
1017 {
1018   assert (input->cursor);
1019   input->cursor++;
1020   if (input->cursor == input->limit)
1021     input_character_helper (input);
1022 }
1023 
1024 /*------------------------------------------------------------.
1025 | Construct descriptors for all items of a given INPUT file.  |
1026 `------------------------------------------------------------*/
1027 
1028 static void
study_input(struct input * input)1029 study_input (struct input *input)
1030 {
1031 #if SAVE_AND_SLOW
1032 # define ADJUST_CHECKSUM(Character)                                \
1033   checksum = (checksum << 5) + (checksum >> (BITS_PER_WORD - 5))   \
1034     + (Character & 0xFF)
1035 #else
1036 # define ADJUST_CHECKSUM(Character)                                \
1037   checksum = (checksum << 5) +                                     \
1038     (checksum >> (BITS_PER_WORD - BITS_PER_TYPE - 5))              \
1039     + (Character & 0xFF)
1040 #endif
1041 
1042   int item_count;		/* number of items read */
1043   char *buffer = NULL;		/* line buffer */
1044   int length = 0;		/* actual line length in buffer */
1045 
1046   /* Read the file and checksum all items.  */
1047 
1048   if (verbose)
1049     fprintf (stderr, _("Reading %s"), input->file_name);
1050 
1051   open_input (input);
1052   input->first_item = items;
1053   item_count = 0;
1054 
1055   /* Read all lines.  */
1056 
1057   while (input_line (input), input->cursor)
1058     {
1059       int counter;
1060 
1061       /* Possibly check if the line should be ignored.  */
1062 
1063       for (counter = 0; counter < ignore_regexps; counter++)
1064 	if (re_match (ignore_regexp_array[counter],
1065 		      input->line, input->limit - input->line, 0, NULL) > 0)
1066 	  break;
1067 
1068       if (counter < ignore_regexps)
1069 	{
1070 	  if (!word_mode)
1071 	    new_item (WHITE, 0);
1072 	  continue;
1073 	}
1074 
1075       /* The line is not being ignored.  */
1076 
1077       /* We prefer `cursor < buffer + length' over `*cursor' in the loop
1078          tests, so embedded NULs can be handled.  */
1079 
1080       if (word_mode)
1081 	{
1082 	  char *cursor = input->line;
1083 	  unsigned checksum;
1084 
1085 	  /* FIXME: Might be simplified out of the line loop.  */
1086 
1087 	  while (cursor < input->limit)
1088 	    {
1089 	      while (cursor < input->limit && isspace (*cursor))
1090 		cursor++;
1091 
1092 	      if (cursor < input->limit)
1093 		{
1094 		  checksum = 0;
1095 		  while (cursor < input->limit && !isspace (*cursor))
1096 		    {
1097 		      ADJUST_CHECKSUM (*cursor);
1098 		      cursor++;
1099 		    }
1100 		  new_item (NORMAL, checksum);
1101 		  item_count++;
1102 		}
1103 	    }
1104 	}
1105       else
1106 	{
1107 	  char *cursor;
1108 	  unsigned checksum = 0;
1109 	  int line_has_delims = 0;
1110 	  int line_has_alnums = 0;
1111 
1112 	  for (cursor = input->line; cursor < input->limit; cursor++)
1113 	    if (isspace (*cursor))
1114 	      {
1115 		if (!ignore_all_space)
1116 		  {
1117 		    if (ignore_space_change)
1118 		      {
1119 			ADJUST_CHECKSUM (' ');
1120 			while (cursor + 1 < buffer + length
1121 			       && isspace (cursor[1]))
1122 			  cursor++;
1123 		      }
1124 		    else
1125 		      ADJUST_CHECKSUM (*cursor);
1126 		  }
1127 	      }
1128 	    else if (isalpha (*cursor))
1129 	      {
1130 		line_has_alnums = 1;
1131 		if (ignore_case && islower (*cursor))
1132 		  {
1133 		    char character = toupper (*cursor);
1134 
1135 		    ADJUST_CHECKSUM (character);
1136 		  }
1137 		else
1138 		  ADJUST_CHECKSUM (*cursor);
1139 	      }
1140 	    else if (isdigit (*cursor))
1141 	      {
1142 		line_has_alnums = 1;
1143 		ADJUST_CHECKSUM (*cursor);
1144 	      }
1145 	    else
1146 	      {
1147 		line_has_delims = 1;
1148 		ADJUST_CHECKSUM (*cursor);
1149 	      }
1150 
1151 	  /* Register the checksum.  */
1152 
1153 	  if (line_has_alnums)
1154 	    new_item (NORMAL, checksum);
1155 	  else if (line_has_delims)
1156 	    new_item (ignore_delimiters ? DELIMS : NORMAL, checksum);
1157 	  else if (ignore_blank_lines)
1158 	    new_item (WHITE, checksum);
1159 	  else
1160 	    new_item (ignore_delimiters ? DELIMS : NORMAL, checksum);
1161 
1162 	  item_count++;
1163 	}
1164     }
1165 
1166   input->item_limit = items;
1167 
1168   /* Cleanup.  */
1169 
1170   close_input (input);
1171 
1172   if (verbose)
1173     fprintf (stderr, ngettext (", %d item\n", ", %d items\n", item_count),
1174 	     item_count);
1175 
1176 #undef ADJUST_CHECKSUM
1177 }
1178 
1179 /*------------------------.
1180 | Study all input files.  |
1181 `------------------------*/
1182 
1183 /* If all input files fit in MAXIMUM_TOTAL_BUFFER, they are all swallowed in
1184    memory at once.  Otherwise, they are read from disk one line at a time.
1185    A fixed value is far than ideal: we should consider how much real memory
1186    space is available, and I do not know how to do this portably.  */
1187 #define MAXIMUM_TOTAL_BUFFER 500000
1188 
1189 static void
study_all_inputs(void)1190 study_all_inputs (void)
1191 {
1192   struct input *input;
1193   int total_size;
1194 
1195   /* Compute nick names for all files.  */
1196 
1197   if (inputs == 2)
1198     {
1199       /* If only two input files, just mimick `diff' output.  */
1200 
1201       strcpy (input_array[0].nick_name, "-");
1202       strcpy (input_array[1].nick_name, "+");
1203     }
1204   else
1205     {
1206       int name_length = 1;
1207       int file_limit = 26;
1208 
1209       /* Choose nick names as sequences of lower case letters.  */
1210 
1211       while (inputs > file_limit)
1212 	{
1213 	  name_length++;
1214 	  file_limit *= 26;
1215 	}
1216 
1217       for (input = input_array; input < input_array + inputs; input++)
1218 	{
1219 	  int value = input - input_array;
1220 	  char *cursor = input->nick_name + name_length;
1221 
1222 	  *cursor-- = '\0';
1223 	  while (cursor >= input->nick_name)
1224 	    {
1225 	      *cursor-- = value % 26 + 'a';
1226 	      value /= 26;
1227 	    }
1228 	}
1229     }
1230 
1231   /* Swallow all files not already copied in memory, if room permits.  */
1232 
1233   total_size = 0;
1234   for (input = input_array; input < input_array + inputs; input++)
1235     total_size += input->stat_buffer.st_size;
1236 
1237   if (total_size <= MAXIMUM_TOTAL_BUFFER)
1238     for (input = input_array; input < input_array + inputs; input++)
1239       if (!input->memory_copy)
1240 	swallow_input (input);
1241 
1242   /* Compute all checksums.  */
1243 
1244   new_sentinel ();
1245   for (input = input_array; input < input_array + inputs; input++)
1246     {
1247       study_input (input);
1248       new_sentinel ();
1249     }
1250 
1251   if (verbose)
1252     {
1253       fprintf (stderr, _("Read summary:"));
1254       fprintf (stderr, ngettext (" %d file,", " %d files,", inputs), inputs);
1255       fprintf (stderr, ngettext (" %d item\n", " %d items\n",
1256 				 items - inputs - 1), items - inputs - 1);
1257     }
1258 
1259 #if DEBUGGING
1260   if (debugging)
1261     {
1262       dump_all_items ();
1263       dump_all_inputs ();
1264     }
1265 #endif
1266 }
1267 
1268 /* Item references.  */
1269 
1270 struct reference
1271 {
1272   struct input *input;		/* input file for reference */
1273   int number;			/* item number for reference */
1274 };
1275 
1276 /*-------------------------------------------------------------------------.
1277 | Explicit a reference for internal item NUMBER.  The first sentinel has   |
1278 | line 0, other sentinels have one more than number of lines in the file.  |
1279 `-------------------------------------------------------------------------*/
1280 
1281 static struct reference
get_reference(int number)1282 get_reference (int number)
1283 {
1284   struct input *input;
1285   struct reference result;
1286 
1287   assert (number < items);
1288 
1289   /* FIXME: if we are studying many files, linear searches become a bit
1290      crude, binary searches would be better in such cases.  */
1291 
1292   for (input = input_array; number >= input->item_limit + 1; input++)
1293     ;
1294 
1295   result.input = input;
1296   result.number = number - input->first_item + 1;
1297   return result;
1298 }
1299 
1300 /*------------------------------------------------------------------------.
1301 | Produce a short string referencing an internal item NUMBER.  The result |
1302 | is statically allocated into a ring of little buffers.                  |
1303 `------------------------------------------------------------------------*/
1304 
1305 static char *
reference_string(int number)1306 reference_string (int number)
1307 {
1308 #define RING_LENGTH 4
1309 
1310   struct reference reference;
1311   static char buffer[RING_LENGTH][20];
1312   static int next = RING_LENGTH - 1;
1313 
1314   if (next == RING_LENGTH - 1)
1315     next = 0;
1316   else
1317     next++;
1318 
1319   reference = get_reference (number);
1320 
1321   if (number == reference.input->item_limit)
1322     sprintf (buffer[next], "%s.EOF", reference.input->nick_name);
1323   else
1324     sprintf (buffer[next], "%s%d",
1325 	     reference.input->nick_name, reference.number);
1326 
1327   return buffer[next];
1328 
1329 #undef RING_LENGTH
1330 }
1331 
1332 #if DEBUGGING
1333 
1334 /*-------------------------------------------------------------------------.
1335 | Dump a reference to a given item in compilation error format.  Ensure at |
1336 | least one space, and in fact, enough to align diagnostics as we go.      |
1337 `-------------------------------------------------------------------------*/
1338 
1339 static void
dump_reference(int item)1340 dump_reference (int item)
1341 {
1342   struct reference reference;
1343   char buffer[20];
1344   static int last_width = 0;
1345   int width;
1346 
1347   reference = get_reference (item);
1348   sprintf (buffer, "%d", reference.number);
1349 
1350   fprintf (stderr, "%s:%s: ", reference.input->file_name, buffer);
1351   width = strlen (reference.input->file_name) + strlen (buffer);
1352   if (width < last_width)
1353     while (width++ < last_width)
1354       putc (' ', stderr);
1355   else
1356     last_width = width;
1357 }
1358 
1359 #endif /* DEBUGGING */
1360 
1361 /* Clusters and members.  */
1362 
1363 /* A cluster represents one set of items which is repeated at two or more
1364    places in the input files, while each repetition is called a cluster
1365    member.  The member size is an item count which excludes white items.  */
1366 
1367 struct cluster
1368 {
1369   short first_member;		/* index in member_array array */
1370   short item_count;		/* size of each member */
1371 };
1372 
1373 static struct cluster *cluster_array = NULL;
1374 int clusters = 0;
1375 
1376 /* A cluster member starts at some item number and runs for a given number
1377    of items.  Ignorable items may be embedded in a cluster member, so
1378    cluster members of a single cluster may have different lengths, but a
1379    cluster member never starts nor end with an white item.  Sentinels
1380    are never part of any cluster.  By construction, all cluster members of a
1381    same cluster are next to each other.  */
1382 
1383 struct member
1384 {
1385   short cluster_number;		/* ordinal of cluster, counted from 0 */
1386   int first_item;		/* number of first item for this member */
1387 };
1388 
1389 static struct member *member_array = NULL;
1390 static int members = 0;
1391 
1392 /* Convenience macros.  */
1393 
1394 #define MEMBER_LIMIT(Cluster) \
1395   (member_array + ((Cluster) + 1)->first_member)
1396 
1397 #define MEMBERS(Cluster) \
1398   (((Cluster) + 1)->first_member - (Cluster)->first_member)
1399 
1400 /*---------------------------------------------------------------------.
1401 | Find how many input items are part of a given cluster MEMBER.  The   |
1402 | returned count includes embedded items having less significance, but |
1403 | exclude last white items.                                            |
1404 `---------------------------------------------------------------------*/
1405 
1406 static int
real_member_size(struct member * member)1407 real_member_size (struct member *member)
1408 {
1409   int number = cluster_array[member->cluster_number].item_count;
1410   ITEM *item = item_array + member->first_item;
1411   ITEM *last_item;
1412 
1413   /* Skip over NUMBER normal items.  */
1414 
1415   while (number > 0)
1416     switch (item_type (item))
1417       {
1418       case NORMAL:
1419 	number--;
1420 	/* Fall through.  */
1421 
1422       case DELIMS:
1423       case WHITE:
1424 	item++;
1425 	break;
1426 
1427       case SENTINEL:
1428 	/* Cannot happen.  */
1429 	abort ();
1430       }
1431 
1432   /* Skip over supplementary matchable or white items, return the item
1433      count as for the last normal or matchable item.  */
1434 
1435   last_item = item;
1436   while (1)
1437     switch (item_type (item))
1438       {
1439       case NORMAL:
1440       case SENTINEL:
1441 	return last_item - item_array - member->first_item;
1442 
1443       case DELIMS:
1444 	last_item = item++;
1445 	break;
1446 
1447       case WHITE:
1448 	item++;
1449 	break;
1450       }
1451 }
1452 
1453 #if DEBUGGING
1454 
1455 /*-----------------------.
1456 | Dump a given CLUSTER.  |
1457 `-----------------------*/
1458 
1459 static void
dump_cluster(struct cluster * cluster)1460 dump_cluster (struct cluster *cluster)
1461 {
1462   int counter;
1463   struct member *member;
1464 
1465   fprintf (stderr, "{%ld},%d",
1466 	   (long) (cluster - cluster_array), cluster->item_count);
1467   for (counter = cluster->first_member;
1468        counter < (cluster + 1)->first_member; counter++)
1469     {
1470       member = member_array + counter;
1471       fprintf (stderr, "%c[%d] %s,%d",
1472 	       counter == cluster->first_member ? '\t' : ' ', counter,
1473 	       reference_string (member->first_item),
1474 	       real_member_size (member));
1475     }
1476   putc ('\n', stderr);
1477 }
1478 
1479 /*--------------------.
1480 | Dump all clusters.  |
1481 `--------------------*/
1482 
1483 static void
dump_all_clusters(void)1484 dump_all_clusters (void)
1485 {
1486   int counter;
1487 
1488   fputs ("\f\n", stderr);
1489   for (counter = 0; counter < clusters; counter++)
1490     dump_cluster (cluster_array + counter);
1491 }
1492 
1493 /*----------------------.
1494 | Dump a given MEMBER.  |
1495 `----------------------*/
1496 
1497 static void
dump_member(struct member * member)1498 dump_member (struct member *member)
1499 {
1500   fprintf (stderr, " [%ld]\t{%d}\t%s,%d+\n",
1501 	   (long) (member - member_array), member->cluster_number,
1502 	   reference_string (member->first_item),
1503 	   cluster_array[member->cluster_number].item_count);
1504 }
1505 
1506 /*-------------------.
1507 | Dump all members.  |
1508 `-------------------*/
1509 
1510 static void
dump_all_members(void)1511 dump_all_members (void)
1512 {
1513   int counter;
1514 
1515   fputs ("\f\n", stderr);
1516   for (counter = 0; counter < members; counter++)
1517     dump_member (member_array + counter);
1518 }
1519 
1520 #endif /* DEBUGGING */
1521 
1522 /*----------------------------------------------------------------------.
1523 | Sort helper.  Compare two member indices, using the original input    |
1524 | order.  When two members have the same start, put the longest first.  |
1525 `----------------------------------------------------------------------*/
1526 
1527 static int
compare_for_member_start(const void * void_first,const void * void_second)1528 compare_for_member_start (const void *void_first, const void *void_second)
1529 {
1530 #define value1 *((int *) void_first)
1531 #define value2 *((int *) void_second)
1532 
1533   int result;
1534 
1535   struct member *member1 = member_array + value1;
1536   struct member *member2 = member_array + value2;
1537 
1538   result = member1->first_item - member2->first_item;
1539   if (result)
1540     return result;
1541 
1542   return (cluster_array[member2->cluster_number].item_count
1543 	  - cluster_array[member1->cluster_number].item_count);
1544 
1545 #undef value1
1546 #undef value2
1547 }
1548 
1549 /*---------------------------------------------------------------------.
1550 | Add a new cluster, for which each member has COUNT non-white items.  |
1551 `---------------------------------------------------------------------*/
1552 
1553 static inline void
new_cluster(int count)1554 new_cluster (int count)
1555 {
1556   struct cluster *cluster;
1557 
1558   if (clusters % 8 == 0)
1559     cluster_array = (struct cluster *)
1560       xrealloc (cluster_array, (clusters + 8) * sizeof (struct cluster));
1561 
1562   cluster = cluster_array + clusters++;
1563   cluster->first_member = members;
1564   cluster->item_count = count;
1565 }
1566 
1567 /*---------------------------.
1568 | Add a new cluster member.  |
1569 `---------------------------*/
1570 
1571 static inline void
new_member(int item)1572 new_member (int item)
1573 {
1574   struct member *member;
1575 
1576   if (members % 8 == 0)
1577     member_array = (struct member *)
1578       xrealloc (member_array, (members + 8) * sizeof (struct member));
1579 
1580   member = member_array + members++;
1581   member->cluster_number = clusters - 1;
1582   member->first_item = item;
1583 }
1584 
1585 /*------------------------------------.
1586 | Output a cluster explanation line.  |
1587 `------------------------------------*/
1588 
1589 static void
explain_member(char prefix,struct member * quote)1590 explain_member (char prefix, struct member *quote)
1591 {
1592   struct cluster *cluster = cluster_array + quote->cluster_number;
1593   struct member *member;
1594   int counter;
1595   struct reference reference;
1596 
1597   putc (prefix, output_file);
1598   for (member = member_array + cluster->first_member;
1599        member < MEMBER_LIMIT (cluster); member++)
1600     {
1601       reference = get_reference (member->first_item);
1602       fprintf (output_file, member == quote ? " %s%d,%d" : " (%s%d,%d)",
1603 	       reference.input->nick_name, reference.number,
1604 	       real_member_size (member));
1605     }
1606   putc ('\n', output_file);
1607 }
1608 
1609 /*----------------------.
1610 | Search for clusters.  |
1611 `----------------------*/
1612 
1613 static void
prepare_clusters(void)1614 prepare_clusters (void)
1615 {
1616   /* The array of indirect items have similar contents next to each other.  */
1617   int *indirect_item_array = xmalloc (items * sizeof (int));
1618   int indirect_items = 0;	/* number of entries in indirect_item_array */
1619   int *cluster_set;		/* index for first member in set of clusters */
1620   int *member_set;		/* index for first member in current cluster */
1621 
1622   /* For a given index, SIZE_ARRAY[index] describes the common item size
1623      between item CLUSTER_SET[index] and CLUSTER_SET[index + 1].  */
1624   int *size_array = NULL;
1625   int allocated_sizes = 0;	/* allocated entries for size_array */
1626   int sizes;			/* number of entries in size_array */
1627   int *size;			/* cursor in size_array */
1628 
1629   /* Here is a bag of starts indices for members of the same cluster (having
1630      all CLUSTER_SIZE worth items).  These indices are values selected out
1631      of INDIRECT_ITEM_ARRAY, between FIRST and LAST (included).  */
1632   int *sorter_array = NULL;	/* for ensuring members are in nice order */
1633   int sorters;			/* number of members in sorter_array */
1634   int *sorter;			/* cursor into sorter_array */
1635   int *cursor;			/* cursor into sorter_array */
1636 
1637   ITEM *item;			/* cursor in item_array */
1638 
1639   int cluster_size;		/* size of current cluster */
1640   int size_value;		/* current size, or previous cluster size */
1641   int counter;			/* all purpose counter */
1642   int checksum;			/* possible common cheksum of previous items */
1643 
1644   /* Sort indices.  */
1645 
1646 #if DEBUGGING
1647   if (debugging)
1648     fprintf (stderr, _("Sorting"));
1649 #endif
1650 
1651   for (counter = 0; counter < items; counter++)
1652     if (item_type (item_array + counter) == NORMAL
1653 	|| item_type (item_array + counter) == DELIMS)
1654       indirect_item_array[indirect_items++] = counter;
1655   if (indirect_items < items)
1656     indirect_item_array = (int *)
1657       xrealloc (indirect_item_array, indirect_items * sizeof (int));
1658   qsort (indirect_item_array, indirect_items, sizeof (int),
1659 	 compare_for_checksum_runs);
1660 
1661   /* Find all clusters.  */
1662 
1663 #if DEBUGGING
1664   if (debugging)
1665     fprintf (stderr, _(", clustering"));
1666 #endif
1667 
1668   for (cluster_set = indirect_item_array;
1669        cluster_set + 1 < indirect_item_array + indirect_items;
1670        cluster_set += sizes + 1)
1671     {
1672       /* Find all members beginning with the same run of checksums.  These
1673          will be later distributed among a few clusters of various sizes,
1674          and so, are common to the incoming set of clusters.  */
1675 
1676       sizes = 0;
1677       while (cluster_set + sizes + 1 < indirect_item_array + indirect_items)
1678 	{
1679 	  size_value
1680 	    = identical_size (cluster_set[sizes], cluster_set[sizes + 1]);
1681 	  if (size_value < minimum_size)
1682 	    break;
1683 
1684 	  if (sizes == allocated_sizes)
1685 	    {
1686 	      allocated_sizes += 32;
1687 	      size_array = (int *)
1688 		xrealloc (size_array, allocated_sizes * sizeof (int));
1689 	      sorter_array = (int *)
1690 		xrealloc (sorter_array, (allocated_sizes + 1) * sizeof (int));
1691 	    }
1692 	  size_array[sizes++] = size_value;
1693 	}
1694 
1695       /* This test is not necessary for the algorithm to work, but this
1696          fairly common case might be worth a bit of speedup.  Maybe!  */
1697 
1698       if (sizes == 0)
1699 	continue;
1700 
1701       /* Output all clusters of the set from the tallest to the shortest.
1702          Here, size refers to number of items in members, and not to the
1703          number of members in a cluster.  (In fact, it is very expectable
1704          that shortest clusters have more members.)  */
1705 
1706       cluster_size = 0;
1707       while (1)
1708 	{
1709 	  /* Discover the maximum cluster size not yet processed.  */
1710 
1711 	  size_value = cluster_size;
1712 	  cluster_size = 0;
1713 
1714 	  for (size = size_array; size < size_array + sizes; size++)
1715 	    if ((size_value == 0 || *size < size_value)
1716 		&& *size > cluster_size)
1717 	      cluster_size = *size;
1718 
1719 	  /* If the cluster size did not decrease, we cannot do more.  */
1720 
1721 	  if (cluster_size == 0)
1722 	    break;
1723 
1724 	  /* Consider all consecutive members having at least the cluster
1725 	     size in common: they form a cluster.  But any gap in the
1726 	     sequence represents a change of cluster: same length, but
1727 	     different contents.  */
1728 
1729 	  size = size_array;
1730 	  while (1)
1731 	    {
1732 	      /* Find all members for one cluster.  However, after having
1733 	         skipped over the gap, just break out if nothing remains.  */
1734 
1735 	      while (size < size_array + sizes && *size < cluster_size)
1736 		size++;
1737 	      if (size == size_array + sizes)
1738 		break;
1739 
1740 	      member_set = cluster_set + (size - size_array);
1741 	      sorters = 1;
1742 	      while (size < size_array + sizes && *size >= cluster_size)
1743 		{
1744 		  sorters++;
1745 		  size++;
1746 		}
1747 
1748 	      /* No cluster may be a proper subset of another.  That is, if
1749 	         all putative members are preceded by identical items, then
1750 	         they are indeed part of a bigger cluster, which has already
1751 	         been or will later be caught.  In such case, just avoid
1752 	         retaining them here.  This also prevents quadratic
1753 	         behaviour which, I presume, would be heavy on computation.
1754 
1755 	         This test may be defeated by tolerant matches.  Maybe
1756 	         tolerant matches will just go away.  Surely, tolerant
1757 	         matches later require subset member elimination.  */
1758 
1759 	      item = item_array + member_set[0];
1760 	      BACKWARD_ITEM (item);
1761 	      if (item_type (item) != SENTINEL)
1762 		{
1763 		  checksum = item->checksum;
1764 
1765 		  for (counter = 1; counter < sorters; counter++)
1766 		    {
1767 		      item = item_array + member_set[counter];
1768 		      BACKWARD_ITEM (item);
1769 		      if (item_type (item) == SENTINEL
1770 			  || item->checksum != checksum)
1771 			break;
1772 		    }
1773 		  if (counter == sorters)
1774 		    continue;
1775 		}
1776 
1777 	      /* Skip any cluster member overlapping with the previous
1778 	         member of the same cluster.  If doing so, chop the size of
1779 	         the first member just before the overlap point: this should
1780 	         later trigger a cluster having this reduced size.  */
1781 
1782 	      memcpy (sorter_array, member_set, sorters * sizeof (int));
1783 	      qsort (sorter_array, sorters, sizeof (int),
1784 		     compare_for_positions);
1785 	      cursor = sorter_array;
1786 	      for (counter = 1; counter < sorters; counter++)
1787 		{
1788 		  size_value = item_distance (*cursor, sorter_array[counter],
1789 					      cluster_size);
1790 		  if (size_value < 0)
1791 		    *++cursor = sorter_array[counter];
1792 		  else
1793 		    {
1794 		      int counter2;
1795 
1796 		      for (counter2 = 0; counter2 < sizes; counter2++)
1797 			if (cluster_set[counter2] == *cursor
1798 			    || cluster_set[counter2] == sorter_array[counter])
1799 			  {
1800 			    assert (size_value < size_array[counter2]);
1801 			    size_array[counter2] = size_value;
1802 			    break;
1803 			  }
1804 		      assert (counter2 < sizes);
1805 		    }
1806 		}
1807 	      sorters = cursor - sorter_array + 1;
1808 
1809 	      /* Create the cluster only if at least two members remain.  */
1810 
1811 	      if (sorters >= 2)
1812 		{
1813 		  new_cluster (cluster_size);
1814 		  for (counter = 0; counter < sorters; counter++)
1815 		    new_member (sorter_array[counter]);
1816 		}
1817 	    }
1818 	}
1819     }
1820 
1821   /* Add a sentinel cluster at the end.  */
1822 
1823   new_cluster (0);
1824   clusters--;
1825 
1826   /* Cleanup.  */
1827 
1828   free (indirect_item_array);
1829   free (size_array);
1830 
1831 #if DEBUGGING
1832   if (debugging)
1833     {
1834       fprintf (stderr, _(", done\n"));
1835       dump_all_clusters ();
1836     }
1837 #endif
1838 }
1839 
1840 /*------------------------------------------------------------.
1841 | Check if a MEMBER is active in some file other than INPUT.  |
1842 `------------------------------------------------------------*/
1843 
1844 static int
active_elsewhere(struct member * member,struct input * input)1845 active_elsewhere (struct member *member, struct input *input)
1846 {
1847   struct cluster *cluster = cluster_array + member->cluster_number;
1848 
1849   /* FIXME: maybe it is sufficient to test first and last member only?  */
1850 
1851   for (member = member_array + cluster->first_member;
1852        member < MEMBER_LIMIT (cluster); member++)
1853     if (member->first_item < input->first_item
1854 	|| member->first_item + real_member_size (member) > input->item_limit)
1855       return 1;
1856 
1857   return 0;
1858 }
1859 
1860 /* Indirect members.  */
1861 
1862 /* The following array gives indices into the array of cluster members,
1863    member_array above, in such a way that the succession of indices points
1864    to members in the original textual order of input files.  */
1865 
1866 static int *indirect_array;
1867 static int indirects;
1868 
1869 /*----------------------------------------.
1870 | Compute the array of indirect members.  |
1871 `----------------------------------------*/
1872 
1873 static void
prepare_indirects(void)1874 prepare_indirects (void)
1875 {
1876   int counter;
1877   struct member *member;
1878   struct input *input;
1879   int duplicated_items;
1880 
1881 #if DEBUGGING
1882   if (debugging)
1883     fprintf (stderr, _("Sorting members"));
1884 #endif
1885 
1886   indirect_array = xmalloc (members * sizeof (int));
1887   for (counter = 0; counter < members; counter++)
1888     indirect_array[counter] = counter;
1889   if (members > 1)
1890     qsort (indirect_array, members, sizeof (int), compare_for_member_start);
1891   indirects = members;
1892 
1893 #if DEBUGGING
1894   if (debugging)
1895     {
1896       fprintf (stderr, _(", done\n"));
1897       fputs ("\f\n", stderr);
1898       for (counter = 0; counter < indirects; counter++)
1899 	{
1900 	  member = member_array + indirect_array[counter];
1901 	  dump_reference (member->first_item);
1902 	  dump_member (member_array + indirect_array[counter]);
1903 	  if (counter < indirects - 1
1904 	      && (member_array[indirect_array[counter + 1]].first_item
1905 		  < member->first_item + real_member_size (member)))
1906 	    {
1907 	      dump_reference (member->first_item);
1908 	      fprintf (stderr, "Overlapping with next member\n");
1909 	    }
1910 	}
1911     }
1912 #endif
1913 
1914   if (verbose)
1915     {
1916       fprintf (stderr, _("Work summary:"));
1917       fprintf (stderr, ngettext (" %d cluster,", " %d clusters,", clusters),
1918 	       clusters);
1919       fprintf (stderr, ngettext (" %d member\n", " %d members\n", members),
1920 	       members);
1921     }
1922 }
1923 
1924 /* Mergings.  */
1925 
1926 /* A merging is an indication for advancing in one given input file, by
1927    catching up all items (differences) until a cluster member, and then
1928    skipping over that member.  Mergings are ordered so to maintain parallel
1929    advancement between files.  A merging group is a fragment in the sequence
1930    of consecutive mergings, all mergings in a group have members from the
1931    same cluster.  A merging group is then variable in length, a flag in the
1932    merging sequence signals the beginning of a new group.
1933 
1934    However, some cluster members might have to be considered as differences
1935    preceding the more genuine member of a group, because they represent
1936    cross matches which are not easily represented in a parallel sequence.
1937    Another flag signals such members, which then escape the rule that all
1938    members of a group share the same cluster.  */
1939 
1940 struct merging
1941 {
1942   unsigned group_flag:1;	/* beginning of a new merge group */
1943   unsigned cross_flag:1;	/* member isolated by cross matches */
1944   unsigned input_number:14;	/* input file designator */
1945   unsigned member_number:16;	/* member designator */
1946 };
1947 
1948 static struct merging *merging_array = NULL;
1949 static int mergings = 0;
1950 
1951 #if DEBUGGING
1952 
1953 /*--------------------.
1954 | Dump all mergings.  |
1955 `--------------------*/
1956 
1957 static void
dump_all_mergings(void)1958 dump_all_mergings (void)
1959 {
1960   struct merging *merging;
1961   struct reference reference;
1962 
1963   fputs ("\f\n", stderr);
1964   for (merging = merging_array; merging < merging_array + mergings; merging++)
1965     {
1966       if (merging->group_flag)
1967 	{
1968 	  if (merging > merging_array)
1969 	    putc ('\n', stderr);
1970 	  fprintf (stderr, "<%ld>", (long) (merging - merging_array));
1971 	}
1972       reference
1973 	= get_reference (member_array[merging->member_number].first_item);
1974       fprintf (stderr, "\t[%d]%c%s%d", merging->member_number,
1975 	       merging->cross_flag ? 'X' : ' ',
1976 	       reference.input->nick_name, reference.number);
1977       assert (reference.input == input_array + merging->input_number);
1978     }
1979   putc ('\n', stderr);
1980 }
1981 
1982 #endif /* DEBUGGING */
1983 
1984 /*------------------------------------------------------------------.
1985 | After having output PREFIX, explain the merging group starting at |
1986 | MERGING and extending until LIMIT.                                |
1987 `------------------------------------------------------------------*/
1988 
1989 static void
explain_group(char prefix,struct merging * merging,struct merging * limit)1990 explain_group (char prefix, struct merging *merging, struct merging *limit)
1991 {
1992   struct cluster *cluster
1993     = cluster_array + member_array[merging->member_number].cluster_number;
1994   int counter;
1995   struct input *input;
1996   struct member *member;
1997   struct merging *cursor;
1998   struct reference reference;
1999 
2000   putc (prefix, output_file);
2001   for (counter = cluster->first_member;
2002        counter < (cluster + 1)->first_member; counter++)
2003     {
2004       member = member_array + counter;
2005       reference = get_reference (member->first_item);
2006 
2007       for (cursor = merging;
2008 	   cursor < limit && cursor->member_number != counter; cursor++)
2009 	;
2010 
2011       fprintf (output_file, cursor < limit ? " %s%d,%d" : " (%s%d,%d)",
2012 	       reference.input->nick_name,
2013 	       reference.number, real_member_size (member));
2014     }
2015   putc ('\n', output_file);
2016 }
2017 
2018 /*--------------------.
2019 | Add a new merging.  |
2020 `--------------------*/
2021 
2022 static void
new_merging(int group,int cross,struct input * input,struct member * member)2023 new_merging (int group, int cross, struct input *input, struct member *member)
2024 {
2025   struct merging *merging = merging_array + mergings++;
2026 
2027   merging->group_flag = group;
2028   merging->cross_flag = cross;
2029   merging->input_number = input - input_array;
2030   merging->member_number = member - member_array;
2031 }
2032 
2033 /*------------------------------.
2034 | Decide the merging sequence.  |
2035 `------------------------------*/
2036 
2037 void
prepare_mergings(void)2038 prepare_mergings (void)
2039 {
2040   int *cursor;
2041 
2042   struct input *input0;
2043   struct input *input;
2044   struct member *member;
2045 
2046   struct cluster *cluster;	/* current cluster candidate */
2047   int cost;			/* cost associate with current candidate */
2048   struct cluster *best_cluster;	/* best cluster candidate for hunk */
2049   int best_cost;		/* cost associated with best cluster */
2050   int group_flag;		/* set if next merging starts group */
2051 
2052   /* Remove member overlaps.  */
2053 
2054   if (members > 0)
2055     {
2056       int counter;
2057 
2058       /* When members overlap, retain only the shortest ones (for now).  */
2059 
2060       indirects = 0;
2061       for (counter = 0; counter < members - 1; counter++)
2062 	{
2063 	  member = member_array + indirect_array[counter];
2064 	  if (member->first_item + real_member_size (member)
2065 	      <= member_array[indirect_array[counter + 1]].first_item)
2066 	    indirect_array[indirects++] = indirect_array[counter];
2067 	  else
2068 	    /* Invalidate this member.  */
2069 	    member->cluster_number = -1;
2070 	}
2071       indirect_array[indirects++] = indirect_array[counter];
2072     }
2073 
2074   /* Once indirects sorted and ready, this array is logically split into
2075      segments, one per input file, which segments might be later scanned in
2076      parallel a few times.  All required state variables for resuming the
2077      scan are held into the last few fields of the input structure.  */
2078 
2079   cursor = indirect_array;
2080   for (input = input_array; input < input_array + inputs; input++)
2081     {
2082       input->item = input->first_item;
2083       input->indirect_cursor = cursor;
2084       while (cursor < indirect_array + indirects
2085 	     && member_array[*cursor].first_item < input->item_limit)
2086 	cursor++;
2087       input->indirect_limit = cursor;
2088     }
2089 
2090   /* Allocate an array for discovered mergings.  */
2091 
2092   merging_array = xmalloc (indirects * sizeof (struct merging));
2093   mergings = 0;
2094 
2095   /* Repetitively consider the incoming cluster members for all inputs, and
2096      select at each iteration the cluster members causing least differences.
2097      This may indirectly trigger bigger differences into later iterations.
2098      Some thought should be given to global optimisation.  FIXME.  */
2099 
2100   while (1)
2101     {
2102 #if DEBUGGING
2103       if (debugging)
2104 	{
2105 	  fprintf (stderr, "\nCHOOSING NEXT CLUSTER\n");
2106 
2107 	  for (input = input_array; input < input_array + inputs; input++)
2108 	    if (input->indirect_cursor < input->indirect_limit)
2109 	      {
2110 		fprintf (stderr, "%s:\t", reference_string (input->item));
2111 		dump_member (INPUT_MEMBER (input));
2112 	      }
2113 	    else
2114 	      fprintf (stderr, "%s:\tNone\n", reference_string (input->item));
2115 
2116 	  putc ('\n', stderr);
2117 	}
2118 #endif
2119 
2120       best_cluster = NULL;
2121       for (input0 = input_array; input0 < input_array + inputs; input0++)
2122 	if (input0->indirect_cursor < input0->indirect_limit)
2123 	  {
2124 	    cluster = INPUT_CLUSTER (input0);
2125 #if DEBUGGING
2126 	    if (debugging)
2127 	      {
2128 		fprintf (stderr, "%s:\t", reference_string (input0->item));
2129 		dump_cluster (cluster);
2130 	      }
2131 #endif
2132 
2133 	    /* Skip this input if we have already evaluated the cluster.  */
2134 
2135 	    for (input = input_array; input < input0; input++)
2136 	      if (input->indirect_cursor < input->indirect_limit
2137 		  && INPUT_CLUSTER (input) == cluster)
2138 		break;
2139 	    if (input < input0)
2140 	      continue;
2141 
2142 	    /* Evaluate the cost of the cluster.  This will be the total
2143 	       number of difference items needed in the listing for getting
2144 	       to the point of printing the cluster.  For computing it,
2145 	       check all members of the evaluated cluster, retaining only
2146 	       one member per file, in fact, exactly the first which has not
2147 	       been listed yet.  */
2148 
2149 	    cost = 0;
2150 	    input = input_array;
2151 	    member = member_array + cluster->first_member;
2152 	    while (member < MEMBER_LIMIT (cluster))
2153 	      {
2154 		/* Just ignore invalidated members.  */
2155 
2156 		if (member->cluster_number < 0)
2157 		  {
2158 		    member++;
2159 		    continue;
2160 		  }
2161 
2162 		/* Find the proper file.  */
2163 
2164 		while (member->first_item >= input->item_limit)
2165 		  input++;
2166 
2167 		/* Ignore a member that would have been already listed.  */
2168 
2169 		if (member->first_item < input->item)
2170 		  {
2171 		    member++;
2172 		    continue;
2173 		  }
2174 
2175 		/* Accumulate cost for this member.  */
2176 
2177 #if DEBUGGING
2178 		if (debugging)
2179 		  fprintf (stderr, "    Cost += %d  [%s..%s)\n",
2180 			   member->first_item - input->item,
2181 			   reference_string (input->item),
2182 			   reference_string (member->first_item));
2183 #endif
2184 		cost += (member++)->first_item - input->item;
2185 
2186 		/* Skip other members that would appear later in same file.  */
2187 
2188 		while (member < MEMBER_LIMIT (cluster)
2189 		       && member->first_item < input->item_limit)
2190 		  member++;
2191 	      }
2192 
2193 	    /* Retain the best cluster so far.  */
2194 
2195 #if DEBUGGING
2196 	    if (debugging)
2197 	      {
2198 		if (best_cluster)
2199 		  fprintf (stderr, "  {%ld}=%d <-> {%ld}=%d\n",
2200 			   (long) (best_cluster - cluster_array), best_cost,
2201 			   (long) (cluster - cluster_array), cost);
2202 		else
2203 		  fprintf (stderr, "  {%ld}=%d\n",
2204 			   (long) (cluster - cluster_array), cost);
2205 	      }
2206 #endif
2207 	    if (!best_cluster || cost < best_cost)
2208 	      {
2209 		best_cluster = cluster;
2210 		best_cost = cost;
2211 	      }
2212 	  }
2213 
2214       /* Get out if everything has been done.  */
2215 
2216       if (!best_cluster)
2217 	break;
2218 
2219 #if DEBUGGING
2220       if (debugging)
2221 	{
2222 	  fprintf (stderr, "\nCHOICE\t");
2223 	  dump_cluster (best_cluster);
2224 	  putc ('\n', stderr);
2225 	}
2226 #endif
2227 
2228       /* Save found mergings, while moving all items pointers to after the
2229          members of the best cluster.  */
2230 
2231       input = input_array;
2232       group_flag = 1;
2233       member = member_array + best_cluster->first_member;
2234       while (member < MEMBER_LIMIT (best_cluster))
2235 	{
2236 	  /* Just ignore invalidated members.  */
2237 
2238 	  if (member->cluster_number < 0)
2239 	    {
2240 	      member++;
2241 	      continue;
2242 	    }
2243 
2244 	  /* Find the proper file.  */
2245 
2246 #if DEBUGGING
2247 	  if (debugging)
2248 	    {
2249 	      putc ('v', stderr);
2250 	      dump_member (member);
2251 	    }
2252 #endif
2253 	  while (member->first_item >= input->item_limit)
2254 	    input++;
2255 
2256 	  /* Ignore a member that would have been already listed.  */
2257 
2258 	  if (member->first_item < input->item)
2259 	    {
2260 	      member++;
2261 	      continue;
2262 	    }
2263 
2264 	  /* Skip all crossed members while adding them as crossed mergings.  */
2265 
2266 	  while (INPUT_MEMBER (input) != member)
2267 	    {
2268 	      new_merging (group_flag, 1, input, INPUT_MEMBER (input));
2269 	      group_flag = 0;
2270 	      input->indirect_cursor++;
2271 	    }
2272 	  assert (input->indirect_cursor < input->indirect_limit);
2273 	  assert (INPUT_MEMBER (input)->first_item < input->item_limit);
2274 
2275 	  /* Skip the selected member while adding it as a direct merging.  */
2276 
2277 	  new_merging (group_flag, 0, input, member);
2278 	  group_flag = 0;
2279 	  input->item = member->first_item + real_member_size (member);
2280 	  input->indirect_cursor++;
2281 
2282 	  /* Skip other members that would appear later in same file.  */
2283 
2284 	  while (member < MEMBER_LIMIT (best_cluster)
2285 		 && member->first_item < input->item_limit)
2286 	    member++;
2287 	}
2288     }
2289 
2290 #if DEBUGGING
2291   if (debugging)
2292     dump_all_mergings ();
2293 #endif
2294 
2295   assert (mergings == indirects);
2296 
2297   if (verbose)
2298     {
2299       fprintf (stderr, _("Work summary:"));
2300       fprintf (stderr, ngettext (" %d cluster,", " %d clusters,", clusters),
2301 	       clusters);
2302       fprintf (stderr, ngettext (" %d member,", " %d members,", members),
2303 	       members);
2304       fprintf (stderr, ngettext (" %d overlap\n", " %d overlaps\n",
2305 				 members - indirects), members - indirects);
2306     }
2307 }
2308 
2309 /* Terminal and pager support.  */
2310 
2311 /* How to start underlining.  */
2312 const char *termcap_start_underline = NULL;
2313 
2314 /* How to stop underlining.  */
2315 const char *termcap_stop_underline = NULL;
2316 
2317 /* How to start bolding.  */
2318 const char *termcap_start_bold = NULL;
2319 
2320 /* How to stop bolding.  */
2321 const char *termcap_stop_bold = NULL;
2322 
2323 enum emphasis
2324 {
2325   STRAIGHT,			/* no emphasis */
2326   UNDERLINED,			/* underlined text */
2327   BOLD				/* bold text */
2328 };
2329 
2330 /* Current output emphasis.  */
2331 enum emphasis current_emphasis = STRAIGHT;
2332 
2333 /*-----------------------------.
2334 | Select strings for marking.  |
2335 `-----------------------------*/
2336 
2337 static void
initialize_strings(void)2338 initialize_strings (void)
2339 {
2340   struct input *input;
2341 
2342 #if HAVE_TPUTS
2343   if (find_termcap)
2344     {
2345       const char *name;		/* terminal capability name */
2346       char term_buffer[2048];	/* terminal description */
2347       static char *buffer;	/* buffer for capabilities */
2348       char *filler;		/* cursor into allocated strings */
2349       int success;		/* tgetent results */
2350 
2351       name = getenv ("TERM");
2352       if (name == NULL)
2353 	error (EXIT_ERROR, 0,
2354 	       _("select a terminal through the TERM environment variable"));
2355       success = tgetent (term_buffer, name);
2356       if (success < 0)
2357 	error (EXIT_ERROR, 0, _("could not access the termcap data base"));
2358       if (success == 0)
2359 	error (EXIT_ERROR, 0, _("terminal type `%s' is not defined"), name);
2360       buffer = (char *) malloc (strlen (term_buffer));
2361       filler = buffer;
2362 
2363       termcap_start_underline = tgetstr ("us", &filler);
2364       termcap_stop_underline = tgetstr ("ue", &filler);
2365       termcap_start_bold = tgetstr ("so", &filler);
2366       termcap_stop_bold = tgetstr ("se", &filler);
2367     }
2368 #endif /* HAVE_TPUTS */
2369 
2370   /* Ensure some default strings.  For all input files except last, prefer
2371      underline.  For last input file, prefer bold.  */
2372 
2373   for (input = input_array; input < input_array + inputs - 1; input++)
2374     {
2375       input->term_start = termcap_start_underline;
2376       input->term_stop = termcap_stop_underline;
2377 
2378       if (!overstrike)
2379 	{
2380 	  if (!termcap_start_underline && !input->user_start)
2381 	    input->user_start = "[-";
2382 	  if (!termcap_stop_underline && !input->user_stop)
2383 	    input->user_stop = "-]";
2384 	}
2385     }
2386 
2387   input->term_start = termcap_start_bold;
2388   input->term_stop = termcap_stop_bold;
2389 
2390   if (!overstrike)
2391     {
2392       if (!termcap_start_bold && !input->user_start)
2393 	input->user_start = "{+";
2394       if (!termcap_stop_bold && !input->user_stop)
2395 	input->user_stop = "+}";
2396     }
2397 }
2398 
2399 #if HAVE_TPUTS
2400 
2401 /*-----------------------------------------.
2402 | Write one character for tputs function.  |
2403 `-----------------------------------------*/
2404 
2405 static int
putc_for_tputs(int character)2406 putc_for_tputs (int character)
2407 {
2408   return putc (character, output_file);
2409 }
2410 
2411 #endif /* HAVE_TPUTS */
2412 
2413 /*-----------------------.
2414 | Set current EMPHASIS.  |
2415 `-----------------------*/
2416 
2417 static void
set_emphasis(enum emphasis emphasis)2418 set_emphasis (enum emphasis emphasis)
2419 {
2420 #if HAVE_TPUTS
2421   switch (current_emphasis)
2422     {
2423     case STRAIGHT:
2424       switch (emphasis)
2425 	{
2426 	case STRAIGHT:
2427 	  /* Done.  */
2428 	  break;
2429 
2430 	case UNDERLINED:
2431 	  if (termcap_start_underline)
2432 	    tputs (termcap_start_underline, 0, putc_for_tputs);
2433 	  break;
2434 
2435 	case BOLD:
2436 	  if (termcap_start_bold)
2437 	    tputs (termcap_start_bold, 0, putc_for_tputs);
2438 	  break;
2439 	}
2440       break;
2441 
2442     case UNDERLINED:
2443       switch (emphasis)
2444 	{
2445 	case STRAIGHT:
2446 	  if (termcap_stop_underline)
2447 	    tputs (termcap_stop_underline, 0, putc_for_tputs);
2448 	  break;
2449 
2450 	case UNDERLINED:
2451 	  /* Done.  */
2452 	  break;
2453 
2454 	case BOLD:
2455 	  if (termcap_stop_underline)
2456 	    tputs (termcap_stop_underline, 0, putc_for_tputs);
2457 	  if (termcap_start_bold)
2458 	    tputs (termcap_start_bold, 0, putc_for_tputs);
2459 	  break;
2460 	}
2461       break;
2462 
2463     case BOLD:
2464       switch (emphasis)
2465 	{
2466 	case STRAIGHT:
2467 	  if (termcap_stop_bold)
2468 	    tputs (termcap_stop_bold, 0, putc_for_tputs);
2469 	  break;
2470 
2471 	case UNDERLINED:
2472 	  if (termcap_stop_bold)
2473 	    tputs (termcap_stop_bold, 0, putc_for_tputs);
2474 	  if (termcap_start_underline)
2475 	    tputs (termcap_start_underline, 0, putc_for_tputs);
2476 	  break;
2477 
2478 	case BOLD:
2479 	  /* Done.  */
2480 	  break;
2481 	}
2482       break;
2483     }
2484 #endif /* HAVE_TPUTS */
2485 
2486   current_emphasis = emphasis;
2487 }
2488 
2489 /*---------------------------------------------------------.
2490 | Push a new EMPHASIS, or pop it out back to what it was.  |
2491 `---------------------------------------------------------*/
2492 
2493 #define EMPHASIS_STACK_LENGTH 1
2494 
2495 static enum emphasis emphasis_array[EMPHASIS_STACK_LENGTH];
2496 static int emphasises = 0;
2497 
2498 static void
push_emphasis(enum emphasis emphasis)2499 push_emphasis (enum emphasis emphasis)
2500 {
2501   assert (emphasises < EMPHASIS_STACK_LENGTH);
2502 
2503   emphasis_array[emphasises++] = current_emphasis;
2504   set_emphasis (emphasis);
2505 }
2506 
2507 static void
pop_emphasis(void)2508 pop_emphasis (void)
2509 {
2510   assert (emphasises > 0);
2511 
2512   set_emphasis (emphasis_array[--emphasises]);
2513 }
2514 
2515 /*--------------------------------------------------------------------------.
2516 | Output a STRING having LENGTH characters, subject to the current          |
2517 | emphasis.  If EXPAND is nonzero, expand TABs.  For expansion to work      |
2518 | properly, this routine should always be called while copying input data.  |
2519 `--------------------------------------------------------------------------*/
2520 
2521 static void
output_characters(const char * string,int length,int expand)2522 output_characters (const char *string, int length, int expand)
2523 {
2524   static unsigned column = 0;
2525 
2526   const char *cursor;
2527   int counter;
2528 
2529   if (overstrike || expand)
2530     for (cursor = string; cursor < string + length; cursor++)
2531       switch (*cursor)
2532 	{
2533 	  /* Underlining or overstriking whitespace surely looks strange,
2534 	     but "less" does understand such things as emphasis requests.  */
2535 
2536 	case '\n':
2537 	case '\r':
2538 	case '\v':
2539 	case '\f':
2540 	  putc (*cursor, output_file);
2541 	  column = 0;
2542 	  break;
2543 
2544 	case '\b':
2545 	  putc ('\b', output_file);
2546 	  if (column > 0)
2547 	    column--;
2548 	  break;
2549 
2550 	case '\t':
2551 	  if (expand)
2552 	    do
2553 	      {
2554 		if (overstrike)
2555 		  switch (current_emphasis)
2556 		    {
2557 		    case STRAIGHT:
2558 		      putc (' ', output_file);
2559 		      break;
2560 
2561 		    case UNDERLINED:
2562 		      putc ('_', output_file);
2563 		      if (overstrike_for_less)
2564 			{
2565 			  putc ('\b', output_file);
2566 			  putc (' ', output_file);
2567 			}
2568 		      break;
2569 
2570 		    case BOLD:
2571 		      putc (*cursor, output_file);
2572 		      if (overstrike_for_less)
2573 			{
2574 			  putc ('\b', output_file);
2575 			  putc (' ', output_file);
2576 			}
2577 		      break;
2578 		    }
2579 		else
2580 		  putc (' ', output_file);
2581 		column++;
2582 	      }
2583 	    while (column % 8 != 0);
2584 	  else
2585 	    putc ('\t', output_file);
2586 	  break;
2587 
2588 	case ' ':
2589 	  if (overstrike)
2590 	    switch (current_emphasis)
2591 	      {
2592 	      case STRAIGHT:
2593 		putc (' ', output_file);
2594 		break;
2595 
2596 	      case UNDERLINED:
2597 		putc ('_', output_file);
2598 		if (overstrike_for_less)
2599 		  {
2600 		    putc ('\b', output_file);
2601 		    putc (' ', output_file);
2602 		  }
2603 		break;
2604 
2605 	      case BOLD:
2606 		putc (' ', output_file);
2607 		if (overstrike_for_less)
2608 		  {
2609 		    putc ('\b', output_file);
2610 		    putc (' ', output_file);
2611 		  }
2612 		break;
2613 	      }
2614 	  else
2615 	    putc (' ', output_file);
2616 	  column++;
2617 	  break;
2618 
2619 	case '_':
2620 	  if (overstrike_for_less && current_emphasis == UNDERLINED)
2621 	    {
2622 	      /* Overstriking an underline would make it bold in "less".  */
2623 	      putc ('_', output_file);
2624 	      column++;
2625 	      break;
2626 	    }
2627 	  /* Fall through.  */
2628 
2629 	default:
2630 	  if (overstrike)
2631 	    switch (current_emphasis)
2632 	      {
2633 	      case STRAIGHT:
2634 		putc (*cursor, output_file);
2635 		break;
2636 
2637 	      case UNDERLINED:
2638 		putc ('_', output_file);
2639 		putc ('\b', output_file);
2640 		putc (*cursor, output_file);
2641 		break;
2642 
2643 	      case BOLD:
2644 		putc (*cursor, output_file);
2645 		putc ('\b', output_file);
2646 		putc (*cursor, output_file);
2647 		break;
2648 	      }
2649 	  else
2650 	    putc (*cursor, output_file);
2651 	  column++;
2652 	  break;
2653 	}
2654   else
2655     fwrite (string, length, 1, output_file);
2656 }
2657 
2658 /*------------------------------------------------------------------------.
2659 | While copying INPUT, select EMPHASIS, with preferred type for printer.  |
2660 `------------------------------------------------------------------------*/
2661 
2662 static void
start_of_emphasis(struct input * input,enum emphasis emphasis)2663 start_of_emphasis (struct input *input, enum emphasis emphasis)
2664 {
2665   assert (current_emphasis == STRAIGHT);
2666 
2667   /* Avoid any emphasis if it would be useless.  */
2668 
2669 #if FIXME
2670   if (!common_listing_allowed
2671       && (!right->listing_allowed || !left->listing_allowed))
2672     return;
2673 #endif
2674 
2675 #if HAVE_TPUTS
2676   if (input->term_start)
2677     tputs (input->term_start, 0, putc_for_tputs);
2678 #endif
2679 
2680   if (input->user_start)
2681     fprintf (output_file, "%s", input->user_start);
2682 
2683   current_emphasis = emphasis;
2684 }
2685 
2686 /*-----------------------------------------------.
2687 | Indicate end of emphasis while copying INPUT.  |
2688 `-----------------------------------------------*/
2689 
2690 static void
end_of_emphasis(struct input * input)2691 end_of_emphasis (struct input *input)
2692 {
2693   assert (current_emphasis != STRAIGHT);
2694 
2695   /* Avoid any emphasis if it would be useless.  */
2696 
2697 #if FIXME
2698   if (!common_listing_allowed
2699       && (!right->listing_allowed || !left->listing_allowed))
2700     return;
2701 #endif
2702 
2703   if (input->user_stop)
2704     fprintf (output_file, "%s", input->user_stop);
2705 
2706 #if HAVE_TPUTS
2707   if (input->term_stop)
2708     tputs (input->term_stop, 0, putc_for_tputs);
2709 #endif
2710 
2711   current_emphasis = STRAIGHT;
2712 }
2713 
2714 /*-------------------------------------------------------------------.
2715 | Launch the output pager if any.  If INPUT is not NULL, do it for a |
2716 | specific input file.                                               |
2717 `-------------------------------------------------------------------*/
2718 
2719 static void
launch_output_program(struct input * input)2720 launch_output_program (struct input *input)
2721 {
2722   if (paginate)
2723     {
2724       if (input)
2725 	output_file = writepipe ("pr", "-f", "-h", input->file_name, NULL);
2726       else
2727 	output_file = writepipe ("pr", "-f", NULL);
2728     }
2729   else
2730     {
2731       char *program;		/* name of the pager */
2732       char *basename;		/* basename of the pager */
2733 
2734       /* Check if a output program should be called, and which one.  Avoid
2735          all paging if only statistics are needed.  */
2736 
2737       if (autopager && isatty (fileno (stdout))
2738 #if FIXME
2739 	  && (left->listing_allowed
2740 	      || right->listing_allowed || common_listing_allowed)
2741 #endif
2742 	)
2743 	{
2744 	  program = getenv ("WDIFF_PAGER");
2745 	  if (program == NULL)
2746 	    program = getenv ("PAGER");
2747 #ifdef PAGER_PROGRAM
2748 	  if (program == NULL)
2749 	    program = PAGER_PROGRAM;
2750 #endif
2751 	}
2752       else
2753 	program = NULL;
2754 
2755       /* Use stdout as default output.  */
2756 
2757 #if DEBUGGING
2758       output_file = debugging ? stderr : stdout;
2759 #else
2760       output_file = stdout;
2761 #endif
2762 
2763       /* If we should use a pager, launch it.  */
2764 
2765       if (program && *program)
2766 	{
2767 	  char *lessenv;
2768 
2769 	  lessenv = getenv ("LESS");
2770 	  if (lessenv == NULL)
2771 	    {
2772 	      setenv ("LESS", LESS_DEFAULT_OPTS, 0);
2773 	    }
2774 	  else
2775 	    {
2776 	      if (asprintf (&lessenv, "%s %s", LESS_DEFAULT_OPTS, lessenv) ==
2777 		  -1)
2778 		{
2779 		  xalloc_die ();
2780 		  return;
2781 		}
2782 	      else
2783 		{
2784 		  setenv ("LESS", lessenv, 1);
2785 		}
2786 	    }
2787 
2788 	  output_file = writepipe (program, NULL);
2789 	  if (!output_file)
2790 	    error (EXIT_ERROR, errno, "%s", program);
2791 	}
2792     }
2793 }
2794 
2795 /*-----------------------------.
2796 | Complete the pager program.  |
2797 `-----------------------------*/
2798 
2799 static void
complete_output_program(void)2800 complete_output_program (void)
2801 {
2802   /* Let the user play at will inside the pager, until s/he exits, before
2803      proceeding any further.  */
2804 
2805   if (output_file && output_file != stdout)
2806     {
2807       fclose (output_file);
2808       wait (NULL);
2809     }
2810 }
2811 
2812 /* Other listing services.  */
2813 
2814 enum margin_mode
2815 {
2816   EMPTY_MARGIN,			/* nothing in left margin */
2817   LOCATION_IN_MARGIN,		/* number of next item */
2818   FILLER_IN_MARGIN,		/* bars for the length of nick name */
2819   FILE_IN_MARGIN		/* nick name for file */
2820 };
2821 
2822 /*-------------------------------------------.
2823 | For file INPUT, produce a kind of MARGIN.  |
2824 `-------------------------------------------*/
2825 
2826 static void
make_margin(struct input * input,enum margin_mode margin)2827 make_margin (struct input *input, enum margin_mode margin)
2828 {
2829   char buffer[15];
2830   char *cursor;
2831 
2832   switch (margin)
2833     {
2834     case EMPTY_MARGIN:
2835       return;
2836 
2837     case LOCATION_IN_MARGIN:
2838       sprintf (buffer, "%s%d", input->nick_name,
2839 	       input->item - input->first_item + 1);
2840       break;
2841 
2842     case FILLER_IN_MARGIN:
2843       cursor = buffer + strlen (input->nick_name);
2844       *cursor-- = '\0';
2845       while (cursor > buffer)
2846 	*cursor-- = '|';
2847       break;
2848 
2849     case FILE_IN_MARGIN:
2850       strcpy (buffer, input->nick_name);
2851       break;
2852     }
2853 
2854   if (word_mode)
2855     {
2856       push_emphasis (UNDERLINED);
2857       output_characters (buffer, strlen (buffer), 0);
2858       pop_emphasis ();
2859       putc (initial_tab ? '\t' : ' ', output_file);
2860     }
2861   else
2862     {
2863       push_emphasis (STRAIGHT);
2864       output_characters (buffer, strlen (buffer), 0);
2865       putc (initial_tab ? '\t' : ' ', output_file);
2866       pop_emphasis ();
2867     }
2868 }
2869 
2870 /*-------------------------.
2871 | Skip a line from INPUT.  |
2872 `-------------------------*/
2873 
2874 static void
skip_line_item(struct input * input)2875 skip_line_item (struct input *input)
2876 {
2877   input_line (input);
2878   assert (input->cursor);
2879 
2880   input->item++;
2881 }
2882 
2883 /*-------------------------.
2884 | Copy a line from INPUT.  |
2885 `-------------------------*/
2886 
2887 static void
copy_line_item(struct input * input,enum margin_mode margin)2888 copy_line_item (struct input *input, enum margin_mode margin)
2889 {
2890   input_line (input);
2891   assert (input->cursor);
2892 
2893   make_margin (input, margin);
2894   output_characters (input->line, input->limit - input->line, expand_tabs);
2895 
2896   input->item++;
2897 }
2898 
2899 /*---------------------------------.
2900 | Skip over white space on INPUT.  |
2901 `---------------------------------*/
2902 
2903 static void
skip_whitespace(struct input * input)2904 skip_whitespace (struct input *input)
2905 {
2906   assert (input->cursor);
2907   while (input->cursor && isspace (*input->cursor))
2908     input_character (input);
2909 
2910   /* FIXME: Maybe worth following columns for TAB expansion, maybe not?  */
2911 }
2912 
2913 /*-----------------------------------------------------------------.
2914 | Copy white space from INPUT to FILE, ensuring a kind of MARGIN.  |
2915 `-----------------------------------------------------------------*/
2916 
2917 static void
copy_whitespace(struct input * input,enum margin_mode margin)2918 copy_whitespace (struct input *input, enum margin_mode margin)
2919 {
2920   char *string = input->cursor;
2921 
2922   assert (input->cursor);
2923   while (input->cursor && isspace (*input->cursor))
2924     if (++input->cursor == input->limit)
2925       {
2926 	if (input->cursor[-1] == '\n')
2927 	  {
2928 	    output_characters (string, input->cursor - string - 1,
2929 			       expand_tabs);
2930 
2931 	    /* While changing lines, ensure we stop any special display
2932 	       prior to, and restore the special display after.  */
2933 
2934 	    if (avoid_wraps && input->user_stop)
2935 	      fprintf (output_file, "%s", input->user_stop);
2936 #if HAVE_TPUTS
2937 	    if (input->term_stop)
2938 	      tputs (input->term_stop, 0, putc_for_tputs);
2939 #endif
2940 
2941 	    output_characters (input->cursor - 1, 1, expand_tabs);
2942 	    make_margin (input, margin);
2943 
2944 #if HAVE_TPUTS
2945 	    if (input->term_start)
2946 	      tputs (input->term_start, 0, putc_for_tputs);
2947 #endif
2948 	    if (avoid_wraps && input->user_start)
2949 	      fprintf (output_file, "%s", input->user_start);
2950 	  }
2951 	else
2952 	  output_characters (string, input->cursor - string, expand_tabs);
2953 
2954 	input_character_helper (input);
2955 	string = input->cursor;
2956       }
2957 
2958   if (string && input->cursor > string)
2959     output_characters (string, input->cursor - string, expand_tabs);
2960 }
2961 
2962 /*-------------------------------------.
2963 | Skip over non-white space on INPUT.  |
2964 `-------------------------------------*/
2965 
2966 static void
skip_word_item(struct input * input)2967 skip_word_item (struct input *input)
2968 {
2969   assert (input->cursor && !isspace (*input->cursor));
2970   while (input->cursor && !isspace (*input->cursor))
2971     input_character (input);
2972 
2973   /* FIXME: Maybe worth following columns for TAB expansion, maybe not?  */
2974 
2975   input->item++;
2976 }
2977 
2978 /*------------------------------------------.
2979 | Copy non white space from INPUT to FILE.  |
2980 `------------------------------------------*/
2981 
2982 static void
copy_word_item(struct input * input)2983 copy_word_item (struct input *input)
2984 {
2985   char *string = input->cursor;
2986 
2987   assert (input->cursor && !isspace (*input->cursor));
2988   while (input->cursor && !isspace (*input->cursor))
2989     input->cursor++;
2990 
2991   output_characters (string, input->cursor - string, expand_tabs);
2992 
2993   if (input->cursor == input->limit)
2994     input_character_helper (input);
2995 
2996   input->item++;
2997 }
2998 
2999 /*------------------------------------------.
3000 | Skip a difference from INPUT up to ITEM.  |
3001 `------------------------------------------*/
3002 
3003 static void
skip_until(struct input * input,int item)3004 skip_until (struct input *input, int item)
3005 {
3006   assert (input->item <= item);
3007   if (word_mode)
3008     while (input->item < item)
3009       {
3010 	skip_whitespace (input);
3011 	skip_word_item (input);
3012       }
3013   else
3014     while (input->item < item)
3015       skip_line_item (input);
3016 }
3017 
3018 /*------------------------------------------------------------------.
3019 | Copy a difference from INPUT up to ITEM, using a kind of MARGIN.  |
3020 `------------------------------------------------------------------*/
3021 
3022 static void
copy_until(struct input * input,int item,enum margin_mode margin)3023 copy_until (struct input *input, int item, enum margin_mode margin)
3024 {
3025   assert (input->item <= item);
3026   if (word_mode)
3027     while (input->item < item)
3028       {
3029 	copy_whitespace (input, margin);
3030 	copy_word_item (input);
3031       }
3032   else
3033     while (input->item < item)
3034       copy_line_item (input, margin);
3035 }
3036 
3037 /*------------------------------------.
3038 | From INPUT, skip a cluster MEMBER.  |
3039 `------------------------------------*/
3040 
3041 static void
skip_member_proper(struct input * input,struct member * member)3042 skip_member_proper (struct input *input, struct member *member)
3043 {
3044   assert (input->item == member->first_item);
3045   skip_until (input, member->first_item + real_member_size (member));
3046 }
3047 
3048 /*------------------------------------.
3049 | From INPUT, copy a cluster MEMBER.  |
3050 `------------------------------------*/
3051 
3052 static void
copy_member_proper(struct input * input,struct member * member)3053 copy_member_proper (struct input *input, struct member *member)
3054 {
3055   assert (input->item == member->first_item);
3056   copy_until (input, member->first_item + real_member_size (member),
3057 	      FILLER_IN_MARGIN);
3058 }
3059 
3060 /* Listing control.  */
3061 
3062 /*------------------------------------------.
3063 | Relist all input files with annotations.  |
3064 `------------------------------------------*/
3065 
3066 static void
relist_annotated_files(void)3067 relist_annotated_files (void)
3068 {
3069   /* The array of actives holds all members being concurrently listed.  If
3070      there is no overlapping members, the array will never hold more than
3071      one member.  The index in this array is also the column position of the
3072      vertical line bracing the member, in the annotated listing.  For
3073      preserving verticality of lines after the termination of some members,
3074      NULL members in the array stand for embedded white columns.  */
3075 
3076   struct active
3077   {
3078     struct member *member;	/* active member, or NULL */
3079     int remaining;		/* count of remaining lines to list */
3080   };
3081   struct active *active_array = NULL;
3082   int actives = 0;
3083   int allocated_actives = 0;
3084 
3085   int other_count = 0;		/* how many actives in other files */
3086 
3087   enum margin_mode margin_mode =
3088     show_links ? LOCATION_IN_MARGIN : EMPTY_MARGIN;
3089 
3090   struct input *input = NULL;
3091   int *cursor;
3092   int counter;
3093   FILE *file;
3094   ITEM *item;
3095   struct active *active;
3096   struct member *member;
3097   struct cluster *cluster;
3098   int ordinal;
3099   struct reference reference;
3100   char buffer[20];
3101 
3102 #if DEBUGGING
3103   if (debugging)
3104     fputs ("\f\n", stderr);
3105 #endif
3106 
3107   /* Prepare terminal.  */
3108 
3109   if (!paginate)
3110     {
3111       launch_output_program (input);
3112       initialize_strings ();
3113     }
3114 
3115   /* Prepare to scan all members.  */
3116 
3117   if (indirects > 0)
3118     {
3119       cursor = indirect_array;
3120       member = member_array + *cursor;
3121     }
3122   else
3123     cursor = NULL;
3124 
3125   /* Process all files.  */
3126 
3127   for (input = input_array; input < input_array + inputs; input++)
3128     {
3129       /* Prepare terminal.  */
3130 
3131       if (paginate)
3132 	{
3133 	  launch_output_program (input);
3134 	  if (input == input_array)
3135 	    initialize_strings ();
3136 	}
3137       else
3138 	{
3139 	  if (input > input_array)
3140 	    fputs ("\f\n", output_file);
3141 	  fprintf (output_file, "@@@ %s\n", input->file_name);
3142 	}
3143 
3144       /* Prepare for file.  */
3145 
3146       open_input (input);
3147       input->item = input->first_item;
3148 
3149       /* Process all items.  */
3150 
3151       if (word_mode && input->item < input->item_limit)
3152 	{
3153 	  make_margin (input, margin_mode);
3154 	  input_line (input);
3155 	}
3156 
3157       while (input->item < input->item_limit)
3158 	{
3159 	  if (word_mode)
3160 	    copy_whitespace (input, margin_mode);
3161 
3162 	  item = item_array + input->item;
3163 
3164 	  /* See if we are starting any member.  */
3165 
3166 	  while (cursor && member->first_item == input->item)
3167 	    {
3168 	      cluster = cluster_array + member->cluster_number;
3169 	      ordinal = member - member_array - cluster->first_member - 1;
3170 
3171 	      /* Obtain some active slot for the new member.  */
3172 
3173 	      if (actives)
3174 		{
3175 		  /* Set ACTIVE to the rightmost not-in-use active, or after
3176 		     all actives if they are all in use.  */
3177 
3178 		  active = active_array + actives;
3179 		  for (counter = 0; counter < actives; counter++)
3180 		    if (!active_array[counter].member)
3181 		      active = active_array + counter;
3182 
3183 		  /* Output vertical bars as needed for all prior actives.  */
3184 
3185 		  if (!word_mode)
3186 		    for (counter = 0;
3187 			 active_array + counter < active; counter++)
3188 		      if (active_array[counter].member)
3189 			putc ('|', output_file);
3190 		      else
3191 			putc (' ', output_file);
3192 
3193 		  /* Ensure active will be allocated if necessary.  */
3194 
3195 		  if (active == active_array + actives)
3196 		    active = NULL;
3197 		}
3198 	      else
3199 		active = NULL;
3200 
3201 	      /* Allocate and initialise the active.  */
3202 
3203 	      if (!active)
3204 		{
3205 		  if (actives == allocated_actives)
3206 		    {
3207 		      allocated_actives += 8;
3208 		      active_array = (struct active *)
3209 			xrealloc (active_array,
3210 				  allocated_actives *
3211 				  (sizeof (struct active)));
3212 		    }
3213 		  active = active_array + actives++;
3214 		}
3215 	      active->member = member;
3216 	      active->remaining
3217 		= cluster_array[member->cluster_number].item_count;
3218 
3219 	      /* Print a pointer to the previous member of same cluster.  */
3220 
3221 	      if (ordinal >= 0)
3222 		{
3223 		  reference = get_reference
3224 		    (member_array
3225 		     [cluster->first_member + ordinal].first_item);
3226 
3227 		  if (word_mode)
3228 		    {
3229 		      if (show_links)
3230 			{
3231 			  sprintf (buffer, "[%c%s%d",
3232 				   (char) (active - active_array + 'A'),
3233 				   reference.input->nick_name,
3234 				   reference.number);
3235 			  push_emphasis (UNDERLINED);
3236 			  output_characters (buffer, strlen (buffer), 0);
3237 			  pop_emphasis ();
3238 			  putc (' ', output_file);
3239 			}
3240 		    }
3241 		  else
3242 		    {
3243 		      fprintf (output_file, ".-> [%d/%d] ",
3244 			       ordinal + 1, MEMBERS (cluster));
3245 		      sprintf (buffer, "%s%d",
3246 			       reference.input->nick_name, reference.number);
3247 		      push_emphasis (UNDERLINED);
3248 		      output_characters (buffer, strlen (buffer), 0);
3249 		      pop_emphasis ();
3250 		      if (reference.input != input)
3251 			fprintf (output_file, " (%s)",
3252 				 reference.input->file_name);
3253 		      putc ('\n', output_file);
3254 		    }
3255 		}
3256 	      else if (word_mode)
3257 		{
3258 		  if (show_links)
3259 		    {
3260 		      sprintf (buffer, "[%c",
3261 			       (char) (active - active_array + 'A'));
3262 		      push_emphasis (UNDERLINED);
3263 		      output_characters (buffer, strlen (buffer), 0);
3264 		      pop_emphasis ();
3265 		      putc (' ', output_file);
3266 		    }
3267 		}
3268 	      else
3269 		fputs (".-\n", output_file);
3270 
3271 	      /* Bold text appearing in other files.  */
3272 
3273 	      if (active_elsewhere (member, input))
3274 		if (other_count++ == 0)
3275 		  set_emphasis (BOLD);
3276 
3277 	      /* Advance cursor.  */
3278 
3279 	      if (++cursor < indirect_array + indirects)
3280 		member = member_array + *cursor;
3281 	      else
3282 		cursor = NULL;
3283 	    }
3284 
3285 	  /* List the item itself.  */
3286 
3287 	  if (word_mode)
3288 	    copy_word_item (input);
3289 	  else
3290 	    {
3291 	      for (counter = 0; counter < actives; counter++)
3292 		if (active_array[counter].member)
3293 		  putc ('|', output_file);
3294 		else
3295 		  putc (' ', output_file);
3296 
3297 	      if (counter < 7)
3298 		{
3299 		  sprintf (buffer, "%s%d", input->nick_name,
3300 			   input->item - input->first_item + 1);
3301 		  for (counter += strlen (buffer); counter < 7; counter++)
3302 		    putc (' ', output_file);
3303 		  push_emphasis (UNDERLINED);
3304 		  output_characters (buffer + counter - 7,
3305 				     strlen (buffer + counter - 7), 0);
3306 		  pop_emphasis ();
3307 		}
3308 
3309 	      if (initial_tab)
3310 		putc ('\t', output_file);
3311 	      else
3312 		putc (' ', output_file);
3313 
3314 	      copy_line_item (input, EMPTY_MARGIN);
3315 	    }
3316 
3317 	  /* See if we are ending any member.  */
3318 
3319 	  if (!actives)
3320 	    continue;
3321 
3322 	  active = active_array + actives;
3323 	  while (--active >= active_array)
3324 	    if (item_type (item) == NORMAL && --active->remaining == 0)
3325 	      {
3326 		cluster = cluster_array + active->member->cluster_number;
3327 		ordinal
3328 		  = active->member - member_array - cluster->first_member + 1;
3329 
3330 		/* Print a pointer to the next member of same cluster.  */
3331 
3332 		if (!word_mode)
3333 		  {
3334 		    for (counter = 0;
3335 			 active_array + counter < active; counter++)
3336 		      if (active_array[counter].member)
3337 			putc ('|', output_file);
3338 		      else
3339 			putc (' ', output_file);
3340 		  }
3341 
3342 		if (ordinal < MEMBERS (cluster))
3343 		  {
3344 		    reference = get_reference
3345 		      (member_array
3346 		       [cluster->first_member + ordinal].first_item);
3347 
3348 		    if (word_mode)
3349 		      {
3350 			if (show_links)
3351 			  {
3352 			    putc (' ', output_file);
3353 			    sprintf (buffer, "%s%d%c]",
3354 				     reference.input->nick_name,
3355 				     reference.number,
3356 				     (char) (active - active_array + 'A'));
3357 			    push_emphasis (UNDERLINED);
3358 			    output_characters (buffer, strlen (buffer), 0);
3359 			    pop_emphasis ();
3360 			  }
3361 		      }
3362 		    else
3363 		      {
3364 			fprintf (output_file, "`-> [%d/%d] ",
3365 				 ordinal + 1, MEMBERS (cluster));
3366 			sprintf (buffer, "%s%d",
3367 				 reference.input->nick_name,
3368 				 reference.number);
3369 			push_emphasis (UNDERLINED);
3370 			output_characters (buffer, strlen (buffer), 0);
3371 			pop_emphasis ();
3372 			if (reference.input != input)
3373 			  fprintf (output_file, " (%s)",
3374 				   reference.input->file_name);
3375 			putc ('\n', output_file);
3376 		      }
3377 		  }
3378 		else if (word_mode)
3379 		  {
3380 		    if (show_links)
3381 		      {
3382 			putc (' ', output_file);
3383 			sprintf (buffer, "%c]",
3384 				 (char) (active - active_array + 'A'));
3385 			push_emphasis (UNDERLINED);
3386 			output_characters (buffer, strlen (buffer), 0);
3387 			pop_emphasis ();
3388 		      }
3389 		  }
3390 		else
3391 		  fputs ("`-\n", output_file);
3392 
3393 		/* Do not bold text not appearing in any other file.  */
3394 
3395 		if (active_elsewhere (active->member, input))
3396 		  if (--other_count == 0)
3397 		    set_emphasis (STRAIGHT);
3398 
3399 		/* Remove as many active members as we can.  */
3400 
3401 		active->member = NULL;
3402 		if (active == active_array + actives - 1)
3403 		  while (actives > 0 && !active_array[actives - 1].member)
3404 		    actives--;
3405 	      }
3406 	}
3407 
3408       /* Finish file.  */
3409 
3410       assert (actives == 0);
3411       assert (other_count == 0);
3412 
3413       close_input (input);
3414 
3415       if (paginate)
3416 	complete_output_program ();
3417     }
3418 
3419   if (!paginate)
3420     complete_output_program ();
3421 }
3422 
3423 /*--------------------------------------------------------------------.
3424 | Make a merged listing of input files.  If UNIFIED, produce unified  |
3425 | context diffs instead of plain context diffs.  If CROSSED, identify |
3426 | crossed blocks.						      |
3427 `--------------------------------------------------------------------*/
3428 
3429 static void
relist_merged_lines(int unified,int crossed)3430 relist_merged_lines (int unified, int crossed)
3431 {
3432   int counter;
3433   struct input *input;
3434   struct member *member;
3435   struct cluster *chosen_cluster;
3436   struct reference reference;
3437   struct merging *merging;
3438   struct merging *group_limit;
3439   struct merging *cursor;
3440   int listed;
3441 
3442   /* Prepare terminal.  */
3443 
3444   launch_output_program (NULL);
3445   initialize_strings ();
3446 
3447   /* FIXME: Merging requires all input files to be opened in parallel.  This
3448      is not realistic when there are really many input files: in such cases,
3449      one has no other choice than to avoid merged listings and resort to the
3450      full relisting option instead.  */
3451 
3452   for (input = input_array; input < input_array + inputs; input++)
3453     {
3454       open_input (input);
3455       input->item = input->first_item;
3456     }
3457 
3458   /* Process all merging groups, one at a time.  */
3459 
3460 #if DEBUGGING
3461   if (debugging)
3462     fputs ("\f\n", stderr);
3463 #endif
3464 
3465   for (merging = merging_array;
3466        merging < merging_array + mergings; merging = group_limit)
3467     {
3468       /* Find the extent of the merging group, and count genuine mergings.  */
3469 
3470       counter = 0;
3471       if (!merging->cross_flag)
3472 	counter++;
3473 
3474       for (cursor = merging + 1;
3475 	   cursor < merging_array + mergings && !cursor->group_flag; cursor++)
3476 	if (!cursor->cross_flag)
3477 	  counter++;
3478 
3479       group_limit = cursor;
3480 
3481       /* Prepare for a new hunk.  Hmph!  Not really yet...  */
3482 
3483       /* Output differences, which are items before members.  Also consider
3484          any crossed member as a difference and output it on the same blow.  */
3485 
3486       for (cursor = merging; cursor < group_limit; cursor++)
3487 	{
3488 	  input = input_array + cursor->input_number;
3489 	  member = member_array + cursor->member_number;
3490 	  copy_until (input, member->first_item, FILE_IN_MARGIN);
3491 	  if (cursor->cross_flag)
3492 	    {
3493 	      explain_group ('/', cursor, cursor + 1);
3494 	      copy_member_proper (input, member);
3495 	      explain_group ('\\', cursor, cursor + 1);
3496 	    }
3497 	}
3498 
3499       /* Describe members, which we are about to skip.  */
3500 
3501       explain_group ('/', merging, group_limit);
3502 
3503       listed = 0;
3504       for (cursor = merging; cursor < group_limit; cursor++)
3505 	if (!cursor->cross_flag)
3506 	  {
3507 	    input = input_array + cursor->input_number;
3508 	    member = member_array + cursor->member_number;
3509 	    if (listed)
3510 	      skip_member_proper (input, member);
3511 	    else
3512 	      {
3513 		copy_member_proper (input, member);
3514 		listed = 1;
3515 	      }
3516 	  }
3517 
3518       explain_group ('\\', merging, group_limit);
3519     }
3520 
3521   /* Copy remaining differences and clean up.  */
3522 
3523   for (input = input_array; input < input_array + inputs; input++)
3524     {
3525       copy_until (input, input->item_limit, FILE_IN_MARGIN);
3526       close_input (input);
3527     }
3528 }
3529 
3530 /*-----------------------------------------------------.
3531 | Study diff output and use it to drive reformatting.  |
3532 `-----------------------------------------------------*/
3533 
3534 /* Define the separator lines when output is inhibited.  */
3535 #define SEPARATOR_LINE \
3536   "======================================================================"
3537 
3538 static void
relist_merged_words(void)3539 relist_merged_words (void)
3540 {
3541   int counter;
3542   struct input *input;
3543   struct member *member;
3544   struct cluster *chosen_cluster;
3545   struct reference reference;
3546   struct merging *merging;
3547   struct merging *group_limit;
3548   struct merging *cursor;
3549   int listed;
3550 
3551   int count_total_left;		/* count of total words in left file */
3552   int count_total_right;	/* count of total words in right file */
3553   int count_isolated_left;	/* count of deleted words in left file */
3554   int count_isolated_right;	/* count of added words in right file */
3555   int count_changed_left;	/* count of changed words in left file */
3556   int count_changed_right;	/* count of changed words in right file */
3557 
3558   /* Prepare terminal.  */
3559 
3560   launch_output_program (NULL);
3561   initialize_strings ();
3562 
3563   /* Rewind input files.  */
3564 
3565   for (input = input_array; input < input_array + inputs; input++)
3566     {
3567       open_input (input);
3568       input->item = input->first_item;
3569       input_line (input);
3570     }
3571 
3572   count_total_left = left->item_limit - left->first_item;
3573   count_total_right = right->item_limit - right->first_item;
3574   count_isolated_left = 0;
3575   count_isolated_right = 0;
3576   count_changed_left = 0;
3577   count_changed_right = 0;
3578 
3579   /* Process all merging groups, one at a time.  */
3580 
3581   for (merging = merging_array;
3582        merging < merging_array + mergings; merging = group_limit)
3583     {
3584       /* Find the extent of the merging group, and count genuine mergings.  */
3585 
3586       counter = 0;
3587       if (!merging->cross_flag)
3588 	counter++;
3589 
3590       for (cursor = merging + 1;
3591 	   cursor < merging_array + mergings && !cursor->group_flag; cursor++)
3592 	if (!cursor->cross_flag)
3593 	  counter++;
3594 
3595       group_limit = cursor;
3596 
3597       /* Output differences, which are items before members.  Also consider
3598          any crossed member as a difference and output it on the same blow.  */
3599 
3600       for (cursor = merging; cursor < group_limit; cursor++)
3601 	{
3602 	  input = input_array + cursor->input_number;
3603 	  member = member_array + cursor->member_number;
3604 	  if (input->listing_allowed)
3605 	    {
3606 	      if (input->item < member->first_item || cursor->cross_flag)
3607 		{
3608 		  if (input == left)
3609 		    copy_whitespace (input, EMPTY_MARGIN);
3610 		  else
3611 		    skip_whitespace (input);
3612 
3613 		  if (input == input_array + inputs - 1)
3614 		    start_of_emphasis (input, BOLD);
3615 		  else
3616 		    start_of_emphasis (input, UNDERLINED);
3617 
3618 		  if (input->item < member->first_item)
3619 		    {
3620 		      copy_word_item (input);
3621 		      copy_until (input, member->first_item, FILE_IN_MARGIN);
3622 		    }
3623 		  if (cursor->cross_flag)
3624 		    copy_member_proper (input, member);
3625 
3626 		  end_of_emphasis (input);
3627 		}
3628 	    }
3629 	  else
3630 	    {
3631 	      skip_until (input, member->first_item);
3632 	      if (cursor->cross_flag)
3633 		skip_member_proper (input, member);
3634 	    }
3635 	}
3636 
3637       /* Use separator lines to disambiguate the output.  */
3638 
3639       if (common_listing_allowed)
3640 	{
3641 	  if (!left->listing_allowed && !right->listing_allowed)
3642 	    fprintf (output_file, "\n%s\n", SEPARATOR_LINE);
3643 	}
3644       else
3645 	fprintf (output_file, "\n%s\n", SEPARATOR_LINE);
3646 
3647       /* Describe members, which we are about to skip.  */
3648 
3649       listed = 0;
3650       for (cursor = merging; cursor < group_limit; cursor++)
3651 	if (!cursor->cross_flag)
3652 	  {
3653 	    input = input_array + cursor->input_number;
3654 	    member = member_array + cursor->member_number;
3655 	    if (listed)
3656 	      skip_member_proper (input, member);
3657 	    else
3658 	      {
3659 		copy_member_proper (input, member);
3660 		listed = 1;
3661 	      }
3662 	  }
3663     }
3664 
3665   /* Copy remaining differences and clean up.  Copy from left side if the
3666      user wanted to see only the common code and deleted words.  */
3667 
3668   if (!common_listing_allowed
3669       && (left->listing_allowed || right->listing_allowed))
3670     fprintf (output_file, "\n%s\n", SEPARATOR_LINE);
3671 
3672   for (input = input_array; input < input_array + inputs; input++)
3673     {
3674       if (input->listing_allowed)
3675 	{
3676 	  if (input->item < input->item_limit)
3677 	    {
3678 	      if (input == left)
3679 		copy_whitespace (input, EMPTY_MARGIN);
3680 	      else
3681 		skip_whitespace (input);
3682 
3683 	      if (input == input_array + inputs - 1)
3684 		start_of_emphasis (input, BOLD);
3685 	      else
3686 		start_of_emphasis (input, UNDERLINED);
3687 
3688 	      copy_word_item (input);
3689 	      copy_until (input, input->item_limit, FILE_IN_MARGIN);
3690 
3691 	      end_of_emphasis (input);
3692 	    }
3693 	  copy_whitespace (input, EMPTY_MARGIN);
3694 	}
3695       close_input (input);
3696     }
3697 
3698   /* Print merging statistics.  */
3699 
3700   if (verbose)
3701     {
3702       int count_common_left;	/* words unchanged in left file */
3703       int count_common_right;	/* words unchanged in right file */
3704 
3705       count_common_left
3706 	= count_total_left - count_isolated_left - count_changed_left;
3707       count_common_right
3708 	= count_total_right - count_isolated_right - count_changed_right;
3709 
3710       printf (ngettext ("%s: %d word", "%s: %d words", count_total_left),
3711 	      left->file_name, count_total_left);
3712       if (count_total_left > 0)
3713 	{
3714 	  printf (ngettext ("  %d %.0f%% common", "  %d %.0f%% common",
3715 			    count_common_left), count_common_left,
3716 		  count_common_left * 100. / count_total_left);
3717 	  printf (ngettext ("  %d %.0f%% deleted", "  %d %.0f%% deleted",
3718 			    count_isolated_left), count_isolated_left,
3719 		  count_isolated_left * 100. / count_total_left);
3720 	  printf (ngettext ("  %d %.0f%% changed", "  %d %.0f%% changed",
3721 			    count_changed_left), count_changed_left,
3722 		  count_changed_left * 100. / count_total_left);
3723 	}
3724       printf ("\n");
3725 
3726       printf (ngettext ("%s: %d word", "%s: %d words", count_total_right),
3727 	      right->file_name, count_total_right);
3728       if (count_total_right > 0)
3729 	{
3730 	  printf (ngettext ("  %d %.0f%% common", "  %d %.0f%% common",
3731 			    count_common_right), count_common_right,
3732 		  count_common_right * 100. / count_total_right);
3733 	  printf (ngettext ("  %d %.0f%% inserted", "  %d %.0f%% inserted",
3734 			    count_isolated_right), count_isolated_right,
3735 		  count_isolated_right * 100. / count_total_right);
3736 	  printf (ngettext ("  %d %.0f%% changed", "  %d %.0f%% changed",
3737 			    count_changed_right), count_changed_right,
3738 		  count_changed_right * 100. / count_total_right);
3739 	}
3740       printf ("\n");
3741     }
3742 
3743   /* Set exit status.  */
3744 
3745   if (count_isolated_left || count_isolated_right
3746       || count_changed_left || count_changed_right)
3747     exit_status = EXIT_DIFFERENCE;
3748 
3749   /* Reset the terminal.  */
3750 
3751   complete_output_program ();
3752 }
3753 
3754 /* Main control.  */
3755 
3756 /*-----------------------------------------------.
3757 | Explain how to use the program, then get out.	 |
3758 `-----------------------------------------------*/
3759 
3760 static void
usage(int status)3761 usage (int status)
3762 {
3763   if (status != EXIT_SUCCESS)
3764     fprintf (stderr, _("Try `%s --help' for more information.\n"),
3765 	     program_name);
3766   else
3767     {
3768       /* *INDENT-OFF* */
3769       fputs (_("\
3770 mdiff - Studies multiple files and searches for similar sequences, it then\n\
3771 produces possibly detailed lists of differences and similarities.\n"),
3772 	     stdout);
3773       fputs ("\n", stdout);
3774       printf (_("\
3775 Usage: %s [OPTION]... [FILE]...\n"),
3776 	      program_name);
3777 
3778       fputs (_("\nOperation modes:\n"), stdout);
3779       fputs (_("  -h                     (ignored)\n"), stdout);
3780       fputs (_("  -v, --verbose          report a few statistics on stderr\n"), stdout);
3781       fputs (_("      --help             display this help then exit\n"), stdout);
3782       fputs (_("      --version          display program version then exit\n"), stdout);
3783 
3784       fputs (_("\nFormatting output:\n"), stdout);
3785       fputs (_("  -T, --initial-tab       produce TAB instead of initial space\n"), stdout);
3786       fputs (_("  -l, --paginate          paginate output through `pr'\n"), stdout);
3787       fputs (_("  -S, --string[=STRING]   take note of another user STRING\n"), stdout);
3788       fputs (_("  -V, --show-links        give file and line references in annotations\n"), stdout);
3789       fputs (_("  -t, --expand-tabs       expand tabs to spaces in the output\n"), stdout);
3790 
3791 #if DEBUGGING
3792       fputs (_("\nDebugging:\n"), stdout);
3793       fputs (_("  -0, --debugging   output many details about what is going on\n"), stdout);
3794 #endif
3795 
3796       fputs (_("\nWord mode options:\n"), stdout);
3797       fputs (_("  -1, --no-deleted           inhibit output of deleted words\n"), stdout);
3798       fputs (_("  -2, --no-inserted          inhibit output of inserted words\n"), stdout);
3799       fputs (_("  -3, --no-common            inhibit output of common words\n"), stdout);
3800       fputs (_("  -A, --auto-pager           automatically calls a pager\n"), stdout);
3801       fputs (_("  -k, --less-mode            variation of printer mode for \"less\"\n"), stdout);
3802       fputs (_("  -m, --avoid-wraps          do not extend fields through newlines\n"), stdout);
3803       fputs (_("  -o, --printer              overstrike as for printers\n"), stdout);
3804       fputs (_("  -z, --terminal             use termcap as for terminal displays\n"), stdout);
3805       fputs (_("  -O, --item-regexp=REGEXP   compare items as defined by REGEXP\n"), stdout);
3806       fputs (_("  -W, --word-mode            compare words instead of lines\n"), stdout);
3807 
3808       /***
3809       fputs (_("\nComparing files:\n"), stdout);
3810       fputs (_("*  -H, --speed-large-files     go faster, for when numerous small changes\n"), stdout);
3811       fputs (_("*  -a, --text                  report line differences (text file default)\n"), stdout);
3812       fputs (_("*  -d, --minimal               try harder for a smaller set of changes\n"), stdout);
3813       fputs (_("*  -q, --brief                 only says if files differ (binary default)\n"), stdout);
3814       fputs (_("*      --horizon-lines=LINES   keep LINES lines in common prefixes/suffixes\n"), stdout);
3815       ***/
3816       /***
3817       fputs (_("\nComparing directories:\n"), stdout);
3818       fputs (_("*  -N, --new-file                  consider missing files to be empty\n"), stdout);
3819       fputs (_("*  -P, --unidirectional-new-file   consider missing old files to be empty\n"), stdout);
3820       fputs (_("*  -S, --starting-file=FILE        resume directory comparison with FILE\n"), stdout);
3821       fputs (_("*  -X, --exclude-from=FILE         ignore files matching patterns from FILE\n"), stdout);
3822       fputs (_("*  -r, --recursive                 recursively compare subdirectories\n"), stdout);
3823       fputs (_("*  -s, --report-identical-files    report when two files are the same\n"), stdout);
3824       fputs (_("*  -x, --exclude=PATTERN           ignore files (dirs) matching PATTERN\n"), stdout);
3825       ***/
3826       /***
3827       fputs (_("\nIgnoring text:\n"), stdout);
3828       fputs (_("  -B, --ignore-blank-lines             ignore blank lines\n"), stdout);
3829       fputs (_("*  -I, --ignore-matching-lines=REGEXP   ignore lines matching REGEXP\n"), stdout);
3830       fputs (_("  -b, --ignore-space-change            ignore amount of white space\n"), stdout);
3831       fputs (_("  -i, --ignore-case                    ignore case differences\n"), stdout);
3832       fputs (_("  -w, --ignore-all-space               ignore white space\n"), stdout);
3833       fputs (_("\nClustering:\n"), stdout);
3834       fputs (_("  -G, --relist-files         list all input files with annotations\n"), stdout);
3835       fputs (_("  -J, --minimum-size=ITEMS   ignore clusters not having that many ITEMS\n"), stdout);
3836       fputs (_("  -j, --ignore-delimiters    do not count items having only delimiters\n"), stdout);
3837       ***/
3838       /***
3839       fputs (_("\nDetailed output formats:\n"), stdout);
3840       fputs (_("*  -D, --ifdef=NAME                      output `#ifdef NAME' format\n"), stdout);
3841       fputs (_("*      --changed-group-format=FORMAT     use FORMAT for changed lines\n"), stdout);
3842       fputs (_("*      --new-group-format=FORMAT         use FORMAT for inserted lines\n"), stdout);
3843       fputs (_("*      --new-line-format=FORMAT          use FORMAT for inserted line\n"), stdout);
3844       fputs (_("*      --old-group-format=FORMAT         use FORMAT for deleted lines\n"), stdout);
3845       fputs (_("*      --old-line-format=FORMAT          use FORMAT for deleted line\n"), stdout);
3846       fputs (_("*      --unchanged-group-format=FORMAT   use FORMAT for unchanged lines\n"), stdout);
3847       fputs (_("*      --unchanged-line-format=FORMAT    use FORMAT for unchanged line\n"), stdout);
3848       fputs (_("*      --line-format=FORMAT              --{old,new,unchanged}-line-format\n"), stdout);
3849       ***/
3850       /***
3851       fputs (_("\nScript-like formats:\n"), stdout);
3852       fputs (_("  (none of -CDUcefnuy)   output normal diffs\n"), stdout);
3853       fputs (_("*  -e, --ed               output a valid `ed' script\n"), stdout);
3854       fputs (_("*  -f, --forward-ed       mix between -e and -n (not very useful)\n"), stdout);
3855       fputs (_("*  -n, --rcs              output RCS format (internally used by RCS)\n"), stdout);
3856       ***/
3857       /***
3858       fputs (_("\nContext and unified formats:\n"), stdout);
3859       fputs (_("*  -F, --show-function-line=REGEXP   show previous context matching REGEXP\n"), stdout);
3860       fputs (_("*  -p, --show-c-function             show which C function for each change\n"), stdout);
3861       ***/
3862       /***
3863       fputs (_("*  -C, --context=LINES         as -c, also select context size in lines\n"), stdout);
3864       fputs (_("*  -L, --label=LABEL           use from/to LABEL instead of file name (twice)\n"), stdout);
3865       fputs (_("*  -U, --unified=LINES         as -u, also select context size in lines\n"), stdout);
3866       fputs (_("*  -c, --context               output context diffs (default 3 context lines)\n"), stdout);
3867       fputs (_("*  -u, --unified               output unidiffs (default 3 context lines)\n"), stdout);
3868       fputs (_("*  -LINES                      (obsolete: select context size in lines)\n"), stdout);
3869       ***/
3870       /***
3871       fputs (_("\nSide by side format:\n"), stdout);
3872       fputs (_("*  -W, --width=COLUMNS           use width of COLUMNS\n"), stdout);
3873       fputs (_("*  -y, --side-by-side            use side by side output format\n"), stdout);
3874       fputs (_("*      --left-column             print only left column line when common\n"), stdout);
3875       fputs (_("*      --sdiff-merge-assist      (internally used by `sdiff')\n"), stdout);
3876       fputs (_("*      --suppress-common-lines   do not print common lines\n"), stdout);
3877       ***/
3878       /***
3879       fputs (_("\n\
3880 FORMAT is made up of characters standing for themselves, except:\n\
3881   %%%%           a single %%\n\
3882   %%c'C'        quoted character C\n\
3883   %%c'\\O'       character having value O, from 1 to 3 octal digits\n\
3884   %%(A=B?T:E)   if A is B then T else E; A B number or VARIABLE; T E FORMAT\n\
3885   %%FN          use SPECIF specification F to print VARIABLE value N\n\
3886   %%<           [group] old, each line through --old-line-format\n\
3887   %%>           [group] new, each line through --new-line-format\n\
3888   %%=           [group] unchanged, each line through --unchanged-line-format\n\
3889   %%l           [line] without its possible trailing newline\n\
3890   %%L           [line] with its possible trailing newline\n"),
3891 	     stdout);
3892       ***/
3893       /***
3894       fputs (_("\n\
3895 SPECIF is [-][W[.D]]{doxX} as in C printf\n"),
3896 	     stdout);
3897       ***/
3898       /***
3899       fputs (_("\n\
3900 VARIABLE is {eflmn} for old group or {EFLMN} for new group\n\
3901   {eE}   line number just before group\n\
3902   {fF}   first line number of group\n\
3903   {lL}   last line number of group\n\
3904   {mM}   line number just after group\n\
3905   {nN}   number of lines in the group\n"),
3906 	     stdout);
3907       ***/
3908       /***
3909       fputs (_("\nStandard diff options:\n"), stdout);
3910       fputs (_("  -i, --ignore-case         consider upper- and lower-case to be the same\n"), stdout);
3911       fputs (_("  -w, --ignore-all-space    ignore all white space\n"), stdout);
3912       fputs (_("  -b, --ignore-space-change ignore changes in the amount of white space\n"), stdout);
3913       fputs (_("  -B, --ignore-blank-lines  ignore changes whose lines are all blank\n"), stdout);
3914       fputs (_("  -I, --ignore-matching-lines=RE ignore changes whose lines all match RE\n"), stdout);
3915       fputs (_("  -a, --text                treat all files as text\n"), stdout);
3916       fputs (_("\
3917   -c, --context[=NUMBER]    output regular context diffs,\n\
3918                             changing to NUMBER lines of context\n"), stdout);
3919       fputs (_("\
3920   -u, --unified[=NUMBER]    output unified context diffs or unidiffs,\n\
3921                             with NUMBER lines of context\n"), stdout);
3922       fputs (_("  -C, --context=NUM         output NUM lines of copied context\n"), stdout);
3923       fputs (_("  -U, --unified=NUM         output NUM lines of unified context\n"), stdout);
3924       fputs (_("  -L, --label=LABEL         use LABEL instead of file name\n"), stdout);
3925       fputs (_("  -p, --show-c-function     show which C function each change is in\n"), stdout);
3926       fputs (_("  -F, --show-function-line=RE show the most recent line matching RE\n"), stdout);
3927       fputs (_("  -q, --brief               output only whether files differ\n"), stdout);
3928       fputs (_("  -e, --ed                  output an ed script\n"), stdout);
3929       fputs (_("  -n, --rcs                 output an RCS format diff\n"), stdout);
3930       fputs (_("  -y, --side-by-side        output in two columns\n"), stdout);
3931       fputs (_("  -w, --width=NUM           output at most NUM (default 130) characters per line\n"), stdout);
3932       fputs (_("      --left-column         output only the left column of common lines\n"), stdout);
3933       fputs (_("      --suppress-common-lines do not output common lines\n"), stdout);
3934       fputs (_("  -D, --ifdef=NAME          output merged file to show `#ifdef NAME' diffs\n"), stdout);
3935       fputs (_("      --GTYPE-group-format=GFMT GTYPE input groups with GFMT\n"), stdout);
3936       fputs (_("      --line-format=LFMT    all input lines with LFMT\n"), stdout);
3937       fputs (_("      --LTYPE-line-format=LFMT LTYPE input lines with LFMT\n"), stdout);
3938       fputs (_("  -l, --paginate            pass the output through `pr' to paginate it\n"), stdout);
3939       fputs (_("  -t, --expand-tabs         expand tabs to spaces in output\n"), stdout);
3940       fputs (_("  -T, --initial-tab         make tabs line up by prepending a tab\n"), stdout);
3941       fputs (_("  -r, --recursive           recursively compare any subdirectories found\n"), stdout);
3942       fputs (_("  -N, --new-file            treat absent files as empty\n"), stdout);
3943       fputs (_("  -P, --unidirectional-new-file treat absent first files as empty\n"), stdout);
3944       fputs (_("  -s, --report-identical-files report when two files are the same\n"), stdout);
3945       fputs (_("  -x, --exclude=PAT         exclude files that match PAT\n"), stdout);
3946       fputs (_("  -X, --exclude-from=FILE   exclude files that match any pattern in FILE\n"), stdout);
3947       fputs (_("  -S, --starting-file=FILE  start with FILE when comparing directories\n"), stdout);
3948       fputs (_("      --horizon-lines=NUM   keep NUM lines of the common prefix and suffix\n"), stdout);
3949       fputs (_("  -d, --minimal             try hard to find a smaller set of changes\n"), stdout);
3950       fputs (_("  -H, --speed-large-files   assume large files and many scattered small changes\n"), stdout);
3951       ***/
3952       /***
3953       fputs (_("\
3954 \n\
3955 By default, context diffs have an horizon of two lines.\n"),
3956 	     stdout);
3957       ***/
3958       /***
3959       fputs (_("\
3960 \n\
3961 LTYPE is `old', `new', or `unchanged'.  GTYPE is LTYPE or `changed'.\n\
3962 GFMT may contain:\n\
3963   %<  lines from FILE1\n\
3964   %>  lines from FILE2\n\
3965   %=  lines common to FILE1 and FILE2\n\
3966   %[-][WIDTH][.[PREC]]{doxX}LETTER  printf-style spec for LETTER\n\
3967     LETTERs are as follows for new group, lower case for old group:\n\
3968       F  first line number\n\
3969       L  last line number\n\
3970       N  number of lines = L-F+1\n\
3971       E  F-1\n\
3972       M  L+1\n"),
3973 	     stdout);
3974       fputs (_("\
3975 LFMT may contain:\n\
3976   %L  contents of line\n\
3977   %l  contents of line, excluding any trailing newline\n\
3978   %[-][WIDTH][.[PREC]]{doxX}n  printf-style spec for input line number\n\
3979 Either GFMT or LFMT may contain:\n\
3980   %%  %\n\
3981   %c'C'  the single character C\n\
3982   %c'\\OOO'  the character with octal code OOO\n"),
3983 	     stdout);
3984       ***/
3985       /***
3986       fputs (_("\nOld mdiff options:\n"), stdout);
3987       fputs (_("*  -f, --fuzz-items=ITEMS     no more than ITEMS non matching in a cluster\n"), stdout);
3988       ***/
3989 
3990       fputs ("\n", stdout);
3991       fputs (_("With no FILE, or when FILE is -, read standard input.\n"), stdout);
3992       fputs ("\n", stdout);
3993       fputs (_("Report bugs to <wdiff-bugs@gnu.org>.\n"), stdout);
3994       /* *INDENT-ON* */
3995     }
3996   exit (status);
3997 }
3998 
3999 /*----------------------------------------------------------------------.
4000 | Main program.  Decode ARGC arguments passed through the ARGV array of |
4001 | strings, then launch execution.				        |
4002 `----------------------------------------------------------------------*/
4003 
4004 #define UNIMPLEMENTED(Option) \
4005   error (0, 0, _("ignoring option %s (not implemented)"), (Option))
4006 
4007 int
main(int argc,char * const * argv)4008 main (int argc, char *const *argv)
4009 {
4010   int option_char;		/* option character */
4011   struct input *input;		/* cursor in input array */
4012 
4013   int inhibit_left = 0;
4014   int inhibit_right = 0;
4015   int inhibit_common = 0;
4016   const char *delete_start = NULL;
4017   const char *delete_stop = NULL;
4018   const char *insert_start = NULL;
4019   const char *insert_stop = NULL;
4020 
4021   program_name = argv[0];
4022   setlocale (LC_ALL, "");
4023   bindtextdomain (PACKAGE, LOCALEDIR);
4024   textdomain (PACKAGE);
4025 
4026   /* Decode command options.  */
4027 
4028   while (option_char = getopt_long (argc, (char **) argv, OPTION_STRING,
4029 				    long_options, NULL), option_char != EOF)
4030     switch (option_char)
4031       {
4032       default:
4033 	usage (EXIT_ERROR);
4034 
4035       case '\0':
4036 	break;
4037 
4038       case '0':
4039 #if DEBUGGING
4040 	debugging = 1;
4041 	verbose = 1;
4042 #else
4043 	UNIMPLEMENTED ("--debugging");
4044 #endif
4045 	break;
4046 
4047       case '1':
4048 	inhibit_left = 1;
4049 	break;
4050 
4051       case '2':
4052 	inhibit_right = 1;
4053 	break;
4054 
4055       case '3':
4056 	inhibit_common = 1;
4057 	break;
4058 
4059       case 'A':
4060 	autopager = 1;
4061 	break;
4062 
4063       case 'B':
4064 	ignore_blank_lines = 1;
4065 	break;
4066 
4067       case 'D':
4068 	ifdef = optarg;
4069 	UNIMPLEMENTED ("--ifdef");
4070 	break;
4071 
4072       case 'F':
4073 	show_function_line = optarg;
4074 	UNIMPLEMENTED ("--show-function-line");
4075 	break;
4076 
4077       case 'G':
4078 	relist_files = 1;
4079 	break;
4080 
4081       case 'H':
4082 	speed_large_files = 1;
4083 	UNIMPLEMENTED ("--speed-large-files");
4084 	break;
4085 
4086       case 'I':
4087 	if (ignore_regexps % 8 == 0)
4088 	  ignore_regexp_array = (struct re_pattern_buffer **)
4089 	    xrealloc (ignore_regexp_array,
4090 		      ((ignore_regexps + 8)
4091 		       * sizeof (struct re_pattern_buffer *)));
4092 	ignore_regexp_array[ignore_regexps++]
4093 	  = alloc_and_compile_regex (optarg);
4094 	break;
4095 
4096       case 'J':
4097 	minimum_size = atoi (optarg);
4098 	break;
4099 
4100       case 'L':
4101 	label = optarg;
4102 	UNIMPLEMENTED ("--label");
4103 	break;
4104 
4105       case 'N':
4106 	new_file = 1;
4107 	UNIMPLEMENTED ("--new-file");
4108 	break;
4109 
4110       case 'O':
4111 	UNIMPLEMENTED ("--compare-words");
4112 	item_regexp = alloc_and_compile_regex (optarg);
4113 	break;
4114 
4115       case 'P':
4116 	unidirectional_new_file = 1;
4117 	UNIMPLEMENTED ("--unidirectional-new-file");
4118 	break;
4119 
4120       case 'Q':
4121 	insert_start = optarg;
4122 	break;
4123 
4124       case 'R':
4125 	insert_stop = optarg;
4126 	break;
4127 
4128       case 'S':
4129 	starting_file = optarg;
4130 	UNIMPLEMENTED ("--starting-file");
4131 	break;
4132 
4133       case 'T':
4134 	initial_tab = 1;
4135 	break;
4136 
4137       case 'V':
4138 	show_links = 1;
4139 	break;
4140 
4141       case 'W':
4142 	word_mode = 1;
4143 	break;
4144 
4145       case 'X':
4146 	exclude_from = optarg;
4147 	UNIMPLEMENTED ("--exclude-from");
4148 	break;
4149 
4150       case 'Y':
4151 	delete_start = optarg;
4152 	break;
4153 
4154       case 'Z':
4155 	delete_stop = optarg;
4156 	break;
4157 
4158       case 'a':
4159 	text = 1;
4160 	UNIMPLEMENTED ("--text");
4161 	break;
4162 
4163       case 'b':
4164 	ignore_space_change = 1;
4165 	ignore_blank_lines = 1;
4166 	break;
4167 
4168       case 'C':
4169       case 'c':
4170 	UNIMPLEMENTED ("--context");
4171 	if (optarg)
4172 	  horizon_lines = atoi (optarg);
4173 	context = 1;
4174 	break;
4175 
4176       case 'd':
4177 	minimal = 1;
4178 	UNIMPLEMENTED ("--minimal");
4179 	break;
4180 
4181       case 'e':
4182 	ed = 1;
4183 	UNIMPLEMENTED ("--ed");
4184 	break;
4185 
4186 #if FIXME
4187       case 'h':
4188 	UNIMPLEMENTED ("-h");
4189 	break;
4190 #endif
4191 
4192       case 'i':
4193 	ignore_case = 1;
4194 	break;
4195 
4196       case 'j':
4197 	ignore_delimiters = 1;
4198 	break;
4199 
4200       case 'k':
4201 	if (find_termcap < 0)
4202 	  find_termcap = 0;
4203 	overstrike = 1;
4204 	overstrike_for_less = 1;
4205 	break;
4206 
4207       case 'l':
4208 	paginate = 1;
4209 	break;
4210 
4211       case 'm':
4212 	avoid_wraps = 1;
4213 	break;
4214 
4215       case 'n':
4216 	rcs = 1;
4217 	UNIMPLEMENTED ("--rcs");
4218 	break;
4219 
4220       case 'o':
4221 	overstrike = 1;
4222 	break;
4223 
4224       case 'p':
4225 	show_c_function = 1;
4226 	UNIMPLEMENTED ("--show-c-function");
4227 	break;
4228 
4229       case 'q':
4230 	brief = 1;
4231 	UNIMPLEMENTED ("--brief");
4232 	break;
4233 
4234       case 'r':
4235 	recursive = 1;
4236 	UNIMPLEMENTED ("--recursive");
4237 	break;
4238 
4239 #if FIXME
4240       case 's':
4241 	report_identical_files = 1;
4242 	UNIMPLEMENTED ("--report-identical-files");
4243 	break;
4244 #endif
4245 
4246       case TOLERANCE_OPTION:	/* mdiff draft */
4247 	UNIMPLEMENTED ("--tolerance");
4248 	tolerance = atoi (optarg);
4249 	break;
4250 
4251       case 't':
4252 	expand_tabs = 1;
4253 	UNIMPLEMENTED ("--expand-tabs");
4254 	break;
4255 
4256       case 'U':
4257       case 'u':
4258 	UNIMPLEMENTED ("--unified");
4259 	if (optarg)
4260 	  horizon_lines = atoi (optarg);
4261 	unified = 1;
4262 	break;
4263 
4264       case 'v':
4265 	verbose = 1;
4266 	break;
4267 
4268       case 'w':
4269 	ignore_all_space = 1;
4270 	break;
4271 
4272 #if FIXME
4273       case 'w':
4274 	width = atoi (optarg);
4275 	UNIMPLEMENTED ("--width");
4276 	break;
4277 #endif
4278 
4279       case 'x':
4280 	exclude = optarg;
4281 	UNIMPLEMENTED ("--exclude");
4282 	break;
4283 
4284       case 'y':
4285 	side_by_side = 1;
4286 	UNIMPLEMENTED ("--side-by-side");
4287 	break;
4288 
4289       case 'K':
4290 	/* compatibility option, equal to -t now */
4291 	/* fall through */
4292 
4293       case 'z':
4294 #if HAVE_TPUTS
4295 	if (find_termcap < 0)
4296 	  find_termcap = 1;
4297 	break;
4298 #else
4299 	error (EXIT_ERROR, 0, _("cannot use -z, termcap not available"));
4300 #endif
4301 
4302       case GTYPE_GROUP_FORMAT_OPTION:
4303 	gtype_group_format = optarg;
4304 	UNIMPLEMENTED ("--gtype-group-format");
4305 	break;
4306 
4307       case HORIZON_LINES_OPTION:
4308 	horizon_lines = atoi (optarg);
4309 	UNIMPLEMENTED ("--horizon-lines");
4310 	break;
4311 
4312       case LEFT_COLUMN_OPTION:
4313 	left_column = 1;
4314 	UNIMPLEMENTED ("--left-column");
4315 	break;
4316 
4317       case LINE_FORMAT_OPTION:
4318 	line_format = optarg;
4319 	UNIMPLEMENTED ("--line-format");
4320 	break;
4321 
4322       case LTYPE_LINE_FORMAT_OPTION:
4323 	ltype_line_format = optarg;
4324 	UNIMPLEMENTED ("--ltype-line-format");
4325 	break;
4326 
4327       case SUPPRESS_COMMON_LINES_OPTION:
4328 	suppress_common_lines = 1;
4329 	UNIMPLEMENTED ("--suppress-common-lines");
4330 	break;
4331       }
4332 
4333   if (minimum_size < 0)
4334     minimum_size = relist_files ? (word_mode ? 10 : 5) : 1;
4335 
4336   if (!relist_files && word_mode && argc - optind != 2)
4337     {
4338       error (0, 0, _("word merging for two files only (so far)"));
4339       usage (EXIT_ERROR);
4340     }
4341 
4342   /* If find_termcap still undecided, make it true only if autopager is
4343      set while stdout is directed to a terminal.  This decision might be
4344      reversed later, if the pager happens to be "less".  */
4345 
4346   if (find_termcap < 0)
4347     find_termcap = autopager && isatty (fileno (stdout));
4348 
4349   /* Process trivial options.  */
4350 
4351   if (show_version)
4352     {
4353       printf ("mdiff (GNU %s) %s\n", PACKAGE, VERSION);
4354       fputs (_("\
4355 \n\
4356 Copyright (C) 1992, 1997, 1998, 1999, 2010 Free Software Foundation, Inc.\n"), stdout);
4357       fputs (_("\
4358 This is free software; see the source for copying conditions.  There is NO\n\
4359 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"), stdout);
4360       fputs (_("\
4361 \n\
4362 Written by Franc,ois Pinard <pinard@iro.umontreal.ca>.\n"), stdout);
4363       exit (EXIT_SUCCESS);
4364     }
4365 
4366   if (show_help)
4367     usage (EXIT_SUCCESS);
4368 
4369   /* Register all input files.  */
4370 
4371   if (optind == argc)
4372     new_input ("-");
4373   else
4374     while (optind < argc)
4375       new_input (argv[optind++]);
4376 
4377   /* Save some option values.  */
4378 
4379   if (inputs == 2)
4380     {
4381       left->listing_allowed = !inhibit_left;
4382       left->user_start = delete_start;
4383       left->user_stop = delete_stop;
4384 
4385       right->listing_allowed = !inhibit_right;
4386       right->user_start = insert_start;
4387       right->user_stop = insert_stop;
4388 
4389       common_listing_allowed = !inhibit_common;
4390     }
4391   else
4392     {
4393       if (inhibit_left || inhibit_right || inhibit_common
4394 	  || delete_start || delete_stop || insert_start || insert_stop)
4395 	error (0, 0, _("options -123RSYZ meaningful only when two inputs"));
4396     }
4397 
4398   /* Do all the crunching.  */
4399 
4400   study_all_inputs ();
4401   prepare_clusters ();
4402   prepare_indirects ();
4403   if (!relist_files)
4404     prepare_mergings ();
4405 
4406   /* Output results.  */
4407 
4408   output_file = stdout;
4409 
4410   if (relist_files)
4411     relist_annotated_files ();
4412   else if (context)
4413     relist_merged_lines (0, 1);
4414   else if (unified)
4415     relist_merged_lines (1, 1);
4416   else if (word_mode)
4417     relist_merged_words ();
4418 
4419   /* Clean up.  */
4420 
4421   exit (exit_status);
4422 }
4423