xref: /openbsd/gnu/usr.bin/texinfo/util/texindex.c (revision f06e4914)
1 /* texindex -- sort TeX index dribble output into an actual index.
2    $Id: texindex.c,v 1.7 2024/08/16 22:58:54 guenther Exp $
3 
4    Copyright (C) 1987, 1991, 1992, 1996, 1997, 1998, 1999, 2000, 2001,
5    2002, 2003, 2004 Free Software Foundation, Inc.
6 
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 2, or (at your option)
10    any later version.
11 
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, write to the Free Software
19    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307. */
20 
21 #include "system.h"
22 #include <getopt.h>
23 
24 static char *program_name = "texindex";
25 
26 #if defined (emacs)
27 #  include "../src/config.h"
28 /* Some s/os.h files redefine these. */
29 #  undef read
30 #  undef close
31 #  undef write
32 #  undef open
33 #endif
34 
35 #if !defined (HAVE_MEMSET)
36 #undef memset
37 #define memset(ptr, ignore, count) bzero (ptr, count)
38 #endif
39 
40 char *mktemp (char *);
41 
42 #if !defined (SEEK_SET)
43 #  define SEEK_SET 0
44 #  define SEEK_CUR 1
45 #  define SEEK_END 2
46 #endif /* !SEEK_SET */
47 
48 struct linebuffer;
49 
50 /* When sorting in core, this structure describes one line
51    and the position and length of its first keyfield.  */
52 struct lineinfo
53 {
54   char *text;           /* The actual text of the line. */
55   union {
56     char *text;         /* The start of the key (for textual comparison). */
57     long number;        /* The numeric value (for numeric comparison). */
58   } key;
59   long keylen;          /* Length of KEY field. */
60 };
61 
62 /* This structure describes a field to use as a sort key. */
63 struct keyfield
64 {
65   int startwords;       /* Number of words to skip. */
66   int startchars;       /* Number of additional chars to skip. */
67   int endwords;         /* Number of words to ignore at end. */
68   int endchars;         /* Ditto for characters of last word. */
69   char ignore_blanks;   /* Non-zero means ignore spaces and tabs. */
70   char fold_case;       /* Non-zero means case doesn't matter. */
71   char reverse;         /* Non-zero means compare in reverse order. */
72   char numeric;         /* Non-zeros means field is ASCII numeric. */
73   char positional;      /* Sort according to file position. */
74   char braced;          /* Count balanced-braced groupings as fields. */
75 };
76 
77 /* Vector of keyfields to use. */
78 struct keyfield keyfields[3];
79 
80 /* Number of keyfields stored in that vector.  */
81 int num_keyfields = 3;
82 
83 /* Vector of input file names, terminated with a null pointer. */
84 char **infiles;
85 
86 /* Vector of corresponding output file names, or NULL, meaning default it
87    (add an `s' to the end). */
88 char **outfiles;
89 
90 /* Length of `infiles'. */
91 int num_infiles;
92 
93 /* Pointer to the array of pointers to lines being sorted. */
94 char **linearray;
95 
96 /* The allocated length of `linearray'. */
97 long nlines;
98 
99 /* Directory to use for temporary files.  On Unix, it ends with a slash.  */
100 char *tempdir;
101 
102 /* Number of last temporary file.  */
103 int tempcount;
104 
105 /* Number of last temporary file already deleted.
106    Temporary files are deleted by `flush_tempfiles' in order of creation.  */
107 int last_deleted_tempcount;
108 
109 /* During in-core sort, this points to the base of the data block
110    which contains all the lines of data.  */
111 char *text_base;
112 
113 /* Initially 0; changed to 1 if we want initials in this index.  */
114 int need_initials;
115 
116 /* Remembers the first initial letter seen in this index, so we can
117    determine whether we need initials in the sorted form.  */
118 char first_initial;
119 
120 /* Additional command switches .*/
121 
122 /* Nonzero means do not delete tempfiles -- for debugging. */
123 int keep_tempfiles;
124 
125 /* Forward declarations of functions in this file. */
126 void decode_command (int argc, char **argv);
127 void sort_in_core (char *infile, int total, char *outfile);
128 void sort_offline (char *infile, off_t total, char *outfile);
129 char **parsefile (char *filename, char **nextline, char *data, long int size);
130 char *find_field (struct keyfield *keyfield, char *str, long int *lengthptr);
131 char *find_pos (char *str, int words, int chars, int ignore_blanks);
132 long find_value (char *start, long int length);
133 char *find_braced_pos (char *str, int words, int chars, int ignore_blanks);
134 char *find_braced_end (char *str);
135 void writelines (char **linearray, int nlines, FILE *ostream);
136 int compare_field (struct keyfield *keyfield, char *start1,
137                    long int length1, long int pos1, char *start2,
138                    long int length2, long int pos2);
139 int compare_full (const void *, const void *);
140 long readline (struct linebuffer *linebuffer, FILE *stream);
141 int merge_files (char **infiles, int nfiles, char *outfile);
142 int merge_direct (char **infiles, int nfiles, char *outfile);
143 void pfatal_with_name (const char *name);
144 void fatal (const char *format, const char *arg);
145 void error (const char *format, const char *arg);
146 char *concat (char *s1, char *s2);
147 void flush_tempfiles (int to_count);
148 
149 #define MAX_IN_CORE_SORT 500000
150 
151 int
main(int argc,char ** argv)152 main (int argc, char **argv)
153 {
154   int i;
155 
156   tempcount = 0;
157   last_deleted_tempcount = 0;
158 
159 #ifdef HAVE_SETLOCALE
160   /* Set locale via LC_ALL.  */
161   setlocale (LC_ALL, "");
162 #endif
163 
164   if (pledge ("stdio rpath wpath cpath tmppath", NULL) == -1)
165     pfatal_with_name ("pledge");
166 
167   /* Set the text message domain.  */
168   bindtextdomain (PACKAGE, LOCALEDIR);
169   textdomain (PACKAGE);
170 
171   /* In case we write to a redirected stdout that fails.  */
172   /* not ready atexit (close_stdout); */
173 
174   /* Describe the kind of sorting to do. */
175   /* The first keyfield uses the first braced field and folds case. */
176   keyfields[0].braced = 1;
177   keyfields[0].fold_case = 1;
178   keyfields[0].endwords = -1;
179   keyfields[0].endchars = -1;
180 
181   /* The second keyfield uses the second braced field, numerically. */
182   keyfields[1].braced = 1;
183   keyfields[1].numeric = 1;
184   keyfields[1].startwords = 1;
185   keyfields[1].endwords = -1;
186   keyfields[1].endchars = -1;
187 
188   /* The third keyfield (which is ignored while discarding duplicates)
189      compares the whole line. */
190   keyfields[2].endwords = -1;
191   keyfields[2].endchars = -1;
192 
193   decode_command (argc, argv);
194 
195   /* Process input files completely, one by one.  */
196 
197   for (i = 0; i < num_infiles; i++)
198     {
199       int desc;
200       off_t ptr;
201       char *outfile;
202       struct stat instat;
203 
204       desc = open (infiles[i], O_RDONLY, 0);
205       if (desc < 0)
206         pfatal_with_name (infiles[i]);
207 
208       if (stat (infiles[i], &instat))
209         pfatal_with_name (infiles[i]);
210       if (S_ISDIR (instat.st_mode))
211         {
212 #ifdef EISDIR
213           errno = EISDIR;
214 #endif
215           pfatal_with_name (infiles[i]);
216         }
217 
218       lseek (desc, (off_t) 0, SEEK_END);
219       ptr = (off_t) lseek (desc, (off_t) 0, SEEK_CUR);
220 
221       close (desc);
222 
223       outfile = outfiles[i];
224       if (!outfile)
225         outfile = concat (infiles[i], "s");
226 
227       need_initials = 0;
228       first_initial = '\0';
229 
230       if (ptr < MAX_IN_CORE_SORT)
231         /* Sort a small amount of data. */
232         sort_in_core (infiles[i], (int)ptr, outfile);
233       else
234         sort_offline (infiles[i], ptr, outfile);
235     }
236 
237   flush_tempfiles (tempcount);
238   xexit (0);
239   return 0; /* Avoid bogus warnings.  */
240 }
241 
242 typedef struct
243 {
244   char *long_name;
245   char *short_name;
246   int *variable_ref;
247   int variable_value;
248   char *arg_name;
249   char *doc_string;
250 } TEXINDEX_OPTION;
251 
252 TEXINDEX_OPTION texindex_options[] = {
253   { "--help", "-h", (int *)NULL, 0, (char *)NULL,
254       N_("display this help and exit") },
255   { "--keep", "-k", &keep_tempfiles, 1, (char *)NULL,
256       N_("keep temporary files around after processing") },
257   { "--no-keep", 0, &keep_tempfiles, 0, (char *)NULL,
258       N_("do not keep temporary files around after processing (default)") },
259   { "--output", "-o", (int *)NULL, 0, "FILE",
260       N_("send output to FILE") },
261   { "--version", (char *)NULL, (int *)NULL, 0, (char *)NULL,
262       N_("display version information and exit") },
263   { (char *)NULL, (char *)NULL, (int *)NULL, 0, (char *)NULL }
264 };
265 
266 void
usage(int result_value)267 usage (int result_value)
268 {
269   register int i;
270   FILE *f = result_value ? stderr : stdout;
271 
272   fprintf (f, _("Usage: %s [OPTION]... FILE...\n"), program_name);
273   fprintf (f, _("Generate a sorted index for each TeX output FILE.\n"));
274   /* Avoid trigraph nonsense.  */
275   fprintf (f,
276 _("Usually FILE... is specified as `foo.%c%c\' for a document `foo.texi'.\n"),
277            '?', '?'); /* avoid trigraph in cat-id-tbl.c */
278   fprintf (f, _("\nOptions:\n"));
279 
280   for (i = 0; texindex_options[i].long_name; i++)
281     {
282       putc (' ', f);
283 
284       if (texindex_options[i].short_name)
285         fprintf (f, "%s, ", texindex_options[i].short_name);
286 
287       fprintf (f, "%s %s",
288                texindex_options[i].long_name,
289                texindex_options[i].arg_name
290                ? texindex_options[i].arg_name : "");
291 
292       fprintf (f, "\t%s\n", _(texindex_options[i].doc_string));
293     }
294   fputs (_("\n\
295 Email bug reports to bug-texinfo@gnu.org,\n\
296 general questions and discussion to help-texinfo@gnu.org.\n\
297 Texinfo home page: http://www.gnu.org/software/texinfo/"), f);
298   fputs ("\n", f);
299 
300   xexit (result_value);
301 }
302 
303 /* Decode the command line arguments to set the parameter variables
304    and set up the vector of keyfields and the vector of input files. */
305 
306 void
decode_command(int argc,char ** argv)307 decode_command (int argc, char **argv)
308 {
309   int arg_index = 1;
310   char **ip;
311   char **op;
312 
313   /* Store default values into parameter variables. */
314 
315   tempdir = getenv ("TMPDIR");
316   if (tempdir == NULL)
317     tempdir = getenv ("TEMP");
318   if (tempdir == NULL)
319     tempdir = getenv ("TMP");
320   if (tempdir == NULL)
321     tempdir = DEFAULT_TMPDIR;
322   else
323     tempdir = concat (tempdir, "/");
324 
325   keep_tempfiles = 0;
326 
327   /* Allocate ARGC input files, which must be enough.  */
328 
329   infiles = (char **) xmalloc (argc * sizeof (char *));
330   outfiles = (char **) xmalloc (argc * sizeof (char *));
331   ip = infiles;
332   op = outfiles;
333 
334   while (arg_index < argc)
335     {
336       char *arg = argv[arg_index++];
337 
338       if (*arg == '-')
339         {
340           if (strcmp (arg, "--version") == 0)
341             {
342               printf ("texindex (GNU %s) %s\n", PACKAGE, VERSION);
343               puts ("");
344               puts ("Copyright (C) 2004 Free Software Foundation, Inc.");
345               printf (_("There is NO warranty.  You may redistribute this software\n\
346 under the terms of the GNU General Public License.\n\
347 For more information about these matters, see the files named COPYING.\n"));
348               xexit (0);
349             }
350           else if ((strcmp (arg, "--keep") == 0) ||
351                    (strcmp (arg, "-k") == 0))
352             {
353               keep_tempfiles = 1;
354             }
355           else if ((strcmp (arg, "--help") == 0) ||
356                    (strcmp (arg, "-h") == 0))
357             {
358               usage (0);
359             }
360           else if ((strcmp (arg, "--output") == 0) ||
361                    (strcmp (arg, "-o") == 0))
362             {
363               if (argv[arg_index] != (char *)NULL)
364                 {
365                   arg_index++;
366                   if (op > outfiles)
367                     *(op - 1) = argv[arg_index];
368                 }
369               else
370                 usage (1);
371             }
372           else
373             usage (1);
374         }
375       else
376         {
377           *ip++ = arg;
378           *op++ = (char *)NULL;
379         }
380     }
381 
382   /* Record number of keyfields and terminate list of filenames. */
383   num_infiles = ip - infiles;
384   *ip = (char *)NULL;
385   if (num_infiles == 0)
386     usage (1);
387 }
388 
389 /* Return a name for temporary file COUNT. */
390 
391 static char *
maketempname(int count)392 maketempname (int count)
393 {
394   static char *tempbase = NULL;
395   char tempsuffix[10];
396   char *name;
397   int fd;
398 
399   if (!tempbase)
400     {
401       int fd;
402       tempbase = concat (tempdir, "txidxXXXXXX");
403 
404       fd = mkstemp (tempbase);
405       if (fd == -1)
406         pfatal_with_name (tempbase);
407     }
408 
409   sprintf (tempsuffix, ".%d", count);
410   name =  concat (tempbase, tempsuffix);
411 
412   fd = open (name, O_CREAT|O_EXCL|O_WRONLY, 0666);
413   if (fd == -1)
414     return NULL;
415   else
416     {
417       close(fd);
418       return name;
419     }
420 }
421 
422 
423 /* Delete all temporary files up to TO_COUNT. */
424 
425 void
flush_tempfiles(int to_count)426 flush_tempfiles (int to_count)
427 {
428   if (keep_tempfiles)
429     return;
430   while (last_deleted_tempcount < to_count)
431     unlink (maketempname (++last_deleted_tempcount));
432 }
433 
434 
435 /* Compare LINE1 and LINE2 according to the specified set of keyfields. */
436 
437 int
compare_full(const void * p1,const void * p2)438 compare_full (const void *p1, const void *p2)
439 {
440   char **line1 = (char **) p1;
441   char **line2 = (char **) p2;
442   int i;
443 
444   /* Compare using the first keyfield;
445      if that does not distinguish the lines, try the second keyfield;
446      and so on. */
447 
448   for (i = 0; i < num_keyfields; i++)
449     {
450       long length1, length2;
451       char *start1 = find_field (&keyfields[i], *line1, &length1);
452       char *start2 = find_field (&keyfields[i], *line2, &length2);
453       int tem = compare_field (&keyfields[i], start1, length1,
454                                *line1 - text_base,
455                                start2, length2, *line2 - text_base);
456       if (tem)
457         {
458           if (keyfields[i].reverse)
459             return -tem;
460           return tem;
461         }
462     }
463 
464   return 0;                     /* Lines match exactly. */
465 }
466 
467 /* Compare LINE1 and LINE2, described by structures
468    in which the first keyfield is identified in advance.
469    For positional sorting, assumes that the order of the lines in core
470    reflects their nominal order.  */
471 int
compare_prepared(const void * p1,const void * p2)472 compare_prepared (const void *p1, const void *p2)
473 {
474   struct lineinfo *line1 = (struct lineinfo *) p1;
475   struct lineinfo *line2 = (struct lineinfo *) p2;
476   int i;
477   int tem;
478   char *text1, *text2;
479 
480   /* Compare using the first keyfield, which has been found for us already. */
481   if (keyfields->positional)
482     {
483       if (line1->text - text_base > line2->text - text_base)
484         tem = 1;
485       else
486         tem = -1;
487     }
488   else if (keyfields->numeric)
489     tem = line1->key.number - line2->key.number;
490   else
491     tem = compare_field (keyfields, line1->key.text, line1->keylen, 0,
492                          line2->key.text, line2->keylen, 0);
493   if (tem)
494     {
495       if (keyfields->reverse)
496         return -tem;
497       return tem;
498     }
499 
500   text1 = line1->text;
501   text2 = line2->text;
502 
503   /* Compare using the second keyfield;
504      if that does not distinguish the lines, try the third keyfield;
505      and so on. */
506 
507   for (i = 1; i < num_keyfields; i++)
508     {
509       long length1, length2;
510       char *start1 = find_field (&keyfields[i], text1, &length1);
511       char *start2 = find_field (&keyfields[i], text2, &length2);
512       int tem = compare_field (&keyfields[i], start1, length1,
513                                text1 - text_base,
514                                start2, length2, text2 - text_base);
515       if (tem)
516         {
517           if (keyfields[i].reverse)
518             return -tem;
519           return tem;
520         }
521     }
522 
523   return 0;                     /* Lines match exactly. */
524 }
525 
526 /* Like compare_full but more general.
527    You can pass any strings, and you can say how many keyfields to use.
528    POS1 and POS2 should indicate the nominal positional ordering of
529    the two lines in the input.  */
530 
531 int
compare_general(char * str1,char * str2,long int pos1,long int pos2,int use_keyfields)532 compare_general (char *str1, char *str2, long int pos1, long int pos2, int use_keyfields)
533 {
534   int i;
535 
536   /* Compare using the first keyfield;
537      if that does not distinguish the lines, try the second keyfield;
538      and so on. */
539 
540   for (i = 0; i < use_keyfields; i++)
541     {
542       long length1, length2;
543       char *start1 = find_field (&keyfields[i], str1, &length1);
544       char *start2 = find_field (&keyfields[i], str2, &length2);
545       int tem = compare_field (&keyfields[i], start1, length1, pos1,
546                                start2, length2, pos2);
547       if (tem)
548         {
549           if (keyfields[i].reverse)
550             return -tem;
551           return tem;
552         }
553     }
554 
555   return 0;                     /* Lines match exactly. */
556 }
557 
558 /* Find the start and length of a field in STR according to KEYFIELD.
559    A pointer to the starting character is returned, and the length
560    is stored into the int that LENGTHPTR points to.  */
561 
562 char *
find_field(struct keyfield * keyfield,char * str,long int * lengthptr)563 find_field (struct keyfield *keyfield, char *str, long int *lengthptr)
564 {
565   char *start;
566   char *end;
567   char *(*fun) (char *, int, int, int);
568 
569   if (keyfield->braced)
570     fun = find_braced_pos;
571   else
572     fun = find_pos;
573 
574   start = (*fun) (str, keyfield->startwords, keyfield->startchars,
575                   keyfield->ignore_blanks);
576   if (keyfield->endwords < 0)
577     {
578       if (keyfield->braced)
579         end = find_braced_end (start);
580       else
581         {
582           end = start;
583           while (*end && *end != '\n')
584             end++;
585         }
586     }
587   else
588     {
589       end = (*fun) (str, keyfield->endwords, keyfield->endchars, 0);
590       if (end - str < start - str)
591         end = start;
592     }
593   *lengthptr = end - start;
594   return start;
595 }
596 
597 /* Return a pointer to a specified place within STR,
598    skipping (from the beginning) WORDS words and then CHARS chars.
599    If IGNORE_BLANKS is nonzero, we skip all blanks
600    after finding the specified word.  */
601 
602 char *
find_pos(char * str,int words,int chars,int ignore_blanks)603 find_pos (char *str, int words, int chars, int ignore_blanks)
604 {
605   int i;
606   char *p = str;
607 
608   for (i = 0; i < words; i++)
609     {
610       char c;
611       /* Find next bunch of nonblanks and skip them. */
612       while ((c = *p) == ' ' || c == '\t')
613         p++;
614       while ((c = *p) && c != '\n' && !(c == ' ' || c == '\t'))
615         p++;
616       if (!*p || *p == '\n')
617         return p;
618     }
619 
620   while (*p == ' ' || *p == '\t')
621     p++;
622 
623   for (i = 0; i < chars; i++)
624     {
625       if (!*p || *p == '\n')
626         break;
627       p++;
628     }
629   return p;
630 }
631 
632 /* Like find_pos but assumes that each field is surrounded by braces
633    and that braces within fields are balanced. */
634 
635 char *
find_braced_pos(char * str,int words,int chars,int ignore_blanks)636 find_braced_pos (char *str, int words, int chars, int ignore_blanks)
637 {
638   int i;
639   int bracelevel;
640   char *p = str;
641   char c;
642 
643   for (i = 0; i < words; i++)
644     {
645       bracelevel = 1;
646       while ((c = *p++) != '{' && c != '\n' && c)
647         /* Do nothing. */ ;
648       if (c != '{')
649         return p - 1;
650       while (bracelevel)
651         {
652           c = *p++;
653           if (c == '{')
654             bracelevel++;
655           if (c == '}')
656             bracelevel--;
657           if (c == 0 || c == '\n')
658             return p - 1;
659         }
660     }
661 
662   while ((c = *p++) != '{' && c != '\n' && c)
663     /* Do nothing. */ ;
664 
665   if (c != '{')
666     return p - 1;
667 
668   if (ignore_blanks)
669     while ((c = *p) == ' ' || c == '\t')
670       p++;
671 
672   for (i = 0; i < chars; i++)
673     {
674       if (!*p || *p == '\n')
675         break;
676       p++;
677     }
678   return p;
679 }
680 
681 /* Find the end of the balanced-brace field which starts at STR.
682    The position returned is just before the closing brace. */
683 
684 char *
find_braced_end(char * str)685 find_braced_end (char *str)
686 {
687   int bracelevel;
688   char *p = str;
689   char c;
690 
691   bracelevel = 1;
692   while (bracelevel)
693     {
694       c = *p++;
695       if (c == '{')
696         bracelevel++;
697       if (c == '}')
698         bracelevel--;
699       if (c == 0 || c == '\n')
700         return p - 1;
701     }
702   return p - 1;
703 }
704 
705 long
find_value(char * start,long int length)706 find_value (char *start, long int length)
707 {
708   while (length != 0L)
709     {
710       if (isdigit (*start))
711         return atol (start);
712       length--;
713       start++;
714     }
715   return 0l;
716 }
717 
718 /* Vector used to translate characters for comparison.
719    This is how we make all alphanumerics follow all else,
720    and ignore case in the first sorting.  */
721 int char_order[256];
722 
723 void
init_char_order(void)724 init_char_order (void)
725 {
726   int i;
727   for (i = 1; i < 256; i++)
728     char_order[i] = i;
729 
730   for (i = '0'; i <= '9'; i++)
731     char_order[i] += 512;
732 
733   for (i = 'a'; i <= 'z'; i++)
734     {
735       char_order[i] = 512 + i;
736       char_order[i + 'A' - 'a'] = 512 + i;
737     }
738 }
739 
740 /* Compare two fields (each specified as a start pointer and a character count)
741    according to KEYFIELD.
742    The sign of the value reports the relation between the fields. */
743 
744 int
compare_field(struct keyfield * keyfield,char * start1,long int length1,long int pos1,char * start2,long int length2,long int pos2)745 compare_field (struct keyfield *keyfield, char *start1, long int length1,
746                long int pos1, char *start2, long int length2, long int pos2)
747 {
748   if (keyfields->positional)
749     {
750       if (pos1 > pos2)
751         return 1;
752       else
753         return -1;
754     }
755   if (keyfield->numeric)
756     {
757       long value = find_value (start1, length1) - find_value (start2, length2);
758       if (value > 0)
759         return 1;
760       if (value < 0)
761         return -1;
762       return 0;
763     }
764   else
765     {
766       char *p1 = start1;
767       char *p2 = start2;
768       char *e1 = start1 + length1;
769       char *e2 = start2 + length2;
770 
771       while (1)
772         {
773           int c1, c2;
774 
775           if (p1 == e1)
776             c1 = 0;
777           else
778             c1 = *p1++;
779           if (p2 == e2)
780             c2 = 0;
781           else
782             c2 = *p2++;
783 
784           if (char_order[c1] != char_order[c2])
785             return char_order[c1] - char_order[c2];
786           if (!c1)
787             break;
788         }
789 
790       /* Strings are equal except possibly for case.  */
791       p1 = start1;
792       p2 = start2;
793       while (1)
794         {
795           int c1, c2;
796 
797           if (p1 == e1)
798             c1 = 0;
799           else
800             c1 = *p1++;
801           if (p2 == e2)
802             c2 = 0;
803           else
804             c2 = *p2++;
805 
806           if (c1 != c2)
807             /* Reverse sign here so upper case comes out last.  */
808             return c2 - c1;
809           if (!c1)
810             break;
811         }
812 
813       return 0;
814     }
815 }
816 
817 /* A `struct linebuffer' is a structure which holds a line of text.
818    `readline' reads a line from a stream into a linebuffer
819    and works regardless of the length of the line.  */
820 
821 struct linebuffer
822 {
823   long size;
824   char *buffer;
825 };
826 
827 /* Initialize LINEBUFFER for use. */
828 
829 void
initbuffer(struct linebuffer * linebuffer)830 initbuffer (struct linebuffer *linebuffer)
831 {
832   linebuffer->size = 200;
833   linebuffer->buffer = (char *) xmalloc (200);
834 }
835 
836 /* Read a line of text from STREAM into LINEBUFFER.
837    Return the length of the line.  */
838 
839 long
readline(struct linebuffer * linebuffer,FILE * stream)840 readline (struct linebuffer *linebuffer, FILE *stream)
841 {
842   char *buffer = linebuffer->buffer;
843   char *p = linebuffer->buffer;
844   char *end = p + linebuffer->size;
845 
846   while (1)
847     {
848       int c = getc (stream);
849       if (p == end)
850         {
851           buffer = (char *) xrealloc (buffer, linebuffer->size *= 2);
852           p += buffer - linebuffer->buffer;
853           end += buffer - linebuffer->buffer;
854           linebuffer->buffer = buffer;
855         }
856       if (c < 0 || c == '\n')
857         {
858           *p = 0;
859           break;
860         }
861       *p++ = c;
862     }
863 
864   return p - buffer;
865 }
866 
867 /* Sort an input file too big to sort in core.  */
868 
869 void
sort_offline(char * infile,off_t total,char * outfile)870 sort_offline (char *infile, off_t total, char *outfile)
871 {
872   /* More than enough. */
873   int ntemps = 2 * (total + MAX_IN_CORE_SORT - 1) / MAX_IN_CORE_SORT;
874   char **tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
875   FILE *istream = fopen (infile, "r");
876   int i;
877   struct linebuffer lb;
878   long linelength;
879   int failure = 0;
880 
881   initbuffer (&lb);
882 
883   /* Read in one line of input data.  */
884 
885   linelength = readline (&lb, istream);
886 
887   if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
888     {
889       error (_("%s: not a texinfo index file"), infile);
890       return;
891     }
892 
893   /* Split up the input into `ntemps' temporary files, or maybe fewer,
894      and put the new files' names into `tempfiles' */
895 
896   for (i = 0; i < ntemps; i++)
897     {
898       char *outname = maketempname (++tempcount);
899       FILE *ostream;
900       long tempsize = 0;
901 
902       if (!outname)
903         pfatal_with_name("temporary file");
904       ostream = fopen (outname, "w");
905       if (!outname || !ostream)
906         pfatal_with_name (outname);
907       tempfiles[i] = outname;
908 
909       /* Copy lines into this temp file as long as it does not make file
910          "too big" or until there are no more lines.  */
911 
912       while (tempsize + linelength + 1 <= MAX_IN_CORE_SORT)
913         {
914           tempsize += linelength + 1;
915           fputs (lb.buffer, ostream);
916           putc ('\n', ostream);
917 
918           /* Read another line of input data.  */
919 
920           linelength = readline (&lb, istream);
921           if (!linelength && feof (istream))
922             break;
923 
924           if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
925             {
926               error (_("%s: not a texinfo index file"), infile);
927               failure = 1;
928               goto fail;
929             }
930         }
931       fclose (ostream);
932       if (feof (istream))
933         break;
934     }
935 
936   free (lb.buffer);
937 
938 fail:
939   /* Record number of temp files we actually needed.  */
940 
941   ntemps = i;
942 
943   /* Sort each tempfile into another tempfile.
944     Delete the first set of tempfiles and put the names of the second
945     into `tempfiles'. */
946 
947   for (i = 0; i < ntemps; i++)
948     {
949       char *newtemp = maketempname (++tempcount);
950       sort_in_core (tempfiles[i], MAX_IN_CORE_SORT, newtemp);
951       if (!keep_tempfiles)
952         unlink (tempfiles[i]);
953       tempfiles[i] = newtemp;
954     }
955 
956   if (failure)
957     return;
958 
959   /* Merge the tempfiles together and indexify. */
960 
961   merge_files (tempfiles, ntemps, outfile);
962 }
963 
964 /* Sort INFILE, whose size is TOTAL,
965    assuming that is small enough to be done in-core,
966    then indexify it and send the output to OUTFILE (or to stdout).  */
967 
968 void
sort_in_core(char * infile,int total,char * outfile)969 sort_in_core (char *infile, int total, char *outfile)
970 {
971   char **nextline;
972   char *data = (char *) xmalloc (total + 1);
973   char *file_data;
974   long file_size;
975   int i;
976   FILE *ostream = stdout;
977   struct lineinfo *lineinfo;
978 
979   /* Read the contents of the file into the moby array `data'. */
980 
981   int desc = open (infile, O_RDONLY, 0);
982 
983   if (desc < 0)
984     fatal (_("failure reopening %s"), infile);
985   for (file_size = 0;;)
986     {
987       i = read (desc, data + file_size, total - file_size);
988       if (i <= 0)
989         break;
990       file_size += i;
991     }
992   file_data = data;
993   data[file_size] = 0;
994 
995   close (desc);
996 
997   if (file_size > 0 && data[0] != '\\' && data[0] != '@')
998     {
999       error (_("%s: not a texinfo index file"), infile);
1000       return;
1001     }
1002 
1003   init_char_order ();
1004 
1005   /* Sort routines want to know this address. */
1006 
1007   text_base = data;
1008 
1009   /* Create the array of pointers to lines, with a default size
1010      frequently enough.  */
1011 
1012   nlines = total / 50;
1013   if (!nlines)
1014     nlines = 2;
1015   linearray = (char **) xmalloc (nlines * sizeof (char *));
1016 
1017   /* `nextline' points to the next free slot in this array.
1018      `nlines' is the allocated size.  */
1019 
1020   nextline = linearray;
1021 
1022   /* Parse the input file's data, and make entries for the lines.  */
1023 
1024   nextline = parsefile (infile, nextline, file_data, file_size);
1025   if (nextline == 0)
1026     {
1027       error (_("%s: not a texinfo index file"), infile);
1028       return;
1029     }
1030 
1031   /* Sort the lines. */
1032 
1033   /* If we have enough space, find the first keyfield of each line in advance.
1034      Make a `struct lineinfo' for each line, which records the keyfield
1035      as well as the line, and sort them.  */
1036 
1037   lineinfo = malloc ((nextline - linearray) * sizeof (struct lineinfo));
1038 
1039   if (lineinfo)
1040     {
1041       struct lineinfo *lp;
1042       char **p;
1043 
1044       for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
1045         {
1046           lp->text = *p;
1047           lp->key.text = find_field (keyfields, *p, &lp->keylen);
1048           if (keyfields->numeric)
1049             lp->key.number = find_value (lp->key.text, lp->keylen);
1050         }
1051 
1052       qsort (lineinfo, nextline - linearray, sizeof (struct lineinfo),
1053              compare_prepared);
1054 
1055       for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
1056         *p = lp->text;
1057 
1058       free (lineinfo);
1059     }
1060   else
1061     qsort (linearray, nextline - linearray, sizeof (char *), compare_full);
1062 
1063   /* Open the output file. */
1064 
1065   if (outfile)
1066     {
1067       ostream = fopen (outfile, "w");
1068       if (!ostream)
1069         pfatal_with_name (outfile);
1070     }
1071 
1072   writelines (linearray, nextline - linearray, ostream);
1073   if (outfile)
1074     fclose (ostream);
1075 
1076   free (linearray);
1077   free (data);
1078 }
1079 
1080 /* Parse an input string in core into lines.
1081    DATA is the input string, and SIZE is its length.
1082    Data goes in LINEARRAY starting at NEXTLINE.
1083    The value returned is the first entry in LINEARRAY still unused.
1084    Value 0 means input file contents are invalid.  */
1085 
1086 char **
parsefile(char * filename,char ** nextline,char * data,long int size)1087 parsefile (char *filename, char **nextline, char *data, long int size)
1088 {
1089   char *p, *end;
1090   char **line = nextline;
1091 
1092   p = data;
1093   end = p + size;
1094   *end = 0;
1095 
1096   while (p != end)
1097     {
1098       if (p[0] != '\\' && p[0] != '@')
1099         return 0;
1100 
1101       *line = p;
1102 
1103       /* Find the first letter of the first field of this line.  If it
1104          is different from the first letter of the first field of the
1105          first line, we need initial headers in the output index.  */
1106       while (*p && *p != '{')
1107         p++;
1108       if (p == end)
1109         return 0;
1110       p++;
1111       if (first_initial)
1112         {
1113           if (first_initial != toupper (*p))
1114             need_initials = 1;
1115         }
1116       else
1117         first_initial = toupper (*p);
1118 
1119       while (*p && *p != '\n')
1120         p++;
1121       if (p != end)
1122         p++;
1123 
1124       line++;
1125       if (line == linearray + nlines)
1126         {
1127           char **old = linearray;
1128           linearray = xrealloc (linearray, sizeof (char *) * (nlines *= 4));
1129           line += linearray - old;
1130         }
1131     }
1132 
1133   return line;
1134 }
1135 
1136 /* Indexification is a filter applied to the sorted lines
1137    as they are being written to the output file.
1138    Multiple entries for the same name, with different page numbers,
1139    get combined into a single entry with multiple page numbers.
1140    The first braced field, which is used for sorting, is discarded.
1141    However, its first character is examined, folded to lower case,
1142    and if it is different from that in the previous line fed to us
1143    a \initial line is written with one argument, the new initial.
1144 
1145    If an entry has four braced fields, then the second and third
1146    constitute primary and secondary names.
1147    In this case, each change of primary name
1148    generates a \primary line which contains only the primary name,
1149    and in between these are \secondary lines which contain
1150    just a secondary name and page numbers. */
1151 
1152 /* The last primary name we wrote a \primary entry for.
1153    If only one level of indexing is being done, this is the last name seen. */
1154 char *lastprimary;
1155 /* Length of storage allocated for lastprimary. */
1156 int lastprimarylength;
1157 
1158 /* Similar, for the secondary name. */
1159 char *lastsecondary;
1160 int lastsecondarylength;
1161 
1162 /* Zero if we are not in the middle of writing an entry.
1163    One if we have written the beginning of an entry but have not
1164    yet written any page numbers into it.
1165    Greater than one if we have written the beginning of an entry
1166    plus at least one page number. */
1167 int pending;
1168 
1169 /* The initial (for sorting purposes) of the last primary entry written.
1170    When this changes, a \initial {c} line is written */
1171 
1172 char *lastinitial;
1173 
1174 int lastinitiallength;
1175 
1176 /* When we need a string of length 1 for the value of lastinitial,
1177    store it here.  */
1178 
1179 char lastinitial1[2];
1180 
1181 /* Initialize static storage for writing an index. */
1182 
1183 void
init_index(void)1184 init_index (void)
1185 {
1186   pending = 0;
1187   lastinitial = lastinitial1;
1188   lastinitial1[0] = 0;
1189   lastinitial1[1] = 0;
1190   lastinitiallength = 0;
1191   lastprimarylength = 100;
1192   lastprimary = (char *) xmalloc (lastprimarylength + 1);
1193   memset (lastprimary, '\0', lastprimarylength + 1);
1194   lastsecondarylength = 100;
1195   lastsecondary = (char *) xmalloc (lastsecondarylength + 1);
1196   memset (lastsecondary, '\0', lastsecondarylength + 1);
1197 }
1198 
1199 /* Indexify.  Merge entries for the same name,
1200    insert headers for each initial character, etc.  */
1201 
1202 void
indexify(char * line,FILE * ostream)1203 indexify (char *line, FILE *ostream)
1204 {
1205   char *primary, *secondary, *pagenumber;
1206   int primarylength, secondarylength = 0, pagelength;
1207   int nosecondary;
1208   int initiallength;
1209   char *initial;
1210   char initial1[2];
1211   register char *p;
1212 
1213   /* First, analyze the parts of the entry fed to us this time. */
1214 
1215   p = find_braced_pos (line, 0, 0, 0);
1216   if (*p == '{')
1217     {
1218       initial = p;
1219       /* Get length of inner pair of braces starting at `p',
1220          including that inner pair of braces.  */
1221       initiallength = find_braced_end (p + 1) + 1 - p;
1222     }
1223   else
1224     {
1225       initial = initial1;
1226       initial1[0] = toupper (*p);
1227       initial1[1] = 0;
1228       initiallength = 1;
1229     }
1230 
1231   pagenumber = find_braced_pos (line, 1, 0, 0);
1232   pagelength = find_braced_end (pagenumber) - pagenumber;
1233   if (pagelength == 0)
1234     fatal (_("No page number in %s"), line);
1235 
1236   primary = find_braced_pos (line, 2, 0, 0);
1237   primarylength = find_braced_end (primary) - primary;
1238 
1239   secondary = find_braced_pos (line, 3, 0, 0);
1240   nosecondary = !*secondary;
1241   if (!nosecondary)
1242     secondarylength = find_braced_end (secondary) - secondary;
1243 
1244   /* If the primary is different from before, make a new primary entry. */
1245   if (strncmp (primary, lastprimary, primarylength))
1246     {
1247       /* Close off current secondary entry first, if one is open. */
1248       if (pending)
1249         {
1250           fputs ("}\n", ostream);
1251           pending = 0;
1252         }
1253 
1254       /* If this primary has a different initial, include an entry for
1255          the initial. */
1256       if (need_initials &&
1257           (initiallength != lastinitiallength ||
1258            strncmp (initial, lastinitial, initiallength)))
1259         {
1260           fprintf (ostream, "\\initial {");
1261           fwrite (initial, 1, initiallength, ostream);
1262           fputs ("}\n", ostream);
1263           if (initial == initial1)
1264             {
1265               lastinitial = lastinitial1;
1266               *lastinitial1 = *initial1;
1267             }
1268           else
1269             {
1270               lastinitial = initial;
1271             }
1272           lastinitiallength = initiallength;
1273         }
1274 
1275       /* Make the entry for the primary.  */
1276       if (nosecondary)
1277         fputs ("\\entry {", ostream);
1278       else
1279         fputs ("\\primary {", ostream);
1280       fwrite (primary, primarylength, 1, ostream);
1281       if (nosecondary)
1282         {
1283           fputs ("}{", ostream);
1284           pending = 1;
1285         }
1286       else
1287         fputs ("}\n", ostream);
1288 
1289       /* Record name of most recent primary. */
1290       if (lastprimarylength < primarylength)
1291         {
1292           lastprimarylength = primarylength + 100;
1293           lastprimary = (char *) xrealloc (lastprimary,
1294                                            1 + lastprimarylength);
1295         }
1296       strncpy (lastprimary, primary, primarylength);
1297       lastprimary[primarylength] = 0;
1298 
1299       /* There is no current secondary within this primary, now. */
1300       lastsecondary[0] = 0;
1301     }
1302 
1303   /* Should not have an entry with no subtopic following one with a
1304      subtopic. */
1305 
1306   if (nosecondary && *lastsecondary)
1307     error (_("entry %s follows an entry with a secondary name"), line);
1308 
1309   /* Start a new secondary entry if necessary. */
1310   if (!nosecondary && strncmp (secondary, lastsecondary, secondarylength))
1311     {
1312       if (pending)
1313         {
1314           fputs ("}\n", ostream);
1315           pending = 0;
1316         }
1317 
1318       /* Write the entry for the secondary.  */
1319       fputs ("\\secondary {", ostream);
1320       fwrite (secondary, secondarylength, 1, ostream);
1321       fputs ("}{", ostream);
1322       pending = 1;
1323 
1324       /* Record name of most recent secondary. */
1325       if (lastsecondarylength < secondarylength)
1326         {
1327           lastsecondarylength = secondarylength + 100;
1328           lastsecondary = (char *) xrealloc (lastsecondary,
1329                                              1 + lastsecondarylength);
1330         }
1331       strncpy (lastsecondary, secondary, secondarylength);
1332       lastsecondary[secondarylength] = 0;
1333     }
1334 
1335   /* Here to add one more page number to the current entry. */
1336   if (pending++ != 1)
1337     fputs (", ", ostream);  /* Punctuate first, if this is not the first. */
1338   fwrite (pagenumber, pagelength, 1, ostream);
1339 }
1340 
1341 /* Close out any unfinished output entry. */
1342 
1343 void
finish_index(FILE * ostream)1344 finish_index (FILE *ostream)
1345 {
1346   if (pending)
1347     fputs ("}\n", ostream);
1348   free (lastprimary);
1349   free (lastsecondary);
1350 }
1351 
1352 /* Copy the lines in the sorted order.
1353    Each line is copied out of the input file it was found in. */
1354 
1355 void
writelines(char ** linearray,int nlines,FILE * ostream)1356 writelines (char **linearray, int nlines, FILE *ostream)
1357 {
1358   char **stop_line = linearray + nlines;
1359   char **next_line;
1360 
1361   init_index ();
1362 
1363   /* Output the text of the lines, and free the buffer space. */
1364 
1365   for (next_line = linearray; next_line != stop_line; next_line++)
1366     {
1367       /* If -u was specified, output the line only if distinct from
1368          previous one.  */
1369       if (next_line == linearray
1370       /* Compare previous line with this one, using only the
1371          explicitly specd keyfields. */
1372           || compare_general (*(next_line - 1), *next_line, 0L, 0L,
1373                               num_keyfields - 1))
1374         {
1375           char *p = *next_line;
1376           char c;
1377 
1378           while ((c = *p++) && c != '\n')
1379             /* Do nothing. */ ;
1380           *(p - 1) = 0;
1381           indexify (*next_line, ostream);
1382         }
1383     }
1384 
1385   finish_index (ostream);
1386 }
1387 
1388 /* Assume (and optionally verify) that each input file is sorted;
1389    merge them and output the result.
1390    Returns nonzero if any input file fails to be sorted.
1391 
1392    This is the high-level interface that can handle an unlimited
1393    number of files.  */
1394 
1395 #define MAX_DIRECT_MERGE 10
1396 
1397 int
merge_files(char ** infiles,int nfiles,char * outfile)1398 merge_files (char **infiles, int nfiles, char *outfile)
1399 {
1400   char **tempfiles;
1401   int ntemps;
1402   int i;
1403   int value = 0;
1404   int start_tempcount = tempcount;
1405 
1406   if (nfiles <= MAX_DIRECT_MERGE)
1407     return merge_direct (infiles, nfiles, outfile);
1408 
1409   /* Merge groups of MAX_DIRECT_MERGE input files at a time,
1410      making a temporary file to hold each group's result.  */
1411 
1412   ntemps = (nfiles + MAX_DIRECT_MERGE - 1) / MAX_DIRECT_MERGE;
1413   tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
1414   for (i = 0; i < ntemps; i++)
1415     {
1416       int nf = MAX_DIRECT_MERGE;
1417       if (i + 1 == ntemps)
1418         nf = nfiles - i * MAX_DIRECT_MERGE;
1419       tempfiles[i] = maketempname (++tempcount);
1420       if (!tempfiles[i])
1421         pfatal_with_name("temp file");
1422       value |= merge_direct (&infiles[i * MAX_DIRECT_MERGE], nf, tempfiles[i]);
1423     }
1424 
1425   /* All temporary files that existed before are no longer needed
1426      since their contents have been merged into our new tempfiles.
1427      So delete them.  */
1428   flush_tempfiles (start_tempcount);
1429 
1430   /* Now merge the temporary files we created.  */
1431 
1432   merge_files (tempfiles, ntemps, outfile);
1433 
1434   free (tempfiles);
1435 
1436   return value;
1437 }
1438 
1439 /* Assume (and optionally verify) that each input file is sorted;
1440    merge them and output the result.
1441    Returns nonzero if any input file fails to be sorted.
1442 
1443    This version of merging will not work if the number of
1444    input files gets too high.  Higher level functions
1445    use it only with a bounded number of input files.  */
1446 
1447 int
merge_direct(char ** infiles,int nfiles,char * outfile)1448 merge_direct (char **infiles, int nfiles, char *outfile)
1449 {
1450   struct linebuffer *lb1, *lb2;
1451   struct linebuffer **thisline, **prevline;
1452   FILE **streams;
1453   int i;
1454   int nleft;
1455   int lossage = 0;
1456   int *file_lossage;
1457   struct linebuffer *prev_out = 0;
1458   FILE *ostream = stdout;
1459 
1460   if (outfile)
1461     {
1462       ostream = fopen (outfile, "w");
1463     }
1464   if (!ostream)
1465     pfatal_with_name (outfile);
1466 
1467   init_index ();
1468 
1469   if (nfiles == 0)
1470     {
1471       if (outfile)
1472         fclose (ostream);
1473       return 0;
1474     }
1475 
1476   /* For each file, make two line buffers.  Also, for each file, there
1477      is an element of `thisline' which points at any time to one of the
1478      file's two buffers, and an element of `prevline' which points to
1479      the other buffer.  `thisline' is supposed to point to the next
1480      available line from the file, while `prevline' holds the last file
1481      line used, which is remembered so that we can verify that the file
1482      is properly sorted. */
1483 
1484   /* lb1 and lb2 contain one buffer each per file. */
1485   lb1 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
1486   lb2 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
1487 
1488   /* thisline[i] points to the linebuffer holding the next available
1489      line in file i, or is zero if there are no lines left in that file.  */
1490   thisline = (struct linebuffer **)
1491     xmalloc (nfiles * sizeof (struct linebuffer *));
1492   /* prevline[i] points to the linebuffer holding the last used line
1493      from file i.  This is just for verifying that file i is properly
1494      sorted.  */
1495   prevline = (struct linebuffer **)
1496     xmalloc (nfiles * sizeof (struct linebuffer *));
1497   /* streams[i] holds the input stream for file i.  */
1498   streams = (FILE **) xmalloc (nfiles * sizeof (FILE *));
1499   /* file_lossage[i] is nonzero if we already know file i is not
1500      properly sorted.  */
1501   file_lossage = (int *) xmalloc (nfiles * sizeof (int));
1502 
1503   /* Allocate and initialize all that storage. */
1504 
1505   for (i = 0; i < nfiles; i++)
1506     {
1507       initbuffer (&lb1[i]);
1508       initbuffer (&lb2[i]);
1509       thisline[i] = &lb1[i];
1510       prevline[i] = &lb2[i];
1511       file_lossage[i] = 0;
1512       streams[i] = fopen (infiles[i], "r");
1513       if (!streams[i])
1514         pfatal_with_name (infiles[i]);
1515 
1516       readline (thisline[i], streams[i]);
1517     }
1518 
1519   /* Keep count of number of files not at eof. */
1520   nleft = nfiles;
1521 
1522   while (nleft)
1523     {
1524       struct linebuffer *best = 0;
1525       struct linebuffer *exch;
1526       int bestfile = -1;
1527       int i;
1528 
1529       /* Look at the next avail line of each file; choose the least one.  */
1530 
1531       for (i = 0; i < nfiles; i++)
1532         {
1533           if (thisline[i] &&
1534               (!best ||
1535                0 < compare_general (best->buffer, thisline[i]->buffer,
1536                                  (long) bestfile, (long) i, num_keyfields)))
1537             {
1538               best = thisline[i];
1539               bestfile = i;
1540             }
1541         }
1542 
1543       /* Output that line, unless it matches the previous one and we
1544          don't want duplicates. */
1545 
1546       if (!(prev_out &&
1547             !compare_general (prev_out->buffer,
1548                               best->buffer, 0L, 1L, num_keyfields - 1)))
1549         indexify (best->buffer, ostream);
1550       prev_out = best;
1551 
1552       /* Now make the line the previous of its file, and fetch a new
1553          line from that file.  */
1554 
1555       exch = prevline[bestfile];
1556       prevline[bestfile] = thisline[bestfile];
1557       thisline[bestfile] = exch;
1558 
1559       while (1)
1560         {
1561           /* If the file has no more, mark it empty. */
1562 
1563           if (feof (streams[bestfile]))
1564             {
1565               thisline[bestfile] = 0;
1566               /* Update the number of files still not empty. */
1567               nleft--;
1568               break;
1569             }
1570           readline (thisline[bestfile], streams[bestfile]);
1571           if (thisline[bestfile]->buffer[0] || !feof (streams[bestfile]))
1572             break;
1573         }
1574     }
1575 
1576   finish_index (ostream);
1577 
1578   /* Free all storage and close all input streams. */
1579 
1580   for (i = 0; i < nfiles; i++)
1581     {
1582       fclose (streams[i]);
1583       free (lb1[i].buffer);
1584       free (lb2[i].buffer);
1585     }
1586   free (file_lossage);
1587   free (lb1);
1588   free (lb2);
1589   free (thisline);
1590   free (prevline);
1591   free (streams);
1592 
1593   if (outfile)
1594     fclose (ostream);
1595 
1596   return lossage;
1597 }
1598 
1599 /* Print error message and exit.  */
1600 
1601 void
fatal(const char * format,const char * arg)1602 fatal (const char *format, const char *arg)
1603 {
1604   error (format, arg);
1605   xexit (1);
1606 }
1607 
1608 /* Print error message.  FORMAT is printf control string, ARG is arg for it. */
1609 void
error(const char * format,const char * arg)1610 error (const char *format, const char *arg)
1611 {
1612   printf ("%s: ", program_name);
1613   printf (format, arg);
1614   if (format[strlen (format) -1] != '\n')
1615     printf ("\n");
1616 }
1617 
1618 void
perror_with_name(const char * name)1619 perror_with_name (const char *name)
1620 {
1621   fprintf (stderr, "%s: ", program_name);
1622   perror (name);
1623 }
1624 
1625 void
pfatal_with_name(const char * name)1626 pfatal_with_name (const char *name)
1627 {
1628   perror_with_name (name);
1629   xexit (1);
1630 }
1631 
1632 
1633 /* Return a newly-allocated string concatenating S1 and S2.  */
1634 
1635 char *
concat(char * s1,char * s2)1636 concat (char *s1, char *s2)
1637 {
1638   int len1 = strlen (s1), len2 = strlen (s2);
1639   char *result = (char *) xmalloc (len1 + len2 + 1);
1640 
1641   strcpy (result, s1);
1642   strcpy (result + len1, s2);
1643   *(result + len1 + len2) = 0;
1644 
1645   return result;
1646 }
1647