1/* Prepare TeX index dribble output into an actual index.
2
3   Version 1.45-j1.00
4
5   Copyright (C) 1987, 1991, 1992 Free Software Foundation, Inc.
6
7   This program is free software; you can redistribute it and/or modify
8   it under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 2, or (at your option)
10   any later version.
11
12   This program is distributed in the hope that it will be useful,
13   but WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   GNU General Public License for more details.
16
17   You should have received a copy of the GNU General Public License
18   along with this program; if not, write to the Free Software
19   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307. */
20
21
22#include <stdio.h>
23#include <ctype.h>
24#include <errno.h>
25#include "getopt.h"
26
27#define TEXINDEX_VERSION_STRING "GNU Texindex 2.0 for Texinfo release 3.4"
28
29#if defined (emacs)
30#  include "../src/config.h"
31/* Some s/os.h files redefine these. */
32#  undef read
33#  undef close
34#  undef write
35#  undef open
36#endif
37
38#if defined (HAVE_STRING_H)
39#  include <string.h>
40#endif /* HAVE_STRING_H */
41
42#if !defined (HAVE_STRCHR)
43char *strrchr ();
44#endif /* !HAVE_STRCHR */
45
46#if defined (STDC_HEADERS)
47#  include <stdlib.h>
48#else /* !STDC_HEADERS */
49char *getenv (), *malloc (), *realloc ();
50#endif /* !STDC_HEADERS */
51
52#if defined (HAVE_UNISTD_H)
53#  include <unistd.h>
54#else /* !HAVE_UNISTD_H */
55off_t lseek ();
56#endif /* !HAVE_UNISTD_H */
57
58#if !defined (HAVE_MEMSET)
59#undef memset
60#define memset(ptr, ignore, count) bzero (ptr, count)
61#endif
62
63
64char *mktemp ();
65
66#if defined (VMS)
67#  include <file.h>
68#  define TI_NO_ERROR ((1 << 28) | 1)
69#  define TI_FATAL_ERROR ((1 << 28) | 4)
70#  define unlink delete
71#else /* !VMS */
72#  if defined (HAVE_SYS_FCNTL_H)
73#    include <sys/types.h>
74#    include <sys/fcntl.h>
75#  endif /* HAVE_SYS_FCNTL_H */
76
77#  if defined (_AIX) || !defined (_POSIX_VERSION)
78#    include <sys/file.h>
79#  else /* !AIX && _POSIX_VERSION */
80#    if !defined (HAVE_SYS_FCNTL_H)
81#      include <fcntl.h>
82#    endif /* !HAVE_FCNTL_H */
83#  endif /* !_AIX && _POSIX_VERSION */
84#  define TI_NO_ERROR 0
85#  define TI_FATAL_ERROR 1
86#endif /* !VMS */
87
88#if !defined (SEEK_SET)
89#  define SEEK_SET 0
90#  define SEEK_CUR 1
91#  define SEEK_END 2
92#endif /* !SEEK_SET */
93
94#if !defined (errno)
95extern int errno;
96#endif
97char *strerror ();
98
99/* When sorting in core, this structure describes one line
100   and the position and length of its first keyfield.  */
101struct lineinfo
102{
103  char *text;		/* The actual text of the line. */
104  union {
105    char *text;		/* The start of the key (for textual comparison). */
106    long number;	/* The numeric value (for numeric comparison). */
107  } key;
108  long keylen;		/* Length of KEY field. */
109};
110
111/* This structure describes a field to use as a sort key. */
112struct keyfield
113{
114  int startwords;	/* Number of words to skip. */
115  int startchars;	/* Number of additional chars to skip. */
116  int endwords;		/* Number of words to ignore at end. */
117  int endchars;		/* Ditto for characters of last word. */
118  char ignore_blanks;	/* Non-zero means ignore spaces and tabs. */
119  char fold_case;	/* Non-zero means case doesn't matter. */
120  char reverse;		/* Non-zero means compare in reverse order. */
121  char numeric;		/* Non-zeros means field is ASCII numeric. */
122  char positional;	/* Sort according to file position. */
123  char braced;		/* Count balanced-braced groupings as fields. */
124};
125
126/* Vector of keyfields to use. */
127struct keyfield keyfields[3];
128
129/* Number of keyfields stored in that vector.  */
130int num_keyfields = 3;
131
132/* Vector of input file names, terminated with a null pointer. */
133char **infiles;
134
135/* Vector of corresponding output file names, or NULL, meaning default it
136   (add an `s' to the end). */
137char **outfiles;
138
139/* Length of `infiles'. */
140int num_infiles;
141
142/* Pointer to the array of pointers to lines being sorted. */
143char **linearray;
144
145/* The allocated length of `linearray'. */
146long nlines;
147
148/* Directory to use for temporary files.  On Unix, it ends with a slash.  */
149char *tempdir;
150
151/* Start of filename to use for temporary files.  */
152char *tempbase;
153
154/* Number of last temporary file.  */
155int tempcount;
156
157/* Number of last temporary file already deleted.
158   Temporary files are deleted by `flush_tempfiles' in order of creation.  */
159int last_deleted_tempcount;
160
161/* During in-core sort, this points to the base of the data block
162   which contains all the lines of data.  */
163char *text_base;
164
165/* Additional command switches .*/
166
167/* Nonzero means do not delete tempfiles -- for debugging. */
168int keep_tempfiles;
169
170/* The name this program was run with. */
171char *program_name;
172
173/* Forward declarations of functions in this file. */
174
175void decode_command ();
176void sort_in_core ();
177void sort_offline ();
178char **parsefile ();
179char *find_field ();
180char *find_pos ();
181long find_value ();
182char *find_braced_pos ();
183char *find_braced_end ();
184void writelines ();
185int compare_field ();
186int compare_full ();
187long readline ();
188int merge_files ();
189int merge_direct ();
190void pfatal_with_name ();
191void fatal ();
192void error ();
193void *xmalloc (), *xrealloc ();
194char *concat ();
195char *maketempname ();
196void flush_tempfiles ();
197char *tempcopy ();
198
199#define MAX_IN_CORE_SORT 500000
200
201void
202main (argc, argv)
203     int argc;
204     char **argv;
205{
206  int i;
207
208  tempcount = 0;
209  last_deleted_tempcount = 0;
210
211  program_name = strrchr (argv[0], '/');
212  if (program_name != (char *)NULL)
213    program_name++;
214  else
215    program_name = argv[0];
216
217  /* Describe the kind of sorting to do. */
218  /* The first keyfield uses the first braced field and folds case. */
219  keyfields[0].braced = 1;
220  keyfields[0].fold_case = 1;
221  keyfields[0].endwords = -1;
222  keyfields[0].endchars = -1;
223
224  /* The second keyfield uses the second braced field, numerically. */
225  keyfields[1].braced = 1;
226  keyfields[1].numeric = 1;
227  keyfields[1].startwords = 1;
228  keyfields[1].endwords = -1;
229  keyfields[1].endchars = -1;
230
231  /* The third keyfield (which is ignored while discarding duplicates)
232     compares the whole line. */
233  keyfields[2].endwords = -1;
234  keyfields[2].endchars = -1;
235
236  decode_command (argc, argv);
237
238  tempbase = mktemp (concat ("txiXXXXXX", "", ""));
239
240  /* Process input files completely, one by one.  */
241
242  for (i = 0; i < num_infiles; i++)
243    {
244      int desc;
245      long ptr;
246      char *outfile;
247
248      desc = open (infiles[i], O_RDONLY, 0);
249      if (desc < 0)
250	pfatal_with_name (infiles[i]);
251      lseek (desc, (off_t) 0, SEEK_END);
252      ptr = (long) lseek (desc, (off_t) 0, SEEK_CUR);
253
254      close (desc);
255
256      outfile = outfiles[i];
257      if (!outfile)
258	{
259	  outfile = concat (infiles[i], "s", "");
260	}
261
262      if (ptr < MAX_IN_CORE_SORT)
263	/* Sort a small amount of data. */
264	sort_in_core (infiles[i], ptr, outfile);
265      else
266	sort_offline (infiles[i], ptr, outfile);
267    }
268
269  flush_tempfiles (tempcount);
270  exit (TI_NO_ERROR);
271}
272
273typedef struct
274{
275  char *long_name;
276  char *short_name;
277  int *variable_ref;
278  int variable_value;
279  char *arg_name;
280  char *doc_string;
281} TEXINDEX_OPTION;
282
283TEXINDEX_OPTION texindex_options[] = {
284  { "--keep", "-k", &keep_tempfiles, 1, (char *)NULL,
285      "Keep temporary files around after processing" },
286  { "--no-keep", 0, &keep_tempfiles, 0, (char *)NULL,
287      "Do not keep temporary files around after processing (default)" },
288  { "--output", "-o", (int *)NULL, 0, "FILE",
289      "Send output to FILE" },
290  { "--version", (char *)NULL, (int *)NULL, 0, (char *)NULL,
291      "Show version information" },
292  { "--help", "-h", (int *)NULL, 0, (char *)NULL, "Produce this listing" },
293  { (char *)NULL, (char *)NULL, (int *)NULL, 0, (char *)NULL }
294};
295
296void
297usage (result_value)
298     int result_value;
299{
300  register int i;
301
302  fprintf (stderr, "Usage: %s [OPTIONS] FILE...\n", program_name);
303  fprintf (stderr, "  Generate a permuted index for the TeX files given.\n");
304  fprintf (stderr, "  Usually FILE... is `foo.??' for the source file `foo.tex'.\n");
305  fprintf (stderr, "  The OPTIONS are:\n");
306
307  for (i = 0; texindex_options[i].long_name; i++)
308    {
309      fprintf (stderr, "    %s %s",
310	       texindex_options[i].long_name,
311	       texindex_options[i].arg_name ?
312	       texindex_options[i].arg_name : "");
313
314      if (texindex_options[i].short_name)
315	fprintf (stderr, " \n    or %s %s",
316		 texindex_options[i].short_name,
317		 texindex_options[i].arg_name ?
318		 texindex_options[i].arg_name : "");
319      fprintf (stderr, "\t%s\n", texindex_options[i].doc_string);
320    }
321  exit (result_value);
322}
323
324/* Decode the command line arguments to set the parameter variables
325   and set up the vector of keyfields and the vector of input files. */
326
327void
328decode_command (argc, argv)
329     int argc;
330     char **argv;
331{
332  int arg_index = 1;
333  int optc;
334  char **ip;
335  char **op;
336
337  /* Store default values into parameter variables. */
338
339  tempdir = getenv ("TMPDIR");
340#ifdef VMS
341  if (tempdir == NULL)
342    tempdir = "sys$scratch:";
343#else
344  if (tempdir == NULL)
345    tempdir = "/tmp/";
346  else
347    tempdir = concat (tempdir, "/", "");
348#endif
349
350  keep_tempfiles = 0;
351
352  /* Allocate ARGC input files, which must be enough.  */
353
354  infiles = (char **) xmalloc (argc * sizeof (char *));
355  outfiles = (char **) xmalloc (argc * sizeof (char *));
356  ip = infiles;
357  op = outfiles;
358
359  while (arg_index < argc)
360    {
361      char *arg = argv[arg_index++];
362
363      if (*arg == '-')
364	{
365	  if (strcmp (arg, "--version") == 0)
366	    {
367	      fprintf (stderr, "%s\n", TEXINDEX_VERSION_STRING);
368	      exit (0);
369	    }
370	  else if ((strcmp (arg, "--keep") == 0) ||
371		   (strcmp (arg, "-k") == 0))
372	    {
373	      keep_tempfiles = 1;
374	    }
375	  else if ((strcmp (arg, "--help") == 0) ||
376		   (strcmp (arg, "-h") == 0))
377	    {
378	      usage (0);
379	    }
380	  else if ((strcmp (arg, "--output") == 0) ||
381		   (strcmp (arg, "-o") == 0))
382	    {
383	      if (argv[arg_index] != (char *)NULL)
384		{
385		  arg_index++;
386		  if (op > outfiles)
387		    *(op - 1) = argv[arg_index];
388		}
389	      else
390		usage (1);
391	    }
392	  else
393	    usage (1);
394	}
395      else
396	{
397	  *ip++ = arg;
398	  *op++ = (char *)NULL;
399	}
400    }
401
402  /* Record number of keyfields and terminate list of filenames. */
403  num_infiles = ip - infiles;
404  *ip = (char *)NULL;
405  if (num_infiles == 0)
406    usage (1);
407}
408
409/* Return a name for a temporary file. */
410
411char *
412maketempname (count)
413     int count;
414{
415  char tempsuffix[10];
416  sprintf (tempsuffix, "%d", count);
417  return concat (tempdir, tempbase, tempsuffix);
418}
419
420/* Delete all temporary files up to TO_COUNT. */
421
422void
423flush_tempfiles (to_count)
424     int to_count;
425{
426  if (keep_tempfiles)
427    return;
428  while (last_deleted_tempcount < to_count)
429    unlink (maketempname (++last_deleted_tempcount));
430}
431
432/* Copy the input file open on IDESC into a temporary file
433   and return the temporary file name. */
434
435#define BUFSIZE 1024
436
437char *
438tempcopy (idesc)
439     int idesc;
440{
441  char *outfile = maketempname (++tempcount);
442  int odesc;
443  char buffer[BUFSIZE];
444
445  odesc = open (outfile, O_WRONLY | O_CREAT, 0666);
446
447  if (odesc < 0)
448    pfatal_with_name (outfile);
449
450  while (1)
451    {
452      int nread = read (idesc, buffer, BUFSIZE);
453      write (odesc, buffer, nread);
454      if (!nread)
455	break;
456    }
457
458  close (odesc);
459
460  return outfile;
461}
462
463/* Compare LINE1 and LINE2 according to the specified set of keyfields. */
464
465int
466compare_full (line1, line2)
467     char **line1, **line2;
468{
469  int i;
470
471  /* Compare using the first keyfield;
472     if that does not distinguish the lines, try the second keyfield;
473     and so on. */
474
475  for (i = 0; i < num_keyfields; i++)
476    {
477      long length1, length2;
478      char *start1 = find_field (&keyfields[i], *line1, &length1);
479      char *start2 = find_field (&keyfields[i], *line2, &length2);
480      int tem = compare_field (&keyfields[i], start1, length1, *line1 - text_base,
481			       start2, length2, *line2 - text_base);
482      if (tem)
483	{
484	  if (keyfields[i].reverse)
485	    return -tem;
486	  return tem;
487	}
488    }
489
490  return 0;			/* Lines match exactly. */
491}
492
493/* Compare LINE1 and LINE2, described by structures
494   in which the first keyfield is identified in advance.
495   For positional sorting, assumes that the order of the lines in core
496   reflects their nominal order.  */
497
498int
499compare_prepared (line1, line2)
500     struct lineinfo *line1, *line2;
501{
502  int i;
503  int tem;
504  char *text1, *text2;
505
506  /* Compare using the first keyfield, which has been found for us already. */
507  if (keyfields->positional)
508    {
509      if (line1->text - text_base > line2->text - text_base)
510	tem = 1;
511      else
512	tem = -1;
513    }
514  else if (keyfields->numeric)
515    tem = line1->key.number - line2->key.number;
516  else
517    tem = compare_field (keyfields, line1->key.text, line1->keylen, 0,
518			 line2->key.text, line2->keylen, 0);
519  if (tem)
520    {
521      if (keyfields->reverse)
522	return -tem;
523      return tem;
524    }
525
526  text1 = line1->text;
527  text2 = line2->text;
528
529  /* Compare using the second keyfield;
530     if that does not distinguish the lines, try the third keyfield;
531     and so on. */
532
533  for (i = 1; i < num_keyfields; i++)
534    {
535      long length1, length2;
536      char *start1 = find_field (&keyfields[i], text1, &length1);
537      char *start2 = find_field (&keyfields[i], text2, &length2);
538      int tem = compare_field (&keyfields[i], start1, length1, text1 - text_base,
539			       start2, length2, text2 - text_base);
540      if (tem)
541	{
542	  if (keyfields[i].reverse)
543	    return -tem;
544	  return tem;
545	}
546    }
547
548  return 0;			/* Lines match exactly. */
549}
550
551/* Like compare_full but more general.
552   You can pass any strings, and you can say how many keyfields to use.
553   POS1 and POS2 should indicate the nominal positional ordering of
554   the two lines in the input.  */
555
556int
557compare_general (str1, str2, pos1, pos2, use_keyfields)
558     char *str1, *str2;
559     long pos1, pos2;
560     int use_keyfields;
561{
562  int i;
563
564  /* Compare using the first keyfield;
565     if that does not distinguish the lines, try the second keyfield;
566     and so on. */
567
568  for (i = 0; i < use_keyfields; i++)
569    {
570      long length1, length2;
571      char *start1 = find_field (&keyfields[i], str1, &length1);
572      char *start2 = find_field (&keyfields[i], str2, &length2);
573      int tem = compare_field (&keyfields[i], start1, length1, pos1,
574			       start2, length2, pos2);
575      if (tem)
576	{
577	  if (keyfields[i].reverse)
578	    return -tem;
579	  return tem;
580	}
581    }
582
583  return 0;			/* Lines match exactly. */
584}
585
586/* Find the start and length of a field in STR according to KEYFIELD.
587   A pointer to the starting character is returned, and the length
588   is stored into the int that LENGTHPTR points to.  */
589
590char *
591find_field (keyfield, str, lengthptr)
592     struct keyfield *keyfield;
593     char *str;
594     long *lengthptr;
595{
596  char *start;
597  char *end;
598  char *(*fun) ();
599
600  if (keyfield->braced)
601    fun = find_braced_pos;
602  else
603    fun = find_pos;
604
605  start = (*fun) (str, keyfield->startwords, keyfield->startchars,
606		  keyfield->ignore_blanks);
607  if (keyfield->endwords < 0)
608    {
609      if (keyfield->braced)
610	end = find_braced_end (start);
611      else
612	{
613	  end = start;
614	  while (*end && *end != '\n')
615	    end++;
616	}
617    }
618  else
619    {
620      end = (*fun) (str, keyfield->endwords, keyfield->endchars, 0);
621      if (end - str < start - str)
622	end = start;
623    }
624  *lengthptr = end - start;
625  return start;
626}
627
628/* Return a pointer to a specified place within STR,
629   skipping (from the beginning) WORDS words and then CHARS chars.
630   If IGNORE_BLANKS is nonzero, we skip all blanks
631   after finding the specified word.  */
632
633char *
634find_pos (str, words, chars, ignore_blanks)
635     char *str;
636     int words, chars;
637     int ignore_blanks;
638{
639  int i;
640  char *p = str;
641
642  for (i = 0; i < words; i++)
643    {
644      char c;
645      /* Find next bunch of nonblanks and skip them. */
646      while ((c = *p) == ' ' || c == '\t')
647	p++;
648      while ((c = *p) && c != '\n' && !(c == ' ' || c == '\t'))
649	p++;
650      if (!*p || *p == '\n')
651	return p;
652    }
653
654  while (*p == ' ' || *p == '\t')
655    p++;
656
657  for (i = 0; i < chars; i++)
658    {
659      if (!*p || *p == '\n')
660	break;
661      p++;
662    }
663  return p;
664}
665
666/* Like find_pos but assumes that each field is surrounded by braces
667   and that braces within fields are balanced. */
668
669char *
670find_braced_pos (str, words, chars, ignore_blanks)
671     char *str;
672     int words, chars;
673     int ignore_blanks;
674{
675  int i;
676  int bracelevel;
677  char *p = str;
678  char c;
679
680  for (i = 0; i < words; i++)
681    {
682      bracelevel = 1;
683      while ((c = *p++) != '{' && c != '\n' && c)
684	/* Do nothing. */ ;
685      if (c != '{')
686	return p - 1;
687      while (bracelevel)
688	{
689	  c = *p++;
690#ifdef	SJIS
691	  if ( iskanji(c)) {
692		  p++;
693		  continue;
694	  }
695#endif
696	  if (c == '{')
697	    bracelevel++;
698	  if (c == '}')
699	    bracelevel--;
700	  if (c == 0 || c == '\n')
701	    return p - 1;
702	}
703    }
704
705  while ((c = *p++) != '{' && c != '\n' && c)
706    /* Do nothing. */ ;
707
708  if (c != '{')
709    return p - 1;
710
711  if (ignore_blanks)
712    while ((c = *p) == ' ' || c == '\t')
713      p++;
714
715  for (i = 0; i < chars; i++)
716    {
717      if (!*p || *p == '\n')
718	break;
719      p++;
720    }
721  return p;
722}
723
724/* Find the end of the balanced-brace field which starts at STR.
725   The position returned is just before the closing brace. */
726
727char *
728find_braced_end (str)
729     char *str;
730{
731  int bracelevel;
732  char *p = str;
733  char c;
734
735  bracelevel = 1;
736  while (bracelevel)
737    {
738      c = *p++;
739#ifdef	SJIS
740	  if (iskanji(c)) {
741		  p++;
742		  continue;
743	  }
744#endif
745      if (c == '{')
746	bracelevel++;
747      if (c == '}')
748	bracelevel--;
749      if (c == 0 || c == '\n')
750	return p - 1;
751    }
752  return p - 1;
753}
754
755long
756find_value (start, length)
757     char *start;
758     long length;
759{
760  while (length != 0L)
761    {
762      if (isdigit (*start))
763	return atol (start);
764      length--;
765      start++;
766    }
767  return 0l;
768}
769
770/* Vector used to translate characters for comparison.
771   This is how we make all alphanumerics follow all else,
772   and ignore case in the first sorting.  */
773int char_order[0x10000];			/* japanese */
774
775#ifdef	EUC
776#define	KANJI
777#endif
778
779#ifdef	SJIS
780#define	KANJI
781#endif
782
783#ifdef		KANJI
784
785#ifndef		EUC
786#	ifndef		SJIS
787#		define	EUC
788#	endif
789#else
790#	ifdef	SJIS
791#		error
792#	endif
793#endif
794
795#ifdef		EUC
796#define		JIS_NUM_0		0xa3b0
797#define		JIS_NUM_9		(JIS_NUM_0 + '9'- '0')
798#define		JIS_ALPH_A		0xa3c1
799#define		JIS_ALPH_Z		(JIS_ALPH_A + 'Z'-'A')
800#define		JIS_ALPH_a		0xa3e1
801#define		JIS_HIRA_SMALL_A	0xa4a1
802#define		JIS_HIRA_NN			0xa4f3
803#define		JIS_KATA_SMALL_A	0xa5a1
804#define		JIS_KATA_SMALL_KE	0xa5f6
805#define		JIS_HIRA_U			0xa4a6
806#define		JIS_KATA_U			0xa5a6
807#define		JIS_KATA_VU			0xa5f4
808
809#define		JIS_KATA_UPPER		0xa5
810#define		JIS_HIRA_UPPER		0xa4
811
812#define		JIS_HIRA_A		0xa4a2
813#define		JIS_KATA_A		0xa5a2
814
815#define		JIS_DAKUTEN		0xa1ab
816#define		JIS_HANDAKUTEN	0xa1ac
817#define		JIS_CHOUON		0xa2ac
818
819int kana[] = {
820/*	          .        [      ]      ,        .       wo      a		*/
821	0xa1a1, 0xa1a3, 0xa1d6, 0xa1d7, 0xa1a2, 0xa1a6, 0xa5f2, 0xa5a1,
822/*    i       u       e       o       ya      yu      yo      tsu   */
823	0xa5a3, 0xa5a5, 0xa5a7, 0xa5a9, 0xa5e3, 0xa5e5, 0xa5e7, 0xa5c3,
824/*     -       a       i       u       e       o      ka      ki    */
825	0xa1bc, 0xa5a2, 0xa5a4, 0xa5a6, 0xa5a8, 0xa5aa, 0xa5ab, 0xa5ad,
826/*    ku      ke      ko      sa      si      su     se       so	*/
827	0xa5af,	0xa5b1, 0xa5b3, 0xa5b5, 0xa5b7, 0xa5b9, 0xa5bb, 0xa5bd,
828/*    ta      ti      tu      te      to      na     ni       nu	*/
829	0xa5bf, 0xa5c1, 0xa5c4, 0xa5c6, 0xa5c8, 0xa5ca, 0xa5cb, 0xa5cc,
830/*    ne      no      ha      hi      hu      he      ho      ma 	*/
831	0xa5cd, 0xa5ce, 0xa5cf, 0xa5d2, 0xa5d5, 0xa5d8, 0xa5db, 0xa5de,
832/*    mi      mu      me      mo      ya      yu      yo      ra	*/
833	0xa5df, 0xa5e0, 0xa5e1, 0xa5e2, 0xa5e4, 0xa5e6, 0xa5e8, 0xa5e9,
834/*	  ri      ru      re      ro      wa      nn      ""      maru  */
835	0xa5ea, 0xa5eb, 0xa5ec, 0xa5ed, 0xa5ef, 0xa5f3,	JIS_DAKUTEN,JIS_HANDAKUTEN
836};
837
838int daku[] = {0xa5ac, 0xa5ae, 0xa5b0, 0xa5b2, 0xa5b4, /* $B%,(J*/
839	 	      0xa5b6, 0xa5b8, 0xa5ba, 0xa5bc, 0xa5be, /* $B%6(J */
840			  0xa5c0, 0xa5c2, 0xa5c5, 0xa5c7, 0xa5c9, /* $B%@(J */
841			  0xa5d0, 0xa5d3, 0xa5d6, 0xa5d9, 0xa5dc}; /* $B%P(J */
842int	handaku[] = {0xa5d1, 0xa5d4, 0xa5d7, 0xa5da, 0xa5dd}; /* $B%Q(J */
843int dakuall[] = {0xa4ac, 0xa4ae, 0xa4b0, 0xa4b2, 0xa4b4, /* $B$,(J */
844	 	      0xa4b6, 0xa4b8, 0xa4ba, 0xa4bc, 0xa4be, /* $B$6(J */
845			  0xa4c0, 0xa4c2, 0xa4c5, 0xa4c7, 0xa4c9, /* $B$@(J */
846			  0xa4d0, 0xa4d3, 0xa4d6, 0xa4d9, 0xa4dc, /* $B$P(J */
847			  0xa5ac, 0xa5ae, 0xa5b0, 0xa5b2, 0xa5b4, /* $B%,(J */
848	 	      0xa5b6, 0xa5b8, 0xa5ba, 0xa5bc, 0xa5be, /* $B%6(J */
849			  0xa5c0, 0xa5c2, 0xa5c5, 0xa5c7, 0xa5c9, /* $B%@(J */
850			  0xa5d0, 0xa5d3, 0xa5d6, 0xa5d9, 0xa5dc}; /* $B%P(J */
851int	handakuall[] = {0xa4d1, 0xa4d4, 0xa4d7, 0xa4da, 0xa4dd, /* $B$Q(J */
852					0xa5d1, 0xa5d4, 0xa5d7, 0xa5da, 0xa5dd}; /* $B%Q(J */
853
854
855iskanji(c)							/* japanses extention */
856int	c;
857{
858	if ( !(c & 0x80 )) return 0;
859	if ( c == 0x8e ) return 0;
860	return 1;
861}
862
863is201kana(c)							/* japanses extention */
864int	c;
865{
866	if ( c == 0x8e ) return 1;
867	else return 0;
868}
869#else							/* SJIS */
870#define		JIS_NUM_0		0x824f
871#define		JIS_NUM_9		(JIS_NUM_0 + '9'- '0')
872#define		JIS_ALPH_A		0x8260
873#define		JIS_ALPH_Z		(JIS_ALPH_A + 'Z'-'A')
874#define		JIS_ALPH_a		0x8281
875#define		JIS_HIRA_SMALL_A	0x829f
876#define		JIS_HIRA_NN			0x82f1
877#define		JIS_KATA_SMALL_A	0x8340
878#define		JIS_KATA_SMALL_KE	0x8396
879#define		JIS_HIRA_U			0x82a4
880#define		JIS_KATA_U			0x83a5
881#define		JIS_KATA_VU			0x8394
882
883#define		JIS_KATA_UPPER		0x83
884#define		JIS_HIRA_UPPER		0x82
885
886#define		JIS_HIRA_A		0x82a0
887#define		JIS_KATA_A		0x8341
888
889#define		JIS_DAKUTEN		0x814a
890#define		JIS_HANDAKUTEN	0x814b
891#define		JIS_CHOUON		0x815b
892
893int kana[] = {
894/*	          .        [      ]      ,        .       wo      a		*/
895	0x813f, 0x8142, 0x8175, 0x8176, 0x8141, 0x8145, 0x8392, 0x8340,
896/*    i       u       e       o       ya      yu      yo      tsu   */
897	0x8342, 0x8344, 0x8346, 0x8348, 0x8383, 0x8385, 0x8387, 0x8362,
898/*     -       a       i       u       e       o      ka      ki    */
899	0x815b, 0x8341, 0x8343, 0x8345, 0x8347, 0x8349, 0x834a, 0x834c,
900/*    ku      ke      ko      sa      si      su     se       so	*/
901	0x834e,	0x8350, 0x8352, 0x8354, 0x8356, 0x8358, 0x835a, 0x835c,
902/*    ta      ti      tu      te      to      na     ni       nu	*/
903	0x835e, 0x8360, 0x8363, 0x8365, 0x8367, 0x8369, 0x836a, 0x836b,
904/*    ne      no      ha      hi      hu      he      ho      ma 	*/
905	0x836c, 0x836d, 0x836e, 0x8371, 0x8374, 0x8377, 0x837a, 0x837d,
906/*    mi      mu      me      mo      ya      yu      yo      ra	*/
907	0x837e, 0x8380, 0x8381, 0x8382, 0x8384, 0x8386, 0x8388, 0x8389,
908/*	  ri      ru      re      ro      wa      nn      ""      maru  */
909	0x838a, 0x838b, 0x838c, 0x838d, 0x838f, 0x8393,	JIS_DAKUTEN,JIS_HANDAKUTEN
910};
911
912int daku[] = {0x834b, 0x834d, 0x834f, 0x8351, 0x8353, /* $B%,(J*/
913	 	      0x8355, 0x8357, 0x8359, 0x835b, 0x835d, /* $B%6(J */
914			  0x835f, 0x8361, 0x8364, 0x8366, 0x8367, /* $B%@(J */
915			  0x836f, 0x8372, 0x8375, 0x8378, 0x837b}; /* $B%P(J */
916int	handaku[] = {0x8370, 0x8373, 0x8376, 0x8379, 0x837c}; /* $B%Q(J */
917int dakuall[] = {0x82aa, 0xa2ac, 0x82a3, 0x82b0, 0x82b2, /* $B$,(J */
918	 	      0x82b4, 0x82b6, 0x82b8, 0x82ba, 0x82bc, /* $B$6(J */
919			  0x82be, 0x82c0, 0x82c3, 0x82c5, 0x82c7, /* $B$@(J */
920			  0x82ce, 0x82d1, 0x82d4, 0x82d7, 0x82da, /* $B$P(J */
921
922			  0x834b, 0x834d, 0x834f, 0x8351, 0x8353, /* $B%,(J*/
923	 	      0x8355, 0x8357, 0x8359, 0x835b, 0x835d, /* $B%6(J */
924			  0x835f, 0x8361, 0x8364, 0x8366, 0x8367, /* $B%@(J */
925			  0x836f, 0x8372, 0x8375, 0x8378, 0x837b}; /* $B%P(J */
926int	handakuall[] = {0x82cf, 0x82d2, 0x82d5, 0x82d8, 0x82db, /* $B$Q(J */
927				    0x8370, 0x8373, 0x8376, 0x8379, 0x837c}; /* $B%Q(J */
928
929iskanji(c)							/* japanses extention */
930int	c;
931{
932	c &= 0xff;
933	if (( c >= 0x81 && c <= 0x9f ) ||
934		( c >= 0xe0 && c <= 0xea )) return 1;
935	return 0;
936}
937
938is201kana(c)							/* japanses extention */
939int	c;
940{
941	if ( c >= 0xa0 && c <= 0xdf ) return 1;
942	else return 0;
943}
944#endif
945
946
947kana_char_order()				/* japanese */
948{
949	unsigned	int		c,i,j,cc;
950
951	for ( c = 0x8000 ; c < 0x10000 ; c++ )
952		char_order[c] = c;
953
954	/* NUMERIC */
955	for ( c = JIS_NUM_0 , i = '0' + 512; c <= JIS_NUM_9 ; i++,c++)
956		char_order[c] = i;
957
958	/* ALPHABET */
959	for ( c = JIS_ALPH_A , i = 'a' + 512; c <= JIS_ALPH_Z ; i++,c++) {
960		char_order[c] = i;
961		char_order[c+JIS_ALPH_a - JIS_ALPH_A] = i;
962	}
963
964	/* KANA */
965	for ( c = JIS_HIRA_SMALL_A ; c <= JIS_HIRA_NN ; c++ ) {
966		char_order[c] = c+512;
967		cc = c+JIS_KATA_SMALL_A-JIS_HIRA_SMALL_A;
968#ifdef	SJIS
969		if ( cc > 0x837e ) cc ++;
970#endif
971		char_order[cc] = c+512;
972		if (isdaku(c) ) {
973			char_order[c] = c+512-1;
974			char_order[cc] = c+512-1;
975		} else if (ishandaku(c) ) {
976			char_order[c] = c+512-2;
977			char_order[cc] = c+512-2;
978		}
979	}
980	for ( c = JIS_KATA_VU ; c <= JIS_KATA_SMALL_KE ; c++ ) {
981		char_order[c] = c+512;
982		if (c == JIS_KATA_VU ) {
983			char_order[c] = JIS_KATA_U + 512;
984		}
985	}
986}
987
988
989#ifdef	EUC
990#define	KANA_BYTES	2
991#else
992#define	KANA_BYTES	2
993#endif
994
995static	convert_htoz(str,ret)
996unsigned	char	*str;
997int		*ret;
998{
999	int		c,c2;
1000
1001	int		num = KANA_BYTES;
1002
1003#ifdef	EUC
1004	str++;
1005#endif
1006	c = *str++;
1007	if (!is201kana(*str)) {
1008		if (c >= 0xa0 && c <= 0xdf)
1009			*ret = kana[c - 0xa0];
1010		else *ret = 0x8e00 + c;
1011		return num;
1012	}
1013#ifdef	EUC
1014	str++;
1015#endif
1016	num += KANA_BYTES;
1017
1018	if ( (c2 = *str++) == 0xde) {		/* $BByE@(J */
1019		if      (c >= 0xb6 && c <= 0xba)		/* line-ga */
1020			c = daku[c - 0xb6];
1021		else if (c >= 0xbb && c <= 0xbf)		/* line-za */
1022			c = daku[c - 0xbb+5];
1023		else if (c >= 0xc0 && c <= 0xc4)		/* line-da */
1024			c = daku[c - 0xc0+10];
1025		else if (c >= 0xca && c <= 0xce)		/* line-ba */
1026			c = daku[c - 0xca+15];
1027		else if ( c == 0xb3 )					/* vu */
1028			c = JIS_KATA_VU;
1029	} else if (c2 == 0xdf) {					/* $BH>ByE@(J */
1030		if (c >= 0xca && c <= 0xce)				/* line-pa */
1031			c = handaku[c - 0xca];
1032	} else if ( c2 == 0xb1 ) {					/* $BD92;(J */
1033		if (c >= 0xa0 && c <= 0xdd)
1034			*ret = kana[c - 0xa0];
1035		else *ret = 0x8e00 + c;
1036	}
1037	if ( c < 0x100 ) c += 0x8e00;
1038
1039	return num;
1040}
1041
1042isadddaku(c)
1043{
1044	int	i;
1045	for ( i = 0 ; i < sizeof(dakuall) / sizeof(dakuall[1]) ; i++ )
1046		if ( dakuall[i]-1 == c ) return 1;
1047	return 0;
1048}
1049
1050isaddhandaku(c)
1051{
1052	int	i;
1053	for ( i = 0 ; i < sizeof(handakuall) / sizeof(handakuall[1]) ; i++ )
1054		if ( handakuall[i]-2 == c ) return 1;
1055	return 0;
1056}
1057
1058isdaku(c)
1059{
1060	int	i;
1061	for ( i = 0 ; i < sizeof(dakuall) / sizeof(dakuall[1]) ; i++ )
1062		if ( dakuall[i] == c ) return 1;
1063	return 0;
1064}
1065
1066ishandaku(c)
1067{
1068	int	i;
1069	for ( i = 0 ; i < sizeof(handakuall) / sizeof(handakuall[1]) ; i++ )
1070		if ( handakuall[i] == c ) return 1;
1071	return 0;
1072}
1073
1074int	mbchar(str,ret)
1075unsigned	char	*str;
1076int		*ret;
1077{
1078	int		c = *str++;
1079	int		cc,ccc;
1080	if ( !iskanji(c) && !is201kana(c)) {
1081		*ret = c & 0xff;
1082		return 1;
1083	}
1084	if ( iskanji (c) ) {
1085		cc = ((c & 0xff) << 8) | ((*str++) & 0xff);
1086		ccc = (((*str++ ) & 0xff) << 8 ) | ((*str++) & 0xff);
1087
1088		if ( ccc == JIS_DAKUTEN ) {
1089			if ( isadddaku(cc)) cc++;
1090			else if ( cc == JIS_KATA_U || cc == JIS_HIRA_U )
1091				cc = JIS_KATA_VU;
1092			*ret = cc;
1093			return 4;
1094		} else if ( ccc == JIS_HANDAKUTEN ) {
1095			if ( isaddhandaku(cc)) cc+=2;
1096			*ret = cc;
1097			return 4;
1098		} else if ( ccc == JIS_CHOUON ) {
1099			*ret = cc;
1100			return 4;
1101		} else {
1102			*ret = cc;
1103			return 2;
1104		}
1105	} else if ( is201kana(c)) {
1106		/* EUC */
1107		return convert_htoz(str-1,ret);
1108	}
1109}
1110
1111xinitial(str,c)
1112unsigned	char	*str;
1113int		c;
1114{
1115	int		up;
1116	if (isdaku(c)) c--;
1117	else if (ishandaku(c)) c-=2;
1118	else if ( c == JIS_KATA_VU ) c = JIS_KATA_U;
1119
1120	if ((up= (c & 0xff00) >>8) == JIS_KATA_UPPER ) {		/* katakana */
1121		c += JIS_HIRA_SMALL_A - JIS_KATA_SMALL_A;
1122#ifdef	SJIS
1123		if ( c > 0x837e ) c ++;
1124#endif
1125	}
1126	*str++ = (c >> 8 ) & 0xfff;
1127	*str++ = c & 0xff;
1128	*str= 0;
1129}
1130
1131make_initial(src,dst)
1132unsigned char *src,*dst;
1133{
1134	int		len;
1135	int		c;
1136
1137	len = mbchar(src,&c);
1138	if ( len == 1 ) {
1139		dst[0] =c;
1140		dst[1] = 0;
1141	} else {
1142		xinitial(dst,c);
1143		len = 2;
1144	}
1145	return len;
1146}
1147#endif
1148
1149void
1150init_char_order ()
1151{
1152  int i;
1153  for (i = 1; i < 256; i++)
1154    char_order[i] = i;
1155
1156  for (i = '0'; i <= '9'; i++)
1157    char_order[i] += 512;
1158
1159  for (i = 'a'; i <= 'z'; i++)
1160    {
1161      char_order[i] = 512 + i;
1162      char_order[i + 'A' - 'a'] = 512 + i;
1163    }
1164#ifdef	KANJI
1165	kana_char_order();						/* japanese */
1166#endif
1167}
1168
1169/* Compare two fields (each specified as a start pointer and a character count)
1170   according to KEYFIELD.
1171   The sign of the value reports the relation between the fields. */
1172
1173int
1174compare_field (keyfield, start1, length1, pos1, start2, length2, pos2)
1175     struct keyfield *keyfield;
1176     char *start1;
1177     long length1;
1178     long pos1;
1179     char *start2;
1180     long length2;
1181     long pos2;
1182{
1183  if (keyfields->positional)
1184    {
1185      if (pos1 > pos2)
1186	return 1;
1187      else
1188	return -1;
1189    }
1190  if (keyfield->numeric)
1191    {
1192      long value = find_value (start1, length1) - find_value (start2, length2);
1193      if (value > 0)
1194	return 1;
1195      if (value < 0)
1196	return -1;
1197      return 0;
1198    }
1199  else
1200    {
1201      char *p1 = start1;
1202      char *p2 = start2;
1203      char *e1 = start1 + length1;
1204      char *e2 = start2 + length2;
1205
1206      while (1)
1207	{
1208	  int c1, c2;
1209
1210	  if (p1 == e1)
1211	    c1 = 0;
1212	  else
1213#ifdef	KANJI
1214		  p1 += mbchar(p1,&c1);	/* japanese */
1215#else
1216	    c1 = *p1++;
1217#endif
1218
1219	  if (p2 == e2)
1220	    c2 = 0;
1221	  else
1222#ifdef	KANJI
1223		  p2 += mbchar(p2,&c2);	/* japanese */
1224#else
1225	   c2 = *p2++;
1226#endif
1227
1228	  if (char_order[c1] != char_order[c2])
1229	    return char_order[c1] - char_order[c2];
1230	  if (!c1)
1231	    break;
1232	}
1233
1234      /* Strings are equal except possibly for case.  */
1235      p1 = start1;
1236      p2 = start2;
1237      while (1)
1238	{
1239	  int c1, c2;
1240
1241	  if (p1 == e1)
1242	    c1 = 0;
1243	  else
1244#ifdef	KANJI
1245		  p1 += mbchar(p1,&c1);	/* japanese */
1246#else
1247	    c1 = *p1++;
1248#endif
1249	  if (p2 == e2)
1250	    c2 = 0;
1251	  else
1252#ifdef	KANJI
1253		  p2 += mbchar(p2,&c2);	/* japanese */
1254#else
1255 	    c2 = *p2++;
1256#endif
1257#ifdef	KANJI
1258	  if ( iskanji(c1) || is201kana(c1)) {
1259		  if (c1 != c2)
1260			  /* Reverse sign here so upper case comes out last.  */
1261			  return c2 - c1;
1262	  } else {
1263		  if (c1 != c2)
1264			  return c1 - c2;
1265	  }
1266#else
1267
1268 	  if (c1 != c2)
1269 	    /* Reverse sign here so upper case comes out last.  */
1270 	    return c2 - c1;
1271#endif
1272	  if (!c1)
1273	    break;
1274	}
1275
1276      return 0;
1277    }
1278}
1279
1280/* A `struct linebuffer' is a structure which holds a line of text.
1281   `readline' reads a line from a stream into a linebuffer
1282   and works regardless of the length of the line.  */
1283
1284struct linebuffer
1285{
1286  long size;
1287  char *buffer;
1288};
1289
1290/* Initialize LINEBUFFER for use. */
1291
1292void
1293initbuffer (linebuffer)
1294     struct linebuffer *linebuffer;
1295{
1296  linebuffer->size = 200;
1297  linebuffer->buffer = (char *) xmalloc (200);
1298}
1299
1300/* Read a line of text from STREAM into LINEBUFFER.
1301   Return the length of the line.  */
1302
1303long
1304readline (linebuffer, stream)
1305     struct linebuffer *linebuffer;
1306     FILE *stream;
1307{
1308  char *buffer = linebuffer->buffer;
1309  char *p = linebuffer->buffer;
1310  char *end = p + linebuffer->size;
1311
1312  while (1)
1313    {
1314      int c = getc (stream);
1315      if (p == end)
1316	{
1317	  buffer = (char *) xrealloc (buffer, linebuffer->size *= 2);
1318	  p += buffer - linebuffer->buffer;
1319	  end += buffer - linebuffer->buffer;
1320	  linebuffer->buffer = buffer;
1321	}
1322      if (c < 0 || c == '\n')
1323	{
1324	  *p = 0;
1325	  break;
1326	}
1327      *p++ = c;
1328    }
1329
1330  return p - buffer;
1331}
1332
1333/* Sort an input file too big to sort in core.  */
1334
1335void
1336sort_offline (infile, nfiles, total, outfile)
1337     char *infile;
1338     int nfiles;
1339     long total;
1340     char *outfile;
1341{
1342  /* More than enough. */
1343  int ntemps = 2 * (total + MAX_IN_CORE_SORT - 1) / MAX_IN_CORE_SORT;
1344  char **tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
1345  FILE *istream = fopen (infile, "r");
1346  int i;
1347  struct linebuffer lb;
1348  long linelength;
1349  int failure = 0;
1350
1351  initbuffer (&lb);
1352
1353  /* Read in one line of input data.  */
1354
1355  linelength = readline (&lb, istream);
1356
1357  if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
1358    {
1359      error ("%s: not a texinfo index file", infile);
1360      return;
1361    }
1362
1363  /* Split up the input into `ntemps' temporary files, or maybe fewer,
1364     and put the new files' names into `tempfiles' */
1365
1366  for (i = 0; i < ntemps; i++)
1367    {
1368      char *outname = maketempname (++tempcount);
1369      FILE *ostream = fopen (outname, "w");
1370      long tempsize = 0;
1371
1372      if (!ostream)
1373	pfatal_with_name (outname);
1374      tempfiles[i] = outname;
1375
1376      /* Copy lines into this temp file as long as it does not make file
1377	 "too big" or until there are no more lines.  */
1378
1379      while (tempsize + linelength + 1 <= MAX_IN_CORE_SORT)
1380	{
1381	  tempsize += linelength + 1;
1382	  fputs (lb.buffer, ostream);
1383	  putc ('\n', ostream);
1384
1385	  /* Read another line of input data.  */
1386
1387	  linelength = readline (&lb, istream);
1388	  if (!linelength && feof (istream))
1389	    break;
1390
1391	  if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
1392	    {
1393	      error ("%s: not a texinfo index file", infile);
1394	      failure = 1;
1395	      goto fail;
1396	    }
1397	}
1398      fclose (ostream);
1399      if (feof (istream))
1400	break;
1401    }
1402
1403  free (lb.buffer);
1404
1405fail:
1406  /* Record number of temp files we actually needed.  */
1407
1408  ntemps = i;
1409
1410  /* Sort each tempfile into another tempfile.
1411    Delete the first set of tempfiles and put the names of the second
1412    into `tempfiles'. */
1413
1414  for (i = 0; i < ntemps; i++)
1415    {
1416      char *newtemp = maketempname (++tempcount);
1417      sort_in_core (&tempfiles[i], MAX_IN_CORE_SORT, newtemp);
1418      if (!keep_tempfiles)
1419	unlink (tempfiles[i]);
1420      tempfiles[i] = newtemp;
1421    }
1422
1423  if (failure)
1424    return;
1425
1426  /* Merge the tempfiles together and indexify. */
1427
1428  merge_files (tempfiles, ntemps, outfile);
1429}
1430
1431/* Sort INFILE, whose size is TOTAL,
1432   assuming that is small enough to be done in-core,
1433   then indexify it and send the output to OUTFILE (or to stdout).  */
1434
1435void
1436sort_in_core (infile, total, outfile)
1437     char *infile;
1438     long total;
1439     char *outfile;
1440{
1441  char **nextline;
1442  char *data = (char *) xmalloc (total + 1);
1443  char *file_data;
1444  long file_size;
1445  int i;
1446  FILE *ostream = stdout;
1447  struct lineinfo *lineinfo;
1448
1449  /* Read the contents of the file into the moby array `data'. */
1450
1451  int desc = open (infile, O_RDONLY, 0);
1452
1453  if (desc < 0)
1454    fatal ("failure reopening %s", infile);
1455  for (file_size = 0;;)
1456    {
1457      i = read (desc, data + file_size, total - file_size);
1458      if (i <= 0)
1459	break;
1460      file_size += i;
1461    }
1462  file_data = data;
1463  data[file_size] = 0;
1464
1465  close (desc);
1466
1467  if (file_size > 0 && data[0] != '\\' && data[0] != '@')
1468    {
1469      error ("%s: not a texinfo index file", infile);
1470      return;
1471    }
1472
1473  init_char_order ();
1474
1475  /* Sort routines want to know this address. */
1476
1477  text_base = data;
1478
1479  /* Create the array of pointers to lines, with a default size
1480     frequently enough.  */
1481
1482  nlines = total / 50;
1483  if (!nlines)
1484    nlines = 2;
1485  linearray = (char **) xmalloc (nlines * sizeof (char *));
1486
1487  /* `nextline' points to the next free slot in this array.
1488     `nlines' is the allocated size.  */
1489
1490  nextline = linearray;
1491
1492  /* Parse the input file's data, and make entries for the lines.  */
1493
1494  nextline = parsefile (infile, nextline, file_data, file_size);
1495  if (nextline == 0)
1496    {
1497      error ("%s: not a texinfo index file", infile);
1498      return;
1499    }
1500
1501  /* Sort the lines. */
1502
1503  /* If we have enough space, find the first keyfield of each line in advance.
1504     Make a `struct lineinfo' for each line, which records the keyfield
1505     as well as the line, and sort them.  */
1506
1507  lineinfo = (struct lineinfo *) malloc ((nextline - linearray) * sizeof (struct lineinfo));
1508
1509  if (lineinfo)
1510    {
1511      struct lineinfo *lp;
1512      char **p;
1513
1514      for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
1515	{
1516	  lp->text = *p;
1517	  lp->key.text = find_field (keyfields, *p, &lp->keylen);
1518	  if (keyfields->numeric)
1519	    lp->key.number = find_value (lp->key.text, lp->keylen);
1520	}
1521
1522      qsort (lineinfo, nextline - linearray, sizeof (struct lineinfo), compare_prepared);
1523
1524      for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
1525	*p = lp->text;
1526
1527      free (lineinfo);
1528    }
1529  else
1530    qsort (linearray, nextline - linearray, sizeof (char *), compare_full);
1531
1532  /* Open the output file. */
1533
1534  if (outfile)
1535    {
1536      ostream = fopen (outfile, "w");
1537      if (!ostream)
1538	pfatal_with_name (outfile);
1539    }
1540
1541  writelines (linearray, nextline - linearray, ostream);
1542  if (outfile)
1543    fclose (ostream);
1544
1545  free (linearray);
1546  free (data);
1547}
1548
1549/* Parse an input string in core into lines.
1550   DATA is the input string, and SIZE is its length.
1551   Data goes in LINEARRAY starting at NEXTLINE.
1552   The value returned is the first entry in LINEARRAY still unused.
1553   Value 0 means input file contents are invalid.  */
1554
1555char **
1556parsefile (filename, nextline, data, size)
1557     char *filename;
1558     char **nextline;
1559     char *data;
1560     long size;
1561{
1562  char *p, *end;
1563  char **line = nextline;
1564
1565  p = data;
1566  end = p + size;
1567  *end = 0;
1568
1569  while (p != end)
1570    {
1571      if (p[0] != '\\' && p[0] != '@')
1572	return 0;
1573
1574      *line = p;
1575      while (*p && *p != '\n')
1576	p++;
1577      if (p != end)
1578	p++;
1579
1580      line++;
1581      if (line == linearray + nlines)
1582	{
1583	  char **old = linearray;
1584	  linearray = (char **) xrealloc (linearray, sizeof (char *) * (nlines *= 4));
1585	  line += linearray - old;
1586	}
1587    }
1588
1589  return line;
1590}
1591
1592/* Indexification is a filter applied to the sorted lines
1593   as they are being written to the output file.
1594   Multiple entries for the same name, with different page numbers,
1595   get combined into a single entry with multiple page numbers.
1596   The first braced field, which is used for sorting, is discarded.
1597   However, its first character is examined, folded to lower case,
1598   and if it is different from that in the previous line fed to us
1599   a \initial line is written with one argument, the new initial.
1600
1601   If an entry has four braced fields, then the second and third
1602   constitute primary and secondary names.
1603   In this case, each change of primary name
1604   generates a \primary line which contains only the primary name,
1605   and in between these are \secondary lines which contain
1606   just a secondary name and page numbers. */
1607
1608/* The last primary name we wrote a \primary entry for.
1609   If only one level of indexing is being done, this is the last name seen. */
1610char *lastprimary;
1611/* Length of storage allocated for lastprimary. */
1612int lastprimarylength;
1613
1614/* Similar, for the secondary name. */
1615char *lastsecondary;
1616int lastsecondarylength;
1617
1618/* Zero if we are not in the middle of writing an entry.
1619   One if we have written the beginning of an entry but have not
1620   yet written any page numbers into it.
1621   Greater than one if we have written the beginning of an entry
1622   plus at least one page number. */
1623int pending;
1624
1625/* The initial (for sorting purposes) of the last primary entry written.
1626   When this changes, a \initial {c} line is written */
1627
1628char *lastinitial;
1629
1630int lastinitiallength;
1631
1632/* When we need a string of length 1 for the value of lastinitial,
1633   store it here.  */
1634
1635char lastinitial1[3];				/* japanese */
1636
1637/* Initialize static storage for writing an index. */
1638
1639void
1640init_index ()
1641{
1642  pending = 0;
1643  lastinitial = lastinitial1;
1644  lastinitial1[0] = 0;
1645  lastinitial1[1] = 0;
1646  lastinitiallength = 0;
1647  lastprimarylength = 100;
1648  lastprimary = (char *) xmalloc (lastprimarylength + 1);
1649  memset (lastprimary, '\0', lastprimarylength + 1);
1650  lastsecondarylength = 100;
1651  lastsecondary = (char *) xmalloc (lastsecondarylength + 1);
1652  memset (lastsecondary, '\0', lastsecondarylength + 1);
1653}
1654
1655/* Indexify.  Merge entries for the same name,
1656   insert headers for each initial character, etc.  */
1657
1658void
1659indexify (line, ostream)
1660     char *line;
1661     FILE *ostream;
1662{
1663  char *primary, *secondary, *pagenumber;
1664  int primarylength, secondarylength = 0, pagelength;
1665  int nosecondary;
1666  int initiallength;
1667  char *initial;
1668  char initial1[3];							/* japanese */
1669  register char *p;
1670
1671  /* First, analyze the parts of the entry fed to us this time. */
1672
1673  p = find_braced_pos (line, 0, 0, 0);
1674  if (*p == '{')
1675    {
1676      initial = p;
1677      /* Get length of inner pair of braces starting at `p',
1678	 including that inner pair of braces.  */
1679      initiallength = find_braced_end (p + 1) + 1 - p;
1680    }
1681  else
1682    {
1683      initial = initial1;
1684#ifdef	KANJI
1685	  initiallength = make_initial(p,initial1);	/* japanese */
1686#else
1687      initial = initial1;
1688      initial1[0] = *p;
1689      initial1[1] = 0;
1690      initiallength = 1;
1691#endif
1692
1693      if (initial1[0] >= 'a' && initial1[0] <= 'z')
1694	initial1[0] -= 040;
1695    }
1696
1697  pagenumber = find_braced_pos (line, 1, 0, 0);
1698  pagelength = find_braced_end (pagenumber) - pagenumber;
1699  if (pagelength == 0)
1700    abort ();
1701
1702  primary = find_braced_pos (line, 2, 0, 0);
1703  primarylength = find_braced_end (primary) - primary;
1704
1705  secondary = find_braced_pos (line, 3, 0, 0);
1706  nosecondary = !*secondary;
1707  if (!nosecondary)
1708    secondarylength = find_braced_end (secondary) - secondary;
1709
1710  /* If the primary is different from before, make a new primary entry. */
1711  if (strncmp (primary, lastprimary, primarylength))
1712    {
1713      /* Close off current secondary entry first, if one is open. */
1714      if (pending)
1715	{
1716	  fputs ("}\n", ostream);
1717	  pending = 0;
1718	}
1719
1720      /* If this primary has a different initial, include an entry for
1721	 the initial. */
1722      if (initiallength != lastinitiallength ||
1723	  strncmp (initial, lastinitial, initiallength))
1724	{
1725	  fprintf (ostream, "\\initial {");
1726	  fwrite (initial, 1, initiallength, ostream);
1727	  fprintf (ostream, "}\n", initial);
1728	  if (initial == initial1)
1729	    {
1730	      lastinitial = lastinitial1;
1731	      *lastinitial1 = *initial1;
1732	      lastinitial1[1] = initial1[1];
1733	      lastinitial1[2] = initial1[2];
1734	    }
1735	  else
1736	    {
1737	      lastinitial = initial;
1738	    }
1739	  lastinitiallength = initiallength;
1740	}
1741
1742      /* Make the entry for the primary.  */
1743      if (nosecondary)
1744	fputs ("\\entry {", ostream);
1745      else
1746	fputs ("\\primary {", ostream);
1747      fwrite (primary, primarylength, 1, ostream);
1748      if (nosecondary)
1749	{
1750	  fputs ("}{", ostream);
1751	  pending = 1;
1752	}
1753      else
1754	fputs ("}\n", ostream);
1755
1756      /* Record name of most recent primary. */
1757      if (lastprimarylength < primarylength)
1758	{
1759	  lastprimarylength = primarylength + 100;
1760	  lastprimary = (char *) xrealloc (lastprimary,
1761					   1 + lastprimarylength);
1762	}
1763      strncpy (lastprimary, primary, primarylength);
1764      lastprimary[primarylength] = 0;
1765
1766      /* There is no current secondary within this primary, now. */
1767      lastsecondary[0] = 0;
1768    }
1769
1770  /* Should not have an entry with no subtopic following one with a subtopic. */
1771
1772  if (nosecondary && *lastsecondary)
1773    error ("entry %s follows an entry with a secondary name", line);
1774
1775  /* Start a new secondary entry if necessary. */
1776  if (!nosecondary && strncmp (secondary, lastsecondary, secondarylength))
1777    {
1778      if (pending)
1779	{
1780	  fputs ("}\n", ostream);
1781	  pending = 0;
1782	}
1783
1784      /* Write the entry for the secondary.  */
1785      fputs ("\\secondary {", ostream);
1786      fwrite (secondary, secondarylength, 1, ostream);
1787      fputs ("}{", ostream);
1788      pending = 1;
1789
1790      /* Record name of most recent secondary. */
1791      if (lastsecondarylength < secondarylength)
1792	{
1793	  lastsecondarylength = secondarylength + 100;
1794	  lastsecondary = (char *) xrealloc (lastsecondary,
1795					     1 + lastsecondarylength);
1796	}
1797      strncpy (lastsecondary, secondary, secondarylength);
1798      lastsecondary[secondarylength] = 0;
1799    }
1800
1801  /* Here to add one more page number to the current entry. */
1802  if (pending++ != 1)
1803    fputs (", ", ostream);	/* Punctuate first, if this is not the first. */
1804  fwrite (pagenumber, pagelength, 1, ostream);
1805}
1806
1807/* Close out any unfinished output entry. */
1808
1809void
1810finish_index (ostream)
1811     FILE *ostream;
1812{
1813  if (pending)
1814    fputs ("}\n", ostream);
1815  free (lastprimary);
1816  free (lastsecondary);
1817}
1818
1819/* Copy the lines in the sorted order.
1820   Each line is copied out of the input file it was found in. */
1821
1822void
1823writelines (linearray, nlines, ostream)
1824     char **linearray;
1825     int nlines;
1826     FILE *ostream;
1827{
1828  char **stop_line = linearray + nlines;
1829  char **next_line;
1830
1831  init_index ();
1832
1833  /* Output the text of the lines, and free the buffer space. */
1834
1835  for (next_line = linearray; next_line != stop_line; next_line++)
1836    {
1837      /* If -u was specified, output the line only if distinct from previous one.  */
1838      if (next_line == linearray
1839      /* Compare previous line with this one, using only the
1840         explicitly specd keyfields. */
1841	  || compare_general (*(next_line - 1), *next_line, 0L, 0L, num_keyfields - 1))
1842	{
1843	  char *p = *next_line;
1844	  char c;
1845
1846	  while ((c = *p++) && c != '\n')
1847	    /* Do nothing. */ ;
1848	  *(p - 1) = 0;
1849	  indexify (*next_line, ostream);
1850	}
1851    }
1852
1853  finish_index (ostream);
1854}
1855
1856/* Assume (and optionally verify) that each input file is sorted;
1857   merge them and output the result.
1858   Returns nonzero if any input file fails to be sorted.
1859
1860   This is the high-level interface that can handle an unlimited
1861   number of files.  */
1862
1863#define MAX_DIRECT_MERGE 10
1864
1865int
1866merge_files (infiles, nfiles, outfile)
1867     char **infiles;
1868     int nfiles;
1869     char *outfile;
1870{
1871  char **tempfiles;
1872  int ntemps;
1873  int i;
1874  int value = 0;
1875  int start_tempcount = tempcount;
1876
1877  if (nfiles <= MAX_DIRECT_MERGE)
1878    return merge_direct (infiles, nfiles, outfile);
1879
1880  /* Merge groups of MAX_DIRECT_MERGE input files at a time,
1881     making a temporary file to hold each group's result.  */
1882
1883  ntemps = (nfiles + MAX_DIRECT_MERGE - 1) / MAX_DIRECT_MERGE;
1884  tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
1885  for (i = 0; i < ntemps; i++)
1886    {
1887      int nf = MAX_DIRECT_MERGE;
1888      if (i + 1 == ntemps)
1889	nf = nfiles - i * MAX_DIRECT_MERGE;
1890      tempfiles[i] = maketempname (++tempcount);
1891      value |= merge_direct (&infiles[i * MAX_DIRECT_MERGE], nf, tempfiles[i]);
1892    }
1893
1894  /* All temporary files that existed before are no longer needed
1895     since their contents have been merged into our new tempfiles.
1896     So delete them.  */
1897  flush_tempfiles (start_tempcount);
1898
1899  /* Now merge the temporary files we created.  */
1900
1901  merge_files (tempfiles, ntemps, outfile);
1902
1903  free (tempfiles);
1904
1905  return value;
1906}
1907
1908/* Assume (and optionally verify) that each input file is sorted;
1909   merge them and output the result.
1910   Returns nonzero if any input file fails to be sorted.
1911
1912   This version of merging will not work if the number of
1913   input files gets too high.  Higher level functions
1914   use it only with a bounded number of input files.  */
1915
1916int
1917merge_direct (infiles, nfiles, outfile)
1918     char **infiles;
1919     int nfiles;
1920     char *outfile;
1921{
1922  struct linebuffer *lb1, *lb2;
1923  struct linebuffer **thisline, **prevline;
1924  FILE **streams;
1925  int i;
1926  int nleft;
1927  int lossage = 0;
1928  int *file_lossage;
1929  struct linebuffer *prev_out = 0;
1930  FILE *ostream = stdout;
1931
1932  if (outfile)
1933    {
1934      ostream = fopen (outfile, "w");
1935    }
1936  if (!ostream)
1937    pfatal_with_name (outfile);
1938
1939  init_index ();
1940
1941  if (nfiles == 0)
1942    {
1943      if (outfile)
1944	fclose (ostream);
1945      return 0;
1946    }
1947
1948  /* For each file, make two line buffers.
1949     Also, for each file, there is an element of `thisline'
1950     which points at any time to one of the file's two buffers,
1951     and an element of `prevline' which points to the other buffer.
1952     `thisline' is supposed to point to the next available line from the file,
1953     while `prevline' holds the last file line used,
1954     which is remembered so that we can verify that the file is properly sorted. */
1955
1956  /* lb1 and lb2 contain one buffer each per file. */
1957  lb1 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
1958  lb2 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
1959
1960  /* thisline[i] points to the linebuffer holding the next available line in file i,
1961     or is zero if there are no lines left in that file.  */
1962  thisline = (struct linebuffer **)
1963    xmalloc (nfiles * sizeof (struct linebuffer *));
1964  /* prevline[i] points to the linebuffer holding the last used line
1965     from file i.  This is just for verifying that file i is properly
1966     sorted.  */
1967  prevline = (struct linebuffer **)
1968    xmalloc (nfiles * sizeof (struct linebuffer *));
1969  /* streams[i] holds the input stream for file i.  */
1970  streams = (FILE **) xmalloc (nfiles * sizeof (FILE *));
1971  /* file_lossage[i] is nonzero if we already know file i is not
1972     properly sorted.  */
1973  file_lossage = (int *) xmalloc (nfiles * sizeof (int));
1974
1975  /* Allocate and initialize all that storage. */
1976
1977  for (i = 0; i < nfiles; i++)
1978    {
1979      initbuffer (&lb1[i]);
1980      initbuffer (&lb2[i]);
1981      thisline[i] = &lb1[i];
1982      prevline[i] = &lb2[i];
1983      file_lossage[i] = 0;
1984      streams[i] = fopen (infiles[i], "r");
1985      if (!streams[i])
1986	pfatal_with_name (infiles[i]);
1987
1988      readline (thisline[i], streams[i]);
1989    }
1990
1991  /* Keep count of number of files not at eof. */
1992  nleft = nfiles;
1993
1994  while (nleft)
1995    {
1996      struct linebuffer *best = 0;
1997      struct linebuffer *exch;
1998      int bestfile = -1;
1999      int i;
2000
2001      /* Look at the next avail line of each file; choose the least one.  */
2002
2003      for (i = 0; i < nfiles; i++)
2004	{
2005	  if (thisline[i] &&
2006	      (!best ||
2007	       0 < compare_general (best->buffer, thisline[i]->buffer,
2008				 (long) bestfile, (long) i, num_keyfields)))
2009	    {
2010	      best = thisline[i];
2011	      bestfile = i;
2012	    }
2013	}
2014
2015      /* Output that line, unless it matches the previous one and we
2016	 don't want duplicates. */
2017
2018      if (!(prev_out &&
2019	    !compare_general (prev_out->buffer,
2020			      best->buffer, 0L, 1L, num_keyfields - 1)))
2021	indexify (best->buffer, ostream);
2022      prev_out = best;
2023
2024      /* Now make the line the previous of its file, and fetch a new
2025	 line from that file.  */
2026
2027      exch = prevline[bestfile];
2028      prevline[bestfile] = thisline[bestfile];
2029      thisline[bestfile] = exch;
2030
2031      while (1)
2032	{
2033	  /* If the file has no more, mark it empty. */
2034
2035	  if (feof (streams[bestfile]))
2036	    {
2037	      thisline[bestfile] = 0;
2038	      /* Update the number of files still not empty. */
2039	      nleft--;
2040	      break;
2041	    }
2042	  readline (thisline[bestfile], streams[bestfile]);
2043	  if (thisline[bestfile]->buffer[0] || !feof (streams[bestfile]))
2044	    break;
2045	}
2046    }
2047
2048  finish_index (ostream);
2049
2050  /* Free all storage and close all input streams. */
2051
2052  for (i = 0; i < nfiles; i++)
2053    {
2054      fclose (streams[i]);
2055      free (lb1[i].buffer);
2056      free (lb2[i].buffer);
2057    }
2058  free (file_lossage);
2059  free (lb1);
2060  free (lb2);
2061  free (thisline);
2062  free (prevline);
2063  free (streams);
2064
2065  if (outfile)
2066    fclose (ostream);
2067
2068  return lossage;
2069}
2070
2071/* Print error message and exit.  */
2072
2073void
2074fatal (format, arg)
2075     char *format, *arg;
2076{
2077  error (format, arg);
2078  exit (TI_FATAL_ERROR);
2079}
2080
2081/* Print error message.  FORMAT is printf control string, ARG is arg for it. */
2082void
2083error (format, arg)
2084     char *format, *arg;
2085{
2086  printf ("%s: ", program_name);
2087  printf (format, arg);
2088  if (format[strlen (format) -1] != '\n')
2089    printf ("\n");
2090}
2091
2092void
2093perror_with_name (name)
2094     char *name;
2095{
2096  char *s;
2097
2098  s = strerror (errno);
2099  printf ("%s: ", program_name);
2100  printf ("%s; for file `%s'.\n", s, name);
2101}
2102
2103void
2104pfatal_with_name (name)
2105     char *name;
2106{
2107  char *s;
2108
2109  s = strerror (errno);
2110  printf ("%s: ", program_name);
2111  printf ("%s; for file `%s'.\n", s, name);
2112  exit (TI_FATAL_ERROR);
2113}
2114
2115/* Return a newly-allocated string whose contents concatenate those of
2116   S1, S2, S3.  */
2117
2118char *
2119concat (s1, s2, s3)
2120     char *s1, *s2, *s3;
2121{
2122  int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
2123  char *result = (char *) xmalloc (len1 + len2 + len3 + 1);
2124
2125  strcpy (result, s1);
2126  strcpy (result + len1, s2);
2127  strcpy (result + len1 + len2, s3);
2128  *(result + len1 + len2 + len3) = 0;
2129
2130  return result;
2131}
2132
2133#if !defined (HAVE_STRERROR)
2134extern char *sys_errlist[];
2135extern int sys_nerr;
2136
2137char *
2138strerror (num)
2139     int num;
2140{
2141  if (num >= sys_nerr)
2142    return ("");
2143  else
2144    return (sys_errlist[num]);
2145}
2146#endif /* !HAVE_STRERROR */
2147
2148#if !defined (HAVE_STRCHR)
2149char *
2150strrchr (string, character)
2151     char *string;
2152     int character;
2153{
2154  register int i;
2155
2156  for (i = strlen (string) - 1; i > -1; i--)
2157    if (string[i] == character)
2158      return (string + i);
2159
2160  return ((char *)NULL);
2161}
2162#endif /* HAVE_STRCHR */
2163
2164/* Just like malloc, but kills the program in case of fatal error. */
2165void *
2166xmalloc (nbytes)
2167     int nbytes;
2168{
2169  void *temp = (void *) malloc (nbytes);
2170
2171  if (nbytes && temp == (void *)NULL)
2172    memory_error ("xmalloc", nbytes);
2173
2174  return (temp);
2175}
2176
2177/* Like realloc (), but barfs if there isn't enough memory. */
2178void *
2179xrealloc (pointer, nbytes)
2180     void *pointer;
2181     int nbytes;
2182{
2183  void *temp;
2184
2185  if (!pointer)
2186    temp = (void *)xmalloc (nbytes);
2187  else
2188    temp = (void *)realloc (pointer, nbytes);
2189
2190  if (nbytes && !temp)
2191    memory_error ("xrealloc", nbytes);
2192
2193  return (temp);
2194}
2195
2196memory_error (callers_name, bytes_wanted)
2197     char *callers_name;
2198     int bytes_wanted;
2199{
2200  char printable_string[80];
2201
2202  sprintf (printable_string,
2203	   "Virtual memory exhausted in %s ()!  Needed %d bytes.",
2204	   callers_name, bytes_wanted);
2205
2206  error (printable_string);
2207  abort ();
2208}
2209
2210