1 /* Prepare TeX index dribble output into an actual index.
2 
3    Version 1.45
4 
5    Copyright (C) 1987, 1991, 1992 Free Software Foundation, Inc.
6 
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 2, or (at your option)
10    any later version.
11 
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, write to the Free Software
19    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307. */
20 
21 /*
22    95.10.18 modified by Nobuyosi KAMEI <CXK03514@niftyserve.or.jp>
23 	���ܸ�EUC�������Ǥ���褦�ˤ�����
24    95.10.19 modified by K. Handa <handa@etl.go.jp>
25 	\initial{...} �ν��Ϥ�Ĵ����
26 */
27 
28 
29 #include <stdio.h>
30 #include <ctype.h>
31 #include <errno.h>
32 #include    <assert.h>
33 #include "getopt.h"
34 
35 #define TEXINDEX_VERSION_STRING "GNU Texindex 2.0 for Texinfo release 3.4"
36 #define TEXINDEX_MODIFICATION_STRING "(modified for Japanese EUC encoding)"
37 
38 #if defined (emacs)
39 #  include "../src/config.h"
40 /* Some s/os.h files redefine these. */
41 #  undef read
42 #  undef close
43 #  undef write
44 #  undef open
45 #endif
46 
47 #if defined (HAVE_STRING_H)
48 #  include <string.h>
49 #endif /* HAVE_STRING_H */
50 
51 #if !defined (HAVE_STRCHR)
52 char *strrchr ();
53 #endif /* !HAVE_STRCHR */
54 
55 #if defined (STDC_HEADERS)
56 #  include <stdlib.h>
57 #else /* !STDC_HEADERS */
58 char *getenv (), *malloc (), *realloc ();
59 #endif /* !STDC_HEADERS */
60 
61 #if defined (HAVE_UNISTD_H)
62 #  include <unistd.h>
63 #else /* !HAVE_UNISTD_H */
64 off_t lseek ();
65 #endif /* !HAVE_UNISTD_H */
66 
67 #if !defined (HAVE_MEMSET)
68 #undef memset
69 #define memset(ptr, ignore, count) bzero (ptr, count)
70 #endif
71 
72 
73 char *mktemp ();
74 
75 #if defined (VMS)
76 #  include <file.h>
77 #  define TI_NO_ERROR ((1 << 28) | 1)
78 #  define TI_FATAL_ERROR ((1 << 28) | 4)
79 #  define unlink delete
80 #else /* !VMS */
81 #  if defined (HAVE_SYS_FCNTL_H)
82 #    include <sys/types.h>
83 #    include <sys/fcntl.h>
84 #  endif /* HAVE_SYS_FCNTL_H */
85 
86 #  if defined (_AIX) || !defined (_POSIX_VERSION)
87 #    include <sys/file.h>
88 #  else /* !AIX && _POSIX_VERSION */
89 #    if !defined (HAVE_SYS_FCNTL_H)
90 #      include <fcntl.h>
91 #    endif /* !HAVE_FCNTL_H */
92 #  endif /* !_AIX && _POSIX_VERSION */
93 #  define TI_NO_ERROR 0
94 #  define TI_FATAL_ERROR 1
95 #endif /* !VMS */
96 
97 #if !defined (SEEK_SET)
98 #  define SEEK_SET 0
99 #  define SEEK_CUR 1
100 #  define SEEK_END 2
101 #endif /* !SEEK_SET */
102 
103 #if !defined (errno)
104 extern int errno;
105 #endif
106 char *strerror ();
107 
108 /* When sorting in core, this structure describes one line
109    and the position and length of its first keyfield.  */
110 struct lineinfo
111 {
112   u_short *text;		/* The actual text of the line. */
113   union {
114     u_short *text;	/* The start of the key (for textual comparison). */
115     long number;	/* The numeric value (for numeric comparison). */
116   } key;
117   long keylen;		/* Length of KEY field. */
118 };
119 
120 /* This structure describes a field to use as a sort key. */
121 struct keyfield
122 {
123   int startwords;	/* Number of words to skip. */
124   int startchars;	/* Number of additional chars to skip. */
125   int endwords;		/* Number of words to ignore at end. */
126   int endchars;		/* Ditto for characters of last word. */
127   char ignore_blanks;	/* Non-zero means ignore spaces and tabs. */
128   char fold_case;	/* Non-zero means case doesn't matter. */
129   char reverse;		/* Non-zero means compare in reverse order. */
130   char numeric;		/* Non-zeros means field is ASCII numeric. */
131   char positional;	/* Sort according to file position. */
132   char braced;		/* Count balanced-braced groupings as fields. */
133 };
134 
135 /* Vector of keyfields to use. */
136 struct keyfield keyfields[3];
137 
138 /* Number of keyfields stored in that vector.  */
139 int num_keyfields = 3;
140 
141 /* Vector of input file names, terminated with a null pointer. */
142 char **infiles;
143 
144 /* Vector of corresponding output file names, or NULL, meaning default it
145    (add an `s' to the end). */
146 char **outfiles;
147 
148 /* Length of `infiles'. */
149 int num_infiles;
150 
151 /* Pointer to the array of pointers to lines being sorted. */
152 u_short **linearray;
153 
154 /* The allocated length of `linearray'. */
155 long nlines;
156 
157 /* Directory to use for temporary files.  On Unix, it ends with a slash.  */
158 char *tempdir;
159 
160 /* Start of filename to use for temporary files.  */
161 char *tempbase;
162 
163 /* Number of last temporary file.  */
164 int tempcount;
165 
166 /* Number of last temporary file already deleted.
167    Temporary files are deleted by `flush_tempfiles' in order of creation.  */
168 int last_deleted_tempcount;
169 
170 /* During in-core sort, this points to the base of the data block
171    which contains all the lines of data.  */
172 u_short *text_base;
173 
174 /* Additional command switches .*/
175 
176 /* Nonzero means do not delete tempfiles -- for debugging. */
177 int keep_tempfiles;
178 
179 /* The name this program was run with. */
180 char *program_name;
181 
182 /* ��̾ʸ�����첻�ơ��֥� */
183 int Vowel_Table[0x60];
184 
185 /* Forward declarations of functions in this file. */
186 
187 void decode_command ();
188 void sort_in_core ();
189 void sort_offline ();
190 u_short **parsefile ();
191 u_short *find_field ();
192 u_short *find_pos ();
193 long find_value ();
194 u_short *find_braced_pos (u_short *str, int words, int chars, int ignore_blanks);
195 u_short *find_braced_end ();
196 void writelines (u_short **linearray, int nlines, FILE *ostream);
197 int compare_field ();
198 int compare_full ();
199 long readline ();
200 int merge_files ();
201 int merge_direct ();
202 void pfatal_with_name ();
203 void fatal ();
204 void error ();
205 void *xmalloc (), *xrealloc ();
206 char *concat ();
207 char *maketempname ();
208 void flush_tempfiles ();
209 char *tempcopy ();
210 static	void	memory_error();
211 static int wstrncmp(const u_short *s1, const u_short *s2, size_t n);
212 static	int wfwrite(u_short *ptr, size_t  size,  size_t  nmemb,  FILE*stream);
213 static int wtoc(char *cptr, u_short const *ptr, int len);
214 static int  Vowel_of(int c);
215 static void init_Vowel_Table(void);
216 
217 #define	EUC_BYTE(c) (0x00A1 <= c && c <= 0x00FE)
218 #define G1_CHAR(c) (c & 0xFF00)
219 
220 #define MAX_IN_CORE_SORT 500000
221 
222 void
main(argc,argv)223 main (argc, argv)
224      int argc;
225      char **argv;
226 {
227   int i;
228 
229   tempcount = 0;
230   last_deleted_tempcount = 0;
231 
232   program_name = strrchr (argv[0], '/');
233   if (program_name != (char *)NULL)
234     program_name++;
235   else
236     program_name = argv[0];
237 
238   /* Describe the kind of sorting to do. */
239   /* The first keyfield uses the first braced field and folds case. */
240   keyfields[0].braced = 1;
241   keyfields[0].fold_case = 1;
242   keyfields[0].endwords = -1;
243   keyfields[0].endchars = -1;
244 
245   /* The second keyfield uses the second braced field, numerically. */
246   keyfields[1].braced = 1;
247   keyfields[1].numeric = 1;
248   keyfields[1].startwords = 1;
249   keyfields[1].endwords = -1;
250   keyfields[1].endchars = -1;
251 
252   /* The third keyfield (which is ignored while discarding duplicates)
253      compares the whole line. */
254   keyfields[2].endwords = -1;
255   keyfields[2].endchars = -1;
256 
257   decode_command (argc, argv);
258 
259   tempbase = mktemp (concat ("txiXXXXXX", "", ""));
260 
261   /* Process input files completely, one by one.  */
262 
263   for (i = 0; i < num_infiles; i++)
264     {
265       int desc;
266       long ptr;
267       char *outfile;
268 
269       desc = open (infiles[i], O_RDONLY, 0);
270       if (desc < 0)
271 	pfatal_with_name (infiles[i]);
272       lseek (desc, (off_t) 0, SEEK_END);
273       ptr = (long) lseek (desc, (off_t) 0, SEEK_CUR);
274 
275       close (desc);
276 
277       outfile = outfiles[i];
278       if (!outfile)
279 	{
280 	  outfile = concat (infiles[i], "s", "");
281 	}
282 
283       if (ptr < MAX_IN_CORE_SORT)
284 	/* Sort a small amount of data. */
285 	sort_in_core (infiles[i], ptr, outfile);
286       else
287 	sort_offline (infiles[i], ptr, outfile);
288     }
289 
290   flush_tempfiles (tempcount);
291   exit (TI_NO_ERROR);
292 }
293 
294 typedef struct
295 {
296   char *long_name;
297   char *short_name;
298   int *variable_ref;
299   int variable_value;
300   char *arg_name;
301   char *doc_string;
302 } TEXINDEX_OPTION;
303 
304 TEXINDEX_OPTION texindex_options[] = {
305   { "--keep", "-k", &keep_tempfiles, 1, (char *)NULL,
306       "Keep temporary files around after processing" },
307   { "--no-keep", 0, &keep_tempfiles, 0, (char *)NULL,
308       "Do not keep temporary files around after processing (default)" },
309   { "--output", "-o", (int *)NULL, 0, "FILE",
310       "Send output to FILE" },
311   { "--version", (char *)NULL, (int *)NULL, 0, (char *)NULL,
312       "Show version information" },
313   { "--help", "-h", (int *)NULL, 0, (char *)NULL, "Produce this listing" },
314   { (char *)NULL, (char *)NULL, (int *)NULL, 0, (char *)NULL }
315 };
316 
317 void
usage(result_value)318 usage (result_value)
319      int result_value;
320 {
321   register int i;
322 
323   fprintf (stderr, "Usage: %s [OPTIONS] FILE...\n", program_name);
324   fprintf (stderr, "  Generate a permuted index for the TeX files given.\n");
325   fprintf (stderr, "  Usually FILE... is `foo.??' for the source file `foo.tex'.\n");
326   fprintf (stderr, "  The OPTIONS are:\n");
327 
328   for (i = 0; texindex_options[i].long_name; i++)
329     {
330       fprintf (stderr, "    %s %s",
331 	       texindex_options[i].long_name,
332 	       texindex_options[i].arg_name ?
333 	       texindex_options[i].arg_name : "");
334 
335       if (texindex_options[i].short_name)
336 	fprintf (stderr, " \n    or %s %s",
337 		 texindex_options[i].short_name,
338 		 texindex_options[i].arg_name ?
339 		 texindex_options[i].arg_name : "");
340       fprintf (stderr, "\t%s\n", texindex_options[i].doc_string);
341     }
342   exit (result_value);
343 }
344 
345 /* Decode the command line arguments to set the parameter variables
346    and set up the vector of keyfields and the vector of input files. */
347 
348 void
decode_command(argc,argv)349 decode_command (argc, argv)
350      int argc;
351      char **argv;
352 {
353   int arg_index = 1;
354   char **ip;
355   char **op;
356 
357   /* Store default values into parameter variables. */
358 
359   tempdir = getenv ("TMPDIR");
360 #ifdef VMS
361   if (tempdir == NULL)
362     tempdir = "sys$scratch:";
363 #else
364   if (tempdir == NULL)
365     tempdir = "/tmp/";
366   else
367     tempdir = concat (tempdir, "/", "");
368 #endif
369 
370   keep_tempfiles = 0;
371 
372   /* Allocate ARGC input files, which must be enough.  */
373 
374   infiles = (char **) xmalloc (argc * sizeof (char *));
375   outfiles = (char **) xmalloc (argc * sizeof (char *));
376   ip = infiles;
377   op = outfiles;
378 
379   while (arg_index < argc)
380     {
381       char *arg = argv[arg_index++];
382 
383       if (*arg == '-')
384 	{
385 	  if (strcmp (arg, "--version") == 0)
386 	    {
387 	      fprintf (stderr, "%s", TEXINDEX_VERSION_STRING);
388 #ifdef TEXINDEX_MODIFICATION_STRING
389 	      fprintf (stderr, " %s", TEXINDEX_MODIFICATION_STRING);
390 #endif
391 	      fprintf (stderr, "\n");
392 	      exit (0);
393 	    }
394 	  else if ((strcmp (arg, "--keep") == 0) ||
395 		   (strcmp (arg, "-k") == 0))
396 	    {
397 	      keep_tempfiles = 1;
398 	    }
399 	  else if ((strcmp (arg, "--help") == 0) ||
400 		   (strcmp (arg, "-h") == 0))
401 	    {
402 	      usage (0);
403 	    }
404 	  else if ((strcmp (arg, "--output") == 0) ||
405 		   (strcmp (arg, "-o") == 0))
406 	    {
407 	      if (argv[arg_index] != (char *)NULL)
408 		{
409 		  arg_index++;
410 		  if (op > outfiles)
411 		    *(op - 1) = argv[arg_index];
412 		}
413 	      else
414 		usage (1);
415 	    }
416 	  else
417 	    usage (1);
418 	}
419       else
420 	{
421 	  *ip++ = arg;
422 	  *op++ = (char *)NULL;
423 	}
424     }
425 
426   /* Record number of keyfields and terminate list of filenames. */
427   num_infiles = ip - infiles;
428   *ip = (char *)NULL;
429   if (num_infiles == 0)
430     usage (1);
431 }
432 
433 /* Return a name for a temporary file. */
434 
435 char *
maketempname(count)436 maketempname (count)
437      int count;
438 {
439   char tempsuffix[10];
440   sprintf (tempsuffix, "%d", count);
441   return concat (tempdir, tempbase, tempsuffix);
442 }
443 
444 /* Delete all temporary files up to TO_COUNT. */
445 
446 void
flush_tempfiles(to_count)447 flush_tempfiles (to_count)
448      int to_count;
449 {
450   if (keep_tempfiles)
451     return;
452   while (last_deleted_tempcount < to_count)
453     unlink (maketempname (++last_deleted_tempcount));
454 }
455 
456 /* Copy the input file open on IDESC into a temporary file
457    and return the temporary file name. */
458 
459 #define BUFSIZE 1024
460 
461 char *
tempcopy(idesc)462 tempcopy (idesc)
463      int idesc;
464 {
465   char *outfile = maketempname (++tempcount);
466   int odesc;
467   char buffer[BUFSIZE];
468 
469   odesc = open (outfile, O_WRONLY | O_CREAT, 0666);
470 
471   if (odesc < 0)
472     pfatal_with_name (outfile);
473 
474   while (1)
475     {
476       int nread = read (idesc, buffer, BUFSIZE);
477       write (odesc, buffer, nread);
478       if (!nread)
479 	break;
480     }
481 
482   close (odesc);
483 
484   return outfile;
485 }
486 
487 /* Compare LINE1 and LINE2 according to the specified set of keyfields. */
488 
489 int
compare_full(line1,line2)490 compare_full (line1, line2)
491      u_short **line1, **line2;
492 {
493   int i;
494 
495   /* Compare using the first keyfield;
496      if that does not distinguish the lines, try the second keyfield;
497      and so on. */
498 
499   for (i = 0; i < num_keyfields; i++)
500     {
501       long length1, length2;
502       u_short *start1 = find_field (&keyfields[i], *line1, &length1);
503       u_short *start2 = find_field (&keyfields[i], *line2, &length2);
504       int tem = compare_field (&keyfields[i], start1, length1, *line1 - text_base,
505 			       start2, length2, *line2 - text_base);
506       if (tem)
507 	{
508 	  if (keyfields[i].reverse)
509 	    return -tem;
510 	  return tem;
511 	}
512     }
513 
514   return 0;			/* Lines match exactly. */
515 }
516 
517 /* Compare LINE1 and LINE2, described by structures
518    in which the first keyfield is identified in advance.
519    For positional sorting, assumes that the order of the lines in core
520    reflects their nominal order.  */
521 
522 int
compare_prepared(line1,line2)523 compare_prepared (line1, line2)
524      struct lineinfo *line1, *line2;
525 {
526   int i;
527   int tem;
528   u_short *text1, *text2;
529 
530   /* Compare using the first keyfield, which has been found for us already. */
531   if (keyfields->positional)
532     {
533       if (line1->text - text_base > line2->text - text_base)
534 	tem = 1;
535       else
536 	tem = -1;
537     }
538   else if (keyfields->numeric)
539     tem = line1->key.number - line2->key.number;
540   else
541     tem = compare_field (keyfields, line1->key.text, line1->keylen, 0,
542 			 line2->key.text, line2->keylen, 0);
543   if (tem)
544     {
545       if (keyfields->reverse)
546 	return -tem;
547       return tem;
548     }
549 
550   text1 = line1->text;
551   text2 = line2->text;
552 
553   /* Compare using the second keyfield;
554      if that does not distinguish the lines, try the third keyfield;
555      and so on. */
556 
557   for (i = 1; i < num_keyfields; i++)
558     {
559       long length1, length2;
560       u_short *start1 = find_field (&keyfields[i], text1, &length1);
561       u_short *start2 = find_field (&keyfields[i], text2, &length2);
562       int tem = compare_field (&keyfields[i], start1, length1, text1 - text_base,
563 			       start2, length2, text2 - text_base);
564       if (tem)
565 	{
566 	  if (keyfields[i].reverse)
567 	    return -tem;
568 	  return tem;
569 	}
570     }
571 
572   return 0;			/* Lines match exactly. */
573 }
574 
575 /* Like compare_full but more general.
576    You can pass any strings, and you can say how many keyfields to use.
577    POS1 and POS2 should indicate the nominal positional ordering of
578    the two lines in the input.  */
579 
580 int
compare_general(str1,str2,pos1,pos2,use_keyfields)581 compare_general (str1, str2, pos1, pos2, use_keyfields)
582      u_short *str1, *str2;
583      long pos1, pos2;
584      int use_keyfields;
585 {
586   int i;
587 
588   /* Compare using the first keyfield;
589      if that does not distinguish the lines, try the second keyfield;
590      and so on. */
591 
592   for (i = 0; i < use_keyfields; i++)
593     {
594       long length1, length2;
595       u_short *start1 = find_field (&keyfields[i], str1, &length1);
596       u_short *start2 = find_field (&keyfields[i], str2, &length2);
597       int tem = compare_field (&keyfields[i], start1, length1, pos1,
598 			       start2, length2, pos2);
599       if (tem)
600 	{
601 	  if (keyfields[i].reverse)
602 	    return -tem;
603 	  return tem;
604 	}
605     }
606 
607   return 0;			/* Lines match exactly. */
608 }
609 
610 /* Find the start and length of a field in STR according to KEYFIELD.
611    A pointer to the starting character is returned, and the length
612    is stored into the int that LENGTHPTR points to.  */
613 
614 u_short *
find_field(keyfield,str,lengthptr)615 find_field (keyfield, str, lengthptr)
616      struct keyfield *keyfield;
617      u_short *str;
618      long *lengthptr;
619 {
620   u_short *start;
621   u_short *end;
622   u_short *(*fun) ();
623 
624   if (keyfield->braced)
625     fun = find_braced_pos;
626   else
627     fun = find_pos;
628 
629   start = (*fun) (str, keyfield->startwords, keyfield->startchars,
630 		  keyfield->ignore_blanks);
631   if (keyfield->endwords < 0)
632     {
633       if (keyfield->braced)
634 	end = find_braced_end (start);
635       else
636 	{
637 	  end = start;
638 	  while (*end && *end != '\n')
639 	    end++;
640 	}
641     }
642   else
643     {
644       end = (*fun) (str, keyfield->endwords, keyfield->endchars, 0);
645       if (end - str < start - str)
646 	end = start;
647     }
648   *lengthptr = end - start;
649   return start;
650 }
651 
652 /* Return a pointer to a specified place within STR,
653    skipping (from the beginning) WORDS words and then CHARS chars.
654    If IGNORE_BLANKS is nonzero, we skip all blanks
655    after finding the specified word.  */
656 
657 u_short *
find_pos(str,words,chars,ignore_blanks)658 find_pos (str, words, chars, ignore_blanks)
659      u_short *str;
660      int words, chars;
661      int ignore_blanks;
662 {
663   int i;
664   u_short *p = str;
665 
666   for (i = 0; i < words; i++)
667     {
668       u_short c;
669       /* Find next bunch of nonblanks and skip them. */
670       while ((c = *p) == ' ' || c == '\t')
671 	p++;
672       while ((c = *p) && c != '\n' && !(c == ' ' || c == '\t'))
673 	p++;
674       if (!*p || *p == '\n')
675 	return p;
676     }
677 
678   while (*p == ' ' || *p == '\t')
679     p++;
680 
681   for (i = 0; i < chars; i++)
682     {
683       if (!*p || *p == '\n')
684 	break;
685       p++;
686     }
687   return p;
688 }
689 
690 /* Like find_pos but assumes that each field is surrounded by braces
691    and that braces within fields are balanced. */
692 
693 u_short *
find_braced_pos(str,words,chars,ignore_blanks)694 find_braced_pos (str, words, chars, ignore_blanks)
695      u_short *str;
696      int words, chars;
697      int ignore_blanks;
698 {
699   int i;
700   int bracelevel;
701   u_short *p = str;
702   u_short c;
703 
704   for (i = 0; i < words; i++)
705     {
706       bracelevel = 1;
707       while ((c = *p++) != '{' && c != '\n' && c)
708 	/* Do nothing. */ ;
709       if (c != '{')
710 	return p - 1;
711       while (bracelevel)
712 	{
713 	  c = *p++;
714 	  if (c == '{')
715 	    bracelevel++;
716 	  if (c == '}')
717 	    bracelevel--;
718 	  if (c == 0 || c == '\n')
719 	    return p - 1;
720 	}
721     }
722 
723   while ((c = *p++) != '{' && c != '\n' && c)
724     /* Do nothing. */ ;
725 
726   if (c != '{')
727     return p - 1;
728 
729   if (ignore_blanks)
730     while ((c = *p) == ' ' || c == '\t')
731       p++;
732 
733   for (i = 0; i < chars; i++)
734     {
735       if (!*p || *p == '\n')
736 	break;
737       p++;
738     }
739   return p;
740 }
741 
742 /* Find the end of the balanced-brace field which starts at STR.
743    The position returned is just before the closing brace. */
744 
745 u_short *
find_braced_end(str)746 find_braced_end (str)
747      u_short *str;
748 {
749   int bracelevel;
750   u_short *p = str;
751   u_short c;
752 
753   bracelevel = 1;
754   while (bracelevel)
755     {
756       c = *p++;
757       if (c == '{')
758 	bracelevel++;
759       if (c == '}')
760 	bracelevel--;
761       if (c == 0 || c == '\n')
762 	return p - 1;
763     }
764   return p - 1;
765 }
766 
767 long
find_value(start,length)768 find_value (start, length)
769     u_short *start;
770     long length;
771     {
772     long	val;
773 
774     while (length != 0L)
775 	{
776 	if (isdigit (*start))
777 	    break;
778 	length--;
779 	start++;
780 	}
781 
782     val = 0;
783     while(isdigit(*start) && length > 0L)
784 	{
785 	val = val*10 + *start;
786 	start++;
787 	length--;
788 	}
789 
790     return val;
791     }
792 
793 /* Vector used to translate characters for comparison.
794    This is how we make all alphanumerics follow all else,
795    and ignore case in the first sorting.  */
796 int char_order[0x8000];
797 
798 void
init_char_order(void)799 init_char_order (void)
800     {
801     int i;
802 
803     for (i = 1; i < 0x8000; i++)
804 	char_order[i] = i;
805 
806     for (i = '0'; i <= '9'; i++)
807 	char_order[i] += 512;
808 
809     for (i = 'a'; i <= 'z'; i++)
810 	{
811 	char_order[i] = 512 + i;
812 	char_order[i + 'A' - 'a'] = 512 + i;
813 	}
814 
815     /* �ʲ��Υ���ܥ��̵�� (i.e. char_order[XXX] = 0) */
816     for (i = 0x2121; i <= 0x2132; i++) /* ���Ѷ��� �� �� */
817         char_order[i] = 0;
818     for (i = 0x213D; i <= 0x215B; i++) /* �� �� �� */
819         char_order[i] = 0;
820 
821     /*	¥����ٹ����������Ⱦ�������Ҳ�̾/ʿ��̾�ν���	*/
822     /*	    1.  ʿ��̾���Ҳ�̾��Ʊ���˰���  */
823     /*		��/��(2421/2521)����/��(2473/2573)  */
824     for (i = 0x2421; i < 0x2474; i++)
825 	char_order[i + 0x100] = i;
826 
827     /*	    2.	¥����ٹ����������Ⱦ��������	*/
828     /* ������ ��	������	*/
829     for (i = 0x2421; i < 0x242B; i += 2)
830 	char_order[i] = char_order[i + 0x100]    = i+1;
831     /* ������ ��	������	*/
832     for (i = 0x242C; i < 0x2443; i += 2)
833 	char_order[i] = char_order[i + 0x100]    = i-1;
834     /* �� */
835     char_order[0x2443]	= char_order[0x2543]	= 0x2443+1;
836     /* �š��� ��	�ġ��� */
837     for (i = 0x2445; i < 0x244A; i += 2)
838 	char_order[i] = char_order[i + 0x100]    = i-1;
839     /* �Фѡ��ܤ�	��  �ϡ���  */
840     for (i = 0x2450; i < 0x245E; i += 3)
841 	{
842 	char_order[i]	= char_order[i + 0x100]	    = i-1;
843 	char_order[i+1]	= char_order[i+1 + 0x100]   = i-1;
844 	}
845     /* ����� ��	�����	*/
846     for (i = 0x2463; i < 0x2469; i += 2)
847 	char_order[i] = char_order[i + 0x100] = i+1;
848     /* ��	��  ��	*/
849     char_order[0x246E]	= char_order[0x256E]	= 0x246E +1;
850 
851     /*	    3.	�ü��Ҳ�̾�ΰ���    */
852     char_order[0x2574]	= 0x2426;	/* ��	��  �� */
853     char_order[0x2575]	= 0x242B;	/* ��	��  �� */
854     char_order[0x2576]	= 0x2431;	/* ��	��  �� */
855 
856     /*	JISX0208�Υ���ե��٥åȡ�����  */
857     for (i = 0x2330; i <= 0x2339; i++) /* ������ -> 0-9 */
858         char_order[i] = i - 0x2330 + 512;
859     for (i = 0x2341; i <= 0x235A; i++) /* �����ڡ������ -> A-Z */
860         char_order[i] = char_order[i + (0x2361 - 0x2341)]
861 	  = 512 + (i -  0x2341) + 'a';
862 
863     /* Ĺ����������ӥ롼����ǹԤʤ� */
864 
865     init_Vowel_Table();
866     }
867 
868 /* Compare two fields (each specified as a start pointer and a character count)
869    according to KEYFIELD.
870    The sign of the value reports the relation between the fields. */
871 
872 int
compare_field(keyfield,start1,length1,pos1,start2,length2,pos2)873 compare_field (keyfield, start1, length1, pos1, start2, length2, pos2)
874      struct keyfield *keyfield;
875      u_short *start1;
876      long length1;
877      long pos1;
878      u_short *start2;
879      long length2;
880      long pos2;
881 {
882   if (keyfields->positional)
883     {
884       if (pos1 > pos2)
885 	return 1;
886       else
887 	return -1;
888     }
889   if (keyfield->numeric)
890     {
891       long value = find_value (start1, length1) - find_value (start2, length2);
892       if (value > 0)
893 	return 1;
894       if (value < 0)
895 	return -1;
896       return 0;
897     }
898   else
899     {
900     /* lexical �äƤ�? */
901       u_short *p1 = start1;
902       u_short *p2 = start2;
903       u_short *e1 = start1 + length1;
904       u_short *e2 = start2 + length2;
905       u_short	lastchar1 = 0, lastchar2 = 0;
906       int   c1, c2;
907 
908       do
909 	  {
910 	  int	co1, co2;
911 
912 	  /* char_order[X] �� 0 �Τ�Ρʥ���ܥ�ˤ�̵�뤹�� */
913 	  c1 = c2 = 0;
914 	  while (p1 < e1)
915 	    {
916 	      if (char_order[c1 = *p1++]) break;
917 	      c1 = 0;
918 	    }
919 	  while (p2 < e2)
920 	    {
921 	      if (char_order[c2 = *p2++]) break;
922 	      c2 = 0;
923 	    }
924 
925 	  /* Ĺ������ */
926 	  if (c1 == 0x213C)			/* �� */
927 	      c1    = Vowel_of(lastchar1);
928 	  if (c2 == 0x213C)			/* �� */
929 	      c2    = Vowel_of(lastchar2);
930 	  co1	= char_order[c1];
931 	  co2   = char_order[c2];
932 
933 	  if (co1 != co2)
934 	      return co1 - co2;
935 	  else
936 	      lastchar1 = c1,	lastchar2 = c2;
937 
938 	  } while(c1);
939 
940       /* Strings are equal except possibly for case.  */
941       p1 = start1;
942       p2 = start2;
943       while (1)
944 	{
945 	  int c1, c2;
946 
947 	  if (p1 == e1)
948 	    c1 = 0;
949 	  else
950 	    c1 = *p1++;
951 	  if (p2 == e2)
952 	    c2 = 0;
953 	  else
954 	    c2 = *p2++;
955 
956 	  if (c1 != c2)
957 	    /* Reverse sign here so upper case comes out last.  */
958 	    return c2 - c1;
959 	  if (!c1)
960 	    break;
961 	}
962 
963       return 0;
964     }
965 }
966 
967 /* A `struct linebuffer' is a structure which holds a line of text.
968    `readline' reads a line from a stream into a linebuffer
969    and works regardless of the length of the line.  */
970 
971 struct linebuffer
972 {
973   long size;
974   u_short *buffer;
975 };
976 
977 /* Initialize LINEBUFFER for use. */
978 
979 void
initbuffer(linebuffer)980 initbuffer (linebuffer)
981      struct linebuffer *linebuffer;
982 {
983   linebuffer->size = 200;
984   linebuffer->buffer = (u_short *) xmalloc (200*sizeof(u_short));
985 }
986 
987 /* Read a line of text from STREAM into LINEBUFFER.
988    Return the length of the line.  */
989 
990 long
readline(linebuffer,stream)991 readline (linebuffer, stream)
992      struct linebuffer *linebuffer;
993      FILE *stream;
994 {
995   u_short *buffer = linebuffer->buffer;
996   u_short *p = linebuffer->buffer;
997   u_short *end = p + linebuffer->size;
998 
999   while (1)
1000     {
1001       int c = getc (stream);
1002       if (p == end)
1003 	{
1004 	  buffer =
1005 	      (u_short *) xrealloc (buffer,
1006 				    (linebuffer->size *= 2)*sizeof(u_short));
1007 	  p += buffer - linebuffer->buffer;
1008 	  end += buffer - linebuffer->buffer;
1009 	  linebuffer->buffer = buffer;
1010 	}
1011       if (c < 0 || c == '\n')
1012 	{
1013 	  *p = 0;
1014 	  break;
1015 	}
1016       *p++ = c;
1017     }
1018 
1019   return p - buffer;
1020 }
1021 
1022 /* Sort an input file too big to sort in core.  */
1023 
1024 void
sort_offline(infile,nfiles,total,outfile)1025 sort_offline (infile, nfiles, total, outfile)
1026      char *infile;
1027      int nfiles;
1028      long total;
1029      char *outfile;
1030 {
1031   /* More than enough. */
1032   int ntemps = 2 * (total + MAX_IN_CORE_SORT - 1) / MAX_IN_CORE_SORT;
1033   char **tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
1034   FILE *istream = fopen (infile, "r");
1035   int i;
1036   struct linebuffer lb;
1037   long linelength;
1038   int failure = 0;
1039 
1040   initbuffer (&lb);
1041 
1042   /* Read in one line of input data.  */
1043 
1044   linelength = readline (&lb, istream);
1045 
1046   if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
1047     {
1048       error ("%s: not a texinfo index file", infile);
1049       return;
1050     }
1051 
1052   /* Split up the input into `ntemps' temporary files, or maybe fewer,
1053      and put the new files' names into `tempfiles' */
1054 
1055   for (i = 0; i < ntemps; i++)
1056     {
1057       char *outname = maketempname (++tempcount);
1058       FILE *ostream = fopen (outname, "w");
1059       long tempsize = 0;
1060 
1061       if (!ostream)
1062 	pfatal_with_name (outname);
1063       tempfiles[i] = outname;
1064 
1065       /* Copy lines into this temp file as long as it does not make file
1066 	 "too big" or until there are no more lines.  */
1067 
1068       while (tempsize + linelength + 1 <= MAX_IN_CORE_SORT)
1069 	{
1070 	  tempsize += linelength + 1;
1071 	      {
1072 	      u_short	*tp;
1073 
1074 	      for (tp = lb.buffer; *tp; tp++)
1075 		  putc(*tp, ostream);
1076 	      }
1077 	  putc ('\n', ostream);
1078 
1079 	  /* Read another line of input data.  */
1080 
1081 	  linelength = readline (&lb, istream);
1082 	  if (!linelength && feof (istream))
1083 	    break;
1084 
1085 	  if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
1086 	    {
1087 	      error ("%s: not a texinfo index file", infile);
1088 	      failure = 1;
1089 	      goto fail;
1090 	    }
1091 	}
1092       fclose (ostream);
1093       if (feof (istream))
1094 	break;
1095     }
1096 
1097   free (lb.buffer);
1098 
1099 fail:
1100   /* Record number of temp files we actually needed.  */
1101 
1102   ntemps = i;
1103 
1104   /* Sort each tempfile into another tempfile.
1105     Delete the first set of tempfiles and put the names of the second
1106     into `tempfiles'. */
1107 
1108   for (i = 0; i < ntemps; i++)
1109     {
1110       char *newtemp = maketempname (++tempcount);
1111       sort_in_core (&tempfiles[i], MAX_IN_CORE_SORT, newtemp);
1112       if (!keep_tempfiles)
1113 	unlink (tempfiles[i]);
1114       tempfiles[i] = newtemp;
1115     }
1116 
1117   if (failure)
1118     return;
1119 
1120   /* Merge the tempfiles together and indexify. */
1121 
1122   merge_files (tempfiles, ntemps, outfile);
1123 }
1124 
1125 /* Sort INFILE, whose size is TOTAL,
1126    assuming that is small enough to be done in-core,
1127    then indexify it and send the output to OUTFILE (or to stdout).  */
1128 
1129 void
sort_in_core(infile,total,outfile)1130 sort_in_core (infile, total, outfile)
1131      char *infile;
1132      long total;
1133      char *outfile;
1134 {
1135   u_short **nextline;
1136   u_short *data = (u_short *) xmalloc ((total + 1)*sizeof(u_short));
1137   u_short *file_data;
1138   long file_size;
1139   int i;
1140   FILE *ostream = stdout;
1141   struct lineinfo *lineinfo;
1142   FILE *ifp;
1143   int	c;
1144 
1145   /* Read the contents of the file into the moby array `data'. */
1146   ifp	= fopen(infile, "r");
1147   if (!ifp)
1148     fatal ("failure reopening %s", infile);
1149   memset(data, 0, (total + 1)*sizeof(u_short));	/* zero clear */
1150   for (i = 0, c = fgetc(ifp); c != EOF; c = fgetc(ifp), i++)
1151       {
1152       if (EUC_BYTE(c))
1153 	  {
1154 	  data[i] = (c & 0x007F) << 8;
1155 	  c = fgetc(ifp) & 0x007F;
1156 	  }
1157       data[i]	+= c;
1158       }
1159   file_size = i;
1160   file_data = data;
1161   data[file_size] = 0;
1162 
1163   fclose(ifp);
1164 
1165   if (file_size > 0 && data[0] != '\\' && data[0] != '@')
1166     {
1167       error ("%s: not a texinfo index file", infile);
1168       return;
1169     }
1170 
1171   init_char_order ();
1172 
1173   /* Sort routines want to know this address. */
1174 
1175   text_base = data;
1176 
1177   /* Create the array of pointers to lines, with a default size
1178      frequently enough.  */
1179 
1180   nlines = total / 50;
1181   if (!nlines)
1182     nlines = 2;
1183   linearray = (u_short **) xmalloc (nlines * sizeof (u_short *));
1184 
1185   /* `nextline' points to the next free slot in this array.
1186      `nlines' is the allocated size.  */
1187 
1188   nextline = linearray;
1189 
1190   /* Parse the input file's data, and make entries for the lines.  */
1191 
1192   nextline = parsefile (nextline, file_data, file_size);
1193 
1194   if (nextline == 0)
1195     {
1196       error ("%s: not a texinfo index file", infile);
1197       return;
1198     }
1199 
1200   /* Sort the lines. */
1201 
1202   /* If we have enough space, find the first keyfield of each line in advance.
1203      Make a `struct lineinfo' for each line, which records the keyfield
1204      as well as the line, and sort them.  */
1205 
1206   lineinfo = (struct lineinfo *) malloc ((nextline - linearray) * sizeof (struct lineinfo));
1207 
1208   if (lineinfo)
1209     {
1210       struct lineinfo *lp;
1211       u_short **p;
1212 
1213       for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
1214 	{
1215 	  lp->text = *p;
1216 	  lp->key.text = find_field (keyfields, *p, &lp->keylen);
1217 	  if (keyfields->numeric)
1218 	    lp->key.number = find_value (lp->key.text, lp->keylen);
1219 	}
1220 
1221       qsort (lineinfo, nextline - linearray, sizeof (struct lineinfo), compare_prepared);
1222 
1223       for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
1224 	  *p = lp->text;
1225 
1226       free (lineinfo);
1227     }
1228   else
1229     qsort (linearray, nextline - linearray, sizeof (u_short *), compare_full);
1230 
1231   /* Open the output file. */
1232 
1233   if (outfile)
1234     {
1235       ostream = fopen (outfile, "w");
1236       if (!ostream)
1237 	pfatal_with_name (outfile);
1238     }
1239 
1240   writelines (linearray, nextline - linearray, ostream);
1241   if (outfile)
1242     fclose (ostream);
1243 
1244   free (linearray);
1245   free (data);
1246 }
1247 
1248 /* Parse an input string in core into lines.
1249    DATA is the input string, and SIZE is its length.
1250    Data goes in LINEARRAY starting at NEXTLINE.
1251    The value returned is the first entry in LINEARRAY still unused.
1252    Value 0 means input file contents are invalid.  */
1253 
1254 u_short **
parsefile(nextline,data,size)1255 parsefile (nextline, data, size)
1256      u_short **nextline;
1257      u_short *data;
1258      long size;
1259 {
1260   u_short *p, *end;
1261   u_short **line = nextline;
1262 
1263   p = data;
1264   end = p + size;
1265   *end = 0;
1266 
1267   while (p != end)
1268     {
1269       if (p[0] != '\\' && p[0] != '@')
1270 	return 0;
1271 
1272       *line = p;
1273       while (*p && *p != '\n')
1274 	p++;
1275       if (p != end)
1276 	p++;
1277 
1278       line++;
1279       if (line == linearray + nlines)
1280 	{
1281 	  u_short **old = linearray;
1282 	  linearray =
1283 	      (u_short **) xrealloc (linearray,
1284 				     sizeof (u_short *) * (nlines *= 4));
1285 	  line += linearray - old;
1286 	}
1287     }
1288 
1289   return line;
1290 }
1291 
1292 /* Indexification is a filter applied to the sorted lines
1293    as they are being written to the output file.
1294    Multiple entries for the same name, with different page numbers,
1295    get combined into a single entry with multiple page numbers.
1296    The first braced field, which is used for sorting, is discarded.
1297    However, its first character is examined, folded to lower case,
1298    and if it is different from that in the previous line fed to us
1299    a \initial line is written with one argument, the new initial.
1300 
1301    If an entry has four braced fields, then the second and third
1302    constitute primary and secondary names.
1303    In this case, each change of primary name
1304    generates a \primary line which contains only the primary name,
1305    and in between these are \secondary lines which contain
1306    just a secondary name and page numbers. */
1307 
1308 /* The last primary name we wrote a \primary entry for.
1309    If only one level of indexing is being done, this is the last name seen. */
1310 u_short *lastprimary;
1311 /* Length of storage allocated for lastprimary. */
1312 int lastprimarylength;
1313 
1314 /* Similar, for the secondary name. */
1315 u_short *lastsecondary;
1316 int lastsecondarylength;
1317 
1318 /* Zero if we are not in the middle of writing an entry.
1319    One if we have written the beginning of an entry but have not
1320    yet written any page numbers into it.
1321    Greater than one if we have written the beginning of an entry
1322    plus at least one page number. */
1323 int pending;
1324 
1325 /* The initial (for sorting purposes) of the last primary entry written.
1326    When this changes, a \initial {c} line is written */
1327 
1328 u_short *lastinitial;
1329 
1330 int lastinitiallength;
1331 
1332 /* When we need a string of length 1 for the value of lastinitial,
1333    store it here.  */
1334 
1335 u_short lastinitial1[2];
1336 
1337 /* Initialize static storage for writing an index. */
1338 
1339 void
init_index()1340 init_index ()
1341 {
1342   pending = 0;
1343   lastinitial = lastinitial1;
1344   lastinitial1[0] = 0;
1345   lastinitial1[1] = 0;
1346   lastinitiallength = 0;
1347   lastprimarylength = 100;
1348   lastprimary = (u_short *) xmalloc ((lastprimarylength + 1)*sizeof(u_short));
1349   memset (lastprimary, '\0', (lastprimarylength + 1)*sizeof(u_short));
1350   lastsecondarylength = 100;
1351   lastsecondary =
1352       (u_short *) xmalloc ((lastsecondarylength + 1)*sizeof(u_short));
1353   memset (lastsecondary, '\0', (lastsecondarylength + 1)*sizeof(u_short));
1354 }
1355 
1356 /* Indexify.  Merge entries for the same name,
1357    insert headers for each initial character, etc.  */
1358 
1359 void
indexify(u_short * line,FILE * ostream)1360 indexify (u_short *line, FILE *ostream)
1361 {
1362   u_short *primary, *secondary, *pagenumber;
1363   int primarylength, secondarylength = 0, pagelength;
1364   int nosecondary;
1365   int initiallength;
1366   u_short *initial;
1367   u_short initial1[2];
1368   register u_short *p;
1369 
1370   /* First, analyze the parts of the entry fed to us this time. */
1371 
1372   p = find_braced_pos (line, 0, 0, 0);
1373   if (*p == '{')
1374     {
1375       initial = p;
1376       /* Get length of inner pair of braces starting at `p',
1377 	 including that inner pair of braces.  */
1378       initiallength = find_braced_end (p + 1) + 1 - p;
1379     }
1380   else
1381     {
1382       initial = initial1;
1383       initial1[0] = *p & 0x7F7F;
1384       initial1[1] = 0;
1385       initiallength = 1;
1386 
1387       if (initial1[0] >= 'a' && initial1[0] <= 'z')
1388 	initial1[0] -= ' ';
1389       else if (initial1[0] > 0x2000)
1390 	initial1[0] = char_order[initial1[0]];
1391     }
1392 
1393   pagenumber = find_braced_pos (line, 1, 0, 0);
1394   pagelength = find_braced_end (pagenumber) - pagenumber;
1395   if (pagelength == 0)
1396     abort ();
1397 
1398   primary = find_braced_pos (line, 2, 0, 0);
1399   primarylength = find_braced_end (primary) - primary;
1400 
1401   secondary = find_braced_pos (line, 3, 0, 0);
1402   nosecondary = !*secondary;
1403   if (!nosecondary)
1404     secondarylength = find_braced_end (secondary) - secondary;
1405 
1406   /* If the primary is different from before, make a new primary entry. */
1407   if (wstrncmp (primary, lastprimary, primarylength))
1408     {
1409       /* Close off current secondary entry first, if one is open. */
1410       if (pending)
1411 	{
1412 	  fputs ("}\n", ostream);
1413 	  pending = 0;
1414 	}
1415 
1416       /* If this primary has a different initial, include an entry for
1417 	 the initial. */
1418       if (initiallength != lastinitiallength ||
1419 	  (*initial && wstrncmp (initial, lastinitial, initiallength)))
1420 	{
1421 	  fprintf (ostream, "\\initial {");
1422 	  wfwrite (initial, initiallength, 1, ostream);
1423 	  fprintf (ostream, "}\n", initial);
1424 	  if (initial == initial1)
1425 	    {
1426 	      lastinitial = lastinitial1;
1427 	      *lastinitial1 = *initial1;
1428 	    }
1429 	  else
1430 	    {
1431 	      lastinitial = initial;
1432 	    }
1433 	  lastinitiallength = initiallength;
1434 	}
1435 
1436       /* Make the entry for the primary.  */
1437       if (nosecondary)
1438 	fputs ("\\entry {", ostream);
1439       else
1440 	fputs ("\\primary {", ostream);
1441       wfwrite (primary, primarylength, 1, ostream);
1442       if (nosecondary)
1443 	{
1444 	  fputs ("}{", ostream);
1445 	  pending = 1;
1446 	}
1447       else
1448 	fputs ("}\n", ostream);
1449 
1450       /* Record name of most recent primary. */
1451       if (lastprimarylength < primarylength)
1452 	{
1453 	  lastprimarylength = primarylength + 100;
1454 	  lastprimary =
1455 	      (u_short *) xrealloc (lastprimary,
1456 				    (1 + lastprimarylength)*sizeof(u_short));
1457 	}
1458       memcpy (lastprimary, primary, primarylength*sizeof(u_short));
1459       lastprimary[primarylength] = 0;
1460 
1461       /* There is no current secondary within this primary, now. */
1462       lastsecondary[0] = 0;
1463     }
1464 
1465   /* Should not have an entry with no subtopic following one with a subtopic. */
1466 
1467   if (nosecondary && *lastsecondary)
1468       {
1469       char  tline[2001];
1470       int   len;
1471 
1472       len   = wtoc(tline, line, 2000);		/* 2000 is much enough ? */
1473       tline[len]   = '\0';
1474       error ("entry %s follows an entry with a secondary name", tline);
1475       }
1476 
1477   /* Start a new secondary entry if necessary. */
1478   if (!nosecondary && wstrncmp (secondary, lastsecondary, secondarylength))
1479     {
1480       if (pending)
1481 	{
1482 	  fputs ("}\n", ostream);
1483 	  pending = 0;
1484 	}
1485 
1486       /* Write the entry for the secondary.  */
1487       fputs ("\\secondary {", ostream);
1488       wfwrite (secondary, secondarylength, 1, ostream);
1489       fputs ("}{", ostream);
1490       pending = 1;
1491 
1492       /* Record name of most recent secondary. */
1493       if (lastsecondarylength < secondarylength)
1494 	{
1495 	  lastsecondarylength = secondarylength + 100;
1496 	  lastsecondary =
1497 	      (u_short *) xrealloc ((lastsecondary,
1498 				     1 + lastsecondarylength)*sizeof(u_short));
1499 	}
1500       memcpy (lastsecondary, secondary, secondarylength * sizeof(u_short));
1501       lastsecondary[secondarylength] = 0;
1502     }
1503 
1504   /* Here to add one more page number to the current entry. */
1505   if (pending++ != 1)
1506     fputs (", ", ostream);	/* Punctuate first, if this is not the first. */
1507   wfwrite (pagenumber, pagelength, 1, ostream);
1508 }
1509 
1510 /* Close out any unfinished output entry. */
1511 
1512 void
finish_index(ostream)1513 finish_index (ostream)
1514      FILE *ostream;
1515 {
1516   if (pending)
1517     fputs ("}\n", ostream);
1518   free (lastprimary);
1519   free (lastsecondary);
1520 }
1521 
1522 /* Copy the lines in the sorted order.
1523    Each line is copied out of the input file it was found in. */
1524 
1525 void
writelines(linearray,nlines,ostream)1526 writelines (linearray, nlines, ostream)
1527      u_short **linearray;
1528      int nlines;
1529      FILE *ostream;
1530 {
1531   u_short **stop_line = linearray + nlines;
1532   u_short **next_line;
1533 
1534   init_index ();
1535 
1536   /* Output the text of the lines, and free the buffer space. */
1537 
1538   for (next_line = linearray; next_line != stop_line; next_line++)
1539     {
1540       /* If -u was specified, output the line only if distinct from previous one.  */
1541       if (next_line == linearray
1542       /* Compare previous line with this one, using only the
1543          explicitly specd keyfields. */
1544 	  || compare_general (*(next_line - 1), *next_line, 0L, 0L, num_keyfields - 1))
1545 	{
1546 	  u_short *p = *next_line;
1547 	  u_short c;
1548 
1549 	  while ((c = *p++) && c != '\n')
1550 	    /* Do nothing. */ ;
1551 	  *(p - 1) = 0;
1552 	  indexify (*next_line, ostream);
1553 	}
1554     }
1555 
1556   finish_index (ostream);
1557 }
1558 
1559 /* Assume (and optionally verify) that each input file is sorted;
1560    merge them and output the result.
1561    Returns nonzero if any input file fails to be sorted.
1562 
1563    This is the high-level interface that can handle an unlimited
1564    number of files.  */
1565 
1566 #define MAX_DIRECT_MERGE 10
1567 
1568 int
merge_files(infiles,nfiles,outfile)1569 merge_files (infiles, nfiles, outfile)
1570      char **infiles;
1571      int nfiles;
1572      char *outfile;
1573 {
1574   char **tempfiles;
1575   int ntemps;
1576   int i;
1577   int value = 0;
1578   int start_tempcount = tempcount;
1579 
1580   if (nfiles <= MAX_DIRECT_MERGE)
1581     return merge_direct (infiles, nfiles, outfile);
1582 
1583   /* Merge groups of MAX_DIRECT_MERGE input files at a time,
1584      making a temporary file to hold each group's result.  */
1585 
1586   ntemps = (nfiles + MAX_DIRECT_MERGE - 1) / MAX_DIRECT_MERGE;
1587   tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
1588   for (i = 0; i < ntemps; i++)
1589     {
1590       int nf = MAX_DIRECT_MERGE;
1591       if (i + 1 == ntemps)
1592 	nf = nfiles - i * MAX_DIRECT_MERGE;
1593       tempfiles[i] = maketempname (++tempcount);
1594       value |= merge_direct (&infiles[i * MAX_DIRECT_MERGE], nf, tempfiles[i]);
1595     }
1596 
1597   /* All temporary files that existed before are no longer needed
1598      since their contents have been merged into our new tempfiles.
1599      So delete them.  */
1600   flush_tempfiles (start_tempcount);
1601 
1602   /* Now merge the temporary files we created.  */
1603 
1604   merge_files (tempfiles, ntemps, outfile);
1605 
1606   free (tempfiles);
1607 
1608   return value;
1609 }
1610 
1611 /* Assume (and optionally verify) that each input file is sorted;
1612    merge them and output the result.
1613    Returns nonzero if any input file fails to be sorted.
1614 
1615    This version of merging will not work if the number of
1616    input files gets too high.  Higher level functions
1617    use it only with a bounded number of input files.  */
1618 
1619 int
merge_direct(infiles,nfiles,outfile)1620 merge_direct (infiles, nfiles, outfile)
1621      char **infiles;
1622      int nfiles;
1623      char *outfile;
1624 {
1625   struct linebuffer *lb1, *lb2;
1626   struct linebuffer **thisline, **prevline;
1627   FILE **streams;
1628   int i;
1629   int nleft;
1630   int lossage = 0;
1631   int *file_lossage;
1632   struct linebuffer *prev_out = 0;
1633   FILE *ostream = stdout;
1634 
1635   if (outfile)
1636     {
1637       ostream = fopen (outfile, "w");
1638     }
1639   if (!ostream)
1640     pfatal_with_name (outfile);
1641 
1642   init_index ();
1643 
1644   if (nfiles == 0)
1645     {
1646       if (outfile)
1647 	fclose (ostream);
1648       return 0;
1649     }
1650 
1651   /* For each file, make two line buffers.
1652      Also, for each file, there is an element of `thisline'
1653      which points at any time to one of the file's two buffers,
1654      and an element of `prevline' which points to the other buffer.
1655      `thisline' is supposed to point to the next available line from the file,
1656      while `prevline' holds the last file line used,
1657      which is remembered so that we can verify that the file is properly sorted. */
1658 
1659   /* lb1 and lb2 contain one buffer each per file. */
1660   lb1 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
1661   lb2 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
1662 
1663   /* thisline[i] points to the linebuffer holding the next available line in file i,
1664      or is zero if there are no lines left in that file.  */
1665   thisline = (struct linebuffer **)
1666     xmalloc (nfiles * sizeof (struct linebuffer *));
1667   /* prevline[i] points to the linebuffer holding the last used line
1668      from file i.  This is just for verifying that file i is properly
1669      sorted.  */
1670   prevline = (struct linebuffer **)
1671     xmalloc (nfiles * sizeof (struct linebuffer *));
1672   /* streams[i] holds the input stream for file i.  */
1673   streams = (FILE **) xmalloc (nfiles * sizeof (FILE *));
1674   /* file_lossage[i] is nonzero if we already know file i is not
1675      properly sorted.  */
1676   file_lossage = (int *) xmalloc (nfiles * sizeof (int));
1677 
1678   /* Allocate and initialize all that storage. */
1679 
1680   for (i = 0; i < nfiles; i++)
1681     {
1682       initbuffer (&lb1[i]);
1683       initbuffer (&lb2[i]);
1684       thisline[i] = &lb1[i];
1685       prevline[i] = &lb2[i];
1686       file_lossage[i] = 0;
1687       streams[i] = fopen (infiles[i], "r");
1688       if (!streams[i])
1689 	pfatal_with_name (infiles[i]);
1690 
1691       readline (thisline[i], streams[i]);
1692     }
1693 
1694   /* Keep count of number of files not at eof. */
1695   nleft = nfiles;
1696 
1697   while (nleft)
1698     {
1699       struct linebuffer *best = 0;
1700       struct linebuffer *exch;
1701       int bestfile = -1;
1702       int i;
1703 
1704       /* Look at the next avail line of each file; choose the least one.  */
1705 
1706       for (i = 0; i < nfiles; i++)
1707 	{
1708 	  if (thisline[i] &&
1709 	      (!best ||
1710 	       0 < compare_general (best->buffer, thisline[i]->buffer,
1711 				 (long) bestfile, (long) i, num_keyfields)))
1712 	    {
1713 	      best = thisline[i];
1714 	      bestfile = i;
1715 	    }
1716 	}
1717 
1718       /* Output that line, unless it matches the previous one and we
1719 	 don't want duplicates. */
1720 
1721       if (!(prev_out &&
1722 	    !compare_general (prev_out->buffer,
1723 			      best->buffer, 0L, 1L, num_keyfields - 1)))
1724 	indexify (best->buffer, ostream);
1725       prev_out = best;
1726 
1727       /* Now make the line the previous of its file, and fetch a new
1728 	 line from that file.  */
1729 
1730       exch = prevline[bestfile];
1731       prevline[bestfile] = thisline[bestfile];
1732       thisline[bestfile] = exch;
1733 
1734       while (1)
1735 	{
1736 	  /* If the file has no more, mark it empty. */
1737 
1738 	  if (feof (streams[bestfile]))
1739 	    {
1740 	      thisline[bestfile] = 0;
1741 	      /* Update the number of files still not empty. */
1742 	      nleft--;
1743 	      break;
1744 	    }
1745 	  readline (thisline[bestfile], streams[bestfile]);
1746 	  if (thisline[bestfile]->buffer[0] || !feof (streams[bestfile]))
1747 	    break;
1748 	}
1749     }
1750 
1751   finish_index (ostream);
1752 
1753   /* Free all storage and close all input streams. */
1754 
1755   for (i = 0; i < nfiles; i++)
1756     {
1757       fclose (streams[i]);
1758       free (lb1[i].buffer);
1759       free (lb2[i].buffer);
1760     }
1761   free (file_lossage);
1762   free (lb1);
1763   free (lb2);
1764   free (thisline);
1765   free (prevline);
1766   free (streams);
1767 
1768   if (outfile)
1769     fclose (ostream);
1770 
1771   return lossage;
1772 }
1773 
1774 /* Print error message and exit.  */
1775 
1776 void
fatal(format,arg)1777 fatal (format, arg)
1778      char *format, *arg;
1779 {
1780   error (format, arg);
1781   exit (TI_FATAL_ERROR);
1782 }
1783 
1784 /* Print error message.  FORMAT is printf control string, ARG is arg for it. */
1785 void
error(format,arg)1786 error (format, arg)
1787      char *format, *arg;
1788 {
1789   printf ("%s: ", program_name);
1790   printf (format, arg);
1791   if (format[strlen (format) -1] != '\n')
1792     printf ("\n");
1793 }
1794 
1795 void
perror_with_name(name)1796 perror_with_name (name)
1797      char *name;
1798 {
1799   char *s;
1800 
1801   s = strerror (errno);
1802   printf ("%s: ", program_name);
1803   printf ("%s; for file `%s'.\n", s, name);
1804 }
1805 
1806 void
pfatal_with_name(name)1807 pfatal_with_name (name)
1808      char *name;
1809 {
1810   char *s;
1811 
1812   s = strerror (errno);
1813   printf ("%s: ", program_name);
1814   printf ("%s; for file `%s'.\n", s, name);
1815   exit (TI_FATAL_ERROR);
1816 }
1817 
1818 /* Return a newly-allocated string whose contents concatenate those of
1819    S1, S2, S3.  */
1820 
1821 char *
concat(s1,s2,s3)1822 concat (s1, s2, s3)
1823      char *s1, *s2, *s3;
1824 {
1825   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
1826   char *result = (char *) xmalloc (len1 + len2 + len3 + 1);
1827 
1828   strcpy (result, s1);
1829   strcpy (result + len1, s2);
1830   strcpy (result + len1 + len2, s3);
1831   *(result + len1 + len2 + len3) = 0;
1832 
1833   return result;
1834 }
1835 
1836 #if !defined (HAVE_STRERROR)
1837 extern char *sys_errlist[];
1838 extern int sys_nerr;
1839 
1840 char *
strerror(num)1841 strerror (num)
1842      int num;
1843 {
1844   if (num >= sys_nerr)
1845     return ("");
1846   else
1847     return (sys_errlist[num]);
1848 }
1849 #endif /* !HAVE_STRERROR */
1850 
1851 #if !defined (HAVE_STRCHR)
1852 char *
strrchr(string,character)1853 strrchr (string, character)
1854      char *string;
1855      int character;
1856 {
1857   register int i;
1858 
1859   for (i = strlen (string) - 1; i > -1; i--)
1860     if (string[i] == character)
1861       return (string + i);
1862 
1863   return ((char *)NULL);
1864 }
1865 #endif /* HAVE_STRCHR */
1866 
1867 /* Just like malloc, but kills the program in case of fatal error. */
1868 void *
xmalloc(nbytes)1869 xmalloc (nbytes)
1870      int nbytes;
1871 {
1872   void *temp = (void *) malloc (nbytes);
1873 
1874   if (nbytes && temp == (void *)NULL)
1875     memory_error ("xmalloc", nbytes);
1876 
1877   return (temp);
1878 }
1879 
1880 /* Like realloc (), but barfs if there isn't enough memory. */
1881 void *
xrealloc(pointer,nbytes)1882 xrealloc (pointer, nbytes)
1883      void *pointer;
1884      int nbytes;
1885 {
1886   void *temp;
1887 
1888   if (!pointer)
1889     temp = (void *)xmalloc (nbytes);
1890   else
1891     temp = (void *)realloc (pointer, nbytes);
1892 
1893   if (nbytes && !temp)
1894     memory_error ("xrealloc", nbytes);
1895 
1896   return (temp);
1897 }
1898 
1899 void
memory_error(callers_name,bytes_wanted)1900 memory_error (callers_name, bytes_wanted)
1901      char *callers_name;
1902      int bytes_wanted;
1903 {
1904   char printable_string[80];
1905 
1906   sprintf (printable_string,
1907 	   "Virtual memory exhausted in %s ()!  Needed %d bytes.",
1908 	   callers_name, bytes_wanted);
1909 
1910   error (printable_string);
1911   abort();
1912 }
1913 
1914 
1915 static int
wstrncmp(const u_short * s1,const u_short * s2,size_t n)1916 wstrncmp(const u_short *s1, const u_short *s2, size_t n)
1917     {
1918     int	    i;
1919     int	    d = 0;
1920 
1921     for (i=0; i<n && d == 0; i++)
1922 	d   = s1[i] - s2[i];
1923 
1924     return  d;
1925     }
1926 
1927 
1928 static int
wfwrite(u_short * ptr,size_t size,size_t nmemb,FILE * stream)1929 wfwrite(u_short *ptr, size_t  size,  size_t  nmemb,  FILE*stream)
1930     {
1931     char    *cptr;
1932     int	    ret, len;
1933 
1934     cptr    = (char *)xmalloc(size * nmemb * sizeof(u_short)); /* Ч����! */
1935     len	= wtoc(cptr, ptr, size*nmemb);
1936     assert(len <= (size * nmemb * sizeof(u_short)));
1937 
1938     ret	= fwrite(cptr, len, nmemb, stream);
1939     free(cptr);
1940     return  ret;
1941     }
1942 
1943 
1944 static int
wtoc(char * cptr,u_short const * ptr,int len)1945 wtoc(char *cptr, u_short const *ptr, int len)
1946     {
1947     char	*tptr;
1948     u_short	    c;
1949     int		    i;
1950 
1951     tptr    = cptr;
1952     for (i = 0; i < len; i++)
1953 	{
1954 	c   = ptr[i];
1955 	if (G1_CHAR(c))
1956 	    {
1957 	    *tptr++ = c >> 8 | 0x0080;	/* to EUC */
1958 	    c	&= 0x007F;
1959 	    c	|= 0x0080;
1960 	    }
1961 	*tptr++	= c;
1962 	}
1963 
1964     return  tptr - cptr;
1965     }
1966 
1967 
1968 static int
Vowel_of(int c)1969 Vowel_of(int c)
1970     {
1971     if (0x2521 <= c && c < 0x2577)		/* �Ҳ�̾�� */
1972 	c   -= 0x100;				/* ʿ��̾�� */
1973     /* ��������������3������������ʿ��̾�Ϥʤ����������ɤ�����Ȳ��ꤹ�� */
1974     /* ���̤�����ˤʤ�ʤ�Ȧ��	*/
1975 
1976     if (0x2421 <= c && c < 0x2474)
1977 	return	Vowel_Table[c - 0x2420];
1978     else
1979 	return	c;				/* ʿ��̾�ʳ��Ϥ��Τޤ� */
1980     }
1981 
1982 
1983 static void
init_Vowel_Table(void)1984 init_Vowel_Table(void)
1985     {
1986     int	    i;
1987 
1988     /* ������ */
1989     for (i = 0x2421 - 0x2420; i < 0x2445 - 0x2420; i += 10)
1990 	{
1991 	Vowel_Table[i]	    = Vowel_Table[i+1]    = 0x2422; /* ���� */
1992 	Vowel_Table[i+2]    = Vowel_Table[i+3]    = 0x2424; /* ���� */
1993 	Vowel_Table[i+4]    = Vowel_Table[i+5]    = 0x2426; /* ���� */
1994 	Vowel_Table[i+6]    = Vowel_Table[i+7]    = 0x2428; /* ���� */
1995 	Vowel_Table[i+8]    = Vowel_Table[i+9]    = 0x242A; /* ���� */
1996 	}
1997     Vowel_Table[0x243F - 0x2420]    = 0x2422;	/* �� */
1998     Vowel_Table[0x2440 - 0x2420]    = 0x2422;	/* �� */
1999     Vowel_Table[0x2441 - 0x2420]    = 0x2424;	/* �� */
2000     Vowel_Table[0x2442 - 0x2420]    = 0x2424;	/* �� */
2001     Vowel_Table[0x2443 - 0x2420]    = 0x2426;	/* �� */
2002     Vowel_Table[0x2444 - 0x2420]    = 0x2426;	/* �� */
2003     Vowel_Table[0x2445 - 0x2420]    = 0x2426;	/* �� */
2004     Vowel_Table[0x2446 - 0x2420]    = 0x2428;	/* �� */
2005     Vowel_Table[0x2447 - 0x2420]    = 0x2428;	/* �� */
2006     Vowel_Table[0x2448 - 0x2420]    = 0x242A;	/* �� */
2007     Vowel_Table[0x2449 - 0x2420]    = 0x242A;	/* �� */
2008     Vowel_Table[0x244A - 0x2420]    = 0x2422;	/* �� */
2009     Vowel_Table[0x244B - 0x2420]    = 0x2424;	/* �� */
2010     Vowel_Table[0x244C - 0x2420]    = 0x2426;	/* �� */
2011     Vowel_Table[0x244D - 0x2420]    = 0x2428;	/* �� */
2012     Vowel_Table[0x244E - 0x2420]    = 0x242A;	/* �� */
2013     /* �ϡ��� */
2014     for (i = 0x244F - 0x2420; i < 0x245E - 0x2420; i += 15)
2015 	{
2016 	Vowel_Table[i]	    = Vowel_Table[i+1]	= Vowel_Table[i+2]  = 0x2422;
2017 	Vowel_Table[i+3]    = Vowel_Table[i+4]  = Vowel_Table[i+5]  = 0x2424;
2018 	Vowel_Table[i+6]    = Vowel_Table[i+7]  = Vowel_Table[i+8]  = 0x2426;
2019 	Vowel_Table[i+9]    = Vowel_Table[i+11] = Vowel_Table[i+12] = 0x2428;
2020 	Vowel_Table[i+12]   = Vowel_Table[i+13] = Vowel_Table[i+14] = 0x242A;
2021 	}
2022     Vowel_Table[0x245E - 0x2420]    = 0x2422;	/* �� */
2023     Vowel_Table[0x245F - 0x2420]    = 0x2424;	/* �� */
2024     Vowel_Table[0x2460 - 0x2420]    = 0x2426;	/* �� */
2025     Vowel_Table[0x2461 - 0x2420]    = 0x2428;	/* �� */
2026     Vowel_Table[0x2462 - 0x2420]    = 0x242A;	/* �� */
2027     Vowel_Table[0x2463 - 0x2420]    = 0x2422;	/* �� */
2028     Vowel_Table[0x2464 - 0x2420]    = 0x2422;	/* �� */
2029     Vowel_Table[0x2465 - 0x2420]    = 0x2426;	/* �� */
2030     Vowel_Table[0x2466 - 0x2420]    = 0x2426;	/* �� */
2031     Vowel_Table[0x2467 - 0x2420]    = 0x242A;	/* �� */
2032     Vowel_Table[0x2468 - 0x2420]    = 0x242A;	/* �� */
2033     Vowel_Table[0x2469 - 0x2420]    = 0x2422;	/* �� */
2034     Vowel_Table[0x246A - 0x2420]    = 0x2424;	/* �� */
2035     Vowel_Table[0x246B - 0x2420]    = 0x2426;	/* �� */
2036     Vowel_Table[0x246C - 0x2420]    = 0x2428;	/* �� */
2037     Vowel_Table[0x246D - 0x2420]    = 0x242A;	/* �� */
2038     Vowel_Table[0x246E - 0x2420]    = 0x2422;	/* �� */
2039     Vowel_Table[0x246F - 0x2420]    = 0x2422;	/* �� */
2040     /* ����� �ϡ����Τޤޡ��첻�ؤ��Ѵ��Ϥ��ʤ��� */
2041     for (i = 0x2470 - 0x2420; i < 0x2477 - 0x2420; i++)
2042 	Vowel_Table[i]	= i;
2043     }
2044