1 /* Prepare TeX index dribble output into an actual index.
2
3 Version 1.45
4
5 Copyright (C) 1987, 1991, 1992 Free Software Foundation, Inc.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307. */
20
21 /*
22 95.10.18 modified by Nobuyosi KAMEI <CXK03514@niftyserve.or.jp>
23 ���ܸ�EUC�������Ǥ���褦�ˤ�����
24 95.10.19 modified by K. Handa <handa@etl.go.jp>
25 \initial{...} �ν��Ϥ�Ĵ����
26 */
27
28
29 #include <stdio.h>
30 #include <ctype.h>
31 #include <errno.h>
32 #include <assert.h>
33 #include "getopt.h"
34
35 #define TEXINDEX_VERSION_STRING "GNU Texindex 2.0 for Texinfo release 3.4"
36 #define TEXINDEX_MODIFICATION_STRING "(modified for Japanese EUC encoding)"
37
38 #if defined (emacs)
39 # include "../src/config.h"
40 /* Some s/os.h files redefine these. */
41 # undef read
42 # undef close
43 # undef write
44 # undef open
45 #endif
46
47 #if defined (HAVE_STRING_H)
48 # include <string.h>
49 #endif /* HAVE_STRING_H */
50
51 #if !defined (HAVE_STRCHR)
52 char *strrchr ();
53 #endif /* !HAVE_STRCHR */
54
55 #if defined (STDC_HEADERS)
56 # include <stdlib.h>
57 #else /* !STDC_HEADERS */
58 char *getenv (), *malloc (), *realloc ();
59 #endif /* !STDC_HEADERS */
60
61 #if defined (HAVE_UNISTD_H)
62 # include <unistd.h>
63 #else /* !HAVE_UNISTD_H */
64 off_t lseek ();
65 #endif /* !HAVE_UNISTD_H */
66
67 #if !defined (HAVE_MEMSET)
68 #undef memset
69 #define memset(ptr, ignore, count) bzero (ptr, count)
70 #endif
71
72
73 char *mktemp ();
74
75 #if defined (VMS)
76 # include <file.h>
77 # define TI_NO_ERROR ((1 << 28) | 1)
78 # define TI_FATAL_ERROR ((1 << 28) | 4)
79 # define unlink delete
80 #else /* !VMS */
81 # if defined (HAVE_SYS_FCNTL_H)
82 # include <sys/types.h>
83 # include <sys/fcntl.h>
84 # endif /* HAVE_SYS_FCNTL_H */
85
86 # if defined (_AIX) || !defined (_POSIX_VERSION)
87 # include <sys/file.h>
88 # else /* !AIX && _POSIX_VERSION */
89 # if !defined (HAVE_SYS_FCNTL_H)
90 # include <fcntl.h>
91 # endif /* !HAVE_FCNTL_H */
92 # endif /* !_AIX && _POSIX_VERSION */
93 # define TI_NO_ERROR 0
94 # define TI_FATAL_ERROR 1
95 #endif /* !VMS */
96
97 #if !defined (SEEK_SET)
98 # define SEEK_SET 0
99 # define SEEK_CUR 1
100 # define SEEK_END 2
101 #endif /* !SEEK_SET */
102
103 #if !defined (errno)
104 extern int errno;
105 #endif
106 char *strerror ();
107
108 /* When sorting in core, this structure describes one line
109 and the position and length of its first keyfield. */
110 struct lineinfo
111 {
112 u_short *text; /* The actual text of the line. */
113 union {
114 u_short *text; /* The start of the key (for textual comparison). */
115 long number; /* The numeric value (for numeric comparison). */
116 } key;
117 long keylen; /* Length of KEY field. */
118 };
119
120 /* This structure describes a field to use as a sort key. */
121 struct keyfield
122 {
123 int startwords; /* Number of words to skip. */
124 int startchars; /* Number of additional chars to skip. */
125 int endwords; /* Number of words to ignore at end. */
126 int endchars; /* Ditto for characters of last word. */
127 char ignore_blanks; /* Non-zero means ignore spaces and tabs. */
128 char fold_case; /* Non-zero means case doesn't matter. */
129 char reverse; /* Non-zero means compare in reverse order. */
130 char numeric; /* Non-zeros means field is ASCII numeric. */
131 char positional; /* Sort according to file position. */
132 char braced; /* Count balanced-braced groupings as fields. */
133 };
134
135 /* Vector of keyfields to use. */
136 struct keyfield keyfields[3];
137
138 /* Number of keyfields stored in that vector. */
139 int num_keyfields = 3;
140
141 /* Vector of input file names, terminated with a null pointer. */
142 char **infiles;
143
144 /* Vector of corresponding output file names, or NULL, meaning default it
145 (add an `s' to the end). */
146 char **outfiles;
147
148 /* Length of `infiles'. */
149 int num_infiles;
150
151 /* Pointer to the array of pointers to lines being sorted. */
152 u_short **linearray;
153
154 /* The allocated length of `linearray'. */
155 long nlines;
156
157 /* Directory to use for temporary files. On Unix, it ends with a slash. */
158 char *tempdir;
159
160 /* Start of filename to use for temporary files. */
161 char *tempbase;
162
163 /* Number of last temporary file. */
164 int tempcount;
165
166 /* Number of last temporary file already deleted.
167 Temporary files are deleted by `flush_tempfiles' in order of creation. */
168 int last_deleted_tempcount;
169
170 /* During in-core sort, this points to the base of the data block
171 which contains all the lines of data. */
172 u_short *text_base;
173
174 /* Additional command switches .*/
175
176 /* Nonzero means do not delete tempfiles -- for debugging. */
177 int keep_tempfiles;
178
179 /* The name this program was run with. */
180 char *program_name;
181
182 /* ��̾ʸ�����첻�ơ��֥� */
183 int Vowel_Table[0x60];
184
185 /* Forward declarations of functions in this file. */
186
187 void decode_command ();
188 void sort_in_core ();
189 void sort_offline ();
190 u_short **parsefile ();
191 u_short *find_field ();
192 u_short *find_pos ();
193 long find_value ();
194 u_short *find_braced_pos (u_short *str, int words, int chars, int ignore_blanks);
195 u_short *find_braced_end ();
196 void writelines (u_short **linearray, int nlines, FILE *ostream);
197 int compare_field ();
198 int compare_full ();
199 long readline ();
200 int merge_files ();
201 int merge_direct ();
202 void pfatal_with_name ();
203 void fatal ();
204 void error ();
205 void *xmalloc (), *xrealloc ();
206 char *concat ();
207 char *maketempname ();
208 void flush_tempfiles ();
209 char *tempcopy ();
210 static void memory_error();
211 static int wstrncmp(const u_short *s1, const u_short *s2, size_t n);
212 static int wfwrite(u_short *ptr, size_t size, size_t nmemb, FILE*stream);
213 static int wtoc(char *cptr, u_short const *ptr, int len);
214 static int Vowel_of(int c);
215 static void init_Vowel_Table(void);
216
217 #define EUC_BYTE(c) (0x00A1 <= c && c <= 0x00FE)
218 #define G1_CHAR(c) (c & 0xFF00)
219
220 #define MAX_IN_CORE_SORT 500000
221
222 void
main(argc,argv)223 main (argc, argv)
224 int argc;
225 char **argv;
226 {
227 int i;
228
229 tempcount = 0;
230 last_deleted_tempcount = 0;
231
232 program_name = strrchr (argv[0], '/');
233 if (program_name != (char *)NULL)
234 program_name++;
235 else
236 program_name = argv[0];
237
238 /* Describe the kind of sorting to do. */
239 /* The first keyfield uses the first braced field and folds case. */
240 keyfields[0].braced = 1;
241 keyfields[0].fold_case = 1;
242 keyfields[0].endwords = -1;
243 keyfields[0].endchars = -1;
244
245 /* The second keyfield uses the second braced field, numerically. */
246 keyfields[1].braced = 1;
247 keyfields[1].numeric = 1;
248 keyfields[1].startwords = 1;
249 keyfields[1].endwords = -1;
250 keyfields[1].endchars = -1;
251
252 /* The third keyfield (which is ignored while discarding duplicates)
253 compares the whole line. */
254 keyfields[2].endwords = -1;
255 keyfields[2].endchars = -1;
256
257 decode_command (argc, argv);
258
259 tempbase = mktemp (concat ("txiXXXXXX", "", ""));
260
261 /* Process input files completely, one by one. */
262
263 for (i = 0; i < num_infiles; i++)
264 {
265 int desc;
266 long ptr;
267 char *outfile;
268
269 desc = open (infiles[i], O_RDONLY, 0);
270 if (desc < 0)
271 pfatal_with_name (infiles[i]);
272 lseek (desc, (off_t) 0, SEEK_END);
273 ptr = (long) lseek (desc, (off_t) 0, SEEK_CUR);
274
275 close (desc);
276
277 outfile = outfiles[i];
278 if (!outfile)
279 {
280 outfile = concat (infiles[i], "s", "");
281 }
282
283 if (ptr < MAX_IN_CORE_SORT)
284 /* Sort a small amount of data. */
285 sort_in_core (infiles[i], ptr, outfile);
286 else
287 sort_offline (infiles[i], ptr, outfile);
288 }
289
290 flush_tempfiles (tempcount);
291 exit (TI_NO_ERROR);
292 }
293
294 typedef struct
295 {
296 char *long_name;
297 char *short_name;
298 int *variable_ref;
299 int variable_value;
300 char *arg_name;
301 char *doc_string;
302 } TEXINDEX_OPTION;
303
304 TEXINDEX_OPTION texindex_options[] = {
305 { "--keep", "-k", &keep_tempfiles, 1, (char *)NULL,
306 "Keep temporary files around after processing" },
307 { "--no-keep", 0, &keep_tempfiles, 0, (char *)NULL,
308 "Do not keep temporary files around after processing (default)" },
309 { "--output", "-o", (int *)NULL, 0, "FILE",
310 "Send output to FILE" },
311 { "--version", (char *)NULL, (int *)NULL, 0, (char *)NULL,
312 "Show version information" },
313 { "--help", "-h", (int *)NULL, 0, (char *)NULL, "Produce this listing" },
314 { (char *)NULL, (char *)NULL, (int *)NULL, 0, (char *)NULL }
315 };
316
317 void
usage(result_value)318 usage (result_value)
319 int result_value;
320 {
321 register int i;
322
323 fprintf (stderr, "Usage: %s [OPTIONS] FILE...\n", program_name);
324 fprintf (stderr, " Generate a permuted index for the TeX files given.\n");
325 fprintf (stderr, " Usually FILE... is `foo.??' for the source file `foo.tex'.\n");
326 fprintf (stderr, " The OPTIONS are:\n");
327
328 for (i = 0; texindex_options[i].long_name; i++)
329 {
330 fprintf (stderr, " %s %s",
331 texindex_options[i].long_name,
332 texindex_options[i].arg_name ?
333 texindex_options[i].arg_name : "");
334
335 if (texindex_options[i].short_name)
336 fprintf (stderr, " \n or %s %s",
337 texindex_options[i].short_name,
338 texindex_options[i].arg_name ?
339 texindex_options[i].arg_name : "");
340 fprintf (stderr, "\t%s\n", texindex_options[i].doc_string);
341 }
342 exit (result_value);
343 }
344
345 /* Decode the command line arguments to set the parameter variables
346 and set up the vector of keyfields and the vector of input files. */
347
348 void
decode_command(argc,argv)349 decode_command (argc, argv)
350 int argc;
351 char **argv;
352 {
353 int arg_index = 1;
354 char **ip;
355 char **op;
356
357 /* Store default values into parameter variables. */
358
359 tempdir = getenv ("TMPDIR");
360 #ifdef VMS
361 if (tempdir == NULL)
362 tempdir = "sys$scratch:";
363 #else
364 if (tempdir == NULL)
365 tempdir = "/tmp/";
366 else
367 tempdir = concat (tempdir, "/", "");
368 #endif
369
370 keep_tempfiles = 0;
371
372 /* Allocate ARGC input files, which must be enough. */
373
374 infiles = (char **) xmalloc (argc * sizeof (char *));
375 outfiles = (char **) xmalloc (argc * sizeof (char *));
376 ip = infiles;
377 op = outfiles;
378
379 while (arg_index < argc)
380 {
381 char *arg = argv[arg_index++];
382
383 if (*arg == '-')
384 {
385 if (strcmp (arg, "--version") == 0)
386 {
387 fprintf (stderr, "%s", TEXINDEX_VERSION_STRING);
388 #ifdef TEXINDEX_MODIFICATION_STRING
389 fprintf (stderr, " %s", TEXINDEX_MODIFICATION_STRING);
390 #endif
391 fprintf (stderr, "\n");
392 exit (0);
393 }
394 else if ((strcmp (arg, "--keep") == 0) ||
395 (strcmp (arg, "-k") == 0))
396 {
397 keep_tempfiles = 1;
398 }
399 else if ((strcmp (arg, "--help") == 0) ||
400 (strcmp (arg, "-h") == 0))
401 {
402 usage (0);
403 }
404 else if ((strcmp (arg, "--output") == 0) ||
405 (strcmp (arg, "-o") == 0))
406 {
407 if (argv[arg_index] != (char *)NULL)
408 {
409 arg_index++;
410 if (op > outfiles)
411 *(op - 1) = argv[arg_index];
412 }
413 else
414 usage (1);
415 }
416 else
417 usage (1);
418 }
419 else
420 {
421 *ip++ = arg;
422 *op++ = (char *)NULL;
423 }
424 }
425
426 /* Record number of keyfields and terminate list of filenames. */
427 num_infiles = ip - infiles;
428 *ip = (char *)NULL;
429 if (num_infiles == 0)
430 usage (1);
431 }
432
433 /* Return a name for a temporary file. */
434
435 char *
maketempname(count)436 maketempname (count)
437 int count;
438 {
439 char tempsuffix[10];
440 sprintf (tempsuffix, "%d", count);
441 return concat (tempdir, tempbase, tempsuffix);
442 }
443
444 /* Delete all temporary files up to TO_COUNT. */
445
446 void
flush_tempfiles(to_count)447 flush_tempfiles (to_count)
448 int to_count;
449 {
450 if (keep_tempfiles)
451 return;
452 while (last_deleted_tempcount < to_count)
453 unlink (maketempname (++last_deleted_tempcount));
454 }
455
456 /* Copy the input file open on IDESC into a temporary file
457 and return the temporary file name. */
458
459 #define BUFSIZE 1024
460
461 char *
tempcopy(idesc)462 tempcopy (idesc)
463 int idesc;
464 {
465 char *outfile = maketempname (++tempcount);
466 int odesc;
467 char buffer[BUFSIZE];
468
469 odesc = open (outfile, O_WRONLY | O_CREAT, 0666);
470
471 if (odesc < 0)
472 pfatal_with_name (outfile);
473
474 while (1)
475 {
476 int nread = read (idesc, buffer, BUFSIZE);
477 write (odesc, buffer, nread);
478 if (!nread)
479 break;
480 }
481
482 close (odesc);
483
484 return outfile;
485 }
486
487 /* Compare LINE1 and LINE2 according to the specified set of keyfields. */
488
489 int
compare_full(line1,line2)490 compare_full (line1, line2)
491 u_short **line1, **line2;
492 {
493 int i;
494
495 /* Compare using the first keyfield;
496 if that does not distinguish the lines, try the second keyfield;
497 and so on. */
498
499 for (i = 0; i < num_keyfields; i++)
500 {
501 long length1, length2;
502 u_short *start1 = find_field (&keyfields[i], *line1, &length1);
503 u_short *start2 = find_field (&keyfields[i], *line2, &length2);
504 int tem = compare_field (&keyfields[i], start1, length1, *line1 - text_base,
505 start2, length2, *line2 - text_base);
506 if (tem)
507 {
508 if (keyfields[i].reverse)
509 return -tem;
510 return tem;
511 }
512 }
513
514 return 0; /* Lines match exactly. */
515 }
516
517 /* Compare LINE1 and LINE2, described by structures
518 in which the first keyfield is identified in advance.
519 For positional sorting, assumes that the order of the lines in core
520 reflects their nominal order. */
521
522 int
compare_prepared(line1,line2)523 compare_prepared (line1, line2)
524 struct lineinfo *line1, *line2;
525 {
526 int i;
527 int tem;
528 u_short *text1, *text2;
529
530 /* Compare using the first keyfield, which has been found for us already. */
531 if (keyfields->positional)
532 {
533 if (line1->text - text_base > line2->text - text_base)
534 tem = 1;
535 else
536 tem = -1;
537 }
538 else if (keyfields->numeric)
539 tem = line1->key.number - line2->key.number;
540 else
541 tem = compare_field (keyfields, line1->key.text, line1->keylen, 0,
542 line2->key.text, line2->keylen, 0);
543 if (tem)
544 {
545 if (keyfields->reverse)
546 return -tem;
547 return tem;
548 }
549
550 text1 = line1->text;
551 text2 = line2->text;
552
553 /* Compare using the second keyfield;
554 if that does not distinguish the lines, try the third keyfield;
555 and so on. */
556
557 for (i = 1; i < num_keyfields; i++)
558 {
559 long length1, length2;
560 u_short *start1 = find_field (&keyfields[i], text1, &length1);
561 u_short *start2 = find_field (&keyfields[i], text2, &length2);
562 int tem = compare_field (&keyfields[i], start1, length1, text1 - text_base,
563 start2, length2, text2 - text_base);
564 if (tem)
565 {
566 if (keyfields[i].reverse)
567 return -tem;
568 return tem;
569 }
570 }
571
572 return 0; /* Lines match exactly. */
573 }
574
575 /* Like compare_full but more general.
576 You can pass any strings, and you can say how many keyfields to use.
577 POS1 and POS2 should indicate the nominal positional ordering of
578 the two lines in the input. */
579
580 int
compare_general(str1,str2,pos1,pos2,use_keyfields)581 compare_general (str1, str2, pos1, pos2, use_keyfields)
582 u_short *str1, *str2;
583 long pos1, pos2;
584 int use_keyfields;
585 {
586 int i;
587
588 /* Compare using the first keyfield;
589 if that does not distinguish the lines, try the second keyfield;
590 and so on. */
591
592 for (i = 0; i < use_keyfields; i++)
593 {
594 long length1, length2;
595 u_short *start1 = find_field (&keyfields[i], str1, &length1);
596 u_short *start2 = find_field (&keyfields[i], str2, &length2);
597 int tem = compare_field (&keyfields[i], start1, length1, pos1,
598 start2, length2, pos2);
599 if (tem)
600 {
601 if (keyfields[i].reverse)
602 return -tem;
603 return tem;
604 }
605 }
606
607 return 0; /* Lines match exactly. */
608 }
609
610 /* Find the start and length of a field in STR according to KEYFIELD.
611 A pointer to the starting character is returned, and the length
612 is stored into the int that LENGTHPTR points to. */
613
614 u_short *
find_field(keyfield,str,lengthptr)615 find_field (keyfield, str, lengthptr)
616 struct keyfield *keyfield;
617 u_short *str;
618 long *lengthptr;
619 {
620 u_short *start;
621 u_short *end;
622 u_short *(*fun) ();
623
624 if (keyfield->braced)
625 fun = find_braced_pos;
626 else
627 fun = find_pos;
628
629 start = (*fun) (str, keyfield->startwords, keyfield->startchars,
630 keyfield->ignore_blanks);
631 if (keyfield->endwords < 0)
632 {
633 if (keyfield->braced)
634 end = find_braced_end (start);
635 else
636 {
637 end = start;
638 while (*end && *end != '\n')
639 end++;
640 }
641 }
642 else
643 {
644 end = (*fun) (str, keyfield->endwords, keyfield->endchars, 0);
645 if (end - str < start - str)
646 end = start;
647 }
648 *lengthptr = end - start;
649 return start;
650 }
651
652 /* Return a pointer to a specified place within STR,
653 skipping (from the beginning) WORDS words and then CHARS chars.
654 If IGNORE_BLANKS is nonzero, we skip all blanks
655 after finding the specified word. */
656
657 u_short *
find_pos(str,words,chars,ignore_blanks)658 find_pos (str, words, chars, ignore_blanks)
659 u_short *str;
660 int words, chars;
661 int ignore_blanks;
662 {
663 int i;
664 u_short *p = str;
665
666 for (i = 0; i < words; i++)
667 {
668 u_short c;
669 /* Find next bunch of nonblanks and skip them. */
670 while ((c = *p) == ' ' || c == '\t')
671 p++;
672 while ((c = *p) && c != '\n' && !(c == ' ' || c == '\t'))
673 p++;
674 if (!*p || *p == '\n')
675 return p;
676 }
677
678 while (*p == ' ' || *p == '\t')
679 p++;
680
681 for (i = 0; i < chars; i++)
682 {
683 if (!*p || *p == '\n')
684 break;
685 p++;
686 }
687 return p;
688 }
689
690 /* Like find_pos but assumes that each field is surrounded by braces
691 and that braces within fields are balanced. */
692
693 u_short *
find_braced_pos(str,words,chars,ignore_blanks)694 find_braced_pos (str, words, chars, ignore_blanks)
695 u_short *str;
696 int words, chars;
697 int ignore_blanks;
698 {
699 int i;
700 int bracelevel;
701 u_short *p = str;
702 u_short c;
703
704 for (i = 0; i < words; i++)
705 {
706 bracelevel = 1;
707 while ((c = *p++) != '{' && c != '\n' && c)
708 /* Do nothing. */ ;
709 if (c != '{')
710 return p - 1;
711 while (bracelevel)
712 {
713 c = *p++;
714 if (c == '{')
715 bracelevel++;
716 if (c == '}')
717 bracelevel--;
718 if (c == 0 || c == '\n')
719 return p - 1;
720 }
721 }
722
723 while ((c = *p++) != '{' && c != '\n' && c)
724 /* Do nothing. */ ;
725
726 if (c != '{')
727 return p - 1;
728
729 if (ignore_blanks)
730 while ((c = *p) == ' ' || c == '\t')
731 p++;
732
733 for (i = 0; i < chars; i++)
734 {
735 if (!*p || *p == '\n')
736 break;
737 p++;
738 }
739 return p;
740 }
741
742 /* Find the end of the balanced-brace field which starts at STR.
743 The position returned is just before the closing brace. */
744
745 u_short *
find_braced_end(str)746 find_braced_end (str)
747 u_short *str;
748 {
749 int bracelevel;
750 u_short *p = str;
751 u_short c;
752
753 bracelevel = 1;
754 while (bracelevel)
755 {
756 c = *p++;
757 if (c == '{')
758 bracelevel++;
759 if (c == '}')
760 bracelevel--;
761 if (c == 0 || c == '\n')
762 return p - 1;
763 }
764 return p - 1;
765 }
766
767 long
find_value(start,length)768 find_value (start, length)
769 u_short *start;
770 long length;
771 {
772 long val;
773
774 while (length != 0L)
775 {
776 if (isdigit (*start))
777 break;
778 length--;
779 start++;
780 }
781
782 val = 0;
783 while(isdigit(*start) && length > 0L)
784 {
785 val = val*10 + *start;
786 start++;
787 length--;
788 }
789
790 return val;
791 }
792
793 /* Vector used to translate characters for comparison.
794 This is how we make all alphanumerics follow all else,
795 and ignore case in the first sorting. */
796 int char_order[0x8000];
797
798 void
init_char_order(void)799 init_char_order (void)
800 {
801 int i;
802
803 for (i = 1; i < 0x8000; i++)
804 char_order[i] = i;
805
806 for (i = '0'; i <= '9'; i++)
807 char_order[i] += 512;
808
809 for (i = 'a'; i <= 'z'; i++)
810 {
811 char_order[i] = 512 + i;
812 char_order[i + 'A' - 'a'] = 512 + i;
813 }
814
815 /* �ʲ��Υ���ܥ��̵�� (i.e. char_order[XXX] = 0) */
816 for (i = 0x2121; i <= 0x2132; i++) /* ���Ѷ��� �� �� */
817 char_order[i] = 0;
818 for (i = 0x213D; i <= 0x215B; i++) /* �� �� �� */
819 char_order[i] = 0;
820
821 /* ¥����ٹ����������Ⱦ�������Ҳ�̾/ʿ��̾�ν��� */
822 /* 1. ʿ��̾���Ҳ�̾��Ʊ���˰��� */
823 /* ��/��(2421/2521)����/��(2473/2573) */
824 for (i = 0x2421; i < 0x2474; i++)
825 char_order[i + 0x100] = i;
826
827 /* 2. ¥����ٹ����������Ⱦ�������� */
828 /* ������ �� ������ */
829 for (i = 0x2421; i < 0x242B; i += 2)
830 char_order[i] = char_order[i + 0x100] = i+1;
831 /* ������ �� ������ */
832 for (i = 0x242C; i < 0x2443; i += 2)
833 char_order[i] = char_order[i + 0x100] = i-1;
834 /* �� */
835 char_order[0x2443] = char_order[0x2543] = 0x2443+1;
836 /* �š��� �� �ġ��� */
837 for (i = 0x2445; i < 0x244A; i += 2)
838 char_order[i] = char_order[i + 0x100] = i-1;
839 /* �Фѡ��ܤ� �� �ϡ��� */
840 for (i = 0x2450; i < 0x245E; i += 3)
841 {
842 char_order[i] = char_order[i + 0x100] = i-1;
843 char_order[i+1] = char_order[i+1 + 0x100] = i-1;
844 }
845 /* ����� �� ����� */
846 for (i = 0x2463; i < 0x2469; i += 2)
847 char_order[i] = char_order[i + 0x100] = i+1;
848 /* �� �� �� */
849 char_order[0x246E] = char_order[0x256E] = 0x246E +1;
850
851 /* 3. �ü��Ҳ�̾�ΰ��� */
852 char_order[0x2574] = 0x2426; /* �� �� �� */
853 char_order[0x2575] = 0x242B; /* �� �� �� */
854 char_order[0x2576] = 0x2431; /* �� �� �� */
855
856 /* JISX0208�Υ���ե��٥åȡ����� */
857 for (i = 0x2330; i <= 0x2339; i++) /* ������ -> 0-9 */
858 char_order[i] = i - 0x2330 + 512;
859 for (i = 0x2341; i <= 0x235A; i++) /* �����ڡ������ -> A-Z */
860 char_order[i] = char_order[i + (0x2361 - 0x2341)]
861 = 512 + (i - 0x2341) + 'a';
862
863 /* Ĺ����������ӥ롼����ǹԤʤ� */
864
865 init_Vowel_Table();
866 }
867
868 /* Compare two fields (each specified as a start pointer and a character count)
869 according to KEYFIELD.
870 The sign of the value reports the relation between the fields. */
871
872 int
compare_field(keyfield,start1,length1,pos1,start2,length2,pos2)873 compare_field (keyfield, start1, length1, pos1, start2, length2, pos2)
874 struct keyfield *keyfield;
875 u_short *start1;
876 long length1;
877 long pos1;
878 u_short *start2;
879 long length2;
880 long pos2;
881 {
882 if (keyfields->positional)
883 {
884 if (pos1 > pos2)
885 return 1;
886 else
887 return -1;
888 }
889 if (keyfield->numeric)
890 {
891 long value = find_value (start1, length1) - find_value (start2, length2);
892 if (value > 0)
893 return 1;
894 if (value < 0)
895 return -1;
896 return 0;
897 }
898 else
899 {
900 /* lexical �äƤ�? */
901 u_short *p1 = start1;
902 u_short *p2 = start2;
903 u_short *e1 = start1 + length1;
904 u_short *e2 = start2 + length2;
905 u_short lastchar1 = 0, lastchar2 = 0;
906 int c1, c2;
907
908 do
909 {
910 int co1, co2;
911
912 /* char_order[X] �� 0 �Τ�Ρʥ���ܥ�ˤ�̵�뤹�� */
913 c1 = c2 = 0;
914 while (p1 < e1)
915 {
916 if (char_order[c1 = *p1++]) break;
917 c1 = 0;
918 }
919 while (p2 < e2)
920 {
921 if (char_order[c2 = *p2++]) break;
922 c2 = 0;
923 }
924
925 /* ������ */
926 if (c1 == 0x213C) /* �� */
927 c1 = Vowel_of(lastchar1);
928 if (c2 == 0x213C) /* �� */
929 c2 = Vowel_of(lastchar2);
930 co1 = char_order[c1];
931 co2 = char_order[c2];
932
933 if (co1 != co2)
934 return co1 - co2;
935 else
936 lastchar1 = c1, lastchar2 = c2;
937
938 } while(c1);
939
940 /* Strings are equal except possibly for case. */
941 p1 = start1;
942 p2 = start2;
943 while (1)
944 {
945 int c1, c2;
946
947 if (p1 == e1)
948 c1 = 0;
949 else
950 c1 = *p1++;
951 if (p2 == e2)
952 c2 = 0;
953 else
954 c2 = *p2++;
955
956 if (c1 != c2)
957 /* Reverse sign here so upper case comes out last. */
958 return c2 - c1;
959 if (!c1)
960 break;
961 }
962
963 return 0;
964 }
965 }
966
967 /* A `struct linebuffer' is a structure which holds a line of text.
968 `readline' reads a line from a stream into a linebuffer
969 and works regardless of the length of the line. */
970
971 struct linebuffer
972 {
973 long size;
974 u_short *buffer;
975 };
976
977 /* Initialize LINEBUFFER for use. */
978
979 void
initbuffer(linebuffer)980 initbuffer (linebuffer)
981 struct linebuffer *linebuffer;
982 {
983 linebuffer->size = 200;
984 linebuffer->buffer = (u_short *) xmalloc (200*sizeof(u_short));
985 }
986
987 /* Read a line of text from STREAM into LINEBUFFER.
988 Return the length of the line. */
989
990 long
readline(linebuffer,stream)991 readline (linebuffer, stream)
992 struct linebuffer *linebuffer;
993 FILE *stream;
994 {
995 u_short *buffer = linebuffer->buffer;
996 u_short *p = linebuffer->buffer;
997 u_short *end = p + linebuffer->size;
998
999 while (1)
1000 {
1001 int c = getc (stream);
1002 if (p == end)
1003 {
1004 buffer =
1005 (u_short *) xrealloc (buffer,
1006 (linebuffer->size *= 2)*sizeof(u_short));
1007 p += buffer - linebuffer->buffer;
1008 end += buffer - linebuffer->buffer;
1009 linebuffer->buffer = buffer;
1010 }
1011 if (c < 0 || c == '\n')
1012 {
1013 *p = 0;
1014 break;
1015 }
1016 *p++ = c;
1017 }
1018
1019 return p - buffer;
1020 }
1021
1022 /* Sort an input file too big to sort in core. */
1023
1024 void
sort_offline(infile,nfiles,total,outfile)1025 sort_offline (infile, nfiles, total, outfile)
1026 char *infile;
1027 int nfiles;
1028 long total;
1029 char *outfile;
1030 {
1031 /* More than enough. */
1032 int ntemps = 2 * (total + MAX_IN_CORE_SORT - 1) / MAX_IN_CORE_SORT;
1033 char **tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
1034 FILE *istream = fopen (infile, "r");
1035 int i;
1036 struct linebuffer lb;
1037 long linelength;
1038 int failure = 0;
1039
1040 initbuffer (&lb);
1041
1042 /* Read in one line of input data. */
1043
1044 linelength = readline (&lb, istream);
1045
1046 if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
1047 {
1048 error ("%s: not a texinfo index file", infile);
1049 return;
1050 }
1051
1052 /* Split up the input into `ntemps' temporary files, or maybe fewer,
1053 and put the new files' names into `tempfiles' */
1054
1055 for (i = 0; i < ntemps; i++)
1056 {
1057 char *outname = maketempname (++tempcount);
1058 FILE *ostream = fopen (outname, "w");
1059 long tempsize = 0;
1060
1061 if (!ostream)
1062 pfatal_with_name (outname);
1063 tempfiles[i] = outname;
1064
1065 /* Copy lines into this temp file as long as it does not make file
1066 "too big" or until there are no more lines. */
1067
1068 while (tempsize + linelength + 1 <= MAX_IN_CORE_SORT)
1069 {
1070 tempsize += linelength + 1;
1071 {
1072 u_short *tp;
1073
1074 for (tp = lb.buffer; *tp; tp++)
1075 putc(*tp, ostream);
1076 }
1077 putc ('\n', ostream);
1078
1079 /* Read another line of input data. */
1080
1081 linelength = readline (&lb, istream);
1082 if (!linelength && feof (istream))
1083 break;
1084
1085 if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
1086 {
1087 error ("%s: not a texinfo index file", infile);
1088 failure = 1;
1089 goto fail;
1090 }
1091 }
1092 fclose (ostream);
1093 if (feof (istream))
1094 break;
1095 }
1096
1097 free (lb.buffer);
1098
1099 fail:
1100 /* Record number of temp files we actually needed. */
1101
1102 ntemps = i;
1103
1104 /* Sort each tempfile into another tempfile.
1105 Delete the first set of tempfiles and put the names of the second
1106 into `tempfiles'. */
1107
1108 for (i = 0; i < ntemps; i++)
1109 {
1110 char *newtemp = maketempname (++tempcount);
1111 sort_in_core (&tempfiles[i], MAX_IN_CORE_SORT, newtemp);
1112 if (!keep_tempfiles)
1113 unlink (tempfiles[i]);
1114 tempfiles[i] = newtemp;
1115 }
1116
1117 if (failure)
1118 return;
1119
1120 /* Merge the tempfiles together and indexify. */
1121
1122 merge_files (tempfiles, ntemps, outfile);
1123 }
1124
1125 /* Sort INFILE, whose size is TOTAL,
1126 assuming that is small enough to be done in-core,
1127 then indexify it and send the output to OUTFILE (or to stdout). */
1128
1129 void
sort_in_core(infile,total,outfile)1130 sort_in_core (infile, total, outfile)
1131 char *infile;
1132 long total;
1133 char *outfile;
1134 {
1135 u_short **nextline;
1136 u_short *data = (u_short *) xmalloc ((total + 1)*sizeof(u_short));
1137 u_short *file_data;
1138 long file_size;
1139 int i;
1140 FILE *ostream = stdout;
1141 struct lineinfo *lineinfo;
1142 FILE *ifp;
1143 int c;
1144
1145 /* Read the contents of the file into the moby array `data'. */
1146 ifp = fopen(infile, "r");
1147 if (!ifp)
1148 fatal ("failure reopening %s", infile);
1149 memset(data, 0, (total + 1)*sizeof(u_short)); /* zero clear */
1150 for (i = 0, c = fgetc(ifp); c != EOF; c = fgetc(ifp), i++)
1151 {
1152 if (EUC_BYTE(c))
1153 {
1154 data[i] = (c & 0x007F) << 8;
1155 c = fgetc(ifp) & 0x007F;
1156 }
1157 data[i] += c;
1158 }
1159 file_size = i;
1160 file_data = data;
1161 data[file_size] = 0;
1162
1163 fclose(ifp);
1164
1165 if (file_size > 0 && data[0] != '\\' && data[0] != '@')
1166 {
1167 error ("%s: not a texinfo index file", infile);
1168 return;
1169 }
1170
1171 init_char_order ();
1172
1173 /* Sort routines want to know this address. */
1174
1175 text_base = data;
1176
1177 /* Create the array of pointers to lines, with a default size
1178 frequently enough. */
1179
1180 nlines = total / 50;
1181 if (!nlines)
1182 nlines = 2;
1183 linearray = (u_short **) xmalloc (nlines * sizeof (u_short *));
1184
1185 /* `nextline' points to the next free slot in this array.
1186 `nlines' is the allocated size. */
1187
1188 nextline = linearray;
1189
1190 /* Parse the input file's data, and make entries for the lines. */
1191
1192 nextline = parsefile (nextline, file_data, file_size);
1193
1194 if (nextline == 0)
1195 {
1196 error ("%s: not a texinfo index file", infile);
1197 return;
1198 }
1199
1200 /* Sort the lines. */
1201
1202 /* If we have enough space, find the first keyfield of each line in advance.
1203 Make a `struct lineinfo' for each line, which records the keyfield
1204 as well as the line, and sort them. */
1205
1206 lineinfo = (struct lineinfo *) malloc ((nextline - linearray) * sizeof (struct lineinfo));
1207
1208 if (lineinfo)
1209 {
1210 struct lineinfo *lp;
1211 u_short **p;
1212
1213 for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
1214 {
1215 lp->text = *p;
1216 lp->key.text = find_field (keyfields, *p, &lp->keylen);
1217 if (keyfields->numeric)
1218 lp->key.number = find_value (lp->key.text, lp->keylen);
1219 }
1220
1221 qsort (lineinfo, nextline - linearray, sizeof (struct lineinfo), compare_prepared);
1222
1223 for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
1224 *p = lp->text;
1225
1226 free (lineinfo);
1227 }
1228 else
1229 qsort (linearray, nextline - linearray, sizeof (u_short *), compare_full);
1230
1231 /* Open the output file. */
1232
1233 if (outfile)
1234 {
1235 ostream = fopen (outfile, "w");
1236 if (!ostream)
1237 pfatal_with_name (outfile);
1238 }
1239
1240 writelines (linearray, nextline - linearray, ostream);
1241 if (outfile)
1242 fclose (ostream);
1243
1244 free (linearray);
1245 free (data);
1246 }
1247
1248 /* Parse an input string in core into lines.
1249 DATA is the input string, and SIZE is its length.
1250 Data goes in LINEARRAY starting at NEXTLINE.
1251 The value returned is the first entry in LINEARRAY still unused.
1252 Value 0 means input file contents are invalid. */
1253
1254 u_short **
parsefile(nextline,data,size)1255 parsefile (nextline, data, size)
1256 u_short **nextline;
1257 u_short *data;
1258 long size;
1259 {
1260 u_short *p, *end;
1261 u_short **line = nextline;
1262
1263 p = data;
1264 end = p + size;
1265 *end = 0;
1266
1267 while (p != end)
1268 {
1269 if (p[0] != '\\' && p[0] != '@')
1270 return 0;
1271
1272 *line = p;
1273 while (*p && *p != '\n')
1274 p++;
1275 if (p != end)
1276 p++;
1277
1278 line++;
1279 if (line == linearray + nlines)
1280 {
1281 u_short **old = linearray;
1282 linearray =
1283 (u_short **) xrealloc (linearray,
1284 sizeof (u_short *) * (nlines *= 4));
1285 line += linearray - old;
1286 }
1287 }
1288
1289 return line;
1290 }
1291
1292 /* Indexification is a filter applied to the sorted lines
1293 as they are being written to the output file.
1294 Multiple entries for the same name, with different page numbers,
1295 get combined into a single entry with multiple page numbers.
1296 The first braced field, which is used for sorting, is discarded.
1297 However, its first character is examined, folded to lower case,
1298 and if it is different from that in the previous line fed to us
1299 a \initial line is written with one argument, the new initial.
1300
1301 If an entry has four braced fields, then the second and third
1302 constitute primary and secondary names.
1303 In this case, each change of primary name
1304 generates a \primary line which contains only the primary name,
1305 and in between these are \secondary lines which contain
1306 just a secondary name and page numbers. */
1307
1308 /* The last primary name we wrote a \primary entry for.
1309 If only one level of indexing is being done, this is the last name seen. */
1310 u_short *lastprimary;
1311 /* Length of storage allocated for lastprimary. */
1312 int lastprimarylength;
1313
1314 /* Similar, for the secondary name. */
1315 u_short *lastsecondary;
1316 int lastsecondarylength;
1317
1318 /* Zero if we are not in the middle of writing an entry.
1319 One if we have written the beginning of an entry but have not
1320 yet written any page numbers into it.
1321 Greater than one if we have written the beginning of an entry
1322 plus at least one page number. */
1323 int pending;
1324
1325 /* The initial (for sorting purposes) of the last primary entry written.
1326 When this changes, a \initial {c} line is written */
1327
1328 u_short *lastinitial;
1329
1330 int lastinitiallength;
1331
1332 /* When we need a string of length 1 for the value of lastinitial,
1333 store it here. */
1334
1335 u_short lastinitial1[2];
1336
1337 /* Initialize static storage for writing an index. */
1338
1339 void
init_index()1340 init_index ()
1341 {
1342 pending = 0;
1343 lastinitial = lastinitial1;
1344 lastinitial1[0] = 0;
1345 lastinitial1[1] = 0;
1346 lastinitiallength = 0;
1347 lastprimarylength = 100;
1348 lastprimary = (u_short *) xmalloc ((lastprimarylength + 1)*sizeof(u_short));
1349 memset (lastprimary, '\0', (lastprimarylength + 1)*sizeof(u_short));
1350 lastsecondarylength = 100;
1351 lastsecondary =
1352 (u_short *) xmalloc ((lastsecondarylength + 1)*sizeof(u_short));
1353 memset (lastsecondary, '\0', (lastsecondarylength + 1)*sizeof(u_short));
1354 }
1355
1356 /* Indexify. Merge entries for the same name,
1357 insert headers for each initial character, etc. */
1358
1359 void
indexify(u_short * line,FILE * ostream)1360 indexify (u_short *line, FILE *ostream)
1361 {
1362 u_short *primary, *secondary, *pagenumber;
1363 int primarylength, secondarylength = 0, pagelength;
1364 int nosecondary;
1365 int initiallength;
1366 u_short *initial;
1367 u_short initial1[2];
1368 register u_short *p;
1369
1370 /* First, analyze the parts of the entry fed to us this time. */
1371
1372 p = find_braced_pos (line, 0, 0, 0);
1373 if (*p == '{')
1374 {
1375 initial = p;
1376 /* Get length of inner pair of braces starting at `p',
1377 including that inner pair of braces. */
1378 initiallength = find_braced_end (p + 1) + 1 - p;
1379 }
1380 else
1381 {
1382 initial = initial1;
1383 initial1[0] = *p & 0x7F7F;
1384 initial1[1] = 0;
1385 initiallength = 1;
1386
1387 if (initial1[0] >= 'a' && initial1[0] <= 'z')
1388 initial1[0] -= ' ';
1389 else if (initial1[0] > 0x2000)
1390 initial1[0] = char_order[initial1[0]];
1391 }
1392
1393 pagenumber = find_braced_pos (line, 1, 0, 0);
1394 pagelength = find_braced_end (pagenumber) - pagenumber;
1395 if (pagelength == 0)
1396 abort ();
1397
1398 primary = find_braced_pos (line, 2, 0, 0);
1399 primarylength = find_braced_end (primary) - primary;
1400
1401 secondary = find_braced_pos (line, 3, 0, 0);
1402 nosecondary = !*secondary;
1403 if (!nosecondary)
1404 secondarylength = find_braced_end (secondary) - secondary;
1405
1406 /* If the primary is different from before, make a new primary entry. */
1407 if (wstrncmp (primary, lastprimary, primarylength))
1408 {
1409 /* Close off current secondary entry first, if one is open. */
1410 if (pending)
1411 {
1412 fputs ("}\n", ostream);
1413 pending = 0;
1414 }
1415
1416 /* If this primary has a different initial, include an entry for
1417 the initial. */
1418 if (initiallength != lastinitiallength ||
1419 (*initial && wstrncmp (initial, lastinitial, initiallength)))
1420 {
1421 fprintf (ostream, "\\initial {");
1422 wfwrite (initial, initiallength, 1, ostream);
1423 fprintf (ostream, "}\n", initial);
1424 if (initial == initial1)
1425 {
1426 lastinitial = lastinitial1;
1427 *lastinitial1 = *initial1;
1428 }
1429 else
1430 {
1431 lastinitial = initial;
1432 }
1433 lastinitiallength = initiallength;
1434 }
1435
1436 /* Make the entry for the primary. */
1437 if (nosecondary)
1438 fputs ("\\entry {", ostream);
1439 else
1440 fputs ("\\primary {", ostream);
1441 wfwrite (primary, primarylength, 1, ostream);
1442 if (nosecondary)
1443 {
1444 fputs ("}{", ostream);
1445 pending = 1;
1446 }
1447 else
1448 fputs ("}\n", ostream);
1449
1450 /* Record name of most recent primary. */
1451 if (lastprimarylength < primarylength)
1452 {
1453 lastprimarylength = primarylength + 100;
1454 lastprimary =
1455 (u_short *) xrealloc (lastprimary,
1456 (1 + lastprimarylength)*sizeof(u_short));
1457 }
1458 memcpy (lastprimary, primary, primarylength*sizeof(u_short));
1459 lastprimary[primarylength] = 0;
1460
1461 /* There is no current secondary within this primary, now. */
1462 lastsecondary[0] = 0;
1463 }
1464
1465 /* Should not have an entry with no subtopic following one with a subtopic. */
1466
1467 if (nosecondary && *lastsecondary)
1468 {
1469 char tline[2001];
1470 int len;
1471
1472 len = wtoc(tline, line, 2000); /* 2000 is much enough ? */
1473 tline[len] = '\0';
1474 error ("entry %s follows an entry with a secondary name", tline);
1475 }
1476
1477 /* Start a new secondary entry if necessary. */
1478 if (!nosecondary && wstrncmp (secondary, lastsecondary, secondarylength))
1479 {
1480 if (pending)
1481 {
1482 fputs ("}\n", ostream);
1483 pending = 0;
1484 }
1485
1486 /* Write the entry for the secondary. */
1487 fputs ("\\secondary {", ostream);
1488 wfwrite (secondary, secondarylength, 1, ostream);
1489 fputs ("}{", ostream);
1490 pending = 1;
1491
1492 /* Record name of most recent secondary. */
1493 if (lastsecondarylength < secondarylength)
1494 {
1495 lastsecondarylength = secondarylength + 100;
1496 lastsecondary =
1497 (u_short *) xrealloc ((lastsecondary,
1498 1 + lastsecondarylength)*sizeof(u_short));
1499 }
1500 memcpy (lastsecondary, secondary, secondarylength * sizeof(u_short));
1501 lastsecondary[secondarylength] = 0;
1502 }
1503
1504 /* Here to add one more page number to the current entry. */
1505 if (pending++ != 1)
1506 fputs (", ", ostream); /* Punctuate first, if this is not the first. */
1507 wfwrite (pagenumber, pagelength, 1, ostream);
1508 }
1509
1510 /* Close out any unfinished output entry. */
1511
1512 void
finish_index(ostream)1513 finish_index (ostream)
1514 FILE *ostream;
1515 {
1516 if (pending)
1517 fputs ("}\n", ostream);
1518 free (lastprimary);
1519 free (lastsecondary);
1520 }
1521
1522 /* Copy the lines in the sorted order.
1523 Each line is copied out of the input file it was found in. */
1524
1525 void
writelines(linearray,nlines,ostream)1526 writelines (linearray, nlines, ostream)
1527 u_short **linearray;
1528 int nlines;
1529 FILE *ostream;
1530 {
1531 u_short **stop_line = linearray + nlines;
1532 u_short **next_line;
1533
1534 init_index ();
1535
1536 /* Output the text of the lines, and free the buffer space. */
1537
1538 for (next_line = linearray; next_line != stop_line; next_line++)
1539 {
1540 /* If -u was specified, output the line only if distinct from previous one. */
1541 if (next_line == linearray
1542 /* Compare previous line with this one, using only the
1543 explicitly specd keyfields. */
1544 || compare_general (*(next_line - 1), *next_line, 0L, 0L, num_keyfields - 1))
1545 {
1546 u_short *p = *next_line;
1547 u_short c;
1548
1549 while ((c = *p++) && c != '\n')
1550 /* Do nothing. */ ;
1551 *(p - 1) = 0;
1552 indexify (*next_line, ostream);
1553 }
1554 }
1555
1556 finish_index (ostream);
1557 }
1558
1559 /* Assume (and optionally verify) that each input file is sorted;
1560 merge them and output the result.
1561 Returns nonzero if any input file fails to be sorted.
1562
1563 This is the high-level interface that can handle an unlimited
1564 number of files. */
1565
1566 #define MAX_DIRECT_MERGE 10
1567
1568 int
merge_files(infiles,nfiles,outfile)1569 merge_files (infiles, nfiles, outfile)
1570 char **infiles;
1571 int nfiles;
1572 char *outfile;
1573 {
1574 char **tempfiles;
1575 int ntemps;
1576 int i;
1577 int value = 0;
1578 int start_tempcount = tempcount;
1579
1580 if (nfiles <= MAX_DIRECT_MERGE)
1581 return merge_direct (infiles, nfiles, outfile);
1582
1583 /* Merge groups of MAX_DIRECT_MERGE input files at a time,
1584 making a temporary file to hold each group's result. */
1585
1586 ntemps = (nfiles + MAX_DIRECT_MERGE - 1) / MAX_DIRECT_MERGE;
1587 tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
1588 for (i = 0; i < ntemps; i++)
1589 {
1590 int nf = MAX_DIRECT_MERGE;
1591 if (i + 1 == ntemps)
1592 nf = nfiles - i * MAX_DIRECT_MERGE;
1593 tempfiles[i] = maketempname (++tempcount);
1594 value |= merge_direct (&infiles[i * MAX_DIRECT_MERGE], nf, tempfiles[i]);
1595 }
1596
1597 /* All temporary files that existed before are no longer needed
1598 since their contents have been merged into our new tempfiles.
1599 So delete them. */
1600 flush_tempfiles (start_tempcount);
1601
1602 /* Now merge the temporary files we created. */
1603
1604 merge_files (tempfiles, ntemps, outfile);
1605
1606 free (tempfiles);
1607
1608 return value;
1609 }
1610
1611 /* Assume (and optionally verify) that each input file is sorted;
1612 merge them and output the result.
1613 Returns nonzero if any input file fails to be sorted.
1614
1615 This version of merging will not work if the number of
1616 input files gets too high. Higher level functions
1617 use it only with a bounded number of input files. */
1618
1619 int
merge_direct(infiles,nfiles,outfile)1620 merge_direct (infiles, nfiles, outfile)
1621 char **infiles;
1622 int nfiles;
1623 char *outfile;
1624 {
1625 struct linebuffer *lb1, *lb2;
1626 struct linebuffer **thisline, **prevline;
1627 FILE **streams;
1628 int i;
1629 int nleft;
1630 int lossage = 0;
1631 int *file_lossage;
1632 struct linebuffer *prev_out = 0;
1633 FILE *ostream = stdout;
1634
1635 if (outfile)
1636 {
1637 ostream = fopen (outfile, "w");
1638 }
1639 if (!ostream)
1640 pfatal_with_name (outfile);
1641
1642 init_index ();
1643
1644 if (nfiles == 0)
1645 {
1646 if (outfile)
1647 fclose (ostream);
1648 return 0;
1649 }
1650
1651 /* For each file, make two line buffers.
1652 Also, for each file, there is an element of `thisline'
1653 which points at any time to one of the file's two buffers,
1654 and an element of `prevline' which points to the other buffer.
1655 `thisline' is supposed to point to the next available line from the file,
1656 while `prevline' holds the last file line used,
1657 which is remembered so that we can verify that the file is properly sorted. */
1658
1659 /* lb1 and lb2 contain one buffer each per file. */
1660 lb1 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
1661 lb2 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
1662
1663 /* thisline[i] points to the linebuffer holding the next available line in file i,
1664 or is zero if there are no lines left in that file. */
1665 thisline = (struct linebuffer **)
1666 xmalloc (nfiles * sizeof (struct linebuffer *));
1667 /* prevline[i] points to the linebuffer holding the last used line
1668 from file i. This is just for verifying that file i is properly
1669 sorted. */
1670 prevline = (struct linebuffer **)
1671 xmalloc (nfiles * sizeof (struct linebuffer *));
1672 /* streams[i] holds the input stream for file i. */
1673 streams = (FILE **) xmalloc (nfiles * sizeof (FILE *));
1674 /* file_lossage[i] is nonzero if we already know file i is not
1675 properly sorted. */
1676 file_lossage = (int *) xmalloc (nfiles * sizeof (int));
1677
1678 /* Allocate and initialize all that storage. */
1679
1680 for (i = 0; i < nfiles; i++)
1681 {
1682 initbuffer (&lb1[i]);
1683 initbuffer (&lb2[i]);
1684 thisline[i] = &lb1[i];
1685 prevline[i] = &lb2[i];
1686 file_lossage[i] = 0;
1687 streams[i] = fopen (infiles[i], "r");
1688 if (!streams[i])
1689 pfatal_with_name (infiles[i]);
1690
1691 readline (thisline[i], streams[i]);
1692 }
1693
1694 /* Keep count of number of files not at eof. */
1695 nleft = nfiles;
1696
1697 while (nleft)
1698 {
1699 struct linebuffer *best = 0;
1700 struct linebuffer *exch;
1701 int bestfile = -1;
1702 int i;
1703
1704 /* Look at the next avail line of each file; choose the least one. */
1705
1706 for (i = 0; i < nfiles; i++)
1707 {
1708 if (thisline[i] &&
1709 (!best ||
1710 0 < compare_general (best->buffer, thisline[i]->buffer,
1711 (long) bestfile, (long) i, num_keyfields)))
1712 {
1713 best = thisline[i];
1714 bestfile = i;
1715 }
1716 }
1717
1718 /* Output that line, unless it matches the previous one and we
1719 don't want duplicates. */
1720
1721 if (!(prev_out &&
1722 !compare_general (prev_out->buffer,
1723 best->buffer, 0L, 1L, num_keyfields - 1)))
1724 indexify (best->buffer, ostream);
1725 prev_out = best;
1726
1727 /* Now make the line the previous of its file, and fetch a new
1728 line from that file. */
1729
1730 exch = prevline[bestfile];
1731 prevline[bestfile] = thisline[bestfile];
1732 thisline[bestfile] = exch;
1733
1734 while (1)
1735 {
1736 /* If the file has no more, mark it empty. */
1737
1738 if (feof (streams[bestfile]))
1739 {
1740 thisline[bestfile] = 0;
1741 /* Update the number of files still not empty. */
1742 nleft--;
1743 break;
1744 }
1745 readline (thisline[bestfile], streams[bestfile]);
1746 if (thisline[bestfile]->buffer[0] || !feof (streams[bestfile]))
1747 break;
1748 }
1749 }
1750
1751 finish_index (ostream);
1752
1753 /* Free all storage and close all input streams. */
1754
1755 for (i = 0; i < nfiles; i++)
1756 {
1757 fclose (streams[i]);
1758 free (lb1[i].buffer);
1759 free (lb2[i].buffer);
1760 }
1761 free (file_lossage);
1762 free (lb1);
1763 free (lb2);
1764 free (thisline);
1765 free (prevline);
1766 free (streams);
1767
1768 if (outfile)
1769 fclose (ostream);
1770
1771 return lossage;
1772 }
1773
1774 /* Print error message and exit. */
1775
1776 void
fatal(format,arg)1777 fatal (format, arg)
1778 char *format, *arg;
1779 {
1780 error (format, arg);
1781 exit (TI_FATAL_ERROR);
1782 }
1783
1784 /* Print error message. FORMAT is printf control string, ARG is arg for it. */
1785 void
error(format,arg)1786 error (format, arg)
1787 char *format, *arg;
1788 {
1789 printf ("%s: ", program_name);
1790 printf (format, arg);
1791 if (format[strlen (format) -1] != '\n')
1792 printf ("\n");
1793 }
1794
1795 void
perror_with_name(name)1796 perror_with_name (name)
1797 char *name;
1798 {
1799 char *s;
1800
1801 s = strerror (errno);
1802 printf ("%s: ", program_name);
1803 printf ("%s; for file `%s'.\n", s, name);
1804 }
1805
1806 void
pfatal_with_name(name)1807 pfatal_with_name (name)
1808 char *name;
1809 {
1810 char *s;
1811
1812 s = strerror (errno);
1813 printf ("%s: ", program_name);
1814 printf ("%s; for file `%s'.\n", s, name);
1815 exit (TI_FATAL_ERROR);
1816 }
1817
1818 /* Return a newly-allocated string whose contents concatenate those of
1819 S1, S2, S3. */
1820
1821 char *
concat(s1,s2,s3)1822 concat (s1, s2, s3)
1823 char *s1, *s2, *s3;
1824 {
1825 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
1826 char *result = (char *) xmalloc (len1 + len2 + len3 + 1);
1827
1828 strcpy (result, s1);
1829 strcpy (result + len1, s2);
1830 strcpy (result + len1 + len2, s3);
1831 *(result + len1 + len2 + len3) = 0;
1832
1833 return result;
1834 }
1835
1836 #if !defined (HAVE_STRERROR)
1837 extern char *sys_errlist[];
1838 extern int sys_nerr;
1839
1840 char *
strerror(num)1841 strerror (num)
1842 int num;
1843 {
1844 if (num >= sys_nerr)
1845 return ("");
1846 else
1847 return (sys_errlist[num]);
1848 }
1849 #endif /* !HAVE_STRERROR */
1850
1851 #if !defined (HAVE_STRCHR)
1852 char *
strrchr(string,character)1853 strrchr (string, character)
1854 char *string;
1855 int character;
1856 {
1857 register int i;
1858
1859 for (i = strlen (string) - 1; i > -1; i--)
1860 if (string[i] == character)
1861 return (string + i);
1862
1863 return ((char *)NULL);
1864 }
1865 #endif /* HAVE_STRCHR */
1866
1867 /* Just like malloc, but kills the program in case of fatal error. */
1868 void *
xmalloc(nbytes)1869 xmalloc (nbytes)
1870 int nbytes;
1871 {
1872 void *temp = (void *) malloc (nbytes);
1873
1874 if (nbytes && temp == (void *)NULL)
1875 memory_error ("xmalloc", nbytes);
1876
1877 return (temp);
1878 }
1879
1880 /* Like realloc (), but barfs if there isn't enough memory. */
1881 void *
xrealloc(pointer,nbytes)1882 xrealloc (pointer, nbytes)
1883 void *pointer;
1884 int nbytes;
1885 {
1886 void *temp;
1887
1888 if (!pointer)
1889 temp = (void *)xmalloc (nbytes);
1890 else
1891 temp = (void *)realloc (pointer, nbytes);
1892
1893 if (nbytes && !temp)
1894 memory_error ("xrealloc", nbytes);
1895
1896 return (temp);
1897 }
1898
1899 void
memory_error(callers_name,bytes_wanted)1900 memory_error (callers_name, bytes_wanted)
1901 char *callers_name;
1902 int bytes_wanted;
1903 {
1904 char printable_string[80];
1905
1906 sprintf (printable_string,
1907 "Virtual memory exhausted in %s ()! Needed %d bytes.",
1908 callers_name, bytes_wanted);
1909
1910 error (printable_string);
1911 abort();
1912 }
1913
1914
1915 static int
wstrncmp(const u_short * s1,const u_short * s2,size_t n)1916 wstrncmp(const u_short *s1, const u_short *s2, size_t n)
1917 {
1918 int i;
1919 int d = 0;
1920
1921 for (i=0; i<n && d == 0; i++)
1922 d = s1[i] - s2[i];
1923
1924 return d;
1925 }
1926
1927
1928 static int
wfwrite(u_short * ptr,size_t size,size_t nmemb,FILE * stream)1929 wfwrite(u_short *ptr, size_t size, size_t nmemb, FILE*stream)
1930 {
1931 char *cptr;
1932 int ret, len;
1933
1934 cptr = (char *)xmalloc(size * nmemb * sizeof(u_short)); /* ����! */
1935 len = wtoc(cptr, ptr, size*nmemb);
1936 assert(len <= (size * nmemb * sizeof(u_short)));
1937
1938 ret = fwrite(cptr, len, nmemb, stream);
1939 free(cptr);
1940 return ret;
1941 }
1942
1943
1944 static int
wtoc(char * cptr,u_short const * ptr,int len)1945 wtoc(char *cptr, u_short const *ptr, int len)
1946 {
1947 char *tptr;
1948 u_short c;
1949 int i;
1950
1951 tptr = cptr;
1952 for (i = 0; i < len; i++)
1953 {
1954 c = ptr[i];
1955 if (G1_CHAR(c))
1956 {
1957 *tptr++ = c >> 8 | 0x0080; /* to EUC */
1958 c &= 0x007F;
1959 c |= 0x0080;
1960 }
1961 *tptr++ = c;
1962 }
1963
1964 return tptr - cptr;
1965 }
1966
1967
1968 static int
Vowel_of(int c)1969 Vowel_of(int c)
1970 {
1971 if (0x2521 <= c && c < 0x2577) /* �Ҳ�̾�� */
1972 c -= 0x100; /* ʿ��̾�� */
1973 /* ��������������3������������ʿ��̾�Ϥʤ����������ɤ�����Ȳ��ꤹ�� */
1974 /* ���̤�����ˤʤ�ʤ�Ȧ�� */
1975
1976 if (0x2421 <= c && c < 0x2474)
1977 return Vowel_Table[c - 0x2420];
1978 else
1979 return c; /* ʿ��̾�ʳ��Ϥ��Τޤ� */
1980 }
1981
1982
1983 static void
init_Vowel_Table(void)1984 init_Vowel_Table(void)
1985 {
1986 int i;
1987
1988 /* ������ */
1989 for (i = 0x2421 - 0x2420; i < 0x2445 - 0x2420; i += 10)
1990 {
1991 Vowel_Table[i] = Vowel_Table[i+1] = 0x2422; /* ���� */
1992 Vowel_Table[i+2] = Vowel_Table[i+3] = 0x2424; /* ���� */
1993 Vowel_Table[i+4] = Vowel_Table[i+5] = 0x2426; /* ���� */
1994 Vowel_Table[i+6] = Vowel_Table[i+7] = 0x2428; /* ���� */
1995 Vowel_Table[i+8] = Vowel_Table[i+9] = 0x242A; /* ���� */
1996 }
1997 Vowel_Table[0x243F - 0x2420] = 0x2422; /* �� */
1998 Vowel_Table[0x2440 - 0x2420] = 0x2422; /* �� */
1999 Vowel_Table[0x2441 - 0x2420] = 0x2424; /* �� */
2000 Vowel_Table[0x2442 - 0x2420] = 0x2424; /* �� */
2001 Vowel_Table[0x2443 - 0x2420] = 0x2426; /* �� */
2002 Vowel_Table[0x2444 - 0x2420] = 0x2426; /* �� */
2003 Vowel_Table[0x2445 - 0x2420] = 0x2426; /* �� */
2004 Vowel_Table[0x2446 - 0x2420] = 0x2428; /* �� */
2005 Vowel_Table[0x2447 - 0x2420] = 0x2428; /* �� */
2006 Vowel_Table[0x2448 - 0x2420] = 0x242A; /* �� */
2007 Vowel_Table[0x2449 - 0x2420] = 0x242A; /* �� */
2008 Vowel_Table[0x244A - 0x2420] = 0x2422; /* �� */
2009 Vowel_Table[0x244B - 0x2420] = 0x2424; /* �� */
2010 Vowel_Table[0x244C - 0x2420] = 0x2426; /* �� */
2011 Vowel_Table[0x244D - 0x2420] = 0x2428; /* �� */
2012 Vowel_Table[0x244E - 0x2420] = 0x242A; /* �� */
2013 /* �ϡ��� */
2014 for (i = 0x244F - 0x2420; i < 0x245E - 0x2420; i += 15)
2015 {
2016 Vowel_Table[i] = Vowel_Table[i+1] = Vowel_Table[i+2] = 0x2422;
2017 Vowel_Table[i+3] = Vowel_Table[i+4] = Vowel_Table[i+5] = 0x2424;
2018 Vowel_Table[i+6] = Vowel_Table[i+7] = Vowel_Table[i+8] = 0x2426;
2019 Vowel_Table[i+9] = Vowel_Table[i+11] = Vowel_Table[i+12] = 0x2428;
2020 Vowel_Table[i+12] = Vowel_Table[i+13] = Vowel_Table[i+14] = 0x242A;
2021 }
2022 Vowel_Table[0x245E - 0x2420] = 0x2422; /* �� */
2023 Vowel_Table[0x245F - 0x2420] = 0x2424; /* �� */
2024 Vowel_Table[0x2460 - 0x2420] = 0x2426; /* �� */
2025 Vowel_Table[0x2461 - 0x2420] = 0x2428; /* �� */
2026 Vowel_Table[0x2462 - 0x2420] = 0x242A; /* �� */
2027 Vowel_Table[0x2463 - 0x2420] = 0x2422; /* �� */
2028 Vowel_Table[0x2464 - 0x2420] = 0x2422; /* �� */
2029 Vowel_Table[0x2465 - 0x2420] = 0x2426; /* �� */
2030 Vowel_Table[0x2466 - 0x2420] = 0x2426; /* �� */
2031 Vowel_Table[0x2467 - 0x2420] = 0x242A; /* �� */
2032 Vowel_Table[0x2468 - 0x2420] = 0x242A; /* �� */
2033 Vowel_Table[0x2469 - 0x2420] = 0x2422; /* �� */
2034 Vowel_Table[0x246A - 0x2420] = 0x2424; /* �� */
2035 Vowel_Table[0x246B - 0x2420] = 0x2426; /* �� */
2036 Vowel_Table[0x246C - 0x2420] = 0x2428; /* �� */
2037 Vowel_Table[0x246D - 0x2420] = 0x242A; /* �� */
2038 Vowel_Table[0x246E - 0x2420] = 0x2422; /* �� */
2039 Vowel_Table[0x246F - 0x2420] = 0x2422; /* �� */
2040 /* ����� �ϡ����Τޤޡ��첻�ؤ��Ѵ��Ϥ��ʤ��� */
2041 for (i = 0x2470 - 0x2420; i < 0x2477 - 0x2420; i++)
2042 Vowel_Table[i] = i;
2043 }
2044