1/* Prepare TeX index dribble output into an actual index. 2 3 Version 1.45-j1.00 4 5 Copyright (C) 1987, 1991, 1992 Free Software Foundation, Inc. 6 7 This program is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 2, or (at your option) 10 any later version. 11 12 This program is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with this program; if not, write to the Free Software 19 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307. */ 20 21 22#include <stdio.h> 23#include <ctype.h> 24#include <errno.h> 25#include "getopt.h" 26 27#define TEXINDEX_VERSION_STRING "GNU Texindex 2.0 for Texinfo release 3.4" 28 29#if defined (emacs) 30# include "../src/config.h" 31/* Some s/os.h files redefine these. */ 32# undef read 33# undef close 34# undef write 35# undef open 36#endif 37 38#if defined (HAVE_STRING_H) 39# include <string.h> 40#endif /* HAVE_STRING_H */ 41 42#if !defined (HAVE_STRCHR) 43char *strrchr (); 44#endif /* !HAVE_STRCHR */ 45 46#if defined (STDC_HEADERS) 47# include <stdlib.h> 48#else /* !STDC_HEADERS */ 49char *getenv (), *malloc (), *realloc (); 50#endif /* !STDC_HEADERS */ 51 52#if defined (HAVE_UNISTD_H) 53# include <unistd.h> 54#else /* !HAVE_UNISTD_H */ 55off_t lseek (); 56#endif /* !HAVE_UNISTD_H */ 57 58#if !defined (HAVE_MEMSET) 59#undef memset 60#define memset(ptr, ignore, count) bzero (ptr, count) 61#endif 62 63 64char *mktemp (); 65 66#if defined (VMS) 67# include <file.h> 68# define TI_NO_ERROR ((1 << 28) | 1) 69# define TI_FATAL_ERROR ((1 << 28) | 4) 70# define unlink delete 71#else /* !VMS */ 72# if defined (HAVE_SYS_FCNTL_H) 73# include <sys/types.h> 74# include <sys/fcntl.h> 75# endif /* HAVE_SYS_FCNTL_H */ 76 77# if defined (_AIX) || !defined (_POSIX_VERSION) 78# include <sys/file.h> 79# else /* !AIX && _POSIX_VERSION */ 80# if !defined (HAVE_SYS_FCNTL_H) 81# include <fcntl.h> 82# endif /* !HAVE_FCNTL_H */ 83# endif /* !_AIX && _POSIX_VERSION */ 84# define TI_NO_ERROR 0 85# define TI_FATAL_ERROR 1 86#endif /* !VMS */ 87 88#if !defined (SEEK_SET) 89# define SEEK_SET 0 90# define SEEK_CUR 1 91# define SEEK_END 2 92#endif /* !SEEK_SET */ 93 94#if !defined (errno) 95extern int errno; 96#endif 97char *strerror (); 98 99/* When sorting in core, this structure describes one line 100 and the position and length of its first keyfield. */ 101struct lineinfo 102{ 103 char *text; /* The actual text of the line. */ 104 union { 105 char *text; /* The start of the key (for textual comparison). */ 106 long number; /* The numeric value (for numeric comparison). */ 107 } key; 108 long keylen; /* Length of KEY field. */ 109}; 110 111/* This structure describes a field to use as a sort key. */ 112struct keyfield 113{ 114 int startwords; /* Number of words to skip. */ 115 int startchars; /* Number of additional chars to skip. */ 116 int endwords; /* Number of words to ignore at end. */ 117 int endchars; /* Ditto for characters of last word. */ 118 char ignore_blanks; /* Non-zero means ignore spaces and tabs. */ 119 char fold_case; /* Non-zero means case doesn't matter. */ 120 char reverse; /* Non-zero means compare in reverse order. */ 121 char numeric; /* Non-zeros means field is ASCII numeric. */ 122 char positional; /* Sort according to file position. */ 123 char braced; /* Count balanced-braced groupings as fields. */ 124}; 125 126/* Vector of keyfields to use. */ 127struct keyfield keyfields[3]; 128 129/* Number of keyfields stored in that vector. */ 130int num_keyfields = 3; 131 132/* Vector of input file names, terminated with a null pointer. */ 133char **infiles; 134 135/* Vector of corresponding output file names, or NULL, meaning default it 136 (add an `s' to the end). */ 137char **outfiles; 138 139/* Length of `infiles'. */ 140int num_infiles; 141 142/* Pointer to the array of pointers to lines being sorted. */ 143char **linearray; 144 145/* The allocated length of `linearray'. */ 146long nlines; 147 148/* Directory to use for temporary files. On Unix, it ends with a slash. */ 149char *tempdir; 150 151/* Start of filename to use for temporary files. */ 152char *tempbase; 153 154/* Number of last temporary file. */ 155int tempcount; 156 157/* Number of last temporary file already deleted. 158 Temporary files are deleted by `flush_tempfiles' in order of creation. */ 159int last_deleted_tempcount; 160 161/* During in-core sort, this points to the base of the data block 162 which contains all the lines of data. */ 163char *text_base; 164 165/* Additional command switches .*/ 166 167/* Nonzero means do not delete tempfiles -- for debugging. */ 168int keep_tempfiles; 169 170/* The name this program was run with. */ 171char *program_name; 172 173/* Forward declarations of functions in this file. */ 174 175void decode_command (); 176void sort_in_core (); 177void sort_offline (); 178char **parsefile (); 179char *find_field (); 180char *find_pos (); 181long find_value (); 182char *find_braced_pos (); 183char *find_braced_end (); 184void writelines (); 185int compare_field (); 186int compare_full (); 187long readline (); 188int merge_files (); 189int merge_direct (); 190void pfatal_with_name (); 191void fatal (); 192void error (); 193void *xmalloc (), *xrealloc (); 194char *concat (); 195char *maketempname (); 196void flush_tempfiles (); 197char *tempcopy (); 198 199#define MAX_IN_CORE_SORT 500000 200 201void 202main (argc, argv) 203 int argc; 204 char **argv; 205{ 206 int i; 207 208 tempcount = 0; 209 last_deleted_tempcount = 0; 210 211 program_name = strrchr (argv[0], '/'); 212 if (program_name != (char *)NULL) 213 program_name++; 214 else 215 program_name = argv[0]; 216 217 /* Describe the kind of sorting to do. */ 218 /* The first keyfield uses the first braced field and folds case. */ 219 keyfields[0].braced = 1; 220 keyfields[0].fold_case = 1; 221 keyfields[0].endwords = -1; 222 keyfields[0].endchars = -1; 223 224 /* The second keyfield uses the second braced field, numerically. */ 225 keyfields[1].braced = 1; 226 keyfields[1].numeric = 1; 227 keyfields[1].startwords = 1; 228 keyfields[1].endwords = -1; 229 keyfields[1].endchars = -1; 230 231 /* The third keyfield (which is ignored while discarding duplicates) 232 compares the whole line. */ 233 keyfields[2].endwords = -1; 234 keyfields[2].endchars = -1; 235 236 decode_command (argc, argv); 237 238 tempbase = mktemp (concat ("txiXXXXXX", "", "")); 239 240 /* Process input files completely, one by one. */ 241 242 for (i = 0; i < num_infiles; i++) 243 { 244 int desc; 245 long ptr; 246 char *outfile; 247 248 desc = open (infiles[i], O_RDONLY, 0); 249 if (desc < 0) 250 pfatal_with_name (infiles[i]); 251 lseek (desc, (off_t) 0, SEEK_END); 252 ptr = (long) lseek (desc, (off_t) 0, SEEK_CUR); 253 254 close (desc); 255 256 outfile = outfiles[i]; 257 if (!outfile) 258 { 259 outfile = concat (infiles[i], "s", ""); 260 } 261 262 if (ptr < MAX_IN_CORE_SORT) 263 /* Sort a small amount of data. */ 264 sort_in_core (infiles[i], ptr, outfile); 265 else 266 sort_offline (infiles[i], ptr, outfile); 267 } 268 269 flush_tempfiles (tempcount); 270 exit (TI_NO_ERROR); 271} 272 273typedef struct 274{ 275 char *long_name; 276 char *short_name; 277 int *variable_ref; 278 int variable_value; 279 char *arg_name; 280 char *doc_string; 281} TEXINDEX_OPTION; 282 283TEXINDEX_OPTION texindex_options[] = { 284 { "--keep", "-k", &keep_tempfiles, 1, (char *)NULL, 285 "Keep temporary files around after processing" }, 286 { "--no-keep", 0, &keep_tempfiles, 0, (char *)NULL, 287 "Do not keep temporary files around after processing (default)" }, 288 { "--output", "-o", (int *)NULL, 0, "FILE", 289 "Send output to FILE" }, 290 { "--version", (char *)NULL, (int *)NULL, 0, (char *)NULL, 291 "Show version information" }, 292 { "--help", "-h", (int *)NULL, 0, (char *)NULL, "Produce this listing" }, 293 { (char *)NULL, (char *)NULL, (int *)NULL, 0, (char *)NULL } 294}; 295 296void 297usage (result_value) 298 int result_value; 299{ 300 register int i; 301 302 fprintf (stderr, "Usage: %s [OPTIONS] FILE...\n", program_name); 303 fprintf (stderr, " Generate a permuted index for the TeX files given.\n"); 304 fprintf (stderr, " Usually FILE... is `foo.??' for the source file `foo.tex'.\n"); 305 fprintf (stderr, " The OPTIONS are:\n"); 306 307 for (i = 0; texindex_options[i].long_name; i++) 308 { 309 fprintf (stderr, " %s %s", 310 texindex_options[i].long_name, 311 texindex_options[i].arg_name ? 312 texindex_options[i].arg_name : ""); 313 314 if (texindex_options[i].short_name) 315 fprintf (stderr, " \n or %s %s", 316 texindex_options[i].short_name, 317 texindex_options[i].arg_name ? 318 texindex_options[i].arg_name : ""); 319 fprintf (stderr, "\t%s\n", texindex_options[i].doc_string); 320 } 321 exit (result_value); 322} 323 324/* Decode the command line arguments to set the parameter variables 325 and set up the vector of keyfields and the vector of input files. */ 326 327void 328decode_command (argc, argv) 329 int argc; 330 char **argv; 331{ 332 int arg_index = 1; 333 int optc; 334 char **ip; 335 char **op; 336 337 /* Store default values into parameter variables. */ 338 339 tempdir = getenv ("TMPDIR"); 340#ifdef VMS 341 if (tempdir == NULL) 342 tempdir = "sys$scratch:"; 343#else 344 if (tempdir == NULL) 345 tempdir = "/tmp/"; 346 else 347 tempdir = concat (tempdir, "/", ""); 348#endif 349 350 keep_tempfiles = 0; 351 352 /* Allocate ARGC input files, which must be enough. */ 353 354 infiles = (char **) xmalloc (argc * sizeof (char *)); 355 outfiles = (char **) xmalloc (argc * sizeof (char *)); 356 ip = infiles; 357 op = outfiles; 358 359 while (arg_index < argc) 360 { 361 char *arg = argv[arg_index++]; 362 363 if (*arg == '-') 364 { 365 if (strcmp (arg, "--version") == 0) 366 { 367 fprintf (stderr, "%s\n", TEXINDEX_VERSION_STRING); 368 exit (0); 369 } 370 else if ((strcmp (arg, "--keep") == 0) || 371 (strcmp (arg, "-k") == 0)) 372 { 373 keep_tempfiles = 1; 374 } 375 else if ((strcmp (arg, "--help") == 0) || 376 (strcmp (arg, "-h") == 0)) 377 { 378 usage (0); 379 } 380 else if ((strcmp (arg, "--output") == 0) || 381 (strcmp (arg, "-o") == 0)) 382 { 383 if (argv[arg_index] != (char *)NULL) 384 { 385 arg_index++; 386 if (op > outfiles) 387 *(op - 1) = argv[arg_index]; 388 } 389 else 390 usage (1); 391 } 392 else 393 usage (1); 394 } 395 else 396 { 397 *ip++ = arg; 398 *op++ = (char *)NULL; 399 } 400 } 401 402 /* Record number of keyfields and terminate list of filenames. */ 403 num_infiles = ip - infiles; 404 *ip = (char *)NULL; 405 if (num_infiles == 0) 406 usage (1); 407} 408 409/* Return a name for a temporary file. */ 410 411char * 412maketempname (count) 413 int count; 414{ 415 char tempsuffix[10]; 416 sprintf (tempsuffix, "%d", count); 417 return concat (tempdir, tempbase, tempsuffix); 418} 419 420/* Delete all temporary files up to TO_COUNT. */ 421 422void 423flush_tempfiles (to_count) 424 int to_count; 425{ 426 if (keep_tempfiles) 427 return; 428 while (last_deleted_tempcount < to_count) 429 unlink (maketempname (++last_deleted_tempcount)); 430} 431 432/* Copy the input file open on IDESC into a temporary file 433 and return the temporary file name. */ 434 435#define BUFSIZE 1024 436 437char * 438tempcopy (idesc) 439 int idesc; 440{ 441 char *outfile = maketempname (++tempcount); 442 int odesc; 443 char buffer[BUFSIZE]; 444 445 odesc = open (outfile, O_WRONLY | O_CREAT, 0666); 446 447 if (odesc < 0) 448 pfatal_with_name (outfile); 449 450 while (1) 451 { 452 int nread = read (idesc, buffer, BUFSIZE); 453 write (odesc, buffer, nread); 454 if (!nread) 455 break; 456 } 457 458 close (odesc); 459 460 return outfile; 461} 462 463/* Compare LINE1 and LINE2 according to the specified set of keyfields. */ 464 465int 466compare_full (line1, line2) 467 char **line1, **line2; 468{ 469 int i; 470 471 /* Compare using the first keyfield; 472 if that does not distinguish the lines, try the second keyfield; 473 and so on. */ 474 475 for (i = 0; i < num_keyfields; i++) 476 { 477 long length1, length2; 478 char *start1 = find_field (&keyfields[i], *line1, &length1); 479 char *start2 = find_field (&keyfields[i], *line2, &length2); 480 int tem = compare_field (&keyfields[i], start1, length1, *line1 - text_base, 481 start2, length2, *line2 - text_base); 482 if (tem) 483 { 484 if (keyfields[i].reverse) 485 return -tem; 486 return tem; 487 } 488 } 489 490 return 0; /* Lines match exactly. */ 491} 492 493/* Compare LINE1 and LINE2, described by structures 494 in which the first keyfield is identified in advance. 495 For positional sorting, assumes that the order of the lines in core 496 reflects their nominal order. */ 497 498int 499compare_prepared (line1, line2) 500 struct lineinfo *line1, *line2; 501{ 502 int i; 503 int tem; 504 char *text1, *text2; 505 506 /* Compare using the first keyfield, which has been found for us already. */ 507 if (keyfields->positional) 508 { 509 if (line1->text - text_base > line2->text - text_base) 510 tem = 1; 511 else 512 tem = -1; 513 } 514 else if (keyfields->numeric) 515 tem = line1->key.number - line2->key.number; 516 else 517 tem = compare_field (keyfields, line1->key.text, line1->keylen, 0, 518 line2->key.text, line2->keylen, 0); 519 if (tem) 520 { 521 if (keyfields->reverse) 522 return -tem; 523 return tem; 524 } 525 526 text1 = line1->text; 527 text2 = line2->text; 528 529 /* Compare using the second keyfield; 530 if that does not distinguish the lines, try the third keyfield; 531 and so on. */ 532 533 for (i = 1; i < num_keyfields; i++) 534 { 535 long length1, length2; 536 char *start1 = find_field (&keyfields[i], text1, &length1); 537 char *start2 = find_field (&keyfields[i], text2, &length2); 538 int tem = compare_field (&keyfields[i], start1, length1, text1 - text_base, 539 start2, length2, text2 - text_base); 540 if (tem) 541 { 542 if (keyfields[i].reverse) 543 return -tem; 544 return tem; 545 } 546 } 547 548 return 0; /* Lines match exactly. */ 549} 550 551/* Like compare_full but more general. 552 You can pass any strings, and you can say how many keyfields to use. 553 POS1 and POS2 should indicate the nominal positional ordering of 554 the two lines in the input. */ 555 556int 557compare_general (str1, str2, pos1, pos2, use_keyfields) 558 char *str1, *str2; 559 long pos1, pos2; 560 int use_keyfields; 561{ 562 int i; 563 564 /* Compare using the first keyfield; 565 if that does not distinguish the lines, try the second keyfield; 566 and so on. */ 567 568 for (i = 0; i < use_keyfields; i++) 569 { 570 long length1, length2; 571 char *start1 = find_field (&keyfields[i], str1, &length1); 572 char *start2 = find_field (&keyfields[i], str2, &length2); 573 int tem = compare_field (&keyfields[i], start1, length1, pos1, 574 start2, length2, pos2); 575 if (tem) 576 { 577 if (keyfields[i].reverse) 578 return -tem; 579 return tem; 580 } 581 } 582 583 return 0; /* Lines match exactly. */ 584} 585 586/* Find the start and length of a field in STR according to KEYFIELD. 587 A pointer to the starting character is returned, and the length 588 is stored into the int that LENGTHPTR points to. */ 589 590char * 591find_field (keyfield, str, lengthptr) 592 struct keyfield *keyfield; 593 char *str; 594 long *lengthptr; 595{ 596 char *start; 597 char *end; 598 char *(*fun) (); 599 600 if (keyfield->braced) 601 fun = find_braced_pos; 602 else 603 fun = find_pos; 604 605 start = (*fun) (str, keyfield->startwords, keyfield->startchars, 606 keyfield->ignore_blanks); 607 if (keyfield->endwords < 0) 608 { 609 if (keyfield->braced) 610 end = find_braced_end (start); 611 else 612 { 613 end = start; 614 while (*end && *end != '\n') 615 end++; 616 } 617 } 618 else 619 { 620 end = (*fun) (str, keyfield->endwords, keyfield->endchars, 0); 621 if (end - str < start - str) 622 end = start; 623 } 624 *lengthptr = end - start; 625 return start; 626} 627 628/* Return a pointer to a specified place within STR, 629 skipping (from the beginning) WORDS words and then CHARS chars. 630 If IGNORE_BLANKS is nonzero, we skip all blanks 631 after finding the specified word. */ 632 633char * 634find_pos (str, words, chars, ignore_blanks) 635 char *str; 636 int words, chars; 637 int ignore_blanks; 638{ 639 int i; 640 char *p = str; 641 642 for (i = 0; i < words; i++) 643 { 644 char c; 645 /* Find next bunch of nonblanks and skip them. */ 646 while ((c = *p) == ' ' || c == '\t') 647 p++; 648 while ((c = *p) && c != '\n' && !(c == ' ' || c == '\t')) 649 p++; 650 if (!*p || *p == '\n') 651 return p; 652 } 653 654 while (*p == ' ' || *p == '\t') 655 p++; 656 657 for (i = 0; i < chars; i++) 658 { 659 if (!*p || *p == '\n') 660 break; 661 p++; 662 } 663 return p; 664} 665 666/* Like find_pos but assumes that each field is surrounded by braces 667 and that braces within fields are balanced. */ 668 669char * 670find_braced_pos (str, words, chars, ignore_blanks) 671 char *str; 672 int words, chars; 673 int ignore_blanks; 674{ 675 int i; 676 int bracelevel; 677 char *p = str; 678 char c; 679 680 for (i = 0; i < words; i++) 681 { 682 bracelevel = 1; 683 while ((c = *p++) != '{' && c != '\n' && c) 684 /* Do nothing. */ ; 685 if (c != '{') 686 return p - 1; 687 while (bracelevel) 688 { 689 c = *p++; 690#ifdef SJIS 691 if ( iskanji(c)) { 692 p++; 693 continue; 694 } 695#endif 696 if (c == '{') 697 bracelevel++; 698 if (c == '}') 699 bracelevel--; 700 if (c == 0 || c == '\n') 701 return p - 1; 702 } 703 } 704 705 while ((c = *p++) != '{' && c != '\n' && c) 706 /* Do nothing. */ ; 707 708 if (c != '{') 709 return p - 1; 710 711 if (ignore_blanks) 712 while ((c = *p) == ' ' || c == '\t') 713 p++; 714 715 for (i = 0; i < chars; i++) 716 { 717 if (!*p || *p == '\n') 718 break; 719 p++; 720 } 721 return p; 722} 723 724/* Find the end of the balanced-brace field which starts at STR. 725 The position returned is just before the closing brace. */ 726 727char * 728find_braced_end (str) 729 char *str; 730{ 731 int bracelevel; 732 char *p = str; 733 char c; 734 735 bracelevel = 1; 736 while (bracelevel) 737 { 738 c = *p++; 739#ifdef SJIS 740 if (iskanji(c)) { 741 p++; 742 continue; 743 } 744#endif 745 if (c == '{') 746 bracelevel++; 747 if (c == '}') 748 bracelevel--; 749 if (c == 0 || c == '\n') 750 return p - 1; 751 } 752 return p - 1; 753} 754 755long 756find_value (start, length) 757 char *start; 758 long length; 759{ 760 while (length != 0L) 761 { 762 if (isdigit (*start)) 763 return atol (start); 764 length--; 765 start++; 766 } 767 return 0l; 768} 769 770/* Vector used to translate characters for comparison. 771 This is how we make all alphanumerics follow all else, 772 and ignore case in the first sorting. */ 773int char_order[0x10000]; /* japanese */ 774 775#ifdef EUC 776#define KANJI 777#endif 778 779#ifdef SJIS 780#define KANJI 781#endif 782 783#ifdef KANJI 784 785#ifndef EUC 786# ifndef SJIS 787# define EUC 788# endif 789#else 790# ifdef SJIS 791# error 792# endif 793#endif 794 795#ifdef EUC 796#define JIS_NUM_0 0xa3b0 797#define JIS_NUM_9 (JIS_NUM_0 + '9'- '0') 798#define JIS_ALPH_A 0xa3c1 799#define JIS_ALPH_Z (JIS_ALPH_A + 'Z'-'A') 800#define JIS_ALPH_a 0xa3e1 801#define JIS_HIRA_SMALL_A 0xa4a1 802#define JIS_HIRA_NN 0xa4f3 803#define JIS_KATA_SMALL_A 0xa5a1 804#define JIS_KATA_SMALL_KE 0xa5f6 805#define JIS_HIRA_U 0xa4a6 806#define JIS_KATA_U 0xa5a6 807#define JIS_KATA_VU 0xa5f4 808 809#define JIS_KATA_UPPER 0xa5 810#define JIS_HIRA_UPPER 0xa4 811 812#define JIS_HIRA_A 0xa4a2 813#define JIS_KATA_A 0xa5a2 814 815#define JIS_DAKUTEN 0xa1ab 816#define JIS_HANDAKUTEN 0xa1ac 817#define JIS_CHOUON 0xa2ac 818 819int kana[] = { 820/* . [ ] , . wo a */ 821 0xa1a1, 0xa1a3, 0xa1d6, 0xa1d7, 0xa1a2, 0xa1a6, 0xa5f2, 0xa5a1, 822/* i u e o ya yu yo tsu */ 823 0xa5a3, 0xa5a5, 0xa5a7, 0xa5a9, 0xa5e3, 0xa5e5, 0xa5e7, 0xa5c3, 824/* - a i u e o ka ki */ 825 0xa1bc, 0xa5a2, 0xa5a4, 0xa5a6, 0xa5a8, 0xa5aa, 0xa5ab, 0xa5ad, 826/* ku ke ko sa si su se so */ 827 0xa5af, 0xa5b1, 0xa5b3, 0xa5b5, 0xa5b7, 0xa5b9, 0xa5bb, 0xa5bd, 828/* ta ti tu te to na ni nu */ 829 0xa5bf, 0xa5c1, 0xa5c4, 0xa5c6, 0xa5c8, 0xa5ca, 0xa5cb, 0xa5cc, 830/* ne no ha hi hu he ho ma */ 831 0xa5cd, 0xa5ce, 0xa5cf, 0xa5d2, 0xa5d5, 0xa5d8, 0xa5db, 0xa5de, 832/* mi mu me mo ya yu yo ra */ 833 0xa5df, 0xa5e0, 0xa5e1, 0xa5e2, 0xa5e4, 0xa5e6, 0xa5e8, 0xa5e9, 834/* ri ru re ro wa nn "" maru */ 835 0xa5ea, 0xa5eb, 0xa5ec, 0xa5ed, 0xa5ef, 0xa5f3, JIS_DAKUTEN,JIS_HANDAKUTEN 836}; 837 838int daku[] = {0xa5ac, 0xa5ae, 0xa5b0, 0xa5b2, 0xa5b4, /* $B%,(J*/ 839 0xa5b6, 0xa5b8, 0xa5ba, 0xa5bc, 0xa5be, /* $B%6(J */ 840 0xa5c0, 0xa5c2, 0xa5c5, 0xa5c7, 0xa5c9, /* $B%@(J */ 841 0xa5d0, 0xa5d3, 0xa5d6, 0xa5d9, 0xa5dc}; /* $B%P(J */ 842int handaku[] = {0xa5d1, 0xa5d4, 0xa5d7, 0xa5da, 0xa5dd}; /* $B%Q(J */ 843int dakuall[] = {0xa4ac, 0xa4ae, 0xa4b0, 0xa4b2, 0xa4b4, /* $B$,(J */ 844 0xa4b6, 0xa4b8, 0xa4ba, 0xa4bc, 0xa4be, /* $B$6(J */ 845 0xa4c0, 0xa4c2, 0xa4c5, 0xa4c7, 0xa4c9, /* $B$@(J */ 846 0xa4d0, 0xa4d3, 0xa4d6, 0xa4d9, 0xa4dc, /* $B$P(J */ 847 0xa5ac, 0xa5ae, 0xa5b0, 0xa5b2, 0xa5b4, /* $B%,(J */ 848 0xa5b6, 0xa5b8, 0xa5ba, 0xa5bc, 0xa5be, /* $B%6(J */ 849 0xa5c0, 0xa5c2, 0xa5c5, 0xa5c7, 0xa5c9, /* $B%@(J */ 850 0xa5d0, 0xa5d3, 0xa5d6, 0xa5d9, 0xa5dc}; /* $B%P(J */ 851int handakuall[] = {0xa4d1, 0xa4d4, 0xa4d7, 0xa4da, 0xa4dd, /* $B$Q(J */ 852 0xa5d1, 0xa5d4, 0xa5d7, 0xa5da, 0xa5dd}; /* $B%Q(J */ 853 854 855iskanji(c) /* japanses extention */ 856int c; 857{ 858 if ( !(c & 0x80 )) return 0; 859 if ( c == 0x8e ) return 0; 860 return 1; 861} 862 863is201kana(c) /* japanses extention */ 864int c; 865{ 866 if ( c == 0x8e ) return 1; 867 else return 0; 868} 869#else /* SJIS */ 870#define JIS_NUM_0 0x824f 871#define JIS_NUM_9 (JIS_NUM_0 + '9'- '0') 872#define JIS_ALPH_A 0x8260 873#define JIS_ALPH_Z (JIS_ALPH_A + 'Z'-'A') 874#define JIS_ALPH_a 0x8281 875#define JIS_HIRA_SMALL_A 0x829f 876#define JIS_HIRA_NN 0x82f1 877#define JIS_KATA_SMALL_A 0x8340 878#define JIS_KATA_SMALL_KE 0x8396 879#define JIS_HIRA_U 0x82a4 880#define JIS_KATA_U 0x83a5 881#define JIS_KATA_VU 0x8394 882 883#define JIS_KATA_UPPER 0x83 884#define JIS_HIRA_UPPER 0x82 885 886#define JIS_HIRA_A 0x82a0 887#define JIS_KATA_A 0x8341 888 889#define JIS_DAKUTEN 0x814a 890#define JIS_HANDAKUTEN 0x814b 891#define JIS_CHOUON 0x815b 892 893int kana[] = { 894/* . [ ] , . wo a */ 895 0x813f, 0x8142, 0x8175, 0x8176, 0x8141, 0x8145, 0x8392, 0x8340, 896/* i u e o ya yu yo tsu */ 897 0x8342, 0x8344, 0x8346, 0x8348, 0x8383, 0x8385, 0x8387, 0x8362, 898/* - a i u e o ka ki */ 899 0x815b, 0x8341, 0x8343, 0x8345, 0x8347, 0x8349, 0x834a, 0x834c, 900/* ku ke ko sa si su se so */ 901 0x834e, 0x8350, 0x8352, 0x8354, 0x8356, 0x8358, 0x835a, 0x835c, 902/* ta ti tu te to na ni nu */ 903 0x835e, 0x8360, 0x8363, 0x8365, 0x8367, 0x8369, 0x836a, 0x836b, 904/* ne no ha hi hu he ho ma */ 905 0x836c, 0x836d, 0x836e, 0x8371, 0x8374, 0x8377, 0x837a, 0x837d, 906/* mi mu me mo ya yu yo ra */ 907 0x837e, 0x8380, 0x8381, 0x8382, 0x8384, 0x8386, 0x8388, 0x8389, 908/* ri ru re ro wa nn "" maru */ 909 0x838a, 0x838b, 0x838c, 0x838d, 0x838f, 0x8393, JIS_DAKUTEN,JIS_HANDAKUTEN 910}; 911 912int daku[] = {0x834b, 0x834d, 0x834f, 0x8351, 0x8353, /* $B%,(J*/ 913 0x8355, 0x8357, 0x8359, 0x835b, 0x835d, /* $B%6(J */ 914 0x835f, 0x8361, 0x8364, 0x8366, 0x8367, /* $B%@(J */ 915 0x836f, 0x8372, 0x8375, 0x8378, 0x837b}; /* $B%P(J */ 916int handaku[] = {0x8370, 0x8373, 0x8376, 0x8379, 0x837c}; /* $B%Q(J */ 917int dakuall[] = {0x82aa, 0xa2ac, 0x82a3, 0x82b0, 0x82b2, /* $B$,(J */ 918 0x82b4, 0x82b6, 0x82b8, 0x82ba, 0x82bc, /* $B$6(J */ 919 0x82be, 0x82c0, 0x82c3, 0x82c5, 0x82c7, /* $B$@(J */ 920 0x82ce, 0x82d1, 0x82d4, 0x82d7, 0x82da, /* $B$P(J */ 921 922 0x834b, 0x834d, 0x834f, 0x8351, 0x8353, /* $B%,(J*/ 923 0x8355, 0x8357, 0x8359, 0x835b, 0x835d, /* $B%6(J */ 924 0x835f, 0x8361, 0x8364, 0x8366, 0x8367, /* $B%@(J */ 925 0x836f, 0x8372, 0x8375, 0x8378, 0x837b}; /* $B%P(J */ 926int handakuall[] = {0x82cf, 0x82d2, 0x82d5, 0x82d8, 0x82db, /* $B$Q(J */ 927 0x8370, 0x8373, 0x8376, 0x8379, 0x837c}; /* $B%Q(J */ 928 929iskanji(c) /* japanses extention */ 930int c; 931{ 932 c &= 0xff; 933 if (( c >= 0x81 && c <= 0x9f ) || 934 ( c >= 0xe0 && c <= 0xea )) return 1; 935 return 0; 936} 937 938is201kana(c) /* japanses extention */ 939int c; 940{ 941 if ( c >= 0xa0 && c <= 0xdf ) return 1; 942 else return 0; 943} 944#endif 945 946 947kana_char_order() /* japanese */ 948{ 949 unsigned int c,i,j,cc; 950 951 for ( c = 0x8000 ; c < 0x10000 ; c++ ) 952 char_order[c] = c; 953 954 /* NUMERIC */ 955 for ( c = JIS_NUM_0 , i = '0' + 512; c <= JIS_NUM_9 ; i++,c++) 956 char_order[c] = i; 957 958 /* ALPHABET */ 959 for ( c = JIS_ALPH_A , i = 'a' + 512; c <= JIS_ALPH_Z ; i++,c++) { 960 char_order[c] = i; 961 char_order[c+JIS_ALPH_a - JIS_ALPH_A] = i; 962 } 963 964 /* KANA */ 965 for ( c = JIS_HIRA_SMALL_A ; c <= JIS_HIRA_NN ; c++ ) { 966 char_order[c] = c+512; 967 cc = c+JIS_KATA_SMALL_A-JIS_HIRA_SMALL_A; 968#ifdef SJIS 969 if ( cc > 0x837e ) cc ++; 970#endif 971 char_order[cc] = c+512; 972 if (isdaku(c) ) { 973 char_order[c] = c+512-1; 974 char_order[cc] = c+512-1; 975 } else if (ishandaku(c) ) { 976 char_order[c] = c+512-2; 977 char_order[cc] = c+512-2; 978 } 979 } 980 for ( c = JIS_KATA_VU ; c <= JIS_KATA_SMALL_KE ; c++ ) { 981 char_order[c] = c+512; 982 if (c == JIS_KATA_VU ) { 983 char_order[c] = JIS_KATA_U + 512; 984 } 985 } 986} 987 988 989#ifdef EUC 990#define KANA_BYTES 2 991#else 992#define KANA_BYTES 2 993#endif 994 995static convert_htoz(str,ret) 996unsigned char *str; 997int *ret; 998{ 999 int c,c2; 1000 1001 int num = KANA_BYTES; 1002 1003#ifdef EUC 1004 str++; 1005#endif 1006 c = *str++; 1007 if (!is201kana(*str)) { 1008 if (c >= 0xa0 && c <= 0xdf) 1009 *ret = kana[c - 0xa0]; 1010 else *ret = 0x8e00 + c; 1011 return num; 1012 } 1013#ifdef EUC 1014 str++; 1015#endif 1016 num += KANA_BYTES; 1017 1018 if ( (c2 = *str++) == 0xde) { /* $BByE@(J */ 1019 if (c >= 0xb6 && c <= 0xba) /* line-ga */ 1020 c = daku[c - 0xb6]; 1021 else if (c >= 0xbb && c <= 0xbf) /* line-za */ 1022 c = daku[c - 0xbb+5]; 1023 else if (c >= 0xc0 && c <= 0xc4) /* line-da */ 1024 c = daku[c - 0xc0+10]; 1025 else if (c >= 0xca && c <= 0xce) /* line-ba */ 1026 c = daku[c - 0xca+15]; 1027 else if ( c == 0xb3 ) /* vu */ 1028 c = JIS_KATA_VU; 1029 } else if (c2 == 0xdf) { /* $BH>ByE@(J */ 1030 if (c >= 0xca && c <= 0xce) /* line-pa */ 1031 c = handaku[c - 0xca]; 1032 } else if ( c2 == 0xb1 ) { /* $BD92;(J */ 1033 if (c >= 0xa0 && c <= 0xdd) 1034 *ret = kana[c - 0xa0]; 1035 else *ret = 0x8e00 + c; 1036 } 1037 if ( c < 0x100 ) c += 0x8e00; 1038 1039 return num; 1040} 1041 1042isadddaku(c) 1043{ 1044 int i; 1045 for ( i = 0 ; i < sizeof(dakuall) / sizeof(dakuall[1]) ; i++ ) 1046 if ( dakuall[i]-1 == c ) return 1; 1047 return 0; 1048} 1049 1050isaddhandaku(c) 1051{ 1052 int i; 1053 for ( i = 0 ; i < sizeof(handakuall) / sizeof(handakuall[1]) ; i++ ) 1054 if ( handakuall[i]-2 == c ) return 1; 1055 return 0; 1056} 1057 1058isdaku(c) 1059{ 1060 int i; 1061 for ( i = 0 ; i < sizeof(dakuall) / sizeof(dakuall[1]) ; i++ ) 1062 if ( dakuall[i] == c ) return 1; 1063 return 0; 1064} 1065 1066ishandaku(c) 1067{ 1068 int i; 1069 for ( i = 0 ; i < sizeof(handakuall) / sizeof(handakuall[1]) ; i++ ) 1070 if ( handakuall[i] == c ) return 1; 1071 return 0; 1072} 1073 1074int mbchar(str,ret) 1075unsigned char *str; 1076int *ret; 1077{ 1078 int c = *str++; 1079 int cc,ccc; 1080 if ( !iskanji(c) && !is201kana(c)) { 1081 *ret = c & 0xff; 1082 return 1; 1083 } 1084 if ( iskanji (c) ) { 1085 cc = ((c & 0xff) << 8) | ((*str++) & 0xff); 1086 ccc = (((*str++ ) & 0xff) << 8 ) | ((*str++) & 0xff); 1087 1088 if ( ccc == JIS_DAKUTEN ) { 1089 if ( isadddaku(cc)) cc++; 1090 else if ( cc == JIS_KATA_U || cc == JIS_HIRA_U ) 1091 cc = JIS_KATA_VU; 1092 *ret = cc; 1093 return 4; 1094 } else if ( ccc == JIS_HANDAKUTEN ) { 1095 if ( isaddhandaku(cc)) cc+=2; 1096 *ret = cc; 1097 return 4; 1098 } else if ( ccc == JIS_CHOUON ) { 1099 *ret = cc; 1100 return 4; 1101 } else { 1102 *ret = cc; 1103 return 2; 1104 } 1105 } else if ( is201kana(c)) { 1106 /* EUC */ 1107 return convert_htoz(str-1,ret); 1108 } 1109} 1110 1111xinitial(str,c) 1112unsigned char *str; 1113int c; 1114{ 1115 int up; 1116 if (isdaku(c)) c--; 1117 else if (ishandaku(c)) c-=2; 1118 else if ( c == JIS_KATA_VU ) c = JIS_KATA_U; 1119 1120 if ((up= (c & 0xff00) >>8) == JIS_KATA_UPPER ) { /* katakana */ 1121 c += JIS_HIRA_SMALL_A - JIS_KATA_SMALL_A; 1122#ifdef SJIS 1123 if ( c > 0x837e ) c ++; 1124#endif 1125 } 1126 *str++ = (c >> 8 ) & 0xfff; 1127 *str++ = c & 0xff; 1128 *str= 0; 1129} 1130 1131make_initial(src,dst) 1132unsigned char *src,*dst; 1133{ 1134 int len; 1135 int c; 1136 1137 len = mbchar(src,&c); 1138 if ( len == 1 ) { 1139 dst[0] =c; 1140 dst[1] = 0; 1141 } else { 1142 xinitial(dst,c); 1143 len = 2; 1144 } 1145 return len; 1146} 1147#endif 1148 1149void 1150init_char_order () 1151{ 1152 int i; 1153 for (i = 1; i < 256; i++) 1154 char_order[i] = i; 1155 1156 for (i = '0'; i <= '9'; i++) 1157 char_order[i] += 512; 1158 1159 for (i = 'a'; i <= 'z'; i++) 1160 { 1161 char_order[i] = 512 + i; 1162 char_order[i + 'A' - 'a'] = 512 + i; 1163 } 1164#ifdef KANJI 1165 kana_char_order(); /* japanese */ 1166#endif 1167} 1168 1169/* Compare two fields (each specified as a start pointer and a character count) 1170 according to KEYFIELD. 1171 The sign of the value reports the relation between the fields. */ 1172 1173int 1174compare_field (keyfield, start1, length1, pos1, start2, length2, pos2) 1175 struct keyfield *keyfield; 1176 char *start1; 1177 long length1; 1178 long pos1; 1179 char *start2; 1180 long length2; 1181 long pos2; 1182{ 1183 if (keyfields->positional) 1184 { 1185 if (pos1 > pos2) 1186 return 1; 1187 else 1188 return -1; 1189 } 1190 if (keyfield->numeric) 1191 { 1192 long value = find_value (start1, length1) - find_value (start2, length2); 1193 if (value > 0) 1194 return 1; 1195 if (value < 0) 1196 return -1; 1197 return 0; 1198 } 1199 else 1200 { 1201 char *p1 = start1; 1202 char *p2 = start2; 1203 char *e1 = start1 + length1; 1204 char *e2 = start2 + length2; 1205 1206 while (1) 1207 { 1208 int c1, c2; 1209 1210 if (p1 == e1) 1211 c1 = 0; 1212 else 1213#ifdef KANJI 1214 p1 += mbchar(p1,&c1); /* japanese */ 1215#else 1216 c1 = *p1++; 1217#endif 1218 1219 if (p2 == e2) 1220 c2 = 0; 1221 else 1222#ifdef KANJI 1223 p2 += mbchar(p2,&c2); /* japanese */ 1224#else 1225 c2 = *p2++; 1226#endif 1227 1228 if (char_order[c1] != char_order[c2]) 1229 return char_order[c1] - char_order[c2]; 1230 if (!c1) 1231 break; 1232 } 1233 1234 /* Strings are equal except possibly for case. */ 1235 p1 = start1; 1236 p2 = start2; 1237 while (1) 1238 { 1239 int c1, c2; 1240 1241 if (p1 == e1) 1242 c1 = 0; 1243 else 1244#ifdef KANJI 1245 p1 += mbchar(p1,&c1); /* japanese */ 1246#else 1247 c1 = *p1++; 1248#endif 1249 if (p2 == e2) 1250 c2 = 0; 1251 else 1252#ifdef KANJI 1253 p2 += mbchar(p2,&c2); /* japanese */ 1254#else 1255 c2 = *p2++; 1256#endif 1257#ifdef KANJI 1258 if ( iskanji(c1) || is201kana(c1)) { 1259 if (c1 != c2) 1260 /* Reverse sign here so upper case comes out last. */ 1261 return c2 - c1; 1262 } else { 1263 if (c1 != c2) 1264 return c1 - c2; 1265 } 1266#else 1267 1268 if (c1 != c2) 1269 /* Reverse sign here so upper case comes out last. */ 1270 return c2 - c1; 1271#endif 1272 if (!c1) 1273 break; 1274 } 1275 1276 return 0; 1277 } 1278} 1279 1280/* A `struct linebuffer' is a structure which holds a line of text. 1281 `readline' reads a line from a stream into a linebuffer 1282 and works regardless of the length of the line. */ 1283 1284struct linebuffer 1285{ 1286 long size; 1287 char *buffer; 1288}; 1289 1290/* Initialize LINEBUFFER for use. */ 1291 1292void 1293initbuffer (linebuffer) 1294 struct linebuffer *linebuffer; 1295{ 1296 linebuffer->size = 200; 1297 linebuffer->buffer = (char *) xmalloc (200); 1298} 1299 1300/* Read a line of text from STREAM into LINEBUFFER. 1301 Return the length of the line. */ 1302 1303long 1304readline (linebuffer, stream) 1305 struct linebuffer *linebuffer; 1306 FILE *stream; 1307{ 1308 char *buffer = linebuffer->buffer; 1309 char *p = linebuffer->buffer; 1310 char *end = p + linebuffer->size; 1311 1312 while (1) 1313 { 1314 int c = getc (stream); 1315 if (p == end) 1316 { 1317 buffer = (char *) xrealloc (buffer, linebuffer->size *= 2); 1318 p += buffer - linebuffer->buffer; 1319 end += buffer - linebuffer->buffer; 1320 linebuffer->buffer = buffer; 1321 } 1322 if (c < 0 || c == '\n') 1323 { 1324 *p = 0; 1325 break; 1326 } 1327 *p++ = c; 1328 } 1329 1330 return p - buffer; 1331} 1332 1333/* Sort an input file too big to sort in core. */ 1334 1335void 1336sort_offline (infile, nfiles, total, outfile) 1337 char *infile; 1338 int nfiles; 1339 long total; 1340 char *outfile; 1341{ 1342 /* More than enough. */ 1343 int ntemps = 2 * (total + MAX_IN_CORE_SORT - 1) / MAX_IN_CORE_SORT; 1344 char **tempfiles = (char **) xmalloc (ntemps * sizeof (char *)); 1345 FILE *istream = fopen (infile, "r"); 1346 int i; 1347 struct linebuffer lb; 1348 long linelength; 1349 int failure = 0; 1350 1351 initbuffer (&lb); 1352 1353 /* Read in one line of input data. */ 1354 1355 linelength = readline (&lb, istream); 1356 1357 if (lb.buffer[0] != '\\' && lb.buffer[0] != '@') 1358 { 1359 error ("%s: not a texinfo index file", infile); 1360 return; 1361 } 1362 1363 /* Split up the input into `ntemps' temporary files, or maybe fewer, 1364 and put the new files' names into `tempfiles' */ 1365 1366 for (i = 0; i < ntemps; i++) 1367 { 1368 char *outname = maketempname (++tempcount); 1369 FILE *ostream = fopen (outname, "w"); 1370 long tempsize = 0; 1371 1372 if (!ostream) 1373 pfatal_with_name (outname); 1374 tempfiles[i] = outname; 1375 1376 /* Copy lines into this temp file as long as it does not make file 1377 "too big" or until there are no more lines. */ 1378 1379 while (tempsize + linelength + 1 <= MAX_IN_CORE_SORT) 1380 { 1381 tempsize += linelength + 1; 1382 fputs (lb.buffer, ostream); 1383 putc ('\n', ostream); 1384 1385 /* Read another line of input data. */ 1386 1387 linelength = readline (&lb, istream); 1388 if (!linelength && feof (istream)) 1389 break; 1390 1391 if (lb.buffer[0] != '\\' && lb.buffer[0] != '@') 1392 { 1393 error ("%s: not a texinfo index file", infile); 1394 failure = 1; 1395 goto fail; 1396 } 1397 } 1398 fclose (ostream); 1399 if (feof (istream)) 1400 break; 1401 } 1402 1403 free (lb.buffer); 1404 1405fail: 1406 /* Record number of temp files we actually needed. */ 1407 1408 ntemps = i; 1409 1410 /* Sort each tempfile into another tempfile. 1411 Delete the first set of tempfiles and put the names of the second 1412 into `tempfiles'. */ 1413 1414 for (i = 0; i < ntemps; i++) 1415 { 1416 char *newtemp = maketempname (++tempcount); 1417 sort_in_core (&tempfiles[i], MAX_IN_CORE_SORT, newtemp); 1418 if (!keep_tempfiles) 1419 unlink (tempfiles[i]); 1420 tempfiles[i] = newtemp; 1421 } 1422 1423 if (failure) 1424 return; 1425 1426 /* Merge the tempfiles together and indexify. */ 1427 1428 merge_files (tempfiles, ntemps, outfile); 1429} 1430 1431/* Sort INFILE, whose size is TOTAL, 1432 assuming that is small enough to be done in-core, 1433 then indexify it and send the output to OUTFILE (or to stdout). */ 1434 1435void 1436sort_in_core (infile, total, outfile) 1437 char *infile; 1438 long total; 1439 char *outfile; 1440{ 1441 char **nextline; 1442 char *data = (char *) xmalloc (total + 1); 1443 char *file_data; 1444 long file_size; 1445 int i; 1446 FILE *ostream = stdout; 1447 struct lineinfo *lineinfo; 1448 1449 /* Read the contents of the file into the moby array `data'. */ 1450 1451 int desc = open (infile, O_RDONLY, 0); 1452 1453 if (desc < 0) 1454 fatal ("failure reopening %s", infile); 1455 for (file_size = 0;;) 1456 { 1457 i = read (desc, data + file_size, total - file_size); 1458 if (i <= 0) 1459 break; 1460 file_size += i; 1461 } 1462 file_data = data; 1463 data[file_size] = 0; 1464 1465 close (desc); 1466 1467 if (file_size > 0 && data[0] != '\\' && data[0] != '@') 1468 { 1469 error ("%s: not a texinfo index file", infile); 1470 return; 1471 } 1472 1473 init_char_order (); 1474 1475 /* Sort routines want to know this address. */ 1476 1477 text_base = data; 1478 1479 /* Create the array of pointers to lines, with a default size 1480 frequently enough. */ 1481 1482 nlines = total / 50; 1483 if (!nlines) 1484 nlines = 2; 1485 linearray = (char **) xmalloc (nlines * sizeof (char *)); 1486 1487 /* `nextline' points to the next free slot in this array. 1488 `nlines' is the allocated size. */ 1489 1490 nextline = linearray; 1491 1492 /* Parse the input file's data, and make entries for the lines. */ 1493 1494 nextline = parsefile (infile, nextline, file_data, file_size); 1495 if (nextline == 0) 1496 { 1497 error ("%s: not a texinfo index file", infile); 1498 return; 1499 } 1500 1501 /* Sort the lines. */ 1502 1503 /* If we have enough space, find the first keyfield of each line in advance. 1504 Make a `struct lineinfo' for each line, which records the keyfield 1505 as well as the line, and sort them. */ 1506 1507 lineinfo = (struct lineinfo *) malloc ((nextline - linearray) * sizeof (struct lineinfo)); 1508 1509 if (lineinfo) 1510 { 1511 struct lineinfo *lp; 1512 char **p; 1513 1514 for (lp = lineinfo, p = linearray; p != nextline; lp++, p++) 1515 { 1516 lp->text = *p; 1517 lp->key.text = find_field (keyfields, *p, &lp->keylen); 1518 if (keyfields->numeric) 1519 lp->key.number = find_value (lp->key.text, lp->keylen); 1520 } 1521 1522 qsort (lineinfo, nextline - linearray, sizeof (struct lineinfo), compare_prepared); 1523 1524 for (lp = lineinfo, p = linearray; p != nextline; lp++, p++) 1525 *p = lp->text; 1526 1527 free (lineinfo); 1528 } 1529 else 1530 qsort (linearray, nextline - linearray, sizeof (char *), compare_full); 1531 1532 /* Open the output file. */ 1533 1534 if (outfile) 1535 { 1536 ostream = fopen (outfile, "w"); 1537 if (!ostream) 1538 pfatal_with_name (outfile); 1539 } 1540 1541 writelines (linearray, nextline - linearray, ostream); 1542 if (outfile) 1543 fclose (ostream); 1544 1545 free (linearray); 1546 free (data); 1547} 1548 1549/* Parse an input string in core into lines. 1550 DATA is the input string, and SIZE is its length. 1551 Data goes in LINEARRAY starting at NEXTLINE. 1552 The value returned is the first entry in LINEARRAY still unused. 1553 Value 0 means input file contents are invalid. */ 1554 1555char ** 1556parsefile (filename, nextline, data, size) 1557 char *filename; 1558 char **nextline; 1559 char *data; 1560 long size; 1561{ 1562 char *p, *end; 1563 char **line = nextline; 1564 1565 p = data; 1566 end = p + size; 1567 *end = 0; 1568 1569 while (p != end) 1570 { 1571 if (p[0] != '\\' && p[0] != '@') 1572 return 0; 1573 1574 *line = p; 1575 while (*p && *p != '\n') 1576 p++; 1577 if (p != end) 1578 p++; 1579 1580 line++; 1581 if (line == linearray + nlines) 1582 { 1583 char **old = linearray; 1584 linearray = (char **) xrealloc (linearray, sizeof (char *) * (nlines *= 4)); 1585 line += linearray - old; 1586 } 1587 } 1588 1589 return line; 1590} 1591 1592/* Indexification is a filter applied to the sorted lines 1593 as they are being written to the output file. 1594 Multiple entries for the same name, with different page numbers, 1595 get combined into a single entry with multiple page numbers. 1596 The first braced field, which is used for sorting, is discarded. 1597 However, its first character is examined, folded to lower case, 1598 and if it is different from that in the previous line fed to us 1599 a \initial line is written with one argument, the new initial. 1600 1601 If an entry has four braced fields, then the second and third 1602 constitute primary and secondary names. 1603 In this case, each change of primary name 1604 generates a \primary line which contains only the primary name, 1605 and in between these are \secondary lines which contain 1606 just a secondary name and page numbers. */ 1607 1608/* The last primary name we wrote a \primary entry for. 1609 If only one level of indexing is being done, this is the last name seen. */ 1610char *lastprimary; 1611/* Length of storage allocated for lastprimary. */ 1612int lastprimarylength; 1613 1614/* Similar, for the secondary name. */ 1615char *lastsecondary; 1616int lastsecondarylength; 1617 1618/* Zero if we are not in the middle of writing an entry. 1619 One if we have written the beginning of an entry but have not 1620 yet written any page numbers into it. 1621 Greater than one if we have written the beginning of an entry 1622 plus at least one page number. */ 1623int pending; 1624 1625/* The initial (for sorting purposes) of the last primary entry written. 1626 When this changes, a \initial {c} line is written */ 1627 1628char *lastinitial; 1629 1630int lastinitiallength; 1631 1632/* When we need a string of length 1 for the value of lastinitial, 1633 store it here. */ 1634 1635char lastinitial1[3]; /* japanese */ 1636 1637/* Initialize static storage for writing an index. */ 1638 1639void 1640init_index () 1641{ 1642 pending = 0; 1643 lastinitial = lastinitial1; 1644 lastinitial1[0] = 0; 1645 lastinitial1[1] = 0; 1646 lastinitiallength = 0; 1647 lastprimarylength = 100; 1648 lastprimary = (char *) xmalloc (lastprimarylength + 1); 1649 memset (lastprimary, '\0', lastprimarylength + 1); 1650 lastsecondarylength = 100; 1651 lastsecondary = (char *) xmalloc (lastsecondarylength + 1); 1652 memset (lastsecondary, '\0', lastsecondarylength + 1); 1653} 1654 1655/* Indexify. Merge entries for the same name, 1656 insert headers for each initial character, etc. */ 1657 1658void 1659indexify (line, ostream) 1660 char *line; 1661 FILE *ostream; 1662{ 1663 char *primary, *secondary, *pagenumber; 1664 int primarylength, secondarylength = 0, pagelength; 1665 int nosecondary; 1666 int initiallength; 1667 char *initial; 1668 char initial1[3]; /* japanese */ 1669 register char *p; 1670 1671 /* First, analyze the parts of the entry fed to us this time. */ 1672 1673 p = find_braced_pos (line, 0, 0, 0); 1674 if (*p == '{') 1675 { 1676 initial = p; 1677 /* Get length of inner pair of braces starting at `p', 1678 including that inner pair of braces. */ 1679 initiallength = find_braced_end (p + 1) + 1 - p; 1680 } 1681 else 1682 { 1683 initial = initial1; 1684#ifdef KANJI 1685 initiallength = make_initial(p,initial1); /* japanese */ 1686#else 1687 initial = initial1; 1688 initial1[0] = *p; 1689 initial1[1] = 0; 1690 initiallength = 1; 1691#endif 1692 1693 if (initial1[0] >= 'a' && initial1[0] <= 'z') 1694 initial1[0] -= 040; 1695 } 1696 1697 pagenumber = find_braced_pos (line, 1, 0, 0); 1698 pagelength = find_braced_end (pagenumber) - pagenumber; 1699 if (pagelength == 0) 1700 abort (); 1701 1702 primary = find_braced_pos (line, 2, 0, 0); 1703 primarylength = find_braced_end (primary) - primary; 1704 1705 secondary = find_braced_pos (line, 3, 0, 0); 1706 nosecondary = !*secondary; 1707 if (!nosecondary) 1708 secondarylength = find_braced_end (secondary) - secondary; 1709 1710 /* If the primary is different from before, make a new primary entry. */ 1711 if (strncmp (primary, lastprimary, primarylength)) 1712 { 1713 /* Close off current secondary entry first, if one is open. */ 1714 if (pending) 1715 { 1716 fputs ("}\n", ostream); 1717 pending = 0; 1718 } 1719 1720 /* If this primary has a different initial, include an entry for 1721 the initial. */ 1722 if (initiallength != lastinitiallength || 1723 strncmp (initial, lastinitial, initiallength)) 1724 { 1725 fprintf (ostream, "\\initial {"); 1726 fwrite (initial, 1, initiallength, ostream); 1727 fprintf (ostream, "}\n", initial); 1728 if (initial == initial1) 1729 { 1730 lastinitial = lastinitial1; 1731 *lastinitial1 = *initial1; 1732 lastinitial1[1] = initial1[1]; 1733 lastinitial1[2] = initial1[2]; 1734 } 1735 else 1736 { 1737 lastinitial = initial; 1738 } 1739 lastinitiallength = initiallength; 1740 } 1741 1742 /* Make the entry for the primary. */ 1743 if (nosecondary) 1744 fputs ("\\entry {", ostream); 1745 else 1746 fputs ("\\primary {", ostream); 1747 fwrite (primary, primarylength, 1, ostream); 1748 if (nosecondary) 1749 { 1750 fputs ("}{", ostream); 1751 pending = 1; 1752 } 1753 else 1754 fputs ("}\n", ostream); 1755 1756 /* Record name of most recent primary. */ 1757 if (lastprimarylength < primarylength) 1758 { 1759 lastprimarylength = primarylength + 100; 1760 lastprimary = (char *) xrealloc (lastprimary, 1761 1 + lastprimarylength); 1762 } 1763 strncpy (lastprimary, primary, primarylength); 1764 lastprimary[primarylength] = 0; 1765 1766 /* There is no current secondary within this primary, now. */ 1767 lastsecondary[0] = 0; 1768 } 1769 1770 /* Should not have an entry with no subtopic following one with a subtopic. */ 1771 1772 if (nosecondary && *lastsecondary) 1773 error ("entry %s follows an entry with a secondary name", line); 1774 1775 /* Start a new secondary entry if necessary. */ 1776 if (!nosecondary && strncmp (secondary, lastsecondary, secondarylength)) 1777 { 1778 if (pending) 1779 { 1780 fputs ("}\n", ostream); 1781 pending = 0; 1782 } 1783 1784 /* Write the entry for the secondary. */ 1785 fputs ("\\secondary {", ostream); 1786 fwrite (secondary, secondarylength, 1, ostream); 1787 fputs ("}{", ostream); 1788 pending = 1; 1789 1790 /* Record name of most recent secondary. */ 1791 if (lastsecondarylength < secondarylength) 1792 { 1793 lastsecondarylength = secondarylength + 100; 1794 lastsecondary = (char *) xrealloc (lastsecondary, 1795 1 + lastsecondarylength); 1796 } 1797 strncpy (lastsecondary, secondary, secondarylength); 1798 lastsecondary[secondarylength] = 0; 1799 } 1800 1801 /* Here to add one more page number to the current entry. */ 1802 if (pending++ != 1) 1803 fputs (", ", ostream); /* Punctuate first, if this is not the first. */ 1804 fwrite (pagenumber, pagelength, 1, ostream); 1805} 1806 1807/* Close out any unfinished output entry. */ 1808 1809void 1810finish_index (ostream) 1811 FILE *ostream; 1812{ 1813 if (pending) 1814 fputs ("}\n", ostream); 1815 free (lastprimary); 1816 free (lastsecondary); 1817} 1818 1819/* Copy the lines in the sorted order. 1820 Each line is copied out of the input file it was found in. */ 1821 1822void 1823writelines (linearray, nlines, ostream) 1824 char **linearray; 1825 int nlines; 1826 FILE *ostream; 1827{ 1828 char **stop_line = linearray + nlines; 1829 char **next_line; 1830 1831 init_index (); 1832 1833 /* Output the text of the lines, and free the buffer space. */ 1834 1835 for (next_line = linearray; next_line != stop_line; next_line++) 1836 { 1837 /* If -u was specified, output the line only if distinct from previous one. */ 1838 if (next_line == linearray 1839 /* Compare previous line with this one, using only the 1840 explicitly specd keyfields. */ 1841 || compare_general (*(next_line - 1), *next_line, 0L, 0L, num_keyfields - 1)) 1842 { 1843 char *p = *next_line; 1844 char c; 1845 1846 while ((c = *p++) && c != '\n') 1847 /* Do nothing. */ ; 1848 *(p - 1) = 0; 1849 indexify (*next_line, ostream); 1850 } 1851 } 1852 1853 finish_index (ostream); 1854} 1855 1856/* Assume (and optionally verify) that each input file is sorted; 1857 merge them and output the result. 1858 Returns nonzero if any input file fails to be sorted. 1859 1860 This is the high-level interface that can handle an unlimited 1861 number of files. */ 1862 1863#define MAX_DIRECT_MERGE 10 1864 1865int 1866merge_files (infiles, nfiles, outfile) 1867 char **infiles; 1868 int nfiles; 1869 char *outfile; 1870{ 1871 char **tempfiles; 1872 int ntemps; 1873 int i; 1874 int value = 0; 1875 int start_tempcount = tempcount; 1876 1877 if (nfiles <= MAX_DIRECT_MERGE) 1878 return merge_direct (infiles, nfiles, outfile); 1879 1880 /* Merge groups of MAX_DIRECT_MERGE input files at a time, 1881 making a temporary file to hold each group's result. */ 1882 1883 ntemps = (nfiles + MAX_DIRECT_MERGE - 1) / MAX_DIRECT_MERGE; 1884 tempfiles = (char **) xmalloc (ntemps * sizeof (char *)); 1885 for (i = 0; i < ntemps; i++) 1886 { 1887 int nf = MAX_DIRECT_MERGE; 1888 if (i + 1 == ntemps) 1889 nf = nfiles - i * MAX_DIRECT_MERGE; 1890 tempfiles[i] = maketempname (++tempcount); 1891 value |= merge_direct (&infiles[i * MAX_DIRECT_MERGE], nf, tempfiles[i]); 1892 } 1893 1894 /* All temporary files that existed before are no longer needed 1895 since their contents have been merged into our new tempfiles. 1896 So delete them. */ 1897 flush_tempfiles (start_tempcount); 1898 1899 /* Now merge the temporary files we created. */ 1900 1901 merge_files (tempfiles, ntemps, outfile); 1902 1903 free (tempfiles); 1904 1905 return value; 1906} 1907 1908/* Assume (and optionally verify) that each input file is sorted; 1909 merge them and output the result. 1910 Returns nonzero if any input file fails to be sorted. 1911 1912 This version of merging will not work if the number of 1913 input files gets too high. Higher level functions 1914 use it only with a bounded number of input files. */ 1915 1916int 1917merge_direct (infiles, nfiles, outfile) 1918 char **infiles; 1919 int nfiles; 1920 char *outfile; 1921{ 1922 struct linebuffer *lb1, *lb2; 1923 struct linebuffer **thisline, **prevline; 1924 FILE **streams; 1925 int i; 1926 int nleft; 1927 int lossage = 0; 1928 int *file_lossage; 1929 struct linebuffer *prev_out = 0; 1930 FILE *ostream = stdout; 1931 1932 if (outfile) 1933 { 1934 ostream = fopen (outfile, "w"); 1935 } 1936 if (!ostream) 1937 pfatal_with_name (outfile); 1938 1939 init_index (); 1940 1941 if (nfiles == 0) 1942 { 1943 if (outfile) 1944 fclose (ostream); 1945 return 0; 1946 } 1947 1948 /* For each file, make two line buffers. 1949 Also, for each file, there is an element of `thisline' 1950 which points at any time to one of the file's two buffers, 1951 and an element of `prevline' which points to the other buffer. 1952 `thisline' is supposed to point to the next available line from the file, 1953 while `prevline' holds the last file line used, 1954 which is remembered so that we can verify that the file is properly sorted. */ 1955 1956 /* lb1 and lb2 contain one buffer each per file. */ 1957 lb1 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer)); 1958 lb2 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer)); 1959 1960 /* thisline[i] points to the linebuffer holding the next available line in file i, 1961 or is zero if there are no lines left in that file. */ 1962 thisline = (struct linebuffer **) 1963 xmalloc (nfiles * sizeof (struct linebuffer *)); 1964 /* prevline[i] points to the linebuffer holding the last used line 1965 from file i. This is just for verifying that file i is properly 1966 sorted. */ 1967 prevline = (struct linebuffer **) 1968 xmalloc (nfiles * sizeof (struct linebuffer *)); 1969 /* streams[i] holds the input stream for file i. */ 1970 streams = (FILE **) xmalloc (nfiles * sizeof (FILE *)); 1971 /* file_lossage[i] is nonzero if we already know file i is not 1972 properly sorted. */ 1973 file_lossage = (int *) xmalloc (nfiles * sizeof (int)); 1974 1975 /* Allocate and initialize all that storage. */ 1976 1977 for (i = 0; i < nfiles; i++) 1978 { 1979 initbuffer (&lb1[i]); 1980 initbuffer (&lb2[i]); 1981 thisline[i] = &lb1[i]; 1982 prevline[i] = &lb2[i]; 1983 file_lossage[i] = 0; 1984 streams[i] = fopen (infiles[i], "r"); 1985 if (!streams[i]) 1986 pfatal_with_name (infiles[i]); 1987 1988 readline (thisline[i], streams[i]); 1989 } 1990 1991 /* Keep count of number of files not at eof. */ 1992 nleft = nfiles; 1993 1994 while (nleft) 1995 { 1996 struct linebuffer *best = 0; 1997 struct linebuffer *exch; 1998 int bestfile = -1; 1999 int i; 2000 2001 /* Look at the next avail line of each file; choose the least one. */ 2002 2003 for (i = 0; i < nfiles; i++) 2004 { 2005 if (thisline[i] && 2006 (!best || 2007 0 < compare_general (best->buffer, thisline[i]->buffer, 2008 (long) bestfile, (long) i, num_keyfields))) 2009 { 2010 best = thisline[i]; 2011 bestfile = i; 2012 } 2013 } 2014 2015 /* Output that line, unless it matches the previous one and we 2016 don't want duplicates. */ 2017 2018 if (!(prev_out && 2019 !compare_general (prev_out->buffer, 2020 best->buffer, 0L, 1L, num_keyfields - 1))) 2021 indexify (best->buffer, ostream); 2022 prev_out = best; 2023 2024 /* Now make the line the previous of its file, and fetch a new 2025 line from that file. */ 2026 2027 exch = prevline[bestfile]; 2028 prevline[bestfile] = thisline[bestfile]; 2029 thisline[bestfile] = exch; 2030 2031 while (1) 2032 { 2033 /* If the file has no more, mark it empty. */ 2034 2035 if (feof (streams[bestfile])) 2036 { 2037 thisline[bestfile] = 0; 2038 /* Update the number of files still not empty. */ 2039 nleft--; 2040 break; 2041 } 2042 readline (thisline[bestfile], streams[bestfile]); 2043 if (thisline[bestfile]->buffer[0] || !feof (streams[bestfile])) 2044 break; 2045 } 2046 } 2047 2048 finish_index (ostream); 2049 2050 /* Free all storage and close all input streams. */ 2051 2052 for (i = 0; i < nfiles; i++) 2053 { 2054 fclose (streams[i]); 2055 free (lb1[i].buffer); 2056 free (lb2[i].buffer); 2057 } 2058 free (file_lossage); 2059 free (lb1); 2060 free (lb2); 2061 free (thisline); 2062 free (prevline); 2063 free (streams); 2064 2065 if (outfile) 2066 fclose (ostream); 2067 2068 return lossage; 2069} 2070 2071/* Print error message and exit. */ 2072 2073void 2074fatal (format, arg) 2075 char *format, *arg; 2076{ 2077 error (format, arg); 2078 exit (TI_FATAL_ERROR); 2079} 2080 2081/* Print error message. FORMAT is printf control string, ARG is arg for it. */ 2082void 2083error (format, arg) 2084 char *format, *arg; 2085{ 2086 printf ("%s: ", program_name); 2087 printf (format, arg); 2088 if (format[strlen (format) -1] != '\n') 2089 printf ("\n"); 2090} 2091 2092void 2093perror_with_name (name) 2094 char *name; 2095{ 2096 char *s; 2097 2098 s = strerror (errno); 2099 printf ("%s: ", program_name); 2100 printf ("%s; for file `%s'.\n", s, name); 2101} 2102 2103void 2104pfatal_with_name (name) 2105 char *name; 2106{ 2107 char *s; 2108 2109 s = strerror (errno); 2110 printf ("%s: ", program_name); 2111 printf ("%s; for file `%s'.\n", s, name); 2112 exit (TI_FATAL_ERROR); 2113} 2114 2115/* Return a newly-allocated string whose contents concatenate those of 2116 S1, S2, S3. */ 2117 2118char * 2119concat (s1, s2, s3) 2120 char *s1, *s2, *s3; 2121{ 2122 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3); 2123 char *result = (char *) xmalloc (len1 + len2 + len3 + 1); 2124 2125 strcpy (result, s1); 2126 strcpy (result + len1, s2); 2127 strcpy (result + len1 + len2, s3); 2128 *(result + len1 + len2 + len3) = 0; 2129 2130 return result; 2131} 2132 2133#if !defined (HAVE_STRERROR) 2134extern char *sys_errlist[]; 2135extern int sys_nerr; 2136 2137char * 2138strerror (num) 2139 int num; 2140{ 2141 if (num >= sys_nerr) 2142 return (""); 2143 else 2144 return (sys_errlist[num]); 2145} 2146#endif /* !HAVE_STRERROR */ 2147 2148#if !defined (HAVE_STRCHR) 2149char * 2150strrchr (string, character) 2151 char *string; 2152 int character; 2153{ 2154 register int i; 2155 2156 for (i = strlen (string) - 1; i > -1; i--) 2157 if (string[i] == character) 2158 return (string + i); 2159 2160 return ((char *)NULL); 2161} 2162#endif /* HAVE_STRCHR */ 2163 2164/* Just like malloc, but kills the program in case of fatal error. */ 2165void * 2166xmalloc (nbytes) 2167 int nbytes; 2168{ 2169 void *temp = (void *) malloc (nbytes); 2170 2171 if (nbytes && temp == (void *)NULL) 2172 memory_error ("xmalloc", nbytes); 2173 2174 return (temp); 2175} 2176 2177/* Like realloc (), but barfs if there isn't enough memory. */ 2178void * 2179xrealloc (pointer, nbytes) 2180 void *pointer; 2181 int nbytes; 2182{ 2183 void *temp; 2184 2185 if (!pointer) 2186 temp = (void *)xmalloc (nbytes); 2187 else 2188 temp = (void *)realloc (pointer, nbytes); 2189 2190 if (nbytes && !temp) 2191 memory_error ("xrealloc", nbytes); 2192 2193 return (temp); 2194} 2195 2196memory_error (callers_name, bytes_wanted) 2197 char *callers_name; 2198 int bytes_wanted; 2199{ 2200 char printable_string[80]; 2201 2202 sprintf (printable_string, 2203 "Virtual memory exhausted in %s ()! Needed %d bytes.", 2204 callers_name, bytes_wanted); 2205 2206 error (printable_string); 2207 abort (); 2208} 2209 2210