1 /*
2 *
3 * This file is part of
4 * MakeIndex - A formatter and format independent index processor
5 *
6 * Copyright (C) 1989 by Chen & Harrison International Systems, Inc.
7 * Copyright (C) 1988 by Olivetti Research Center
8 * Copyright (C) 1987 by Regents of the University of California
9 *
10 * Author:
11 * Pehong Chen
12 * Chen & Harrison International Systems, Inc.
13 * Palo Alto, California
14 * USA
15 * (phc@renoir.berkeley.edu or chen@orc.olivetti.com)
16 *
17 * Contributors:
18 * Please refer to the CONTRIB file that comes with this release
19 * for a list of people who have contributed to this and/or previous
20 * release(s) of MakeIndex.
21 *
22 * All rights reserved by the copyright holders. See the copyright
23 * notice distributed with this software for a complete description of
24 * the conditions under which it is made available.
25 *
26 */
27
28 #include "mkind.h"
29 #include "scanid.h"
30
31 #define CHECK_LENGTH() if (i > len_field) goto OVERFLOW
32
33 int idx_lc; /* line count */
34 int idx_tc; /* total entry count */
35 int idx_ec; /* erroneous entry count */
36 int idx_dc; /* number of dots printed so far */
37
38 static int first_entry = TRUE;
39 static int comp_len;
40 static char key[ARGUMENT_MAX];
41 static char no[NUMBER_MAX];
42 extern char *strchr ARGS((const char* s,int c));
43
44 NODE_PTR head;
45 NODE_PTR tail;
46
47 static void flush_to_eol ARGS((void));
48 static int make_key ARGS((void));
49 static void make_string ARGS((char **ppstr,int n));
50 static int scan_alpha_lower ARGS((char *no,short *npg,short *count));
51 static int scan_alpha_upper ARGS((char *no,short *npg,short *count));
52 static int scan_arabic ARGS((char *no,short *npg,short *count));
53 static int scan_arg1 ARGS((void));
54 static int scan_arg2 ARGS((void));
55 static int scan_field ARGS((int *n,char field[],int len_field,
56 int ck_level, int ck_encap,int ck_actual));
57 static int scan_key ARGS((struct KFIELD *data));
58 static int scan_no ARGS((char *no,short *npg,short *count,short *type));
59 static int scan_roman_lower ARGS((char *no,short *npg,short *count));
60 static int scan_roman_upper ARGS((char *no,short *npg,short *count));
61 static void search_quote ARGS((char sort_key[],char actual_key[]));
62
63 #if (OS_BS2000 | OS_MVSXA)
64 char UPCC[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
65 #endif
66
67 void
scan_idx()68 scan_idx()
69 {
70 char keyword[ARRAY_MAX];
71 int c;
72 int i = 0;
73 int not_eof = TRUE;
74 int arg_count = -1;
75
76 MESSAGE("Scanning input file %s...", idx_fn);
77 idx_lc = idx_tc = idx_ec = idx_dc = 0;
78 comp_len = strlen(page_comp);
79 while (not_eof) {
80 switch (c = GET_CHAR(idx_fp)) {
81 case EOF:
82 if (arg_count == 2) {
83 idx_lc++;
84 if (make_key())
85 IDX_DOT(DOT_MAX);
86 arg_count = -1;
87 } else
88 not_eof = FALSE;
89 break;
90
91 case LFD:
92 idx_lc++;
93 if (arg_count == 2) {
94 if (make_key())
95 IDX_DOT(DOT_MAX);
96 arg_count = -1;
97 } else if (arg_count > -1) {
98 IDX_ERROR("Missing arguments -- need two (premature LFD).\n",
99 NULL);
100 arg_count = -1;
101 }
102 case TAB:
103 case SPC:
104 break;
105
106 default:
107 switch (arg_count) {
108 case -1:
109 i = 0;
110 keyword[i++] = (char) c;
111 arg_count++;
112 idx_tc++;
113 break;
114 case 0:
115 if (c == idx_aopen) {
116 arg_count++;
117 keyword[i] = NUL;
118 if (STREQ(keyword, idx_keyword)) {
119 if (!scan_arg1()) {
120 arg_count = -1;
121 }
122 } else {
123 IDX_SKIPLINE;
124 IDX_ERROR("Unknown index keyword %s.\n", keyword);
125 }
126 } else {
127 if (i < ARRAY_MAX)
128 keyword[i++] = (char) c;
129 else {
130 IDX_SKIPLINE;
131 IDX_ERROR2("Index keyword %s too long (max %d).\n",
132 keyword, ARRAY_MAX);
133 }
134 }
135 break;
136 case 1:
137 if (c == idx_aopen) {
138 arg_count++;
139 if (!scan_arg2()) {
140 arg_count = -1;
141 }
142 } else {
143 IDX_SKIPLINE;
144 IDX_ERROR(
145 "No opening delimiter for second argument (illegal character `%c').\n", c);
146 }
147 break;
148 case 2:
149 IDX_SKIPLINE;
150 IDX_ERROR(
151 "No closing delimiter for second argument (illegal character `%c').\n", c);
152 break;
153 }
154 break;
155 }
156 }
157
158 /* fixup the total counts */
159 idx_tt += idx_tc;
160 idx_et += idx_ec;
161
162 DONE(idx_tc - idx_ec, "entries accepted", idx_ec, "rejected");
163 CLOSE(idx_fp);
164 }
165
166 static void
flush_to_eol()167 flush_to_eol() /* flush to end-of-line, or end-of-file, whichever is first */
168 {
169 int a;
170
171 while ( ((a = GET_CHAR(idx_fp)) != LFD) && (a != EOF) )
172 /* NO-OP */;
173 }
174
175 static int
make_key()176 make_key()
177 {
178 NODE_PTR ptr;
179 int i;
180
181 /* allocate and initialize a node */
182
183 #ifdef DEBUG
184 totmem += sizeof(NODE);
185 (void)fprintf(stderr,"make_key(): malloc(%d)\ttotmem = %ld\n",
186 sizeof(NODE),totmem);
187 #endif /* DEBUG */
188
189 if ((ptr = (NODE_PTR) malloc(sizeof(NODE))) == NULL)
190 FATAL("Not enough core...abort.\n", "");
191
192 for (i = 0; i < FIELD_MAX; i++)
193 {
194 ptr->data.sf[i] = ""; /* initialize fields to pointers */;
195 ptr->data.af[i] = ""; /* to constant empty strings */
196 }
197 ptr->data.encap = "";
198 ptr->data.lpg[0] = NUL;
199 ptr->data.count = 0;
200 ptr->data.type = EMPTY;
201
202 /* process index key */
203 if (!scan_key(&(ptr->data)))
204 return (FALSE);
205
206 /* determine group type */
207 ptr->data.group = group_type(ptr->data.sf[0]);
208
209 /* process page number */
210 strcpy(ptr->data.lpg, no);
211 if (!scan_no(no, ptr->data.npg, &(ptr->data.count), &(ptr->data.type)))
212 return (FALSE);
213
214 if (first_entry) {
215 head = tail = ptr;
216 first_entry = FALSE;
217 } else {
218 tail->next = ptr;
219 tail = ptr;
220 }
221 ptr->data.lc = idx_lc;
222 ptr->data.fn = idx_fn;
223 tail->next = NULL;
224
225 return (TRUE);
226 }
227
228 static void
make_string(ppstr,n)229 make_string(ppstr,n) /* allocate n-byte string if *ppstr */
230 char **ppstr; /* points to an empty string */
231 int n;
232 {
233 if ((*ppstr)[0] == NUL) /* then we have an empty string */
234 {
235 (*ppstr) = (char*)malloc(n);
236 if ((*ppstr) == (char*)NULL)
237 FATAL("Not enough core...abort.\n", "");
238 (*ppstr)[0] = NUL;
239 }
240 }
241
242 static int
scan_key(data)243 scan_key(data)
244 FIELD_PTR data;
245 {
246 int i = 0; /* current level */
247 int n = 0; /* index to the key[] array */
248 int second_round = FALSE;
249 int last = FIELD_MAX - 1;
250
251 while (TRUE) {
252 if (key[n] == NUL)
253 break;
254 if (key[n] == idx_encap)
255 {
256 n++;
257 make_string(&(data->encap),strlen(key) + 1);
258 if (scan_field(&n, data->encap, strlen(key), FALSE, FALSE, FALSE))
259 break;
260 else
261 return (FALSE);
262 }
263 if (key[n] == idx_actual) {
264 n++;
265 if (i == last)
266 {
267 make_string(&(data->af[i]),strlen(key) + 1);
268 if (!scan_field(&n, data->af[i], strlen(key),
269 FALSE, TRUE, FALSE))
270 return (FALSE);
271 }
272 else
273 {
274 make_string(&(data->af[i]),strlen(key) + 1);
275 if (!scan_field(&n, data->af[i], strlen(key),
276 TRUE, TRUE, FALSE))
277 return (FALSE);
278 }
279 } else {
280 /* Next nesting level */
281 if (second_round) {
282 i++;
283 n++;
284 }
285 if (i == last)
286 {
287 make_string(&(data->sf[i]),strlen(key) + 1);
288 if (!scan_field(&n, data->sf[i], strlen(key),
289 FALSE, TRUE, TRUE))
290 return (FALSE);
291 }
292 else
293 {
294 make_string(&(data->sf[i]),strlen(key) + 1);
295 if (!scan_field(&n, data->sf[i], strlen(key),
296 TRUE, TRUE, TRUE))
297 return (FALSE);
298 }
299 second_round = TRUE;
300 if (german_sort && strchr(data->sf[i], '"'))
301 {
302 make_string(&(data->af[i]),strlen(data->sf[i]) + 1);
303 search_quote(data->sf[i], data->af[i]);
304 }
305 }
306 }
307
308 /* check for empty fields which shouldn't be empty */
309 if (*data->sf[0] == NUL) {
310 NULL_RTN;
311 }
312 for (i = 1; i < FIELD_MAX - 1; i++)
313 if ((*data->sf[i] == NUL) &&
314 ((*data->af[i] != NUL) || (*data->sf[i + 1] != NUL))) {
315 NULL_RTN;
316 }
317 /* i == FIELD_MAX-1 */
318 if ((*data->sf[i] == NUL) && (*data->af[i] != NUL)) {
319 NULL_RTN;
320 }
321 return (TRUE);
322 }
323
324 static int
scan_field(n,field,len_field,ck_level,ck_encap,ck_actual)325 scan_field(n, field, len_field, ck_level, ck_encap, ck_actual)
326 int *n;
327 char field[];
328 int len_field; /* length of field[], EXCLUDING space for final NUL */
329 int ck_level;
330 int ck_encap;
331 int ck_actual;
332 {
333 int i = 0;
334 int nbsh; /* backslash count */
335
336 if (compress_blanks && ((key[*n] == SPC) || (key[*n] == TAB)))
337 ++* n;
338
339 while (TRUE) {
340 nbsh = 0;
341 while (key[*n] == idx_escape)
342 {
343 nbsh++;
344 field[i++] = key[*n];
345 CHECK_LENGTH();
346 ++*n;
347 }
348
349 if (key[*n] == idx_quote)
350 {
351 if (nbsh % 2 == 0)
352 field[i++] = key[++*n];
353 else
354 field[i++] = key[*n];
355 CHECK_LENGTH();
356 }
357 else if ((ck_level && (key[*n] == idx_level)) ||
358 (ck_encap && (key[*n] == idx_encap)) ||
359 (ck_actual && (key[*n] == idx_actual)) ||
360 (key[*n] == NUL))
361 {
362 if ((i > 0) && compress_blanks && (field[i - 1] == SPC))
363 field[i - 1] = NUL;
364 else
365 field[i] = NUL;
366 return (TRUE);
367 } else {
368 field[i++] = key[*n];
369 CHECK_LENGTH();
370 if ((!ck_level) && (key[*n] == idx_level)) {
371 IDX_ERROR2("Extra `%c' at position %d of first argument.\n",
372 idx_level, *n + 1);
373 return (FALSE);
374 } else if ((!ck_encap) && (key[*n] == idx_encap)) {
375 IDX_ERROR2("Extra `%c' at position %d of first argument.\n",
376 idx_encap, *n + 1);
377 return (FALSE);
378 } else if ((!ck_actual) && (key[*n] == idx_actual)) {
379 IDX_ERROR2("Extra `%c' at position %d of first argument.\n",
380 idx_actual, *n + 1);
381 return (FALSE);
382 }
383 }
384 /* check if max field length is reached */
385 if (i > len_field)
386 {
387 OVERFLOW:
388 if (!ck_encap) {
389 IDX_ERROR("Encapsulator of page number too long (max. %d).\n",
390 len_field);
391 } else if (ck_actual) {
392 IDX_ERROR("Index sort key too long (max. %d).\n", len_field);
393 } else {
394 IDX_ERROR("Text of key entry too long (max. %d).\n", len_field);
395 }
396 return (FALSE);
397 }
398 ++*n;
399 }
400 #if IBM_PC_TURBO
401 return (FALSE); /* not reached, but keeps compiler happy */
402 #endif
403 }
404
405 int
group_type(str)406 group_type(str)
407 char *str;
408 {
409 int i = 0;
410
411 while ((str[i] != NUL) && ISDIGIT(str[i]))
412 i++;
413
414 if (str[i] == NUL) {
415 sscanf(str, "%d", &i);
416 return (i);
417 } else if (ISSYMBOL(str[0]))
418 return (SYMBOL);
419 else
420 return (ALPHA);
421 }
422
423 static int
scan_no(no,npg,count,type)424 scan_no(no, npg, count, type)
425 char no[];
426 short npg[];
427 short *count;
428 short *type;
429 {
430 int i = 1;
431
432 if (isdigit(no[0])) {
433 *type = ARAB;
434 if (!scan_arabic(no, npg, count))
435 return (FALSE);
436 /* simple heuristic to determine if a letter is Roman or Alpha */
437 } else if (IS_ROMAN_LOWER(no[0]) && (!IS_COMPOSITOR)) {
438 *type = ROML;
439 if (!scan_roman_lower(no, npg, count))
440 return (FALSE);
441 /* simple heuristic to determine if a letter is Roman or Alpha */
442 } else if (IS_ROMAN_UPPER(no[0]) &&
443 ((no[0] == ROMAN_I) || (!IS_COMPOSITOR))) {
444 *type = ROMU;
445 if (!scan_roman_upper(no, npg, count))
446 return (FALSE);
447 } else if (IS_ALPHA_LOWER(no[0])) {
448 *type = ALPL;
449 if (!scan_alpha_lower(no, npg, count))
450 return (FALSE);
451 } else if (IS_ALPHA_UPPER(no[0])) {
452 *type = ALPU;
453 if (!scan_alpha_upper(no, npg, count))
454 return (FALSE);
455 } else {
456 IDX_ERROR("Illegal page number %s.\n", no);
457 return (FALSE);
458 }
459 return (TRUE);
460 }
461
462
463 static int
scan_arabic(no,npg,count)464 scan_arabic(no, npg, count)
465 char no[];
466 short npg[];
467 short *count;
468 {
469 short i = 0;
470 char str[ARABIC_MAX+1]; /* space for trailing NUL */
471
472 while ((no[i] != NUL) && (i <= ARABIC_MAX) && (!IS_COMPOSITOR)) {
473 if (isdigit(no[i])) {
474 str[i] = no[i];
475 i++;
476 } else {
477 IDX_ERROR2("Illegal Arabic digit: position %d in %s.\n", i + 1, no);
478 return (FALSE);
479 }
480 }
481 if (i > ARABIC_MAX) {
482 IDX_ERROR2("Arabic page number %s too big (max %d digits).\n",
483 no, ARABIC_MAX);
484 return (FALSE);
485 }
486 str[i] = NUL;
487
488 ENTER(strtoint(str) + page_offset[ARAB]);
489
490 if (IS_COMPOSITOR)
491 return (scan_no(&no[i + comp_len], npg, count, &i));
492 else
493 return (TRUE);
494 }
495
496
497 static int
scan_roman_lower(no,npg,count)498 scan_roman_lower(no, npg, count)
499 char no[];
500 short npg[];
501 short *count;
502 {
503 short i = 0;
504 int inp = 0;
505 int prev = 0;
506 int new;
507
508 while ((no[i] != NUL) && (i < ROMAN_MAX) && (!IS_COMPOSITOR)) {
509 if ((IS_ROMAN_LOWER(no[i])) &&
510 ((new = ROMAN_LOWER_VAL(no[i])) != 0)) {
511 if (prev == 0)
512 prev = new;
513 else {
514 if (prev < new) {
515 prev = new - prev;
516 new = 0;
517 }
518 inp += prev;
519 prev = new;
520 }
521 } else {
522 IDX_ERROR2("Illegal Roman number: position %d in %s.\n", i + 1, no);
523 return (FALSE);
524 }
525 i++;
526 }
527 if (i == ROMAN_MAX) {
528 IDX_ERROR2("Roman page number %s too big (max %d digits).\n",
529 no, ROMAN_MAX);
530 return (FALSE);
531 }
532 inp += prev;
533
534 ENTER(inp + page_offset[ROML]);
535
536 if (IS_COMPOSITOR)
537 return (scan_no(&no[i + comp_len], npg, count, &i));
538 else
539 return (TRUE);
540 }
541
542
543 static int
scan_roman_upper(no,npg,count)544 scan_roman_upper(no, npg, count)
545 char no[];
546 short npg[];
547 short *count;
548 {
549 short i = 0;
550 int inp = 0;
551 int prev = 0;
552
553 int new;
554
555 while ((no[i] != NUL) && (i < ROMAN_MAX) && (!IS_COMPOSITOR)) {
556 if ((IS_ROMAN_UPPER(no[i])) &&
557 ((new = ROMAN_UPPER_VAL(no[i])) != 0)) {
558 if (prev == 0)
559 prev = new;
560 else {
561 if (prev < new) {
562 prev = new - prev;
563 new = 0;
564 }
565 inp += prev;
566 prev = new;
567 }
568 } else {
569 IDX_ERROR2("Illegal Roman number: position %d in %s.\n", i + 1, no);
570 return (FALSE);
571 }
572 i++;
573 }
574 if (i == ROMAN_MAX) {
575 IDX_ERROR2("Roman page number %s too big (max %d digits).\n",
576 no, ROMAN_MAX);
577 return (FALSE);
578 }
579 inp += prev;
580
581 ENTER(inp + page_offset[ROMU]);
582
583 if (IS_COMPOSITOR)
584 return (scan_no(&no[i + comp_len], npg, count, &i));
585 else
586 return (TRUE);
587 }
588
589
590 static int
scan_alpha_lower(no,npg,count)591 scan_alpha_lower(no, npg, count)
592 char no[];
593 short npg[];
594 short *count;
595 {
596 short i;
597
598 ENTER(ALPHA_VAL(no[0]) + page_offset[ALPL]);
599
600 i = 1;
601 if (IS_COMPOSITOR)
602 return (scan_no(&no[comp_len + 1], npg, count, &i));
603 else
604 return (TRUE);
605 }
606
607
608 static int
scan_alpha_upper(no,npg,count)609 scan_alpha_upper(no, npg, count)
610 char no[];
611 short npg[];
612 short *count;
613 {
614 short i;
615
616 ENTER(ALPHA_VAL(no[0]) + page_offset[ALPU]);
617
618 i = 1;
619 if (IS_COMPOSITOR)
620 return (scan_no(&no[comp_len + 1], npg, count, &i));
621 else
622 return (TRUE);
623 }
624
625
626 static int
scan_arg1()627 scan_arg1()
628 {
629 int i = 0;
630 int n = 0; /* delimiter count */
631 int a;
632
633 if (compress_blanks)
634 while (((a = GET_CHAR(idx_fp)) == SPC) || (a == TAB));
635 else
636 a = GET_CHAR(idx_fp);
637
638 while ((i < ARGUMENT_MAX) && (a != EOF))
639 {
640 if ((a == idx_quote) || (a == idx_escape))
641 { /* take next character literally */
642 key[i++] = (char) a; /* but preserve quote or escape */
643 a = GET_CHAR(idx_fp);
644 key[i++] = (char) a; /* save literal character */
645 }
646 else if (a == idx_aopen)
647 { /* opening delimiters within the argument list */
648 key[i++] = (char) a;
649 n++;
650 }
651 else if (a == idx_aclose)
652 {
653 if (n == 0) /* end of argument */
654 {
655 if (compress_blanks && key[i - 1] == SPC)
656 key[i - 1] = NUL;
657 else
658 key[i] = NUL;
659 return (TRUE);
660 }
661 else /* nested delimiters */
662 {
663 key[i++] = (char) a;
664 n--;
665 }
666 }
667 else
668 {
669 switch (a)
670 {
671 case LFD:
672 idx_lc++;
673 IDX_ERROR("Incomplete first argument (premature LFD).\n", "");
674 return (FALSE);
675 case TAB:
676 case SPC:
677 /* compress successive SPC's to one SPC */
678 if (compress_blanks)
679 {
680 if ((i > 0) && (key[i - 1] != SPC) && (key[i - 1] != TAB))
681 key[i++] = SPC;
682 break;
683 }
684 default:
685 key[i++] = (char) a;
686 break;
687 }
688 }
689 a = GET_CHAR(idx_fp);
690 }
691
692 flush_to_eol(); /* Skip to end of line */
693 idx_lc++;
694 IDX_ERROR("First argument too long (max %d).\n", ARGUMENT_MAX);
695 return (FALSE);
696 }
697
698
699 static int
scan_arg2()700 scan_arg2()
701 {
702 int i = 0;
703 int a;
704 int hit_blank = FALSE;
705
706 while (((a = GET_CHAR(idx_fp)) == SPC) || (a == TAB));
707
708 while (i < NUMBER_MAX) {
709 if (a == idx_aclose) {
710 no[i] = NUL;
711 return (TRUE);
712 } else
713 switch (a) {
714 case LFD:
715 idx_lc++;
716 IDX_ERROR("Incomplete second argument (premature LFD).\n", "");
717 return (FALSE);
718 case TAB:
719 case SPC:
720 hit_blank = TRUE;
721 break;
722 default:
723 if (hit_blank) {
724 flush_to_eol(); /* Skip to end of line */
725 idx_lc++;
726 IDX_ERROR("Illegal space within numerals in second argument.\n", "");
727 return (FALSE);
728 }
729 no[i++] = (char) a;
730 break;
731 }
732 a = GET_CHAR(idx_fp);
733 }
734 flush_to_eol(); /* Skip to end of line */
735 idx_lc++;
736 IDX_ERROR("Second argument too long (max %d).\n", NUMBER_MAX);
737 return (FALSE);
738 }
739
740
741 static void
search_quote(sort_key,actual_key)742 search_quote(sort_key, actual_key)
743 char sort_key[];
744 char actual_key[];
745 {
746 char *ptr; /* pointer to sort_key */
747 char *sort; /* contains sorting text */
748 int char_found = FALSE;
749
750 strcpy(actual_key, sort_key);
751 ptr = strchr(sort_key, '"'); /* look for first double quote */
752 while (ptr != (char*)NULL)
753 {
754 sort = "";
755 switch (*(ptr + 1))
756 { /* skip to umlaut or sharp S */
757 case 'a':
758 case 'A':
759 sort = isupper(*(ptr + 1)) ? "Ae" : "ae";
760 break;
761 case 'o':
762 case 'O':
763 sort = isupper(*(ptr + 1)) ? "Oe" : "oe";
764 break;
765 case 'u':
766 case 'U':
767 sort = isupper(*(ptr + 1)) ? "Ue" : "ue";
768 break;
769 case 's':
770 sort = "ss";
771 break;
772 default:
773 break;
774 }
775 if (sort[0] != NUL)
776 {
777 char_found = TRUE;
778 *ptr = sort[0]; /* write to sort_key */
779 *(ptr + 1) = sort[1];
780 }
781 ptr = strchr(ptr + 1, '"'); /* next double quote */
782 }
783 if (!char_found) /* reset actual_key ? */
784 actual_key[0] = NUL;
785 return;
786 }
787