1 // -*- C++ -*-
2 /* Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc.
3 Written by James Clark (jjc@jclark.com)
4
5 This file is part of groff.
6
7 groff is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 groff is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License along
18 with groff; see the file COPYING. If not, write to the Free Software
19 Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
20
21 #include "refer.h"
22 #include "refid.h"
23 #include "ref.h"
24 #include "token.h"
25 #include "search.h"
26 #include "command.h"
27
28 const char PRE_LABEL_MARKER = '\013';
29 const char POST_LABEL_MARKER = '\014';
30 const char LABEL_MARKER = '\015'; // label_type is added on
31
32 #define FORCE_LEFT_BRACKET 04
33 #define FORCE_RIGHT_BRACKET 010
34
35 static FILE *outfp = stdout;
36
37 string capitalize_fields;
38 string reverse_fields;
39 string abbreviate_fields;
40 string period_before_last_name = ". ";
41 string period_before_initial = ".";
42 string period_before_hyphen = "";
43 string period_before_other = ". ";
44 string sort_fields;
45 int annotation_field = -1;
46 string annotation_macro;
47 string discard_fields = "XYZ";
48 string pre_label = "\\*([.";
49 string post_label = "\\*(.]";
50 string sep_label = ", ";
51 int accumulate = 0;
52 int move_punctuation = 0;
53 int abbreviate_label_ranges = 0;
54 string label_range_indicator;
55 int label_in_text = 1;
56 int label_in_reference = 1;
57 int date_as_label = 0;
58 int sort_adjacent_labels = 0;
59 // Join exactly two authors with this.
60 string join_authors_exactly_two = " and ";
61 // When there are more than two authors join the last two with this.
62 string join_authors_last_two = ", and ";
63 // Otherwise join authors with this.
64 string join_authors_default = ", ";
65 string separate_label_second_parts = ", ";
66 // Use this string to represent that there are other authors.
67 string et_al = " et al";
68 // Use et al only if it can replace at least this many authors.
69 int et_al_min_elide = 2;
70 // Use et al only if the total number of authors is at least this.
71 int et_al_min_total = 3;
72
73
74 int compatible_flag = 0;
75
76 int short_label_flag = 0;
77
78 static int recognize_R1_R2 = 1;
79
80 search_list database_list;
81 int search_default = 1;
82 static int default_database_loaded = 0;
83
84 static reference **citation = 0;
85 static int ncitations = 0;
86 static int citation_max = 0;
87
88 static reference **reference_hash_table = 0;
89 static int hash_table_size;
90 static int nreferences = 0;
91
92 static int need_syncing = 0;
93 string pending_line;
94 string pending_lf_lines;
95
96 static void output_pending_line();
97 static unsigned immediately_handle_reference(const string &);
98 static void immediately_output_references();
99 static unsigned store_reference(const string &);
100 static void divert_to_temporary_file();
101 static reference *make_reference(const string &, unsigned *);
102 static void usage();
103 static void do_file(const char *);
104 static void split_punct(string &line, string &punct);
105 static void output_citation_group(reference **v, int n, label_type, FILE *fp);
106 static void possibly_load_default_database();
107
main(int argc,char ** argv)108 int main(int argc, char **argv)
109 {
110 program_name = argv[0];
111 static char stderr_buf[BUFSIZ];
112 setbuf(stderr, stderr_buf);
113 outfp = stdout;
114 int finished_options = 0;
115 int bib_flag = 0;
116 int done_spec = 0;
117
118 for (--argc, ++argv;
119 !finished_options && argc > 0 && argv[0][0] == '-'
120 && argv[0][1] != '\0';
121 argv++, argc--) {
122 const char *opt = argv[0] + 1;
123 while (opt != 0 && *opt != '\0') {
124 switch (*opt) {
125 case 'C':
126 compatible_flag = 1;
127 opt++;
128 break;
129 case 'B':
130 bib_flag = 1;
131 label_in_reference = 0;
132 label_in_text = 0;
133 ++opt;
134 if (*opt == '\0') {
135 annotation_field = 'X';
136 annotation_macro = "AP";
137 }
138 else if (csalnum(opt[0]) && opt[1] == '.' && opt[2] != '\0') {
139 annotation_field = opt[0];
140 annotation_macro = opt + 2;
141 }
142 opt = 0;
143 break;
144 case 'P':
145 move_punctuation = 1;
146 opt++;
147 break;
148 case 'R':
149 recognize_R1_R2 = 0;
150 opt++;
151 break;
152 case 'S':
153 // Not a very useful spec.
154 set_label_spec("(A.n|Q)', '(D.y|D)");
155 done_spec = 1;
156 pre_label = " (";
157 post_label = ")";
158 sep_label = "; ";
159 opt++;
160 break;
161 case 'V':
162 verify_flag = 1;
163 opt++;
164 break;
165 case 'f':
166 {
167 const char *num = 0;
168 if (*++opt == '\0') {
169 if (argc > 1) {
170 num = *++argv;
171 --argc;
172 }
173 else {
174 error("option `f' requires an argument");
175 usage();
176 }
177 }
178 else {
179 num = opt;
180 opt = 0;
181 }
182 for (const char *ptr = num; *ptr; ptr++)
183 if (!csdigit(*ptr)) {
184 error("bad character `%1' in argument to -f option", *ptr);
185 break;
186 }
187 if (*ptr == '\0') {
188 string spec;
189 spec = '%';
190 spec += num;
191 spec += '\0';
192 set_label_spec(spec.contents());
193 done_spec = 1;
194 }
195 break;
196 }
197 case 'b':
198 label_in_text = 0;
199 label_in_reference = 0;
200 opt++;
201 break;
202 case 'e':
203 accumulate = 1;
204 opt++;
205 break;
206 case 'c':
207 capitalize_fields = ++opt;
208 opt = 0;
209 break;
210 case 'k':
211 {
212 char buf[5];
213 if (csalpha(*++opt))
214 buf[0] = *opt++;
215 else {
216 if (*opt != '\0')
217 error("bad field name `%1'", *opt++);
218 buf[0] = 'L';
219 }
220 buf[1] = '~';
221 buf[2] = '%';
222 buf[3] = 'a';
223 buf[4] = '\0';
224 set_label_spec(buf);
225 done_spec = 1;
226 }
227 break;
228 case 'a':
229 {
230 for (const char *ptr = ++opt; *ptr; ptr++)
231 if (!csdigit(*ptr)) {
232 error("argument to `a' option not a number");
233 break;
234 }
235 if (*ptr == '\0') {
236 reverse_fields = 'A';
237 reverse_fields += opt;
238 }
239 opt = 0;
240 }
241 break;
242 case 'i':
243 linear_ignore_fields = ++opt;
244 opt = 0;
245 break;
246 case 'l':
247 {
248 char buf[INT_DIGITS*2 + 11]; // A.n+2D.y-3%a
249 strcpy(buf, "A.n");
250 if (*++opt != '\0' && *opt != ',') {
251 char *ptr;
252 long n = strtol(opt, &ptr, 10);
253 if (n == 0 && ptr == opt) {
254 error("bad integer `%1' in `l' option", opt);
255 opt = 0;
256 break;
257 }
258 if (n < 0)
259 n = 0;
260 opt = ptr;
261 sprintf(strchr(buf, '\0'), "+%d", n);
262 }
263 strcat(buf, "D.y");
264 if (*opt == ',')
265 opt++;
266 if (*opt != '\0') {
267 char *ptr;
268 long n = strtol(opt, &ptr, 10);
269 if (n == 0 && ptr == opt) {
270 error("bad integer `%1' in `l' option", opt);
271 opt = 0;
272 break;
273 }
274 if (n < 0)
275 n = 0;
276 sprintf(strchr(buf, '\0'), "-%d", n);
277 opt = ptr;
278 if (*opt != '\0')
279 error("argument to `l' option not of form `m,n'");
280 }
281 strcat(buf, "%a");
282 if (!set_label_spec(buf))
283 assert(0);
284 done_spec = 1;
285 }
286 break;
287 case 'n':
288 search_default = 0;
289 opt++;
290 break;
291 case 'p':
292 {
293 const char *filename = 0;
294 if (*++opt == '\0') {
295 if (argc > 1) {
296 filename = *++argv;
297 argc--;
298 }
299 else {
300 error("option `p' requires an argument");
301 usage();
302 }
303 }
304 else {
305 filename = opt;
306 opt = 0;
307 }
308 database_list.add_file(filename);
309 }
310 break;
311 case 's':
312 if (*++opt == '\0')
313 sort_fields = "AD";
314 else {
315 sort_fields = opt;
316 opt = 0;
317 }
318 accumulate = 1;
319 break;
320 case 't':
321 {
322 char *ptr;
323 long n = strtol(opt, &ptr, 10);
324 if (n == 0 && ptr == opt) {
325 error("bad integer `%1' in `t' option", opt);
326 opt = 0;
327 break;
328 }
329 if (n < 1)
330 n = 1;
331 linear_truncate_len = int(n);
332 opt = ptr;
333 break;
334 }
335 case 'v':
336 {
337 extern const char *version_string;
338 fprintf(stderr, "GNU refer version %s\n", version_string);
339 fflush(stderr);
340 opt++;
341 break;
342 }
343 case '-':
344 if (opt[1] == '\0') {
345 finished_options = 1;
346 opt++;
347 break;
348 }
349 // fall through
350 default:
351 error("unrecognized option `%1'", *opt);
352 usage();
353 break;
354 }
355 }
356 }
357 if (!done_spec)
358 set_label_spec("%1");
359 if (argc <= 0) {
360 if (bib_flag)
361 do_bib("-");
362 else
363 do_file("-");
364 }
365 else {
366 for (int i = 0; i < argc; i++) {
367 if (bib_flag)
368 do_bib(argv[i]);
369 else
370 do_file(argv[i]);
371 }
372 }
373 if (accumulate)
374 output_references();
375 if (fflush(stdout) < 0)
376 fatal("output error");
377 exit(0);
378 }
379
usage()380 static void usage()
381 {
382 fprintf(stderr,
383 "usage: %s [-benvCPRS] [-aN] [-cXYZ] [-fN] [-iXYZ] [-kX] [-lM,N] [-p file]\n"
384 " [-sXYZ] [-tN] [-BL.M] [files ...]\n",
385 program_name);
386 exit(1);
387 }
388
possibly_load_default_database()389 static void possibly_load_default_database()
390 {
391 if (search_default && !default_database_loaded) {
392 char *filename = getenv("REFER");
393 if (filename)
394 database_list.add_file(filename);
395 else
396 database_list.add_file(DEFAULT_INDEX, 1);
397 default_database_loaded = 1;
398 }
399 }
400
is_list(const string & str)401 static int is_list(const string &str)
402 {
403 const char *start = str.contents();
404 const char *end = start + str.length();
405 while (end > start && csspace(end[-1]))
406 end--;
407 while (start < end && csspace(*start))
408 start++;
409 return end - start == 6 && memcmp(start, "$LIST$", 6) == 0;
410 }
411
do_file(const char * filename)412 static void do_file(const char *filename)
413 {
414 FILE *fp;
415 if (strcmp(filename, "-") == 0) {
416 fp = stdin;
417 }
418 else {
419 errno = 0;
420 fp = fopen(filename, "r");
421 if (fp == 0) {
422 error("can't open `%1': %2", filename, strerror(errno));
423 return;
424 }
425 current_filename = filename;
426 }
427 fprintf(outfp, ".lf 1 %s\n", filename);
428 string line;
429 current_lineno = 0;
430 for (;;) {
431 line.clear();
432 for (;;) {
433 int c = getc(fp);
434 if (c == EOF) {
435 if (line.length() > 0)
436 line += '\n';
437 break;
438 }
439 if (illegal_input_char(c))
440 error("illegal input character code %1", c);
441 else {
442 line += c;
443 if (c == '\n')
444 break;
445 }
446 }
447 int len = line.length();
448 if (len == 0)
449 break;
450 current_lineno++;
451 if (len >= 2 && line[0] == '.' && line[1] == '[') {
452 int start_lineno = current_lineno;
453 int start_of_line = 1;
454 string str;
455 string post;
456 string pre(line.contents() + 2, line.length() - 3);
457 for (;;) {
458 int c = getc(fp);
459 if (c == EOF) {
460 error_with_file_and_line(current_filename, start_lineno,
461 "missing `.]' line");
462 break;
463 }
464 if (start_of_line)
465 current_lineno++;
466 if (start_of_line && c == '.') {
467 int d = getc(fp);
468 if (d == ']') {
469 while ((d = getc(fp)) != '\n' && d != EOF) {
470 if (illegal_input_char(d))
471 error("illegal input character code %1", d);
472 else
473 post += d;
474 }
475 break;
476 }
477 if (d != EOF)
478 ungetc(d, fp);
479 }
480 if (illegal_input_char(c))
481 error("illegal input character code %1", c);
482 else
483 str += c;
484 start_of_line = (c == '\n');
485 }
486 if (is_list(str)) {
487 output_pending_line();
488 if (accumulate)
489 output_references();
490 else
491 error("found `$LIST$' but not accumulating references");
492 }
493 else {
494 unsigned flags = (accumulate
495 ? store_reference(str)
496 : immediately_handle_reference(str));
497 if (label_in_text) {
498 if (accumulate && outfp == stdout)
499 divert_to_temporary_file();
500 if (pending_line.length() == 0) {
501 warning("can't attach citation to previous line");
502 }
503 else
504 pending_line.set_length(pending_line.length() - 1);
505 string punct;
506 if (move_punctuation)
507 split_punct(pending_line, punct);
508 int have_text = pre.length() > 0 || post.length() > 0;
509 label_type lt = label_type(flags & ~(FORCE_LEFT_BRACKET
510 |FORCE_RIGHT_BRACKET));
511 if ((flags & FORCE_LEFT_BRACKET) || !have_text)
512 pending_line += PRE_LABEL_MARKER;
513 pending_line += pre;
514 pending_line += LABEL_MARKER + lt;
515 pending_line += post;
516 if ((flags & FORCE_RIGHT_BRACKET) || !have_text)
517 pending_line += POST_LABEL_MARKER;
518 pending_line += punct;
519 pending_line += '\n';
520 }
521 }
522 need_syncing = 1;
523 }
524 else if (len >= 4
525 && line[0] == '.' && line[1] == 'l' && line[2] == 'f'
526 && (compatible_flag || line[3] == '\n' || line[3] == ' ')) {
527 pending_lf_lines += line;
528 line += '\0';
529 if (interpret_lf_args(line.contents() + 3))
530 current_lineno--;
531 }
532 else if (recognize_R1_R2
533 && len >= 4
534 && line[0] == '.' && line[1] == 'R' && line[2] == '1'
535 && (compatible_flag || line[3] == '\n' || line[3] == ' ')) {
536 line.clear();
537 int start_of_line = 1;
538 int start_lineno = current_lineno;
539 for (;;) {
540 int c = getc(fp);
541 if (c != EOF && start_of_line)
542 current_lineno++;
543 if (start_of_line && c == '.') {
544 c = getc(fp);
545 if (c == 'R') {
546 c = getc(fp);
547 if (c == '2') {
548 c = getc(fp);
549 if (compatible_flag || c == ' ' || c == '\n' || c == EOF) {
550 while (c != EOF && c != '\n')
551 c = getc(fp);
552 break;
553 }
554 else {
555 line += '.';
556 line += 'R';
557 line += '2';
558 }
559 }
560 else {
561 line += '.';
562 line += 'R';
563 }
564 }
565 else
566 line += '.';
567 }
568 if (c == EOF) {
569 error_with_file_and_line(current_filename, start_lineno,
570 "missing `.R2' line");
571 break;
572 }
573 if (illegal_input_char(c))
574 error("illegal input character code %1", int(c));
575 else {
576 line += c;
577 start_of_line = c == '\n';
578 }
579 }
580 output_pending_line();
581 if (accumulate)
582 output_references();
583 else
584 nreferences = 0;
585 process_commands(line, current_filename, start_lineno + 1);
586 need_syncing = 1;
587 }
588 else {
589 output_pending_line();
590 pending_line = line;
591 }
592 }
593 need_syncing = 0;
594 output_pending_line();
595 if (fp != stdin)
596 fclose(fp);
597 }
598
599 class label_processing_state {
600 enum {
601 NORMAL,
602 PENDING_LABEL,
603 PENDING_LABEL_POST,
604 PENDING_LABEL_POST_PRE,
605 PENDING_POST
606 } state;
607 label_type type; // type of pending labels
608 int count; // number of pending labels
609 reference **rptr; // pointer to next reference
610 int rcount; // number of references left
611 FILE *fp;
612 int handle_pending(int c);
613 public:
614 label_processing_state(reference **, int, FILE *);
615 ~label_processing_state();
616 void process(int c);
617 };
618
output_pending_line()619 static void output_pending_line()
620 {
621 if (label_in_text && !accumulate && ncitations > 0) {
622 label_processing_state state(citation, ncitations, outfp);
623 int len = pending_line.length();
624 for (int i = 0; i < len; i++)
625 state.process((unsigned char)(pending_line[i]));
626 }
627 else
628 put_string(pending_line, outfp);
629 pending_line.clear();
630 if (pending_lf_lines.length() > 0) {
631 put_string(pending_lf_lines, outfp);
632 pending_lf_lines.clear();
633 }
634 if (!accumulate)
635 immediately_output_references();
636 if (need_syncing) {
637 fprintf(outfp, ".lf %d %s\n", current_lineno, current_filename);
638 need_syncing = 0;
639 }
640 }
641
split_punct(string & line,string & punct)642 static void split_punct(string &line, string &punct)
643 {
644 const char *start = line.contents();
645 const char *end = start + line.length();
646 const char *ptr = start;
647 const char *last_token_start = 0;
648 for (;;) {
649 if (ptr >= end)
650 break;
651 last_token_start = ptr;
652 if (*ptr == PRE_LABEL_MARKER || *ptr == POST_LABEL_MARKER
653 || (*ptr >= LABEL_MARKER && *ptr < LABEL_MARKER + N_LABEL_TYPES))
654 ptr++;
655 else if (!get_token(&ptr, end))
656 break;
657 }
658 if (last_token_start) {
659 const token_info *ti = lookup_token(last_token_start, end);
660 if (ti->is_punct()) {
661 punct.append(last_token_start, end - last_token_start);
662 line.set_length(last_token_start - start);
663 }
664 }
665 }
666
divert_to_temporary_file()667 static void divert_to_temporary_file()
668 {
669 outfp = xtmpfile();
670 }
671
store_citation(reference * ref)672 static void store_citation(reference *ref)
673 {
674 if (ncitations >= citation_max) {
675 if (citation == 0)
676 citation = new reference*[citation_max = 100];
677 else {
678 reference **old_citation = citation;
679 citation_max *= 2;
680 citation = new reference *[citation_max];
681 memcpy(citation, old_citation, ncitations*sizeof(reference *));
682 a_delete old_citation;
683 }
684 }
685 citation[ncitations++] = ref;
686 }
687
store_reference(const string & str)688 static unsigned store_reference(const string &str)
689 {
690 if (reference_hash_table == 0) {
691 reference_hash_table = new reference *[17];
692 hash_table_size = 17;
693 for (int i = 0; i < hash_table_size; i++)
694 reference_hash_table[i] = 0;
695 }
696 unsigned flags;
697 reference *ref = make_reference(str, &flags);
698 ref->compute_hash_code();
699 unsigned h = ref->hash();
700 for (reference **ptr = reference_hash_table + (h % hash_table_size);
701 *ptr != 0;
702 ((ptr == reference_hash_table)
703 ? (ptr = reference_hash_table + hash_table_size - 1)
704 : --ptr))
705 if (same_reference(**ptr, *ref))
706 break;
707 if (*ptr != 0) {
708 if (ref->is_merged())
709 warning("fields ignored because reference already used");
710 delete ref;
711 ref = *ptr;
712 }
713 else {
714 *ptr = ref;
715 ref->set_number(nreferences);
716 nreferences++;
717 ref->pre_compute_label();
718 ref->compute_sort_key();
719 if (nreferences*2 >= hash_table_size) {
720 // Rehash it.
721 reference **old_table = reference_hash_table;
722 int old_size = hash_table_size;
723 hash_table_size = next_size(hash_table_size);
724 reference_hash_table = new reference*[hash_table_size];
725 int i;
726 for (i = 0; i < hash_table_size; i++)
727 reference_hash_table[i] = 0;
728 for (i = 0; i < old_size; i++)
729 if (old_table[i]) {
730 for (reference **p = (reference_hash_table
731 + (old_table[i]->hash() % hash_table_size));
732 *p;
733 ((p == reference_hash_table)
734 ? (p = reference_hash_table + hash_table_size - 1)
735 : --p))
736 ;
737 *p = old_table[i];
738 }
739 a_delete old_table;
740 }
741 }
742 if (label_in_text)
743 store_citation(ref);
744 return flags;
745 }
746
immediately_handle_reference(const string & str)747 unsigned immediately_handle_reference(const string &str)
748 {
749 unsigned flags;
750 reference *ref = make_reference(str, &flags);
751 ref->set_number(nreferences);
752 if (label_in_text || label_in_reference) {
753 ref->pre_compute_label();
754 ref->immediate_compute_label();
755 }
756 nreferences++;
757 store_citation(ref);
758 return flags;
759 }
760
immediately_output_references()761 static void immediately_output_references()
762 {
763 for (int i = 0; i < ncitations; i++) {
764 reference *ref = citation[i];
765 if (label_in_reference) {
766 fputs(".ds [F ", outfp);
767 const string &label = ref->get_label(NORMAL_LABEL);
768 if (label.length() > 0
769 && (label[0] == ' ' || label[0] == '\\' || label[0] == '"'))
770 putc('"', outfp);
771 put_string(label, outfp);
772 putc('\n', outfp);
773 }
774 ref->output(outfp);
775 delete ref;
776 }
777 ncitations = 0;
778 }
779
output_citation_group(reference ** v,int n,label_type type,FILE * fp)780 static void output_citation_group(reference **v, int n, label_type type,
781 FILE *fp)
782 {
783 if (sort_adjacent_labels) {
784 // Do an insertion sort. Usually n will be very small.
785 for (int i = 1; i < n; i++) {
786 int num = v[i]->get_number();
787 reference *temp = v[i];
788 for (int j = i - 1; j >= 0 && v[j]->get_number() > num; j--)
789 v[j + 1] = v[j];
790 v[j + 1] = temp;
791 }
792 }
793 // This messes up if !accumulate.
794 if (accumulate && n > 1) {
795 // remove duplicates
796 int j = 1;
797 for (int i = 1; i < n; i++)
798 if (v[i]->get_label(type) != v[i - 1]->get_label(type))
799 v[j++] = v[i];
800 n = j;
801 }
802 string merged_label;
803 for (int i = 0; i < n; i++) {
804 int nmerged = v[i]->merge_labels(v + i + 1, n - i - 1, type, merged_label);
805 if (nmerged > 0) {
806 put_string(merged_label, fp);
807 i += nmerged;
808 }
809 else
810 put_string(v[i]->get_label(type), fp);
811 if (i < n - 1)
812 put_string(sep_label, fp);
813 }
814 }
815
816
label_processing_state(reference ** p,int n,FILE * f)817 label_processing_state::label_processing_state(reference **p, int n, FILE *f)
818 : state(NORMAL), count(0), rptr(p), rcount(n), fp(f)
819 {
820 }
821
~label_processing_state()822 label_processing_state::~label_processing_state()
823 {
824 int handled = handle_pending(EOF);
825 assert(!handled);
826 assert(rcount == 0);
827 }
828
handle_pending(int c)829 int label_processing_state::handle_pending(int c)
830 {
831 switch (state) {
832 case NORMAL:
833 break;
834 case PENDING_LABEL:
835 if (c == POST_LABEL_MARKER) {
836 state = PENDING_LABEL_POST;
837 return 1;
838 }
839 else {
840 output_citation_group(rptr, count, type, fp);
841 rptr += count ;
842 rcount -= count;
843 state = NORMAL;
844 }
845 break;
846 case PENDING_LABEL_POST:
847 if (c == PRE_LABEL_MARKER) {
848 state = PENDING_LABEL_POST_PRE;
849 return 1;
850 }
851 else {
852 output_citation_group(rptr, count, type, fp);
853 rptr += count;
854 rcount -= count;
855 put_string(post_label, fp);
856 state = NORMAL;
857 }
858 break;
859 case PENDING_LABEL_POST_PRE:
860 if (c >= LABEL_MARKER
861 && c < LABEL_MARKER + N_LABEL_TYPES
862 && c - LABEL_MARKER == type) {
863 count += 1;
864 state = PENDING_LABEL;
865 return 1;
866 }
867 else {
868 output_citation_group(rptr, count, type, fp);
869 rptr += count;
870 rcount -= count;
871 put_string(sep_label, fp);
872 state = NORMAL;
873 }
874 break;
875 case PENDING_POST:
876 if (c == PRE_LABEL_MARKER) {
877 put_string(sep_label, fp);
878 state = NORMAL;
879 return 1;
880 }
881 else {
882 put_string(post_label, fp);
883 state = NORMAL;
884 }
885 break;
886 }
887 return 0;
888 }
889
process(int c)890 void label_processing_state::process(int c)
891 {
892 if (handle_pending(c))
893 return;
894 assert(state == NORMAL);
895 switch (c) {
896 case PRE_LABEL_MARKER:
897 put_string(pre_label, fp);
898 state = NORMAL;
899 break;
900 case POST_LABEL_MARKER:
901 state = PENDING_POST;
902 break;
903 case LABEL_MARKER:
904 case LABEL_MARKER + 1:
905 count = 1;
906 state = PENDING_LABEL;
907 type = label_type(c - LABEL_MARKER);
908 break;
909 default:
910 state = NORMAL;
911 putc(c, fp);
912 break;
913 }
914 }
915
916 extern "C" {
917
rcompare(const void * p1,const void * p2)918 static int rcompare(const void *p1, const void *p2)
919 {
920 return compare_reference(**(reference **)p1, **(reference **)p2);
921 }
922
923 }
924
output_references()925 void output_references()
926 {
927 assert(accumulate);
928 if (nreferences > 0) {
929 int j = 0;
930 int i;
931 for (i = 0; i < hash_table_size; i++)
932 if (reference_hash_table[i] != 0)
933 reference_hash_table[j++] = reference_hash_table[i];
934 assert(j == nreferences);
935 for (; j < hash_table_size; j++)
936 reference_hash_table[j] = 0;
937 qsort(reference_hash_table, nreferences, sizeof(reference*), rcompare);
938 for (i = 0; i < nreferences; i++)
939 reference_hash_table[i]->set_number(i);
940 compute_labels(reference_hash_table, nreferences);
941 }
942 if (outfp != stdout) {
943 rewind(outfp);
944 {
945 label_processing_state state(citation, ncitations, stdout);
946 int c;
947 while ((c = getc(outfp)) != EOF)
948 state.process(c);
949 }
950 ncitations = 0;
951 fclose(outfp);
952 outfp = stdout;
953 }
954 if (nreferences > 0) {
955 fputs(".]<\n", outfp);
956 for (int i = 0; i < nreferences; i++) {
957 if (sort_fields.length() > 0)
958 reference_hash_table[i]->print_sort_key_comment(outfp);
959 if (label_in_reference) {
960 fputs(".ds [F ", outfp);
961 const string &label = reference_hash_table[i]->get_label(NORMAL_LABEL);
962 if (label.length() > 0
963 && (label[0] == ' ' || label[0] == '\\' || label[0] == '"'))
964 putc('"', outfp);
965 put_string(label, outfp);
966 putc('\n', outfp);
967 }
968 reference_hash_table[i]->output(outfp);
969 delete reference_hash_table[i];
970 reference_hash_table[i] = 0;
971 }
972 fputs(".]>\n", outfp);
973 nreferences = 0;
974 }
975 clear_labels();
976 }
977
find_reference(const char * query,int query_len)978 static reference *find_reference(const char *query, int query_len)
979 {
980 // This is so that error messages look better.
981 while (query_len > 0 && csspace(query[query_len - 1]))
982 query_len--;
983 string str;
984 for (int i = 0; i < query_len; i++)
985 str += query[i] == '\n' ? ' ' : query[i];
986 str += '\0';
987 possibly_load_default_database();
988 search_list_iterator iter(&database_list, str.contents());
989 reference_id rid;
990 const char *start;
991 int len;
992 if (!iter.next(&start, &len, &rid)) {
993 error("no matches for `%1'", str.contents());
994 return 0;
995 }
996 const char *end = start + len;
997 while (start < end) {
998 if (*start == '%')
999 break;
1000 while (start < end && *start++ != '\n')
1001 ;
1002 }
1003 if (start >= end) {
1004 error("found a reference for `%1' but it didn't contain any fields",
1005 str.contents());
1006 return 0;
1007 }
1008 reference *result = new reference(start, end - start, &rid);
1009 if (iter.next(&start, &len, &rid))
1010 warning("multiple matches for `%1'", str.contents());
1011 return result;
1012 }
1013
make_reference(const string & str,unsigned * flagsp)1014 static reference *make_reference(const string &str, unsigned *flagsp)
1015 {
1016 const char *start = str.contents();
1017 const char *end = start + str.length();
1018 const char *ptr = start;
1019 while (ptr < end) {
1020 if (*ptr == '%')
1021 break;
1022 while (ptr < end && *ptr++ != '\n')
1023 ;
1024 }
1025 *flagsp = 0;
1026 for (; start < ptr; start++) {
1027 if (*start == '#')
1028 *flagsp = (SHORT_LABEL | (*flagsp & (FORCE_RIGHT_BRACKET
1029 | FORCE_LEFT_BRACKET)));
1030 else if (*start == '[')
1031 *flagsp |= FORCE_LEFT_BRACKET;
1032 else if (*start == ']')
1033 *flagsp |= FORCE_RIGHT_BRACKET;
1034 else if (!csspace(*start))
1035 break;
1036 }
1037 if (start >= end) {
1038 error("empty reference");
1039 return new reference;
1040 }
1041 reference *database_ref = 0;
1042 if (start < ptr)
1043 database_ref = find_reference(start, ptr - start);
1044 reference *inline_ref = 0;
1045 if (ptr < end)
1046 inline_ref = new reference(ptr, end - ptr);
1047 if (inline_ref) {
1048 if (database_ref) {
1049 database_ref->merge(*inline_ref);
1050 delete inline_ref;
1051 return database_ref;
1052 }
1053 else
1054 return inline_ref;
1055 }
1056 else if (database_ref)
1057 return database_ref;
1058 else
1059 return new reference;
1060 }
1061
do_ref(const string & str)1062 static void do_ref(const string &str)
1063 {
1064 if (accumulate)
1065 (void)store_reference(str);
1066 else {
1067 (void)immediately_handle_reference(str);
1068 immediately_output_references();
1069 }
1070 }
1071
trim_blanks(string & str)1072 static void trim_blanks(string &str)
1073 {
1074 const char *start = str.contents();
1075 const char *end = start + str.length();
1076 while (end > start && end[-1] != '\n' && csspace(end[-1]))
1077 --end;
1078 str.set_length(end - start);
1079 }
1080
do_bib(const char * filename)1081 void do_bib(const char *filename)
1082 {
1083 FILE *fp;
1084 if (strcmp(filename, "-") == 0)
1085 fp = stdin;
1086 else {
1087 errno = 0;
1088 fp = fopen(filename, "r");
1089 if (fp == 0) {
1090 error("can't open `%1': %2", filename, strerror(errno));
1091 return;
1092 }
1093 current_filename = filename;
1094 }
1095 enum {
1096 START, MIDDLE, BODY, BODY_START, BODY_BLANK, BODY_DOT
1097 } state = START;
1098 string body;
1099 for (;;) {
1100 int c = getc(fp);
1101 if (c == EOF)
1102 break;
1103 if (illegal_input_char(c)) {
1104 error("illegal input character code %1", c);
1105 continue;
1106 }
1107 switch (state) {
1108 case START:
1109 if (c == '%') {
1110 body = c;
1111 state = BODY;
1112 }
1113 else if (c != '\n')
1114 state = MIDDLE;
1115 break;
1116 case MIDDLE:
1117 if (c == '\n')
1118 state = START;
1119 break;
1120 case BODY:
1121 body += c;
1122 if (c == '\n')
1123 state = BODY_START;
1124 break;
1125 case BODY_START:
1126 if (c == '\n') {
1127 do_ref(body);
1128 state = START;
1129 }
1130 else if (c == '.')
1131 state = BODY_DOT;
1132 else if (csspace(c)) {
1133 state = BODY_BLANK;
1134 body += c;
1135 }
1136 else {
1137 body += c;
1138 state = BODY;
1139 }
1140 break;
1141 case BODY_BLANK:
1142 if (c == '\n') {
1143 trim_blanks(body);
1144 do_ref(body);
1145 state = START;
1146 }
1147 else if (csspace(c))
1148 body += c;
1149 else {
1150 body += c;
1151 state = BODY;
1152 }
1153 break;
1154 case BODY_DOT:
1155 if (c == ']') {
1156 do_ref(body);
1157 state = MIDDLE;
1158 }
1159 else {
1160 body += '.';
1161 body += c;
1162 state = c == '\n' ? BODY_START : BODY;
1163 }
1164 break;
1165 default:
1166 assert(0);
1167 }
1168 if (c == '\n')
1169 current_lineno++;
1170 }
1171 switch (state) {
1172 case START:
1173 case MIDDLE:
1174 break;
1175 case BODY:
1176 body += '\n';
1177 do_ref(body);
1178 break;
1179 case BODY_DOT:
1180 case BODY_START:
1181 do_ref(body);
1182 break;
1183 case BODY_BLANK:
1184 trim_blanks(body);
1185 do_ref(body);
1186 break;
1187 }
1188 fclose(fp);
1189 }
1190
1191 // from the Dragon Book
1192
hash_string(const char * s,int len)1193 unsigned hash_string(const char *s, int len)
1194 {
1195 const char *end = s + len;
1196 unsigned h = 0, g;
1197 while (s < end) {
1198 h <<= 4;
1199 h += *s++;
1200 if ((g = h & 0xf0000000) != 0) {
1201 h ^= g >> 24;
1202 h ^= g;
1203 }
1204 }
1205 return h;
1206 }
1207
next_size(int n)1208 int next_size(int n)
1209 {
1210 static const int table_sizes[] = {
1211 101, 503, 1009, 2003, 3001, 4001, 5003, 10007, 20011, 40009,
1212 80021, 160001, 500009, 1000003, 2000003, 4000037, 8000009,
1213 16000057, 32000011, 64000031, 128000003, 0
1214 };
1215
1216 for (const int *p = table_sizes; *p <= n && *p != 0; p++)
1217 ;
1218 assert(*p != 0);
1219 return *p;
1220 }
1221
1222