1 /* $NetBSD: refer.cpp,v 1.1.1.1 2016/01/13 18:41:49 christos Exp $ */
2
3 // -*- C++ -*-
4 /* Copyright (C) 1989-1992, 2000, 2001, 2002, 2004
5 Free Software Foundation, Inc.
6 Written by James Clark (jjc@jclark.com)
7
8 This file is part of groff.
9
10 groff is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 2, or (at your option) any later
13 version.
14
15 groff is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
19
20 You should have received a copy of the GNU General Public License along
21 with groff; see the file COPYING. If not, write to the Free Software
22 Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */
23
24 #include "refer.h"
25 #include "refid.h"
26 #include "ref.h"
27 #include "token.h"
28 #include "search.h"
29 #include "command.h"
30
31 extern "C" const char *Version_string;
32
33 const char PRE_LABEL_MARKER = '\013';
34 const char POST_LABEL_MARKER = '\014';
35 const char LABEL_MARKER = '\015'; // label_type is added on
36
37 #define FORCE_LEFT_BRACKET 04
38 #define FORCE_RIGHT_BRACKET 010
39
40 static FILE *outfp = stdout;
41
42 string capitalize_fields;
43 string reverse_fields;
44 string abbreviate_fields;
45 string period_before_last_name = ". ";
46 string period_before_initial = ".";
47 string period_before_hyphen = "";
48 string period_before_other = ". ";
49 string sort_fields;
50 int annotation_field = -1;
51 string annotation_macro;
52 string discard_fields = "XYZ";
53 string pre_label = "\\*([.";
54 string post_label = "\\*(.]";
55 string sep_label = ", ";
56 int accumulate = 0;
57 int move_punctuation = 0;
58 int abbreviate_label_ranges = 0;
59 string label_range_indicator;
60 int label_in_text = 1;
61 int label_in_reference = 1;
62 int date_as_label = 0;
63 int sort_adjacent_labels = 0;
64 // Join exactly two authors with this.
65 string join_authors_exactly_two = " and ";
66 // When there are more than two authors join the last two with this.
67 string join_authors_last_two = ", and ";
68 // Otherwise join authors with this.
69 string join_authors_default = ", ";
70 string separate_label_second_parts = ", ";
71 // Use this string to represent that there are other authors.
72 string et_al = " et al";
73 // Use et al only if it can replace at least this many authors.
74 int et_al_min_elide = 2;
75 // Use et al only if the total number of authors is at least this.
76 int et_al_min_total = 3;
77
78
79 int compatible_flag = 0;
80
81 int short_label_flag = 0;
82
83 static int recognize_R1_R2 = 1;
84
85 search_list database_list;
86 int search_default = 1;
87 static int default_database_loaded = 0;
88
89 static reference **citation = 0;
90 static int ncitations = 0;
91 static int citation_max = 0;
92
93 static reference **reference_hash_table = 0;
94 static int hash_table_size;
95 static int nreferences = 0;
96
97 static int need_syncing = 0;
98 string pending_line;
99 string pending_lf_lines;
100
101 static void output_pending_line();
102 static unsigned immediately_handle_reference(const string &);
103 static void immediately_output_references();
104 static unsigned store_reference(const string &);
105 static void divert_to_temporary_file();
106 static reference *make_reference(const string &, unsigned *);
107 static void usage(FILE *stream);
108 static void do_file(const char *);
109 static void split_punct(string &line, string &punct);
110 static void output_citation_group(reference **v, int n, label_type, FILE *fp);
111 static void possibly_load_default_database();
112
main(int argc,char ** argv)113 int main(int argc, char **argv)
114 {
115 program_name = argv[0];
116 static char stderr_buf[BUFSIZ];
117 setbuf(stderr, stderr_buf);
118 outfp = stdout;
119 int finished_options = 0;
120 int bib_flag = 0;
121 int done_spec = 0;
122
123 for (--argc, ++argv;
124 !finished_options && argc > 0 && argv[0][0] == '-'
125 && argv[0][1] != '\0';
126 argv++, argc--) {
127 const char *opt = argv[0] + 1;
128 while (opt != 0 && *opt != '\0') {
129 switch (*opt) {
130 case 'C':
131 compatible_flag = 1;
132 opt++;
133 break;
134 case 'B':
135 bib_flag = 1;
136 label_in_reference = 0;
137 label_in_text = 0;
138 ++opt;
139 if (*opt == '\0') {
140 annotation_field = 'X';
141 annotation_macro = "AP";
142 }
143 else if (csalnum(opt[0]) && opt[1] == '.' && opt[2] != '\0') {
144 annotation_field = opt[0];
145 annotation_macro = opt + 2;
146 }
147 opt = 0;
148 break;
149 case 'P':
150 move_punctuation = 1;
151 opt++;
152 break;
153 case 'R':
154 recognize_R1_R2 = 0;
155 opt++;
156 break;
157 case 'S':
158 // Not a very useful spec.
159 set_label_spec("(A.n|Q)', '(D.y|D)");
160 done_spec = 1;
161 pre_label = " (";
162 post_label = ")";
163 sep_label = "; ";
164 opt++;
165 break;
166 case 'V':
167 verify_flag = 1;
168 opt++;
169 break;
170 case 'f':
171 {
172 const char *num = 0;
173 if (*++opt == '\0') {
174 if (argc > 1) {
175 num = *++argv;
176 --argc;
177 }
178 else {
179 error("option `f' requires an argument");
180 usage(stderr);
181 exit(1);
182 }
183 }
184 else {
185 num = opt;
186 opt = 0;
187 }
188 const char *ptr;
189 for (ptr = num; *ptr; ptr++)
190 if (!csdigit(*ptr)) {
191 error("bad character `%1' in argument to -f option", *ptr);
192 break;
193 }
194 if (*ptr == '\0') {
195 string spec;
196 spec = '%';
197 spec += num;
198 spec += '\0';
199 set_label_spec(spec.contents());
200 done_spec = 1;
201 }
202 break;
203 }
204 case 'b':
205 label_in_text = 0;
206 label_in_reference = 0;
207 opt++;
208 break;
209 case 'e':
210 accumulate = 1;
211 opt++;
212 break;
213 case 'c':
214 capitalize_fields = ++opt;
215 opt = 0;
216 break;
217 case 'k':
218 {
219 char buf[5];
220 if (csalpha(*++opt))
221 buf[0] = *opt++;
222 else {
223 if (*opt != '\0')
224 error("bad field name `%1'", *opt++);
225 buf[0] = 'L';
226 }
227 buf[1] = '~';
228 buf[2] = '%';
229 buf[3] = 'a';
230 buf[4] = '\0';
231 set_label_spec(buf);
232 done_spec = 1;
233 }
234 break;
235 case 'a':
236 {
237 const char *ptr;
238 for (ptr = ++opt; *ptr; ptr++)
239 if (!csdigit(*ptr)) {
240 error("argument to `a' option not a number");
241 break;
242 }
243 if (*ptr == '\0') {
244 reverse_fields = 'A';
245 reverse_fields += opt;
246 }
247 opt = 0;
248 }
249 break;
250 case 'i':
251 linear_ignore_fields = ++opt;
252 opt = 0;
253 break;
254 case 'l':
255 {
256 char buf[INT_DIGITS*2 + 11]; // A.n+2D.y-3%a
257 strcpy(buf, "A.n");
258 if (*++opt != '\0' && *opt != ',') {
259 char *ptr;
260 long n = strtol(opt, &ptr, 10);
261 if (n == 0 && ptr == opt) {
262 error("bad integer `%1' in `l' option", opt);
263 opt = 0;
264 break;
265 }
266 if (n < 0)
267 n = 0;
268 opt = ptr;
269 sprintf(strchr(buf, '\0'), "+%ld", n);
270 }
271 strcat(buf, "D.y");
272 if (*opt == ',')
273 opt++;
274 if (*opt != '\0') {
275 char *ptr;
276 long n = strtol(opt, &ptr, 10);
277 if (n == 0 && ptr == opt) {
278 error("bad integer `%1' in `l' option", opt);
279 opt = 0;
280 break;
281 }
282 if (n < 0)
283 n = 0;
284 sprintf(strchr(buf, '\0'), "-%ld", n);
285 opt = ptr;
286 if (*opt != '\0')
287 error("argument to `l' option not of form `m,n'");
288 }
289 strcat(buf, "%a");
290 if (!set_label_spec(buf))
291 assert(0);
292 done_spec = 1;
293 }
294 break;
295 case 'n':
296 search_default = 0;
297 opt++;
298 break;
299 case 'p':
300 {
301 const char *filename = 0;
302 if (*++opt == '\0') {
303 if (argc > 1) {
304 filename = *++argv;
305 argc--;
306 }
307 else {
308 error("option `p' requires an argument");
309 usage(stderr);
310 exit(1);
311 }
312 }
313 else {
314 filename = opt;
315 opt = 0;
316 }
317 database_list.add_file(filename);
318 }
319 break;
320 case 's':
321 if (*++opt == '\0')
322 sort_fields = "AD";
323 else {
324 sort_fields = opt;
325 opt = 0;
326 }
327 accumulate = 1;
328 break;
329 case 't':
330 {
331 char *ptr;
332 long n = strtol(opt, &ptr, 10);
333 if (n == 0 && ptr == opt) {
334 error("bad integer `%1' in `t' option", opt);
335 opt = 0;
336 break;
337 }
338 if (n < 1)
339 n = 1;
340 linear_truncate_len = int(n);
341 opt = ptr;
342 break;
343 }
344 case '-':
345 if (opt[1] == '\0') {
346 finished_options = 1;
347 opt++;
348 break;
349 }
350 if (strcmp(opt,"-version")==0) {
351 case 'v':
352 printf("GNU refer (groff) version %s\n", Version_string);
353 exit(0);
354 break;
355 }
356 if (strcmp(opt,"-help")==0) {
357 usage(stdout);
358 exit(0);
359 break;
360 }
361 // fall through
362 default:
363 error("unrecognized option `%1'", *opt);
364 usage(stderr);
365 exit(1);
366 break;
367 }
368 }
369 }
370 if (!done_spec)
371 set_label_spec("%1");
372 if (argc <= 0) {
373 if (bib_flag)
374 do_bib("-");
375 else
376 do_file("-");
377 }
378 else {
379 for (int i = 0; i < argc; i++) {
380 if (bib_flag)
381 do_bib(argv[i]);
382 else
383 do_file(argv[i]);
384 }
385 }
386 if (accumulate)
387 output_references();
388 if (fflush(stdout) < 0)
389 fatal("output error");
390 return 0;
391 }
392
usage(FILE * stream)393 static void usage(FILE *stream)
394 {
395 fprintf(stream,
396 "usage: %s [-benvCPRS] [-aN] [-cXYZ] [-fN] [-iXYZ] [-kX] [-lM,N] [-p file]\n"
397 " [-sXYZ] [-tN] [-BL.M] [files ...]\n",
398 program_name);
399 }
400
possibly_load_default_database()401 static void possibly_load_default_database()
402 {
403 if (search_default && !default_database_loaded) {
404 char *filename = getenv("REFER");
405 if (filename)
406 database_list.add_file(filename);
407 else
408 database_list.add_file(DEFAULT_INDEX, 1);
409 default_database_loaded = 1;
410 }
411 }
412
is_list(const string & str)413 static int is_list(const string &str)
414 {
415 const char *start = str.contents();
416 const char *end = start + str.length();
417 while (end > start && csspace(end[-1]))
418 end--;
419 while (start < end && csspace(*start))
420 start++;
421 return end - start == 6 && memcmp(start, "$LIST$", 6) == 0;
422 }
423
do_file(const char * filename)424 static void do_file(const char *filename)
425 {
426 FILE *fp;
427 if (strcmp(filename, "-") == 0) {
428 fp = stdin;
429 }
430 else {
431 errno = 0;
432 fp = fopen(filename, "r");
433 if (fp == 0) {
434 error("can't open `%1': %2", filename, strerror(errno));
435 return;
436 }
437 }
438 current_filename = filename;
439 fprintf(outfp, ".lf 1 %s\n", filename);
440 string line;
441 current_lineno = 0;
442 for (;;) {
443 line.clear();
444 for (;;) {
445 int c = getc(fp);
446 if (c == EOF) {
447 if (line.length() > 0)
448 line += '\n';
449 break;
450 }
451 if (invalid_input_char(c))
452 error("invalid input character code %1", c);
453 else {
454 line += c;
455 if (c == '\n')
456 break;
457 }
458 }
459 int len = line.length();
460 if (len == 0)
461 break;
462 current_lineno++;
463 if (len >= 2 && line[0] == '.' && line[1] == '[') {
464 int start_lineno = current_lineno;
465 int start_of_line = 1;
466 string str;
467 string post;
468 string pre(line.contents() + 2, line.length() - 3);
469 for (;;) {
470 int c = getc(fp);
471 if (c == EOF) {
472 error_with_file_and_line(current_filename, start_lineno,
473 "missing `.]' line");
474 break;
475 }
476 if (start_of_line)
477 current_lineno++;
478 if (start_of_line && c == '.') {
479 int d = getc(fp);
480 if (d == ']') {
481 while ((d = getc(fp)) != '\n' && d != EOF) {
482 if (invalid_input_char(d))
483 error("invalid input character code %1", d);
484 else
485 post += d;
486 }
487 break;
488 }
489 if (d != EOF)
490 ungetc(d, fp);
491 }
492 if (invalid_input_char(c))
493 error("invalid input character code %1", c);
494 else
495 str += c;
496 start_of_line = (c == '\n');
497 }
498 if (is_list(str)) {
499 output_pending_line();
500 if (accumulate)
501 output_references();
502 else
503 error("found `$LIST$' but not accumulating references");
504 }
505 else {
506 unsigned flags = (accumulate
507 ? store_reference(str)
508 : immediately_handle_reference(str));
509 if (label_in_text) {
510 if (accumulate && outfp == stdout)
511 divert_to_temporary_file();
512 if (pending_line.length() == 0) {
513 warning("can't attach citation to previous line");
514 }
515 else
516 pending_line.set_length(pending_line.length() - 1);
517 string punct;
518 if (move_punctuation)
519 split_punct(pending_line, punct);
520 int have_text = pre.length() > 0 || post.length() > 0;
521 label_type lt = label_type(flags & ~(FORCE_LEFT_BRACKET
522 |FORCE_RIGHT_BRACKET));
523 if ((flags & FORCE_LEFT_BRACKET) || !have_text)
524 pending_line += PRE_LABEL_MARKER;
525 pending_line += pre;
526 char lm = LABEL_MARKER + (int)lt;
527 pending_line += lm;
528 pending_line += post;
529 if ((flags & FORCE_RIGHT_BRACKET) || !have_text)
530 pending_line += POST_LABEL_MARKER;
531 pending_line += punct;
532 pending_line += '\n';
533 }
534 }
535 need_syncing = 1;
536 }
537 else if (len >= 4
538 && line[0] == '.' && line[1] == 'l' && line[2] == 'f'
539 && (compatible_flag || line[3] == '\n' || line[3] == ' ')) {
540 pending_lf_lines += line;
541 line += '\0';
542 if (interpret_lf_args(line.contents() + 3))
543 current_lineno--;
544 }
545 else if (recognize_R1_R2
546 && len >= 4
547 && line[0] == '.' && line[1] == 'R' && line[2] == '1'
548 && (compatible_flag || line[3] == '\n' || line[3] == ' ')) {
549 line.clear();
550 int start_of_line = 1;
551 int start_lineno = current_lineno;
552 for (;;) {
553 int c = getc(fp);
554 if (c != EOF && start_of_line)
555 current_lineno++;
556 if (start_of_line && c == '.') {
557 c = getc(fp);
558 if (c == 'R') {
559 c = getc(fp);
560 if (c == '2') {
561 c = getc(fp);
562 if (compatible_flag || c == ' ' || c == '\n' || c == EOF) {
563 while (c != EOF && c != '\n')
564 c = getc(fp);
565 break;
566 }
567 else {
568 line += '.';
569 line += 'R';
570 line += '2';
571 }
572 }
573 else {
574 line += '.';
575 line += 'R';
576 }
577 }
578 else
579 line += '.';
580 }
581 if (c == EOF) {
582 error_with_file_and_line(current_filename, start_lineno,
583 "missing `.R2' line");
584 break;
585 }
586 if (invalid_input_char(c))
587 error("invalid input character code %1", int(c));
588 else {
589 line += c;
590 start_of_line = c == '\n';
591 }
592 }
593 output_pending_line();
594 if (accumulate)
595 output_references();
596 else
597 nreferences = 0;
598 process_commands(line, current_filename, start_lineno + 1);
599 need_syncing = 1;
600 }
601 else {
602 output_pending_line();
603 pending_line = line;
604 }
605 }
606 need_syncing = 0;
607 output_pending_line();
608 if (fp != stdin)
609 fclose(fp);
610 }
611
612 class label_processing_state {
613 enum {
614 NORMAL,
615 PENDING_LABEL,
616 PENDING_LABEL_POST,
617 PENDING_LABEL_POST_PRE,
618 PENDING_POST
619 } state;
620 label_type type; // type of pending labels
621 int count; // number of pending labels
622 reference **rptr; // pointer to next reference
623 int rcount; // number of references left
624 FILE *fp;
625 int handle_pending(int c);
626 public:
627 label_processing_state(reference **, int, FILE *);
628 ~label_processing_state();
629 void process(int c);
630 };
631
output_pending_line()632 static void output_pending_line()
633 {
634 if (label_in_text && !accumulate && ncitations > 0) {
635 label_processing_state state(citation, ncitations, outfp);
636 int len = pending_line.length();
637 for (int i = 0; i < len; i++)
638 state.process((unsigned char)(pending_line[i]));
639 }
640 else
641 put_string(pending_line, outfp);
642 pending_line.clear();
643 if (pending_lf_lines.length() > 0) {
644 put_string(pending_lf_lines, outfp);
645 pending_lf_lines.clear();
646 }
647 if (!accumulate)
648 immediately_output_references();
649 if (need_syncing) {
650 fprintf(outfp, ".lf %d %s\n", current_lineno, current_filename);
651 need_syncing = 0;
652 }
653 }
654
split_punct(string & line,string & punct)655 static void split_punct(string &line, string &punct)
656 {
657 const char *start = line.contents();
658 const char *end = start + line.length();
659 const char *ptr = start;
660 const char *last_token_start = 0;
661 for (;;) {
662 if (ptr >= end)
663 break;
664 last_token_start = ptr;
665 if (*ptr == PRE_LABEL_MARKER || *ptr == POST_LABEL_MARKER
666 || (*ptr >= LABEL_MARKER && *ptr < LABEL_MARKER + N_LABEL_TYPES))
667 ptr++;
668 else if (!get_token(&ptr, end))
669 break;
670 }
671 if (last_token_start) {
672 const token_info *ti = lookup_token(last_token_start, end);
673 if (ti->is_punct()) {
674 punct.append(last_token_start, end - last_token_start);
675 line.set_length(last_token_start - start);
676 }
677 }
678 }
679
divert_to_temporary_file()680 static void divert_to_temporary_file()
681 {
682 outfp = xtmpfile();
683 }
684
store_citation(reference * ref)685 static void store_citation(reference *ref)
686 {
687 if (ncitations >= citation_max) {
688 if (citation == 0)
689 citation = new reference*[citation_max = 100];
690 else {
691 reference **old_citation = citation;
692 citation_max *= 2;
693 citation = new reference *[citation_max];
694 memcpy(citation, old_citation, ncitations*sizeof(reference *));
695 a_delete old_citation;
696 }
697 }
698 citation[ncitations++] = ref;
699 }
700
store_reference(const string & str)701 static unsigned store_reference(const string &str)
702 {
703 if (reference_hash_table == 0) {
704 reference_hash_table = new reference *[17];
705 hash_table_size = 17;
706 for (int i = 0; i < hash_table_size; i++)
707 reference_hash_table[i] = 0;
708 }
709 unsigned flags;
710 reference *ref = make_reference(str, &flags);
711 ref->compute_hash_code();
712 unsigned h = ref->hash();
713 reference **ptr;
714 for (ptr = reference_hash_table + (h % hash_table_size);
715 *ptr != 0;
716 ((ptr == reference_hash_table)
717 ? (ptr = reference_hash_table + hash_table_size - 1)
718 : --ptr))
719 if (same_reference(**ptr, *ref))
720 break;
721 if (*ptr != 0) {
722 if (ref->is_merged())
723 warning("fields ignored because reference already used");
724 delete ref;
725 ref = *ptr;
726 }
727 else {
728 *ptr = ref;
729 ref->set_number(nreferences);
730 nreferences++;
731 ref->pre_compute_label();
732 ref->compute_sort_key();
733 if (nreferences*2 >= hash_table_size) {
734 // Rehash it.
735 reference **old_table = reference_hash_table;
736 int old_size = hash_table_size;
737 hash_table_size = next_size(hash_table_size);
738 reference_hash_table = new reference*[hash_table_size];
739 int i;
740 for (i = 0; i < hash_table_size; i++)
741 reference_hash_table[i] = 0;
742 for (i = 0; i < old_size; i++)
743 if (old_table[i]) {
744 reference **p;
745 for (p = (reference_hash_table
746 + (old_table[i]->hash() % hash_table_size));
747 *p;
748 ((p == reference_hash_table)
749 ? (p = reference_hash_table + hash_table_size - 1)
750 : --p))
751 ;
752 *p = old_table[i];
753 }
754 a_delete old_table;
755 }
756 }
757 if (label_in_text)
758 store_citation(ref);
759 return flags;
760 }
761
immediately_handle_reference(const string & str)762 unsigned immediately_handle_reference(const string &str)
763 {
764 unsigned flags;
765 reference *ref = make_reference(str, &flags);
766 ref->set_number(nreferences);
767 if (label_in_text || label_in_reference) {
768 ref->pre_compute_label();
769 ref->immediate_compute_label();
770 }
771 nreferences++;
772 store_citation(ref);
773 return flags;
774 }
775
immediately_output_references()776 static void immediately_output_references()
777 {
778 for (int i = 0; i < ncitations; i++) {
779 reference *ref = citation[i];
780 if (label_in_reference) {
781 fputs(".ds [F ", outfp);
782 const string &label = ref->get_label(NORMAL_LABEL);
783 if (label.length() > 0
784 && (label[0] == ' ' || label[0] == '\\' || label[0] == '"'))
785 putc('"', outfp);
786 put_string(label, outfp);
787 putc('\n', outfp);
788 }
789 ref->output(outfp);
790 delete ref;
791 }
792 ncitations = 0;
793 }
794
output_citation_group(reference ** v,int n,label_type type,FILE * fp)795 static void output_citation_group(reference **v, int n, label_type type,
796 FILE *fp)
797 {
798 if (sort_adjacent_labels) {
799 // Do an insertion sort. Usually n will be very small.
800 for (int i = 1; i < n; i++) {
801 int num = v[i]->get_number();
802 reference *temp = v[i];
803 int j;
804 for (j = i - 1; j >= 0 && v[j]->get_number() > num; j--)
805 v[j + 1] = v[j];
806 v[j + 1] = temp;
807 }
808 }
809 // This messes up if !accumulate.
810 if (accumulate && n > 1) {
811 // remove duplicates
812 int j = 1;
813 for (int i = 1; i < n; i++)
814 if (v[i]->get_label(type) != v[i - 1]->get_label(type))
815 v[j++] = v[i];
816 n = j;
817 }
818 string merged_label;
819 for (int i = 0; i < n; i++) {
820 int nmerged = v[i]->merge_labels(v + i + 1, n - i - 1, type, merged_label);
821 if (nmerged > 0) {
822 put_string(merged_label, fp);
823 i += nmerged;
824 }
825 else
826 put_string(v[i]->get_label(type), fp);
827 if (i < n - 1)
828 put_string(sep_label, fp);
829 }
830 }
831
832
label_processing_state(reference ** p,int n,FILE * f)833 label_processing_state::label_processing_state(reference **p, int n, FILE *f)
834 : state(NORMAL), count(0), rptr(p), rcount(n), fp(f)
835 {
836 }
837
~label_processing_state()838 label_processing_state::~label_processing_state()
839 {
840 int handled = handle_pending(EOF);
841 assert(!handled);
842 assert(rcount == 0);
843 }
844
handle_pending(int c)845 int label_processing_state::handle_pending(int c)
846 {
847 switch (state) {
848 case NORMAL:
849 break;
850 case PENDING_LABEL:
851 if (c == POST_LABEL_MARKER) {
852 state = PENDING_LABEL_POST;
853 return 1;
854 }
855 else {
856 output_citation_group(rptr, count, type, fp);
857 rptr += count ;
858 rcount -= count;
859 state = NORMAL;
860 }
861 break;
862 case PENDING_LABEL_POST:
863 if (c == PRE_LABEL_MARKER) {
864 state = PENDING_LABEL_POST_PRE;
865 return 1;
866 }
867 else {
868 output_citation_group(rptr, count, type, fp);
869 rptr += count;
870 rcount -= count;
871 put_string(post_label, fp);
872 state = NORMAL;
873 }
874 break;
875 case PENDING_LABEL_POST_PRE:
876 if (c >= LABEL_MARKER
877 && c < LABEL_MARKER + N_LABEL_TYPES
878 && c - LABEL_MARKER == type) {
879 count += 1;
880 state = PENDING_LABEL;
881 return 1;
882 }
883 else {
884 output_citation_group(rptr, count, type, fp);
885 rptr += count;
886 rcount -= count;
887 put_string(sep_label, fp);
888 state = NORMAL;
889 }
890 break;
891 case PENDING_POST:
892 if (c == PRE_LABEL_MARKER) {
893 put_string(sep_label, fp);
894 state = NORMAL;
895 return 1;
896 }
897 else {
898 put_string(post_label, fp);
899 state = NORMAL;
900 }
901 break;
902 }
903 return 0;
904 }
905
process(int c)906 void label_processing_state::process(int c)
907 {
908 if (handle_pending(c))
909 return;
910 assert(state == NORMAL);
911 switch (c) {
912 case PRE_LABEL_MARKER:
913 put_string(pre_label, fp);
914 state = NORMAL;
915 break;
916 case POST_LABEL_MARKER:
917 state = PENDING_POST;
918 break;
919 case LABEL_MARKER:
920 case LABEL_MARKER + 1:
921 count = 1;
922 state = PENDING_LABEL;
923 type = label_type(c - LABEL_MARKER);
924 break;
925 default:
926 state = NORMAL;
927 putc(c, fp);
928 break;
929 }
930 }
931
932 extern "C" {
933
rcompare(const void * p1,const void * p2)934 int rcompare(const void *p1, const void *p2)
935 {
936 return compare_reference(**(reference **)p1, **(reference **)p2);
937 }
938
939 }
940
output_references()941 void output_references()
942 {
943 assert(accumulate);
944 if (!hash_table_size) {
945 error("nothing to reference (probably `bibliography' before `sort')");
946 accumulate = 0;
947 nreferences = 0;
948 return;
949 }
950 if (nreferences > 0) {
951 int j = 0;
952 int i;
953 for (i = 0; i < hash_table_size; i++)
954 if (reference_hash_table[i] != 0)
955 reference_hash_table[j++] = reference_hash_table[i];
956 assert(j == nreferences);
957 for (; j < hash_table_size; j++)
958 reference_hash_table[j] = 0;
959 qsort(reference_hash_table, nreferences, sizeof(reference*), rcompare);
960 for (i = 0; i < nreferences; i++)
961 reference_hash_table[i]->set_number(i);
962 compute_labels(reference_hash_table, nreferences);
963 }
964 if (outfp != stdout) {
965 rewind(outfp);
966 {
967 label_processing_state state(citation, ncitations, stdout);
968 int c;
969 while ((c = getc(outfp)) != EOF)
970 state.process(c);
971 }
972 ncitations = 0;
973 fclose(outfp);
974 outfp = stdout;
975 }
976 if (nreferences > 0) {
977 fputs(".]<\n", outfp);
978 for (int i = 0; i < nreferences; i++) {
979 if (sort_fields.length() > 0)
980 reference_hash_table[i]->print_sort_key_comment(outfp);
981 if (label_in_reference) {
982 fputs(".ds [F ", outfp);
983 const string &label = reference_hash_table[i]->get_label(NORMAL_LABEL);
984 if (label.length() > 0
985 && (label[0] == ' ' || label[0] == '\\' || label[0] == '"'))
986 putc('"', outfp);
987 put_string(label, outfp);
988 putc('\n', outfp);
989 }
990 reference_hash_table[i]->output(outfp);
991 delete reference_hash_table[i];
992 reference_hash_table[i] = 0;
993 }
994 fputs(".]>\n", outfp);
995 nreferences = 0;
996 }
997 clear_labels();
998 }
999
find_reference(const char * query,int query_len)1000 static reference *find_reference(const char *query, int query_len)
1001 {
1002 // This is so that error messages look better.
1003 while (query_len > 0 && csspace(query[query_len - 1]))
1004 query_len--;
1005 string str;
1006 for (int i = 0; i < query_len; i++)
1007 str += query[i] == '\n' ? ' ' : query[i];
1008 str += '\0';
1009 possibly_load_default_database();
1010 search_list_iterator iter(&database_list, str.contents());
1011 reference_id rid;
1012 const char *start;
1013 int len;
1014 if (!iter.next(&start, &len, &rid)) {
1015 error("no matches for `%1'", str.contents());
1016 return 0;
1017 }
1018 const char *end = start + len;
1019 while (start < end) {
1020 if (*start == '%')
1021 break;
1022 while (start < end && *start++ != '\n')
1023 ;
1024 }
1025 if (start >= end) {
1026 error("found a reference for `%1' but it didn't contain any fields",
1027 str.contents());
1028 return 0;
1029 }
1030 reference *result = new reference(start, end - start, &rid);
1031 if (iter.next(&start, &len, &rid))
1032 warning("multiple matches for `%1'", str.contents());
1033 return result;
1034 }
1035
make_reference(const string & str,unsigned * flagsp)1036 static reference *make_reference(const string &str, unsigned *flagsp)
1037 {
1038 const char *start = str.contents();
1039 const char *end = start + str.length();
1040 const char *ptr = start;
1041 while (ptr < end) {
1042 if (*ptr == '%')
1043 break;
1044 while (ptr < end && *ptr++ != '\n')
1045 ;
1046 }
1047 *flagsp = 0;
1048 for (; start < ptr; start++) {
1049 if (*start == '#')
1050 *flagsp = (SHORT_LABEL | (*flagsp & (FORCE_RIGHT_BRACKET
1051 | FORCE_LEFT_BRACKET)));
1052 else if (*start == '[')
1053 *flagsp |= FORCE_LEFT_BRACKET;
1054 else if (*start == ']')
1055 *flagsp |= FORCE_RIGHT_BRACKET;
1056 else if (!csspace(*start))
1057 break;
1058 }
1059 if (start >= end) {
1060 error("empty reference");
1061 return new reference;
1062 }
1063 reference *database_ref = 0;
1064 if (start < ptr)
1065 database_ref = find_reference(start, ptr - start);
1066 reference *inline_ref = 0;
1067 if (ptr < end)
1068 inline_ref = new reference(ptr, end - ptr);
1069 if (inline_ref) {
1070 if (database_ref) {
1071 database_ref->merge(*inline_ref);
1072 delete inline_ref;
1073 return database_ref;
1074 }
1075 else
1076 return inline_ref;
1077 }
1078 else if (database_ref)
1079 return database_ref;
1080 else
1081 return new reference;
1082 }
1083
do_ref(const string & str)1084 static void do_ref(const string &str)
1085 {
1086 if (accumulate)
1087 (void)store_reference(str);
1088 else {
1089 (void)immediately_handle_reference(str);
1090 immediately_output_references();
1091 }
1092 }
1093
trim_blanks(string & str)1094 static void trim_blanks(string &str)
1095 {
1096 const char *start = str.contents();
1097 const char *end = start + str.length();
1098 while (end > start && end[-1] != '\n' && csspace(end[-1]))
1099 --end;
1100 str.set_length(end - start);
1101 }
1102
do_bib(const char * filename)1103 void do_bib(const char *filename)
1104 {
1105 FILE *fp;
1106 if (strcmp(filename, "-") == 0)
1107 fp = stdin;
1108 else {
1109 errno = 0;
1110 fp = fopen(filename, "r");
1111 if (fp == 0) {
1112 error("can't open `%1': %2", filename, strerror(errno));
1113 return;
1114 }
1115 current_filename = filename;
1116 }
1117 enum {
1118 START, MIDDLE, BODY, BODY_START, BODY_BLANK, BODY_DOT
1119 } state = START;
1120 string body;
1121 for (;;) {
1122 int c = getc(fp);
1123 if (c == EOF)
1124 break;
1125 if (invalid_input_char(c)) {
1126 error("invalid input character code %1", c);
1127 continue;
1128 }
1129 switch (state) {
1130 case START:
1131 if (c == '%') {
1132 body = c;
1133 state = BODY;
1134 }
1135 else if (c != '\n')
1136 state = MIDDLE;
1137 break;
1138 case MIDDLE:
1139 if (c == '\n')
1140 state = START;
1141 break;
1142 case BODY:
1143 body += c;
1144 if (c == '\n')
1145 state = BODY_START;
1146 break;
1147 case BODY_START:
1148 if (c == '\n') {
1149 do_ref(body);
1150 state = START;
1151 }
1152 else if (c == '.')
1153 state = BODY_DOT;
1154 else if (csspace(c)) {
1155 state = BODY_BLANK;
1156 body += c;
1157 }
1158 else {
1159 body += c;
1160 state = BODY;
1161 }
1162 break;
1163 case BODY_BLANK:
1164 if (c == '\n') {
1165 trim_blanks(body);
1166 do_ref(body);
1167 state = START;
1168 }
1169 else if (csspace(c))
1170 body += c;
1171 else {
1172 body += c;
1173 state = BODY;
1174 }
1175 break;
1176 case BODY_DOT:
1177 if (c == ']') {
1178 do_ref(body);
1179 state = MIDDLE;
1180 }
1181 else {
1182 body += '.';
1183 body += c;
1184 state = c == '\n' ? BODY_START : BODY;
1185 }
1186 break;
1187 default:
1188 assert(0);
1189 }
1190 if (c == '\n')
1191 current_lineno++;
1192 }
1193 switch (state) {
1194 case START:
1195 case MIDDLE:
1196 break;
1197 case BODY:
1198 body += '\n';
1199 do_ref(body);
1200 break;
1201 case BODY_DOT:
1202 case BODY_START:
1203 do_ref(body);
1204 break;
1205 case BODY_BLANK:
1206 trim_blanks(body);
1207 do_ref(body);
1208 break;
1209 }
1210 fclose(fp);
1211 }
1212
1213 // from the Dragon Book
1214
hash_string(const char * s,int len)1215 unsigned hash_string(const char *s, int len)
1216 {
1217 const char *end = s + len;
1218 unsigned h = 0, g;
1219 while (s < end) {
1220 h <<= 4;
1221 h += *s++;
1222 if ((g = h & 0xf0000000) != 0) {
1223 h ^= g >> 24;
1224 h ^= g;
1225 }
1226 }
1227 return h;
1228 }
1229
next_size(int n)1230 int next_size(int n)
1231 {
1232 static const int table_sizes[] = {
1233 101, 503, 1009, 2003, 3001, 4001, 5003, 10007, 20011, 40009,
1234 80021, 160001, 500009, 1000003, 2000003, 4000037, 8000009,
1235 16000057, 32000011, 64000031, 128000003, 0
1236 };
1237
1238 const int *p;
1239 for (p = table_sizes; *p <= n && *p != 0; p++)
1240 ;
1241 assert(*p != 0);
1242 return *p;
1243 }
1244
1245