xref: /386bsd/usr/src/usr.bin/groff/refer/refer.cc (revision a2142627)
1 // -*- C++ -*-
2 /* Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc.
3      Written by James Clark (jjc@jclark.com)
4 
5 This file is part of groff.
6 
7 groff is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11 
12 groff is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License along
18 with groff; see the file COPYING.  If not, write to the Free Software
19 Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
20 
21 #include "refer.h"
22 #include "refid.h"
23 #include "ref.h"
24 #include "token.h"
25 #include "search.h"
26 #include "command.h"
27 
28 const char PRE_LABEL_MARKER = '\013';
29 const char POST_LABEL_MARKER = '\014';
30 const char LABEL_MARKER = '\015'; // label_type is added on
31 
32 #define FORCE_LEFT_BRACKET 04
33 #define FORCE_RIGHT_BRACKET 010
34 
35 static FILE *outfp = stdout;
36 
37 string capitalize_fields;
38 string reverse_fields;
39 string abbreviate_fields;
40 string period_before_last_name = ". ";
41 string period_before_initial = ".";
42 string period_before_hyphen = "";
43 string period_before_other = ". ";
44 string sort_fields;
45 int annotation_field = -1;
46 string annotation_macro;
47 string discard_fields = "XYZ";
48 string pre_label = "\\*([.";
49 string post_label = "\\*(.]";
50 string sep_label = ", ";
51 int accumulate = 0;
52 int move_punctuation = 0;
53 int abbreviate_label_ranges = 0;
54 string label_range_indicator;
55 int label_in_text = 1;
56 int label_in_reference = 1;
57 int date_as_label = 0;
58 int sort_adjacent_labels = 0;
59 // Join exactly two authors with this.
60 string join_authors_exactly_two = " and ";
61 // When there are more than two authors join the last two with this.
62 string join_authors_last_two = ", and ";
63 // Otherwise join authors with this.
64 string join_authors_default = ", ";
65 string separate_label_second_parts = ", ";
66 // Use this string to represent that there are other authors.
67 string et_al = " et al";
68 // Use et al only if it can replace at least this many authors.
69 int et_al_min_elide = 2;
70 // Use et al only if the total number of authors is at least this.
71 int et_al_min_total = 3;
72 
73 
74 int compatible_flag = 0;
75 
76 int short_label_flag = 0;
77 
78 static int recognize_R1_R2 = 1;
79 
80 search_list database_list;
81 int search_default = 1;
82 static int default_database_loaded = 0;
83 
84 static reference **citation = 0;
85 static int ncitations = 0;
86 static int citation_max = 0;
87 
88 static reference **reference_hash_table = 0;
89 static int hash_table_size;
90 static int nreferences = 0;
91 
92 static int need_syncing = 0;
93 string pending_line;
94 string pending_lf_lines;
95 
96 static void output_pending_line();
97 static unsigned immediately_handle_reference(const string &);
98 static void immediately_output_references();
99 static unsigned store_reference(const string &);
100 static void divert_to_temporary_file();
101 static reference *make_reference(const string &, unsigned *);
102 static void usage();
103 static void do_file(const char *);
104 static void split_punct(string &line, string &punct);
105 static void output_citation_group(reference **v, int n, label_type, FILE *fp);
106 static void possibly_load_default_database();
107 
main(int argc,char ** argv)108 int main(int argc, char **argv)
109 {
110   program_name = argv[0];
111   static char stderr_buf[BUFSIZ];
112   setbuf(stderr, stderr_buf);
113   outfp = stdout;
114   int finished_options = 0;
115   int bib_flag = 0;
116   int done_spec = 0;
117 
118   for (--argc, ++argv;
119        !finished_options && argc > 0 && argv[0][0] == '-'
120        && argv[0][1] != '\0';
121        argv++, argc--) {
122     const char *opt = argv[0] + 1;
123     while (opt != 0 && *opt != '\0') {
124       switch (*opt) {
125       case 'C':
126 	compatible_flag = 1;
127 	opt++;
128 	break;
129       case 'B':
130 	bib_flag = 1;
131 	label_in_reference = 0;
132 	label_in_text = 0;
133 	++opt;
134 	if (*opt == '\0') {
135 	  annotation_field = 'X';
136 	  annotation_macro = "AP";
137 	}
138 	else if (csalnum(opt[0]) && opt[1] == '.' && opt[2] != '\0') {
139 	  annotation_field = opt[0];
140 	  annotation_macro = opt + 2;
141 	}
142 	opt = 0;
143 	break;
144       case 'P':
145 	move_punctuation = 1;
146 	opt++;
147 	break;
148       case 'R':
149 	recognize_R1_R2 = 0;
150 	opt++;
151 	break;
152       case 'S':
153 	// Not a very useful spec.
154 	set_label_spec("(A.n|Q)', '(D.y|D)");
155 	done_spec = 1;
156 	pre_label = " (";
157 	post_label = ")";
158 	sep_label = "; ";
159 	opt++;
160 	break;
161       case 'V':
162 	verify_flag = 1;
163 	opt++;
164 	break;
165       case 'f':
166 	{
167 	  const char *num = 0;
168 	  if (*++opt == '\0') {
169 	    if (argc > 1) {
170 	      num = *++argv;
171 	      --argc;
172 	    }
173 	    else {
174 	      error("option `f' requires an argument");
175 	      usage();
176 	    }
177 	  }
178 	  else {
179 	    num = opt;
180 	    opt = 0;
181 	  }
182 	  for (const char *ptr = num; *ptr; ptr++)
183 	    if (!csdigit(*ptr)) {
184 	      error("bad character `%1' in argument to -f option", *ptr);
185 	      break;
186 	    }
187 	  if (*ptr == '\0') {
188 	    string spec;
189 	    spec = '%';
190 	    spec += num;
191 	    spec += '\0';
192 	    set_label_spec(spec.contents());
193 	    done_spec = 1;
194 	  }
195 	  break;
196 	}
197       case 'b':
198 	label_in_text = 0;
199 	label_in_reference = 0;
200 	opt++;
201 	break;
202       case 'e':
203 	accumulate = 1;
204 	opt++;
205 	break;
206       case 'c':
207 	capitalize_fields = ++opt;
208 	opt = 0;
209 	break;
210       case 'k':
211 	{
212 	  char buf[5];
213 	  if (csalpha(*++opt))
214 	    buf[0] = *opt++;
215 	  else {
216 	    if (*opt != '\0')
217 	      error("bad field name `%1'", *opt++);
218 	    buf[0] = 'L';
219 	  }
220 	  buf[1] = '~';
221 	  buf[2] = '%';
222 	  buf[3] = 'a';
223 	  buf[4] = '\0';
224 	  set_label_spec(buf);
225 	  done_spec = 1;
226 	}
227 	break;
228       case 'a':
229 	{
230 	  for (const char *ptr = ++opt; *ptr; ptr++)
231 	    if (!csdigit(*ptr)) {
232 	      error("argument to `a' option not a number");
233 	      break;
234 	    }
235 	  if (*ptr == '\0') {
236 	    reverse_fields = 'A';
237 	    reverse_fields += opt;
238 	  }
239 	  opt = 0;
240 	}
241 	break;
242       case 'i':
243 	linear_ignore_fields = ++opt;
244 	opt = 0;
245 	break;
246       case 'l':
247 	{
248 	  char buf[INT_DIGITS*2 + 11]; // A.n+2D.y-3%a
249 	  strcpy(buf, "A.n");
250 	  if (*++opt != '\0' && *opt != ',') {
251 	    char *ptr;
252 	    long n = strtol(opt, &ptr, 10);
253 	    if (n == 0 && ptr == opt) {
254 	      error("bad integer `%1' in `l' option", opt);
255 	      opt = 0;
256 	      break;
257 	    }
258 	    if (n < 0)
259 	      n = 0;
260 	    opt = ptr;
261 	    sprintf(strchr(buf, '\0'), "+%d", n);
262 	  }
263 	  strcat(buf, "D.y");
264 	  if (*opt == ',')
265 	    opt++;
266 	  if (*opt != '\0') {
267 	    char *ptr;
268 	    long n = strtol(opt, &ptr, 10);
269 	    if (n == 0 && ptr == opt) {
270 	      error("bad integer `%1' in `l' option", opt);
271 	      opt = 0;
272 	      break;
273 	    }
274 	    if (n < 0)
275 	      n = 0;
276 	    sprintf(strchr(buf, '\0'), "-%d", n);
277 	    opt = ptr;
278 	    if (*opt != '\0')
279 	      error("argument to `l' option not of form `m,n'");
280 	  }
281 	  strcat(buf, "%a");
282 	  if (!set_label_spec(buf))
283 	    assert(0);
284 	  done_spec = 1;
285 	}
286 	break;
287       case 'n':
288 	search_default = 0;
289 	opt++;
290 	break;
291       case 'p':
292 	{
293 	  const char *filename = 0;
294 	  if (*++opt == '\0') {
295 	    if (argc > 1) {
296 	      filename = *++argv;
297 	      argc--;
298 	    }
299 	    else {
300 	      error("option `p' requires an argument");
301 	      usage();
302 	    }
303 	  }
304 	  else {
305 	    filename = opt;
306 	    opt = 0;
307 	  }
308 	  database_list.add_file(filename);
309 	}
310 	break;
311       case 's':
312 	if (*++opt == '\0')
313 	  sort_fields = "AD";
314 	else {
315 	  sort_fields = opt;
316 	  opt = 0;
317 	}
318 	accumulate = 1;
319 	break;
320       case 't':
321 	{
322 	  char *ptr;
323 	  long n = strtol(opt, &ptr, 10);
324 	  if (n == 0 && ptr == opt) {
325 	    error("bad integer `%1' in `t' option", opt);
326 	    opt = 0;
327 	    break;
328 	  }
329 	  if (n < 1)
330 	    n = 1;
331 	  linear_truncate_len = int(n);
332 	  opt = ptr;
333 	  break;
334 	}
335       case 'v':
336 	{
337 	  extern const char *version_string;
338 	  fprintf(stderr, "GNU refer version %s\n", version_string);
339 	  fflush(stderr);
340 	  opt++;
341 	  break;
342 	}
343       case '-':
344 	if (opt[1] == '\0') {
345 	  finished_options = 1;
346 	  opt++;
347 	  break;
348 	}
349 	// fall through
350       default:
351 	error("unrecognized option `%1'", *opt);
352 	usage();
353 	break;
354       }
355     }
356   }
357   if (!done_spec)
358     set_label_spec("%1");
359   if (argc <= 0) {
360     if (bib_flag)
361       do_bib("-");
362     else
363       do_file("-");
364   }
365   else {
366     for (int i = 0; i < argc; i++) {
367       if (bib_flag)
368 	do_bib(argv[i]);
369       else
370 	do_file(argv[i]);
371     }
372   }
373   if (accumulate)
374     output_references();
375   if (fflush(stdout) < 0)
376     fatal("output error");
377   exit(0);
378 }
379 
usage()380 static void usage()
381 {
382   fprintf(stderr,
383 "usage: %s [-benvCPRS] [-aN] [-cXYZ] [-fN] [-iXYZ] [-kX] [-lM,N] [-p file]\n"
384 "       [-sXYZ] [-tN] [-BL.M] [files ...]\n",
385 	  program_name);
386   exit(1);
387 }
388 
possibly_load_default_database()389 static void possibly_load_default_database()
390 {
391   if (search_default && !default_database_loaded) {
392     char *filename = getenv("REFER");
393     if (filename)
394       database_list.add_file(filename);
395     else
396       database_list.add_file(DEFAULT_INDEX, 1);
397     default_database_loaded = 1;
398   }
399 }
400 
is_list(const string & str)401 static int is_list(const string &str)
402 {
403   const char *start = str.contents();
404   const char *end = start + str.length();
405   while (end > start && csspace(end[-1]))
406     end--;
407   while (start < end && csspace(*start))
408     start++;
409   return end - start == 6 && memcmp(start, "$LIST$", 6) == 0;
410 }
411 
do_file(const char * filename)412 static void do_file(const char *filename)
413 {
414   FILE *fp;
415   if (strcmp(filename, "-") == 0) {
416     fp = stdin;
417   }
418   else {
419     errno = 0;
420     fp = fopen(filename, "r");
421     if (fp == 0) {
422       error("can't open `%1': %2", filename, strerror(errno));
423       return;
424     }
425     current_filename = filename;
426   }
427   fprintf(outfp, ".lf 1 %s\n", filename);
428   string line;
429   current_lineno = 0;
430   for (;;) {
431     line.clear();
432     for (;;) {
433       int c = getc(fp);
434       if (c == EOF) {
435 	if (line.length() > 0)
436 	  line += '\n';
437 	break;
438       }
439       if (illegal_input_char(c))
440 	error("illegal input character code %1", c);
441       else {
442 	line += c;
443 	if (c == '\n')
444 	  break;
445       }
446     }
447     int len = line.length();
448     if (len == 0)
449       break;
450     current_lineno++;
451     if (len >= 2 && line[0] == '.' && line[1] == '[') {
452       int start_lineno = current_lineno;
453       int start_of_line = 1;
454       string str;
455       string post;
456       string pre(line.contents() + 2, line.length() - 3);
457       for (;;) {
458 	int c = getc(fp);
459 	if (c == EOF) {
460 	  error_with_file_and_line(current_filename, start_lineno,
461 				   "missing `.]' line");
462 	  break;
463 	}
464 	if (start_of_line)
465 	  current_lineno++;
466 	if (start_of_line && c == '.') {
467 	  int d = getc(fp);
468 	  if (d == ']') {
469 	    while ((d = getc(fp)) != '\n' && d != EOF) {
470 	      if (illegal_input_char(d))
471 		error("illegal input character code %1", d);
472 	      else
473 		post += d;
474 	    }
475 	    break;
476 	  }
477 	  if (d != EOF)
478 	    ungetc(d, fp);
479 	}
480 	if (illegal_input_char(c))
481 	  error("illegal input character code %1", c);
482 	else
483 	  str += c;
484 	start_of_line = (c == '\n');
485       }
486       if (is_list(str)) {
487 	output_pending_line();
488 	if (accumulate)
489 	  output_references();
490 	else
491 	  error("found `$LIST$' but not accumulating references");
492       }
493       else {
494 	unsigned flags = (accumulate
495 			  ? store_reference(str)
496 			  : immediately_handle_reference(str));
497 	if (label_in_text) {
498 	  if (accumulate && outfp == stdout)
499 	    divert_to_temporary_file();
500 	  if (pending_line.length() == 0) {
501 	    warning("can't attach citation to previous line");
502 	  }
503 	  else
504 	    pending_line.set_length(pending_line.length() - 1);
505 	  string punct;
506 	  if (move_punctuation)
507 	    split_punct(pending_line, punct);
508 	  int have_text = pre.length() > 0 || post.length() > 0;
509 	  label_type lt = label_type(flags & ~(FORCE_LEFT_BRACKET
510 					       |FORCE_RIGHT_BRACKET));
511 	  if ((flags & FORCE_LEFT_BRACKET) || !have_text)
512 	    pending_line += PRE_LABEL_MARKER;
513 	  pending_line += pre;
514 	  pending_line += LABEL_MARKER + lt;
515 	  pending_line += post;
516 	  if ((flags & FORCE_RIGHT_BRACKET) || !have_text)
517 	    pending_line += POST_LABEL_MARKER;
518 	  pending_line += punct;
519 	  pending_line += '\n';
520 	}
521       }
522       need_syncing = 1;
523     }
524     else if (len >= 4
525 	     && line[0] == '.' && line[1] == 'l' && line[2] == 'f'
526 	     && (compatible_flag || line[3] == '\n' || line[3] == ' ')) {
527       pending_lf_lines += line;
528       line += '\0';
529       if (interpret_lf_args(line.contents() + 3))
530 	current_lineno--;
531     }
532     else if (recognize_R1_R2
533 	     && len >= 4
534 	     && line[0] == '.' && line[1] == 'R' && line[2] == '1'
535 	     && (compatible_flag || line[3] == '\n' || line[3] == ' ')) {
536       line.clear();
537       int start_of_line = 1;
538       int start_lineno = current_lineno;
539       for (;;) {
540 	int c = getc(fp);
541 	if (c != EOF && start_of_line)
542 	  current_lineno++;
543 	if (start_of_line && c == '.') {
544 	  c = getc(fp);
545 	  if (c == 'R') {
546 	    c = getc(fp);
547 	    if (c == '2') {
548 	      c = getc(fp);
549 	      if (compatible_flag || c == ' ' || c == '\n' || c == EOF) {
550 		while (c != EOF && c != '\n')
551 		  c = getc(fp);
552 		break;
553 	      }
554 	      else {
555 		line += '.';
556 		line += 'R';
557 		line += '2';
558 	      }
559 	    }
560 	    else {
561 	      line += '.';
562 	      line += 'R';
563 	    }
564 	  }
565 	  else
566 	    line += '.';
567 	}
568 	if (c == EOF) {
569 	  error_with_file_and_line(current_filename, start_lineno,
570 				   "missing `.R2' line");
571 	  break;
572 	}
573 	if (illegal_input_char(c))
574 	  error("illegal input character code %1", int(c));
575 	else {
576 	  line += c;
577 	  start_of_line = c == '\n';
578 	}
579       }
580       output_pending_line();
581       if (accumulate)
582 	output_references();
583       else
584 	nreferences = 0;
585       process_commands(line, current_filename, start_lineno + 1);
586       need_syncing = 1;
587     }
588     else {
589       output_pending_line();
590       pending_line = line;
591     }
592   }
593   need_syncing = 0;
594   output_pending_line();
595   if (fp != stdin)
596     fclose(fp);
597 }
598 
599 class label_processing_state {
600   enum {
601     NORMAL,
602     PENDING_LABEL,
603     PENDING_LABEL_POST,
604     PENDING_LABEL_POST_PRE,
605     PENDING_POST
606     } state;
607   label_type type;		// type of pending labels
608   int count;			// number of pending labels
609   reference **rptr;		// pointer to next reference
610   int rcount;			// number of references left
611   FILE *fp;
612   int handle_pending(int c);
613 public:
614   label_processing_state(reference **, int, FILE *);
615   ~label_processing_state();
616   void process(int c);
617 };
618 
output_pending_line()619 static void output_pending_line()
620 {
621   if (label_in_text && !accumulate && ncitations > 0) {
622     label_processing_state state(citation, ncitations, outfp);
623     int len = pending_line.length();
624     for (int i = 0; i < len; i++)
625       state.process((unsigned char)(pending_line[i]));
626   }
627   else
628     put_string(pending_line, outfp);
629   pending_line.clear();
630   if (pending_lf_lines.length() > 0) {
631     put_string(pending_lf_lines, outfp);
632     pending_lf_lines.clear();
633   }
634   if (!accumulate)
635     immediately_output_references();
636   if (need_syncing) {
637     fprintf(outfp, ".lf %d %s\n", current_lineno, current_filename);
638     need_syncing = 0;
639   }
640 }
641 
split_punct(string & line,string & punct)642 static void split_punct(string &line, string &punct)
643 {
644   const char *start = line.contents();
645   const char *end = start + line.length();
646   const char *ptr = start;
647   const char *last_token_start = 0;
648   for (;;) {
649     if (ptr >= end)
650       break;
651     last_token_start = ptr;
652     if (*ptr == PRE_LABEL_MARKER || *ptr == POST_LABEL_MARKER
653 	|| (*ptr >= LABEL_MARKER && *ptr < LABEL_MARKER + N_LABEL_TYPES))
654       ptr++;
655     else if (!get_token(&ptr, end))
656       break;
657   }
658   if (last_token_start) {
659     const token_info *ti = lookup_token(last_token_start, end);
660     if (ti->is_punct()) {
661       punct.append(last_token_start, end - last_token_start);
662       line.set_length(last_token_start - start);
663     }
664   }
665 }
666 
divert_to_temporary_file()667 static void divert_to_temporary_file()
668 {
669   outfp = xtmpfile();
670 }
671 
store_citation(reference * ref)672 static void store_citation(reference *ref)
673 {
674   if (ncitations >= citation_max) {
675     if (citation == 0)
676       citation = new reference*[citation_max = 100];
677     else {
678       reference **old_citation = citation;
679       citation_max *= 2;
680       citation = new reference *[citation_max];
681       memcpy(citation, old_citation, ncitations*sizeof(reference *));
682       a_delete old_citation;
683     }
684   }
685   citation[ncitations++] = ref;
686 }
687 
store_reference(const string & str)688 static unsigned store_reference(const string &str)
689 {
690   if (reference_hash_table == 0) {
691     reference_hash_table = new reference *[17];
692     hash_table_size = 17;
693     for (int i = 0; i < hash_table_size; i++)
694       reference_hash_table[i] = 0;
695   }
696   unsigned flags;
697   reference *ref = make_reference(str, &flags);
698   ref->compute_hash_code();
699   unsigned h = ref->hash();
700   for (reference **ptr = reference_hash_table + (h % hash_table_size);
701        *ptr != 0;
702        ((ptr == reference_hash_table)
703 	? (ptr = reference_hash_table + hash_table_size - 1)
704 	: --ptr))
705     if (same_reference(**ptr, *ref))
706       break;
707   if (*ptr != 0) {
708     if (ref->is_merged())
709       warning("fields ignored because reference already used");
710     delete ref;
711     ref = *ptr;
712   }
713   else {
714     *ptr = ref;
715     ref->set_number(nreferences);
716     nreferences++;
717     ref->pre_compute_label();
718     ref->compute_sort_key();
719     if (nreferences*2 >= hash_table_size) {
720       // Rehash it.
721       reference **old_table = reference_hash_table;
722       int old_size = hash_table_size;
723       hash_table_size = next_size(hash_table_size);
724       reference_hash_table = new reference*[hash_table_size];
725       int i;
726       for (i = 0; i < hash_table_size; i++)
727 	reference_hash_table[i] = 0;
728       for (i = 0; i < old_size; i++)
729 	if (old_table[i]) {
730 	  for (reference **p = (reference_hash_table
731 				+ (old_table[i]->hash() % hash_table_size));
732 	       *p;
733 	       ((p == reference_hash_table)
734 		? (p = reference_hash_table + hash_table_size - 1)
735 		: --p))
736 	    ;
737 	  *p = old_table[i];
738 	}
739       a_delete old_table;
740     }
741   }
742   if (label_in_text)
743     store_citation(ref);
744   return flags;
745 }
746 
immediately_handle_reference(const string & str)747 unsigned immediately_handle_reference(const string &str)
748 {
749   unsigned flags;
750   reference *ref = make_reference(str, &flags);
751   ref->set_number(nreferences);
752   if (label_in_text || label_in_reference) {
753     ref->pre_compute_label();
754     ref->immediate_compute_label();
755   }
756   nreferences++;
757   store_citation(ref);
758   return flags;
759 }
760 
immediately_output_references()761 static void immediately_output_references()
762 {
763   for (int i = 0; i < ncitations; i++) {
764     reference *ref = citation[i];
765     if (label_in_reference) {
766       fputs(".ds [F ", outfp);
767       const string &label = ref->get_label(NORMAL_LABEL);
768       if (label.length() > 0
769 	  && (label[0] == ' ' || label[0] == '\\' || label[0] == '"'))
770 	putc('"', outfp);
771       put_string(label, outfp);
772       putc('\n', outfp);
773     }
774     ref->output(outfp);
775     delete ref;
776   }
777   ncitations = 0;
778 }
779 
output_citation_group(reference ** v,int n,label_type type,FILE * fp)780 static void output_citation_group(reference **v, int n, label_type type,
781 				  FILE *fp)
782 {
783   if (sort_adjacent_labels) {
784     // Do an insertion sort.  Usually n will be very small.
785     for (int i = 1; i < n; i++) {
786       int num = v[i]->get_number();
787       reference *temp = v[i];
788       for (int j = i - 1; j >= 0 && v[j]->get_number() > num; j--)
789 	v[j + 1] = v[j];
790       v[j + 1] = temp;
791     }
792   }
793   // This messes up if !accumulate.
794   if (accumulate && n > 1) {
795     // remove duplicates
796     int j = 1;
797     for (int i = 1; i < n; i++)
798       if (v[i]->get_label(type) != v[i - 1]->get_label(type))
799 	v[j++] = v[i];
800     n = j;
801   }
802   string merged_label;
803   for (int i = 0; i < n; i++) {
804     int nmerged = v[i]->merge_labels(v + i + 1, n - i - 1, type, merged_label);
805     if (nmerged > 0) {
806       put_string(merged_label, fp);
807       i += nmerged;
808     }
809     else
810       put_string(v[i]->get_label(type), fp);
811     if (i < n - 1)
812       put_string(sep_label, fp);
813   }
814 }
815 
816 
label_processing_state(reference ** p,int n,FILE * f)817 label_processing_state::label_processing_state(reference **p, int n, FILE *f)
818 : state(NORMAL), count(0), rptr(p), rcount(n), fp(f)
819 {
820 }
821 
~label_processing_state()822 label_processing_state::~label_processing_state()
823 {
824   int handled = handle_pending(EOF);
825   assert(!handled);
826   assert(rcount == 0);
827 }
828 
handle_pending(int c)829 int label_processing_state::handle_pending(int c)
830 {
831   switch (state) {
832   case NORMAL:
833     break;
834   case PENDING_LABEL:
835     if (c == POST_LABEL_MARKER) {
836       state = PENDING_LABEL_POST;
837       return 1;
838     }
839     else {
840       output_citation_group(rptr, count, type, fp);
841       rptr += count ;
842       rcount -= count;
843       state = NORMAL;
844     }
845     break;
846   case PENDING_LABEL_POST:
847     if (c == PRE_LABEL_MARKER) {
848       state = PENDING_LABEL_POST_PRE;
849       return 1;
850     }
851     else {
852       output_citation_group(rptr, count, type, fp);
853       rptr += count;
854       rcount -= count;
855       put_string(post_label, fp);
856       state = NORMAL;
857     }
858     break;
859   case PENDING_LABEL_POST_PRE:
860     if (c >= LABEL_MARKER
861 	&& c < LABEL_MARKER + N_LABEL_TYPES
862 	&& c - LABEL_MARKER == type) {
863       count += 1;
864       state = PENDING_LABEL;
865       return 1;
866     }
867     else {
868       output_citation_group(rptr, count, type, fp);
869       rptr += count;
870       rcount -= count;
871       put_string(sep_label, fp);
872       state = NORMAL;
873     }
874     break;
875   case PENDING_POST:
876     if (c == PRE_LABEL_MARKER) {
877       put_string(sep_label, fp);
878       state = NORMAL;
879       return 1;
880     }
881     else {
882       put_string(post_label, fp);
883       state = NORMAL;
884     }
885     break;
886   }
887   return 0;
888 }
889 
process(int c)890 void label_processing_state::process(int c)
891 {
892   if (handle_pending(c))
893     return;
894   assert(state == NORMAL);
895   switch (c) {
896   case PRE_LABEL_MARKER:
897     put_string(pre_label, fp);
898     state = NORMAL;
899     break;
900   case POST_LABEL_MARKER:
901     state = PENDING_POST;
902     break;
903   case LABEL_MARKER:
904   case LABEL_MARKER + 1:
905     count = 1;
906     state = PENDING_LABEL;
907     type = label_type(c - LABEL_MARKER);
908     break;
909   default:
910     state = NORMAL;
911     putc(c, fp);
912     break;
913   }
914 }
915 
916 extern "C" {
917 
rcompare(const void * p1,const void * p2)918 static int rcompare(const void *p1, const void *p2)
919 {
920   return compare_reference(**(reference **)p1, **(reference **)p2);
921 }
922 
923 }
924 
output_references()925 void output_references()
926 {
927   assert(accumulate);
928   if (nreferences > 0) {
929     int j = 0;
930     int i;
931     for (i = 0; i < hash_table_size; i++)
932       if (reference_hash_table[i] != 0)
933 	reference_hash_table[j++] = reference_hash_table[i];
934     assert(j == nreferences);
935     for (; j < hash_table_size; j++)
936       reference_hash_table[j] = 0;
937     qsort(reference_hash_table, nreferences, sizeof(reference*), rcompare);
938     for (i = 0; i < nreferences; i++)
939       reference_hash_table[i]->set_number(i);
940     compute_labels(reference_hash_table, nreferences);
941   }
942   if (outfp != stdout) {
943     rewind(outfp);
944     {
945       label_processing_state state(citation, ncitations, stdout);
946       int c;
947       while ((c = getc(outfp)) != EOF)
948 	state.process(c);
949     }
950     ncitations = 0;
951     fclose(outfp);
952     outfp = stdout;
953   }
954   if (nreferences > 0) {
955     fputs(".]<\n", outfp);
956     for (int i = 0; i < nreferences; i++) {
957       if (sort_fields.length() > 0)
958 	reference_hash_table[i]->print_sort_key_comment(outfp);
959       if (label_in_reference) {
960 	fputs(".ds [F ", outfp);
961 	const string &label = reference_hash_table[i]->get_label(NORMAL_LABEL);
962 	if (label.length() > 0
963 	    && (label[0] == ' ' || label[0] == '\\' || label[0] == '"'))
964 	  putc('"', outfp);
965 	put_string(label, outfp);
966 	putc('\n', outfp);
967       }
968       reference_hash_table[i]->output(outfp);
969       delete reference_hash_table[i];
970       reference_hash_table[i] = 0;
971     }
972     fputs(".]>\n", outfp);
973     nreferences = 0;
974   }
975   clear_labels();
976 }
977 
find_reference(const char * query,int query_len)978 static reference *find_reference(const char *query, int query_len)
979 {
980   // This is so that error messages look better.
981   while (query_len > 0 && csspace(query[query_len - 1]))
982     query_len--;
983   string str;
984   for (int i = 0; i < query_len; i++)
985     str += query[i] == '\n' ? ' ' : query[i];
986   str += '\0';
987   possibly_load_default_database();
988   search_list_iterator iter(&database_list, str.contents());
989   reference_id rid;
990   const char *start;
991   int len;
992   if (!iter.next(&start, &len, &rid)) {
993     error("no matches for `%1'", str.contents());
994     return 0;
995   }
996   const char *end = start + len;
997   while (start < end) {
998     if (*start == '%')
999       break;
1000     while (start < end && *start++ != '\n')
1001       ;
1002   }
1003   if (start >= end) {
1004     error("found a reference for `%1' but it didn't contain any fields",
1005 	  str.contents());
1006     return 0;
1007   }
1008   reference *result = new reference(start, end - start, &rid);
1009   if (iter.next(&start, &len, &rid))
1010     warning("multiple matches for `%1'", str.contents());
1011   return result;
1012 }
1013 
make_reference(const string & str,unsigned * flagsp)1014 static reference *make_reference(const string &str, unsigned *flagsp)
1015 {
1016   const char *start = str.contents();
1017   const char *end = start + str.length();
1018   const char *ptr = start;
1019   while (ptr < end) {
1020     if (*ptr == '%')
1021       break;
1022     while (ptr < end && *ptr++ != '\n')
1023       ;
1024   }
1025   *flagsp = 0;
1026   for (; start < ptr; start++) {
1027     if (*start == '#')
1028       *flagsp = (SHORT_LABEL | (*flagsp & (FORCE_RIGHT_BRACKET
1029 					   | FORCE_LEFT_BRACKET)));
1030     else if (*start == '[')
1031       *flagsp |= FORCE_LEFT_BRACKET;
1032     else if (*start == ']')
1033       *flagsp |= FORCE_RIGHT_BRACKET;
1034     else if (!csspace(*start))
1035       break;
1036   }
1037   if (start >= end) {
1038     error("empty reference");
1039     return new reference;
1040   }
1041   reference *database_ref = 0;
1042   if (start < ptr)
1043     database_ref = find_reference(start, ptr - start);
1044   reference *inline_ref = 0;
1045   if (ptr < end)
1046     inline_ref = new reference(ptr, end - ptr);
1047   if (inline_ref) {
1048     if (database_ref) {
1049       database_ref->merge(*inline_ref);
1050       delete inline_ref;
1051       return database_ref;
1052     }
1053     else
1054       return inline_ref;
1055   }
1056   else if (database_ref)
1057     return database_ref;
1058   else
1059     return new reference;
1060 }
1061 
do_ref(const string & str)1062 static void do_ref(const string &str)
1063 {
1064   if (accumulate)
1065     (void)store_reference(str);
1066   else {
1067     (void)immediately_handle_reference(str);
1068     immediately_output_references();
1069   }
1070 }
1071 
trim_blanks(string & str)1072 static void trim_blanks(string &str)
1073 {
1074   const char *start = str.contents();
1075   const char *end = start + str.length();
1076   while (end > start && end[-1] != '\n' && csspace(end[-1]))
1077     --end;
1078   str.set_length(end - start);
1079 }
1080 
do_bib(const char * filename)1081 void do_bib(const char *filename)
1082 {
1083   FILE *fp;
1084   if (strcmp(filename, "-") == 0)
1085     fp = stdin;
1086   else {
1087     errno = 0;
1088     fp = fopen(filename, "r");
1089     if (fp == 0) {
1090       error("can't open `%1': %2", filename, strerror(errno));
1091       return;
1092     }
1093     current_filename = filename;
1094   }
1095   enum {
1096     START, MIDDLE, BODY, BODY_START, BODY_BLANK, BODY_DOT
1097     } state = START;
1098   string body;
1099   for (;;) {
1100     int c = getc(fp);
1101     if (c == EOF)
1102       break;
1103     if (illegal_input_char(c)) {
1104       error("illegal input character code %1", c);
1105       continue;
1106     }
1107     switch (state) {
1108     case START:
1109       if (c == '%') {
1110 	body = c;
1111 	state = BODY;
1112       }
1113       else if (c != '\n')
1114 	state = MIDDLE;
1115       break;
1116     case MIDDLE:
1117       if (c == '\n')
1118 	state = START;
1119       break;
1120     case BODY:
1121       body += c;
1122       if (c == '\n')
1123 	state = BODY_START;
1124       break;
1125     case BODY_START:
1126       if (c == '\n') {
1127 	do_ref(body);
1128 	state = START;
1129       }
1130       else if (c == '.')
1131 	state = BODY_DOT;
1132       else if (csspace(c)) {
1133 	state = BODY_BLANK;
1134 	body += c;
1135       }
1136       else {
1137 	body += c;
1138 	state = BODY;
1139       }
1140       break;
1141     case BODY_BLANK:
1142       if (c == '\n') {
1143 	trim_blanks(body);
1144 	do_ref(body);
1145 	state = START;
1146       }
1147       else if (csspace(c))
1148 	body += c;
1149       else {
1150 	body += c;
1151 	state = BODY;
1152       }
1153       break;
1154     case BODY_DOT:
1155       if (c == ']') {
1156 	do_ref(body);
1157 	state = MIDDLE;
1158       }
1159       else {
1160 	body += '.';
1161 	body += c;
1162 	state = c == '\n' ? BODY_START : BODY;
1163       }
1164       break;
1165     default:
1166       assert(0);
1167     }
1168     if (c == '\n')
1169       current_lineno++;
1170   }
1171   switch (state) {
1172   case START:
1173   case MIDDLE:
1174     break;
1175   case BODY:
1176     body += '\n';
1177     do_ref(body);
1178     break;
1179   case BODY_DOT:
1180   case BODY_START:
1181     do_ref(body);
1182     break;
1183   case BODY_BLANK:
1184     trim_blanks(body);
1185     do_ref(body);
1186     break;
1187   }
1188   fclose(fp);
1189 }
1190 
1191 // from the Dragon Book
1192 
hash_string(const char * s,int len)1193 unsigned hash_string(const char *s, int len)
1194 {
1195   const char *end = s + len;
1196   unsigned h = 0, g;
1197   while (s < end) {
1198     h <<= 4;
1199     h += *s++;
1200     if ((g = h & 0xf0000000) != 0) {
1201       h ^= g >> 24;
1202       h ^= g;
1203     }
1204   }
1205   return h;
1206 }
1207 
next_size(int n)1208 int next_size(int n)
1209 {
1210   static const int table_sizes[] = {
1211     101, 503, 1009, 2003, 3001, 4001, 5003, 10007, 20011, 40009,
1212     80021, 160001, 500009, 1000003, 2000003, 4000037, 8000009,
1213     16000057, 32000011, 64000031, 128000003, 0
1214   };
1215 
1216   for (const int *p = table_sizes; *p <= n && *p != 0; p++)
1217     ;
1218   assert(*p != 0);
1219   return *p;
1220 }
1221 
1222