1 /*	$NetBSD: refer.cpp,v 1.1.1.1 2016/01/13 18:41:49 christos Exp $	*/
2 
3 // -*- C++ -*-
4 /* Copyright (C) 1989-1992, 2000, 2001, 2002, 2004
5    Free Software Foundation, Inc.
6      Written by James Clark (jjc@jclark.com)
7 
8 This file is part of groff.
9 
10 groff is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 2, or (at your option) any later
13 version.
14 
15 groff is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
18 for more details.
19 
20 You should have received a copy of the GNU General Public License along
21 with groff; see the file COPYING.  If not, write to the Free Software
22 Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */
23 
24 #include "refer.h"
25 #include "refid.h"
26 #include "ref.h"
27 #include "token.h"
28 #include "search.h"
29 #include "command.h"
30 
31 extern "C" const char *Version_string;
32 
33 const char PRE_LABEL_MARKER = '\013';
34 const char POST_LABEL_MARKER = '\014';
35 const char LABEL_MARKER = '\015'; // label_type is added on
36 
37 #define FORCE_LEFT_BRACKET 04
38 #define FORCE_RIGHT_BRACKET 010
39 
40 static FILE *outfp = stdout;
41 
42 string capitalize_fields;
43 string reverse_fields;
44 string abbreviate_fields;
45 string period_before_last_name = ". ";
46 string period_before_initial = ".";
47 string period_before_hyphen = "";
48 string period_before_other = ". ";
49 string sort_fields;
50 int annotation_field = -1;
51 string annotation_macro;
52 string discard_fields = "XYZ";
53 string pre_label = "\\*([.";
54 string post_label = "\\*(.]";
55 string sep_label = ", ";
56 int accumulate = 0;
57 int move_punctuation = 0;
58 int abbreviate_label_ranges = 0;
59 string label_range_indicator;
60 int label_in_text = 1;
61 int label_in_reference = 1;
62 int date_as_label = 0;
63 int sort_adjacent_labels = 0;
64 // Join exactly two authors with this.
65 string join_authors_exactly_two = " and ";
66 // When there are more than two authors join the last two with this.
67 string join_authors_last_two = ", and ";
68 // Otherwise join authors with this.
69 string join_authors_default = ", ";
70 string separate_label_second_parts = ", ";
71 // Use this string to represent that there are other authors.
72 string et_al = " et al";
73 // Use et al only if it can replace at least this many authors.
74 int et_al_min_elide = 2;
75 // Use et al only if the total number of authors is at least this.
76 int et_al_min_total = 3;
77 
78 
79 int compatible_flag = 0;
80 
81 int short_label_flag = 0;
82 
83 static int recognize_R1_R2 = 1;
84 
85 search_list database_list;
86 int search_default = 1;
87 static int default_database_loaded = 0;
88 
89 static reference **citation = 0;
90 static int ncitations = 0;
91 static int citation_max = 0;
92 
93 static reference **reference_hash_table = 0;
94 static int hash_table_size;
95 static int nreferences = 0;
96 
97 static int need_syncing = 0;
98 string pending_line;
99 string pending_lf_lines;
100 
101 static void output_pending_line();
102 static unsigned immediately_handle_reference(const string &);
103 static void immediately_output_references();
104 static unsigned store_reference(const string &);
105 static void divert_to_temporary_file();
106 static reference *make_reference(const string &, unsigned *);
107 static void usage(FILE *stream);
108 static void do_file(const char *);
109 static void split_punct(string &line, string &punct);
110 static void output_citation_group(reference **v, int n, label_type, FILE *fp);
111 static void possibly_load_default_database();
112 
main(int argc,char ** argv)113 int main(int argc, char **argv)
114 {
115   program_name = argv[0];
116   static char stderr_buf[BUFSIZ];
117   setbuf(stderr, stderr_buf);
118   outfp = stdout;
119   int finished_options = 0;
120   int bib_flag = 0;
121   int done_spec = 0;
122 
123   for (--argc, ++argv;
124        !finished_options && argc > 0 && argv[0][0] == '-'
125        && argv[0][1] != '\0';
126        argv++, argc--) {
127     const char *opt = argv[0] + 1;
128     while (opt != 0 && *opt != '\0') {
129       switch (*opt) {
130       case 'C':
131 	compatible_flag = 1;
132 	opt++;
133 	break;
134       case 'B':
135 	bib_flag = 1;
136 	label_in_reference = 0;
137 	label_in_text = 0;
138 	++opt;
139 	if (*opt == '\0') {
140 	  annotation_field = 'X';
141 	  annotation_macro = "AP";
142 	}
143 	else if (csalnum(opt[0]) && opt[1] == '.' && opt[2] != '\0') {
144 	  annotation_field = opt[0];
145 	  annotation_macro = opt + 2;
146 	}
147 	opt = 0;
148 	break;
149       case 'P':
150 	move_punctuation = 1;
151 	opt++;
152 	break;
153       case 'R':
154 	recognize_R1_R2 = 0;
155 	opt++;
156 	break;
157       case 'S':
158 	// Not a very useful spec.
159 	set_label_spec("(A.n|Q)', '(D.y|D)");
160 	done_spec = 1;
161 	pre_label = " (";
162 	post_label = ")";
163 	sep_label = "; ";
164 	opt++;
165 	break;
166       case 'V':
167 	verify_flag = 1;
168 	opt++;
169 	break;
170       case 'f':
171 	{
172 	  const char *num = 0;
173 	  if (*++opt == '\0') {
174 	    if (argc > 1) {
175 	      num = *++argv;
176 	      --argc;
177 	    }
178 	    else {
179 	      error("option `f' requires an argument");
180 	      usage(stderr);
181 	      exit(1);
182 	    }
183 	  }
184 	  else {
185 	    num = opt;
186 	    opt = 0;
187 	  }
188 	  const char *ptr;
189 	  for (ptr = num; *ptr; ptr++)
190 	    if (!csdigit(*ptr)) {
191 	      error("bad character `%1' in argument to -f option", *ptr);
192 	      break;
193 	    }
194 	  if (*ptr == '\0') {
195 	    string spec;
196 	    spec = '%';
197 	    spec += num;
198 	    spec += '\0';
199 	    set_label_spec(spec.contents());
200 	    done_spec = 1;
201 	  }
202 	  break;
203 	}
204       case 'b':
205 	label_in_text = 0;
206 	label_in_reference = 0;
207 	opt++;
208 	break;
209       case 'e':
210 	accumulate = 1;
211 	opt++;
212 	break;
213       case 'c':
214 	capitalize_fields = ++opt;
215 	opt = 0;
216 	break;
217       case 'k':
218 	{
219 	  char buf[5];
220 	  if (csalpha(*++opt))
221 	    buf[0] = *opt++;
222 	  else {
223 	    if (*opt != '\0')
224 	      error("bad field name `%1'", *opt++);
225 	    buf[0] = 'L';
226 	  }
227 	  buf[1] = '~';
228 	  buf[2] = '%';
229 	  buf[3] = 'a';
230 	  buf[4] = '\0';
231 	  set_label_spec(buf);
232 	  done_spec = 1;
233 	}
234 	break;
235       case 'a':
236 	{
237 	  const char *ptr;
238 	  for (ptr = ++opt; *ptr; ptr++)
239 	    if (!csdigit(*ptr)) {
240 	      error("argument to `a' option not a number");
241 	      break;
242 	    }
243 	  if (*ptr == '\0') {
244 	    reverse_fields = 'A';
245 	    reverse_fields += opt;
246 	  }
247 	  opt = 0;
248 	}
249 	break;
250       case 'i':
251 	linear_ignore_fields = ++opt;
252 	opt = 0;
253 	break;
254       case 'l':
255 	{
256 	  char buf[INT_DIGITS*2 + 11]; // A.n+2D.y-3%a
257 	  strcpy(buf, "A.n");
258 	  if (*++opt != '\0' && *opt != ',') {
259 	    char *ptr;
260 	    long n = strtol(opt, &ptr, 10);
261 	    if (n == 0 && ptr == opt) {
262 	      error("bad integer `%1' in `l' option", opt);
263 	      opt = 0;
264 	      break;
265 	    }
266 	    if (n < 0)
267 	      n = 0;
268 	    opt = ptr;
269 	    sprintf(strchr(buf, '\0'), "+%ld", n);
270 	  }
271 	  strcat(buf, "D.y");
272 	  if (*opt == ',')
273 	    opt++;
274 	  if (*opt != '\0') {
275 	    char *ptr;
276 	    long n = strtol(opt, &ptr, 10);
277 	    if (n == 0 && ptr == opt) {
278 	      error("bad integer `%1' in `l' option", opt);
279 	      opt = 0;
280 	      break;
281 	    }
282 	    if (n < 0)
283 	      n = 0;
284 	    sprintf(strchr(buf, '\0'), "-%ld", n);
285 	    opt = ptr;
286 	    if (*opt != '\0')
287 	      error("argument to `l' option not of form `m,n'");
288 	  }
289 	  strcat(buf, "%a");
290 	  if (!set_label_spec(buf))
291 	    assert(0);
292 	  done_spec = 1;
293 	}
294 	break;
295       case 'n':
296 	search_default = 0;
297 	opt++;
298 	break;
299       case 'p':
300 	{
301 	  const char *filename = 0;
302 	  if (*++opt == '\0') {
303 	    if (argc > 1) {
304 	      filename = *++argv;
305 	      argc--;
306 	    }
307 	    else {
308 	      error("option `p' requires an argument");
309 	      usage(stderr);
310 	      exit(1);
311 	    }
312 	  }
313 	  else {
314 	    filename = opt;
315 	    opt = 0;
316 	  }
317 	  database_list.add_file(filename);
318 	}
319 	break;
320       case 's':
321 	if (*++opt == '\0')
322 	  sort_fields = "AD";
323 	else {
324 	  sort_fields = opt;
325 	  opt = 0;
326 	}
327 	accumulate = 1;
328 	break;
329       case 't':
330 	{
331 	  char *ptr;
332 	  long n = strtol(opt, &ptr, 10);
333 	  if (n == 0 && ptr == opt) {
334 	    error("bad integer `%1' in `t' option", opt);
335 	    opt = 0;
336 	    break;
337 	  }
338 	  if (n < 1)
339 	    n = 1;
340 	  linear_truncate_len = int(n);
341 	  opt = ptr;
342 	  break;
343 	}
344       case '-':
345 	if (opt[1] == '\0') {
346 	  finished_options = 1;
347 	  opt++;
348 	  break;
349 	}
350 	if (strcmp(opt,"-version")==0) {
351       case 'v':
352 	  printf("GNU refer (groff) version %s\n", Version_string);
353 	  exit(0);
354 	  break;
355 	}
356 	if (strcmp(opt,"-help")==0) {
357 	  usage(stdout);
358 	  exit(0);
359 	  break;
360 	}
361 	// fall through
362       default:
363 	error("unrecognized option `%1'", *opt);
364 	usage(stderr);
365 	exit(1);
366 	break;
367       }
368     }
369   }
370   if (!done_spec)
371     set_label_spec("%1");
372   if (argc <= 0) {
373     if (bib_flag)
374       do_bib("-");
375     else
376       do_file("-");
377   }
378   else {
379     for (int i = 0; i < argc; i++) {
380       if (bib_flag)
381 	do_bib(argv[i]);
382       else
383 	do_file(argv[i]);
384     }
385   }
386   if (accumulate)
387     output_references();
388   if (fflush(stdout) < 0)
389     fatal("output error");
390   return 0;
391 }
392 
usage(FILE * stream)393 static void usage(FILE *stream)
394 {
395   fprintf(stream,
396 "usage: %s [-benvCPRS] [-aN] [-cXYZ] [-fN] [-iXYZ] [-kX] [-lM,N] [-p file]\n"
397 "       [-sXYZ] [-tN] [-BL.M] [files ...]\n",
398 	  program_name);
399 }
400 
possibly_load_default_database()401 static void possibly_load_default_database()
402 {
403   if (search_default && !default_database_loaded) {
404     char *filename = getenv("REFER");
405     if (filename)
406       database_list.add_file(filename);
407     else
408       database_list.add_file(DEFAULT_INDEX, 1);
409     default_database_loaded = 1;
410   }
411 }
412 
is_list(const string & str)413 static int is_list(const string &str)
414 {
415   const char *start = str.contents();
416   const char *end = start + str.length();
417   while (end > start && csspace(end[-1]))
418     end--;
419   while (start < end && csspace(*start))
420     start++;
421   return end - start == 6 && memcmp(start, "$LIST$", 6) == 0;
422 }
423 
do_file(const char * filename)424 static void do_file(const char *filename)
425 {
426   FILE *fp;
427   if (strcmp(filename, "-") == 0) {
428     fp = stdin;
429   }
430   else {
431     errno = 0;
432     fp = fopen(filename, "r");
433     if (fp == 0) {
434       error("can't open `%1': %2", filename, strerror(errno));
435       return;
436     }
437   }
438   current_filename = filename;
439   fprintf(outfp, ".lf 1 %s\n", filename);
440   string line;
441   current_lineno = 0;
442   for (;;) {
443     line.clear();
444     for (;;) {
445       int c = getc(fp);
446       if (c == EOF) {
447 	if (line.length() > 0)
448 	  line += '\n';
449 	break;
450       }
451       if (invalid_input_char(c))
452 	error("invalid input character code %1", c);
453       else {
454 	line += c;
455 	if (c == '\n')
456 	  break;
457       }
458     }
459     int len = line.length();
460     if (len == 0)
461       break;
462     current_lineno++;
463     if (len >= 2 && line[0] == '.' && line[1] == '[') {
464       int start_lineno = current_lineno;
465       int start_of_line = 1;
466       string str;
467       string post;
468       string pre(line.contents() + 2, line.length() - 3);
469       for (;;) {
470 	int c = getc(fp);
471 	if (c == EOF) {
472 	  error_with_file_and_line(current_filename, start_lineno,
473 				   "missing `.]' line");
474 	  break;
475 	}
476 	if (start_of_line)
477 	  current_lineno++;
478 	if (start_of_line && c == '.') {
479 	  int d = getc(fp);
480 	  if (d == ']') {
481 	    while ((d = getc(fp)) != '\n' && d != EOF) {
482 	      if (invalid_input_char(d))
483 		error("invalid input character code %1", d);
484 	      else
485 		post += d;
486 	    }
487 	    break;
488 	  }
489 	  if (d != EOF)
490 	    ungetc(d, fp);
491 	}
492 	if (invalid_input_char(c))
493 	  error("invalid input character code %1", c);
494 	else
495 	  str += c;
496 	start_of_line = (c == '\n');
497       }
498       if (is_list(str)) {
499 	output_pending_line();
500 	if (accumulate)
501 	  output_references();
502 	else
503 	  error("found `$LIST$' but not accumulating references");
504       }
505       else {
506 	unsigned flags = (accumulate
507 			  ? store_reference(str)
508 			  : immediately_handle_reference(str));
509 	if (label_in_text) {
510 	  if (accumulate && outfp == stdout)
511 	    divert_to_temporary_file();
512 	  if (pending_line.length() == 0) {
513 	    warning("can't attach citation to previous line");
514 	  }
515 	  else
516 	    pending_line.set_length(pending_line.length() - 1);
517 	  string punct;
518 	  if (move_punctuation)
519 	    split_punct(pending_line, punct);
520 	  int have_text = pre.length() > 0 || post.length() > 0;
521 	  label_type lt = label_type(flags & ~(FORCE_LEFT_BRACKET
522 					       |FORCE_RIGHT_BRACKET));
523 	  if ((flags & FORCE_LEFT_BRACKET) || !have_text)
524 	    pending_line += PRE_LABEL_MARKER;
525 	  pending_line += pre;
526 	  char lm = LABEL_MARKER + (int)lt;
527 	  pending_line += lm;
528 	  pending_line += post;
529 	  if ((flags & FORCE_RIGHT_BRACKET) || !have_text)
530 	    pending_line += POST_LABEL_MARKER;
531 	  pending_line += punct;
532 	  pending_line += '\n';
533 	}
534       }
535       need_syncing = 1;
536     }
537     else if (len >= 4
538 	     && line[0] == '.' && line[1] == 'l' && line[2] == 'f'
539 	     && (compatible_flag || line[3] == '\n' || line[3] == ' ')) {
540       pending_lf_lines += line;
541       line += '\0';
542       if (interpret_lf_args(line.contents() + 3))
543 	current_lineno--;
544     }
545     else if (recognize_R1_R2
546 	     && len >= 4
547 	     && line[0] == '.' && line[1] == 'R' && line[2] == '1'
548 	     && (compatible_flag || line[3] == '\n' || line[3] == ' ')) {
549       line.clear();
550       int start_of_line = 1;
551       int start_lineno = current_lineno;
552       for (;;) {
553 	int c = getc(fp);
554 	if (c != EOF && start_of_line)
555 	  current_lineno++;
556 	if (start_of_line && c == '.') {
557 	  c = getc(fp);
558 	  if (c == 'R') {
559 	    c = getc(fp);
560 	    if (c == '2') {
561 	      c = getc(fp);
562 	      if (compatible_flag || c == ' ' || c == '\n' || c == EOF) {
563 		while (c != EOF && c != '\n')
564 		  c = getc(fp);
565 		break;
566 	      }
567 	      else {
568 		line += '.';
569 		line += 'R';
570 		line += '2';
571 	      }
572 	    }
573 	    else {
574 	      line += '.';
575 	      line += 'R';
576 	    }
577 	  }
578 	  else
579 	    line += '.';
580 	}
581 	if (c == EOF) {
582 	  error_with_file_and_line(current_filename, start_lineno,
583 				   "missing `.R2' line");
584 	  break;
585 	}
586 	if (invalid_input_char(c))
587 	  error("invalid input character code %1", int(c));
588 	else {
589 	  line += c;
590 	  start_of_line = c == '\n';
591 	}
592       }
593       output_pending_line();
594       if (accumulate)
595 	output_references();
596       else
597 	nreferences = 0;
598       process_commands(line, current_filename, start_lineno + 1);
599       need_syncing = 1;
600     }
601     else {
602       output_pending_line();
603       pending_line = line;
604     }
605   }
606   need_syncing = 0;
607   output_pending_line();
608   if (fp != stdin)
609     fclose(fp);
610 }
611 
612 class label_processing_state {
613   enum {
614     NORMAL,
615     PENDING_LABEL,
616     PENDING_LABEL_POST,
617     PENDING_LABEL_POST_PRE,
618     PENDING_POST
619     } state;
620   label_type type;		// type of pending labels
621   int count;			// number of pending labels
622   reference **rptr;		// pointer to next reference
623   int rcount;			// number of references left
624   FILE *fp;
625   int handle_pending(int c);
626 public:
627   label_processing_state(reference **, int, FILE *);
628   ~label_processing_state();
629   void process(int c);
630 };
631 
output_pending_line()632 static void output_pending_line()
633 {
634   if (label_in_text && !accumulate && ncitations > 0) {
635     label_processing_state state(citation, ncitations, outfp);
636     int len = pending_line.length();
637     for (int i = 0; i < len; i++)
638       state.process((unsigned char)(pending_line[i]));
639   }
640   else
641     put_string(pending_line, outfp);
642   pending_line.clear();
643   if (pending_lf_lines.length() > 0) {
644     put_string(pending_lf_lines, outfp);
645     pending_lf_lines.clear();
646   }
647   if (!accumulate)
648     immediately_output_references();
649   if (need_syncing) {
650     fprintf(outfp, ".lf %d %s\n", current_lineno, current_filename);
651     need_syncing = 0;
652   }
653 }
654 
split_punct(string & line,string & punct)655 static void split_punct(string &line, string &punct)
656 {
657   const char *start = line.contents();
658   const char *end = start + line.length();
659   const char *ptr = start;
660   const char *last_token_start = 0;
661   for (;;) {
662     if (ptr >= end)
663       break;
664     last_token_start = ptr;
665     if (*ptr == PRE_LABEL_MARKER || *ptr == POST_LABEL_MARKER
666 	|| (*ptr >= LABEL_MARKER && *ptr < LABEL_MARKER + N_LABEL_TYPES))
667       ptr++;
668     else if (!get_token(&ptr, end))
669       break;
670   }
671   if (last_token_start) {
672     const token_info *ti = lookup_token(last_token_start, end);
673     if (ti->is_punct()) {
674       punct.append(last_token_start, end - last_token_start);
675       line.set_length(last_token_start - start);
676     }
677   }
678 }
679 
divert_to_temporary_file()680 static void divert_to_temporary_file()
681 {
682   outfp = xtmpfile();
683 }
684 
store_citation(reference * ref)685 static void store_citation(reference *ref)
686 {
687   if (ncitations >= citation_max) {
688     if (citation == 0)
689       citation = new reference*[citation_max = 100];
690     else {
691       reference **old_citation = citation;
692       citation_max *= 2;
693       citation = new reference *[citation_max];
694       memcpy(citation, old_citation, ncitations*sizeof(reference *));
695       a_delete old_citation;
696     }
697   }
698   citation[ncitations++] = ref;
699 }
700 
store_reference(const string & str)701 static unsigned store_reference(const string &str)
702 {
703   if (reference_hash_table == 0) {
704     reference_hash_table = new reference *[17];
705     hash_table_size = 17;
706     for (int i = 0; i < hash_table_size; i++)
707       reference_hash_table[i] = 0;
708   }
709   unsigned flags;
710   reference *ref = make_reference(str, &flags);
711   ref->compute_hash_code();
712   unsigned h = ref->hash();
713   reference **ptr;
714   for (ptr = reference_hash_table + (h % hash_table_size);
715        *ptr != 0;
716        ((ptr == reference_hash_table)
717 	? (ptr = reference_hash_table + hash_table_size - 1)
718 	: --ptr))
719     if (same_reference(**ptr, *ref))
720       break;
721   if (*ptr != 0) {
722     if (ref->is_merged())
723       warning("fields ignored because reference already used");
724     delete ref;
725     ref = *ptr;
726   }
727   else {
728     *ptr = ref;
729     ref->set_number(nreferences);
730     nreferences++;
731     ref->pre_compute_label();
732     ref->compute_sort_key();
733     if (nreferences*2 >= hash_table_size) {
734       // Rehash it.
735       reference **old_table = reference_hash_table;
736       int old_size = hash_table_size;
737       hash_table_size = next_size(hash_table_size);
738       reference_hash_table = new reference*[hash_table_size];
739       int i;
740       for (i = 0; i < hash_table_size; i++)
741 	reference_hash_table[i] = 0;
742       for (i = 0; i < old_size; i++)
743 	if (old_table[i]) {
744 	  reference **p;
745 	  for (p = (reference_hash_table
746 				+ (old_table[i]->hash() % hash_table_size));
747 	       *p;
748 	       ((p == reference_hash_table)
749 		? (p = reference_hash_table + hash_table_size - 1)
750 		: --p))
751 	    ;
752 	  *p = old_table[i];
753 	}
754       a_delete old_table;
755     }
756   }
757   if (label_in_text)
758     store_citation(ref);
759   return flags;
760 }
761 
immediately_handle_reference(const string & str)762 unsigned immediately_handle_reference(const string &str)
763 {
764   unsigned flags;
765   reference *ref = make_reference(str, &flags);
766   ref->set_number(nreferences);
767   if (label_in_text || label_in_reference) {
768     ref->pre_compute_label();
769     ref->immediate_compute_label();
770   }
771   nreferences++;
772   store_citation(ref);
773   return flags;
774 }
775 
immediately_output_references()776 static void immediately_output_references()
777 {
778   for (int i = 0; i < ncitations; i++) {
779     reference *ref = citation[i];
780     if (label_in_reference) {
781       fputs(".ds [F ", outfp);
782       const string &label = ref->get_label(NORMAL_LABEL);
783       if (label.length() > 0
784 	  && (label[0] == ' ' || label[0] == '\\' || label[0] == '"'))
785 	putc('"', outfp);
786       put_string(label, outfp);
787       putc('\n', outfp);
788     }
789     ref->output(outfp);
790     delete ref;
791   }
792   ncitations = 0;
793 }
794 
output_citation_group(reference ** v,int n,label_type type,FILE * fp)795 static void output_citation_group(reference **v, int n, label_type type,
796 				  FILE *fp)
797 {
798   if (sort_adjacent_labels) {
799     // Do an insertion sort.  Usually n will be very small.
800     for (int i = 1; i < n; i++) {
801       int num = v[i]->get_number();
802       reference *temp = v[i];
803       int j;
804       for (j = i - 1; j >= 0 && v[j]->get_number() > num; j--)
805 	v[j + 1] = v[j];
806       v[j + 1] = temp;
807     }
808   }
809   // This messes up if !accumulate.
810   if (accumulate && n > 1) {
811     // remove duplicates
812     int j = 1;
813     for (int i = 1; i < n; i++)
814       if (v[i]->get_label(type) != v[i - 1]->get_label(type))
815 	v[j++] = v[i];
816     n = j;
817   }
818   string merged_label;
819   for (int i = 0; i < n; i++) {
820     int nmerged = v[i]->merge_labels(v + i + 1, n - i - 1, type, merged_label);
821     if (nmerged > 0) {
822       put_string(merged_label, fp);
823       i += nmerged;
824     }
825     else
826       put_string(v[i]->get_label(type), fp);
827     if (i < n - 1)
828       put_string(sep_label, fp);
829   }
830 }
831 
832 
label_processing_state(reference ** p,int n,FILE * f)833 label_processing_state::label_processing_state(reference **p, int n, FILE *f)
834 : state(NORMAL), count(0), rptr(p), rcount(n), fp(f)
835 {
836 }
837 
~label_processing_state()838 label_processing_state::~label_processing_state()
839 {
840   int handled = handle_pending(EOF);
841   assert(!handled);
842   assert(rcount == 0);
843 }
844 
handle_pending(int c)845 int label_processing_state::handle_pending(int c)
846 {
847   switch (state) {
848   case NORMAL:
849     break;
850   case PENDING_LABEL:
851     if (c == POST_LABEL_MARKER) {
852       state = PENDING_LABEL_POST;
853       return 1;
854     }
855     else {
856       output_citation_group(rptr, count, type, fp);
857       rptr += count ;
858       rcount -= count;
859       state = NORMAL;
860     }
861     break;
862   case PENDING_LABEL_POST:
863     if (c == PRE_LABEL_MARKER) {
864       state = PENDING_LABEL_POST_PRE;
865       return 1;
866     }
867     else {
868       output_citation_group(rptr, count, type, fp);
869       rptr += count;
870       rcount -= count;
871       put_string(post_label, fp);
872       state = NORMAL;
873     }
874     break;
875   case PENDING_LABEL_POST_PRE:
876     if (c >= LABEL_MARKER
877 	&& c < LABEL_MARKER + N_LABEL_TYPES
878 	&& c - LABEL_MARKER == type) {
879       count += 1;
880       state = PENDING_LABEL;
881       return 1;
882     }
883     else {
884       output_citation_group(rptr, count, type, fp);
885       rptr += count;
886       rcount -= count;
887       put_string(sep_label, fp);
888       state = NORMAL;
889     }
890     break;
891   case PENDING_POST:
892     if (c == PRE_LABEL_MARKER) {
893       put_string(sep_label, fp);
894       state = NORMAL;
895       return 1;
896     }
897     else {
898       put_string(post_label, fp);
899       state = NORMAL;
900     }
901     break;
902   }
903   return 0;
904 }
905 
process(int c)906 void label_processing_state::process(int c)
907 {
908   if (handle_pending(c))
909     return;
910   assert(state == NORMAL);
911   switch (c) {
912   case PRE_LABEL_MARKER:
913     put_string(pre_label, fp);
914     state = NORMAL;
915     break;
916   case POST_LABEL_MARKER:
917     state = PENDING_POST;
918     break;
919   case LABEL_MARKER:
920   case LABEL_MARKER + 1:
921     count = 1;
922     state = PENDING_LABEL;
923     type = label_type(c - LABEL_MARKER);
924     break;
925   default:
926     state = NORMAL;
927     putc(c, fp);
928     break;
929   }
930 }
931 
932 extern "C" {
933 
rcompare(const void * p1,const void * p2)934 int rcompare(const void *p1, const void *p2)
935 {
936   return compare_reference(**(reference **)p1, **(reference **)p2);
937 }
938 
939 }
940 
output_references()941 void output_references()
942 {
943   assert(accumulate);
944   if (!hash_table_size) {
945     error("nothing to reference (probably `bibliography' before `sort')");
946     accumulate = 0;
947     nreferences = 0;
948     return;
949   }
950   if (nreferences > 0) {
951     int j = 0;
952     int i;
953     for (i = 0; i < hash_table_size; i++)
954       if (reference_hash_table[i] != 0)
955 	reference_hash_table[j++] = reference_hash_table[i];
956     assert(j == nreferences);
957     for (; j < hash_table_size; j++)
958       reference_hash_table[j] = 0;
959     qsort(reference_hash_table, nreferences, sizeof(reference*), rcompare);
960     for (i = 0; i < nreferences; i++)
961       reference_hash_table[i]->set_number(i);
962     compute_labels(reference_hash_table, nreferences);
963   }
964   if (outfp != stdout) {
965     rewind(outfp);
966     {
967       label_processing_state state(citation, ncitations, stdout);
968       int c;
969       while ((c = getc(outfp)) != EOF)
970 	state.process(c);
971     }
972     ncitations = 0;
973     fclose(outfp);
974     outfp = stdout;
975   }
976   if (nreferences > 0) {
977     fputs(".]<\n", outfp);
978     for (int i = 0; i < nreferences; i++) {
979       if (sort_fields.length() > 0)
980 	reference_hash_table[i]->print_sort_key_comment(outfp);
981       if (label_in_reference) {
982 	fputs(".ds [F ", outfp);
983 	const string &label = reference_hash_table[i]->get_label(NORMAL_LABEL);
984 	if (label.length() > 0
985 	    && (label[0] == ' ' || label[0] == '\\' || label[0] == '"'))
986 	  putc('"', outfp);
987 	put_string(label, outfp);
988 	putc('\n', outfp);
989       }
990       reference_hash_table[i]->output(outfp);
991       delete reference_hash_table[i];
992       reference_hash_table[i] = 0;
993     }
994     fputs(".]>\n", outfp);
995     nreferences = 0;
996   }
997   clear_labels();
998 }
999 
find_reference(const char * query,int query_len)1000 static reference *find_reference(const char *query, int query_len)
1001 {
1002   // This is so that error messages look better.
1003   while (query_len > 0 && csspace(query[query_len - 1]))
1004     query_len--;
1005   string str;
1006   for (int i = 0; i < query_len; i++)
1007     str += query[i] == '\n' ? ' ' : query[i];
1008   str += '\0';
1009   possibly_load_default_database();
1010   search_list_iterator iter(&database_list, str.contents());
1011   reference_id rid;
1012   const char *start;
1013   int len;
1014   if (!iter.next(&start, &len, &rid)) {
1015     error("no matches for `%1'", str.contents());
1016     return 0;
1017   }
1018   const char *end = start + len;
1019   while (start < end) {
1020     if (*start == '%')
1021       break;
1022     while (start < end && *start++ != '\n')
1023       ;
1024   }
1025   if (start >= end) {
1026     error("found a reference for `%1' but it didn't contain any fields",
1027 	  str.contents());
1028     return 0;
1029   }
1030   reference *result = new reference(start, end - start, &rid);
1031   if (iter.next(&start, &len, &rid))
1032     warning("multiple matches for `%1'", str.contents());
1033   return result;
1034 }
1035 
make_reference(const string & str,unsigned * flagsp)1036 static reference *make_reference(const string &str, unsigned *flagsp)
1037 {
1038   const char *start = str.contents();
1039   const char *end = start + str.length();
1040   const char *ptr = start;
1041   while (ptr < end) {
1042     if (*ptr == '%')
1043       break;
1044     while (ptr < end && *ptr++ != '\n')
1045       ;
1046   }
1047   *flagsp = 0;
1048   for (; start < ptr; start++) {
1049     if (*start == '#')
1050       *flagsp = (SHORT_LABEL | (*flagsp & (FORCE_RIGHT_BRACKET
1051 					   | FORCE_LEFT_BRACKET)));
1052     else if (*start == '[')
1053       *flagsp |= FORCE_LEFT_BRACKET;
1054     else if (*start == ']')
1055       *flagsp |= FORCE_RIGHT_BRACKET;
1056     else if (!csspace(*start))
1057       break;
1058   }
1059   if (start >= end) {
1060     error("empty reference");
1061     return new reference;
1062   }
1063   reference *database_ref = 0;
1064   if (start < ptr)
1065     database_ref = find_reference(start, ptr - start);
1066   reference *inline_ref = 0;
1067   if (ptr < end)
1068     inline_ref = new reference(ptr, end - ptr);
1069   if (inline_ref) {
1070     if (database_ref) {
1071       database_ref->merge(*inline_ref);
1072       delete inline_ref;
1073       return database_ref;
1074     }
1075     else
1076       return inline_ref;
1077   }
1078   else if (database_ref)
1079     return database_ref;
1080   else
1081     return new reference;
1082 }
1083 
do_ref(const string & str)1084 static void do_ref(const string &str)
1085 {
1086   if (accumulate)
1087     (void)store_reference(str);
1088   else {
1089     (void)immediately_handle_reference(str);
1090     immediately_output_references();
1091   }
1092 }
1093 
trim_blanks(string & str)1094 static void trim_blanks(string &str)
1095 {
1096   const char *start = str.contents();
1097   const char *end = start + str.length();
1098   while (end > start && end[-1] != '\n' && csspace(end[-1]))
1099     --end;
1100   str.set_length(end - start);
1101 }
1102 
do_bib(const char * filename)1103 void do_bib(const char *filename)
1104 {
1105   FILE *fp;
1106   if (strcmp(filename, "-") == 0)
1107     fp = stdin;
1108   else {
1109     errno = 0;
1110     fp = fopen(filename, "r");
1111     if (fp == 0) {
1112       error("can't open `%1': %2", filename, strerror(errno));
1113       return;
1114     }
1115     current_filename = filename;
1116   }
1117   enum {
1118     START, MIDDLE, BODY, BODY_START, BODY_BLANK, BODY_DOT
1119     } state = START;
1120   string body;
1121   for (;;) {
1122     int c = getc(fp);
1123     if (c == EOF)
1124       break;
1125     if (invalid_input_char(c)) {
1126       error("invalid input character code %1", c);
1127       continue;
1128     }
1129     switch (state) {
1130     case START:
1131       if (c == '%') {
1132 	body = c;
1133 	state = BODY;
1134       }
1135       else if (c != '\n')
1136 	state = MIDDLE;
1137       break;
1138     case MIDDLE:
1139       if (c == '\n')
1140 	state = START;
1141       break;
1142     case BODY:
1143       body += c;
1144       if (c == '\n')
1145 	state = BODY_START;
1146       break;
1147     case BODY_START:
1148       if (c == '\n') {
1149 	do_ref(body);
1150 	state = START;
1151       }
1152       else if (c == '.')
1153 	state = BODY_DOT;
1154       else if (csspace(c)) {
1155 	state = BODY_BLANK;
1156 	body += c;
1157       }
1158       else {
1159 	body += c;
1160 	state = BODY;
1161       }
1162       break;
1163     case BODY_BLANK:
1164       if (c == '\n') {
1165 	trim_blanks(body);
1166 	do_ref(body);
1167 	state = START;
1168       }
1169       else if (csspace(c))
1170 	body += c;
1171       else {
1172 	body += c;
1173 	state = BODY;
1174       }
1175       break;
1176     case BODY_DOT:
1177       if (c == ']') {
1178 	do_ref(body);
1179 	state = MIDDLE;
1180       }
1181       else {
1182 	body += '.';
1183 	body += c;
1184 	state = c == '\n' ? BODY_START : BODY;
1185       }
1186       break;
1187     default:
1188       assert(0);
1189     }
1190     if (c == '\n')
1191       current_lineno++;
1192   }
1193   switch (state) {
1194   case START:
1195   case MIDDLE:
1196     break;
1197   case BODY:
1198     body += '\n';
1199     do_ref(body);
1200     break;
1201   case BODY_DOT:
1202   case BODY_START:
1203     do_ref(body);
1204     break;
1205   case BODY_BLANK:
1206     trim_blanks(body);
1207     do_ref(body);
1208     break;
1209   }
1210   fclose(fp);
1211 }
1212 
1213 // from the Dragon Book
1214 
hash_string(const char * s,int len)1215 unsigned hash_string(const char *s, int len)
1216 {
1217   const char *end = s + len;
1218   unsigned h = 0, g;
1219   while (s < end) {
1220     h <<= 4;
1221     h += *s++;
1222     if ((g = h & 0xf0000000) != 0) {
1223       h ^= g >> 24;
1224       h ^= g;
1225     }
1226   }
1227   return h;
1228 }
1229 
next_size(int n)1230 int next_size(int n)
1231 {
1232   static const int table_sizes[] = {
1233     101, 503, 1009, 2003, 3001, 4001, 5003, 10007, 20011, 40009,
1234     80021, 160001, 500009, 1000003, 2000003, 4000037, 8000009,
1235     16000057, 32000011, 64000031, 128000003, 0
1236   };
1237 
1238   const int *p;
1239   for (p = table_sizes; *p <= n && *p != 0; p++)
1240     ;
1241   assert(*p != 0);
1242   return *p;
1243 }
1244 
1245