xref: /386bsd/usr/src/usr.bin/groff/refer/label.y (revision a2142627)
1 /* -*- C++ -*-
2    Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc.
3      Written by James Clark (jjc@jclark.com)
4 
5 This file is part of groff.
6 
7 groff is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11 
12 groff is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License along
18 with groff; see the file COPYING.  If not, write to the Free Software
19 Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
20 
21 %{
22 
23 #include "refer.h"
24 #include "refid.h"
25 #include "ref.h"
26 #include "token.h"
27 
28 int yylex();
29 void yyerror(const char *);
30 int yyparse();
31 
32 static const char *format_serial(char c, int n);
33 
34 struct label_info {
35   int start;
36   int length;
37   int count;
38   int total;
39   label_info(const string &);
40 };
41 
42 label_info *lookup_label(const string &label);
43 
44 struct expression {
45   enum {
46     // Does the tentative label depend on the reference?
47     CONTAINS_VARIABLE = 01,
48     CONTAINS_STAR = 02,
49     CONTAINS_FORMAT = 04,
50     CONTAINS_AT = 010
51   };
~expressionexpression52   virtual ~expression() { }
53   virtual void evaluate(int, const reference &, string &,
54 			substring_position &) = 0;
analyzeexpression55   virtual unsigned analyze() { return 0; }
56 };
57 
58 class at_expr : public expression {
59 public:
at_expr()60   at_expr() { }
61   void evaluate(int, const reference &, string &, substring_position &);
analyze()62   unsigned analyze() { return CONTAINS_VARIABLE|CONTAINS_AT; }
63 };
64 
65 class format_expr : public expression {
66   char type;
67   int width;
68   int first_number;
69 public:
70   format_expr(char c, int w = 0, int f = 1)
type(c)71     : type(c), width(w), first_number(f) { }
72   void evaluate(int, const reference &, string &, substring_position &);
analyze()73   unsigned analyze() { return CONTAINS_FORMAT; }
74 };
75 
76 class field_expr : public expression {
77   int number;
78   char name;
79 public:
field_expr(char nm,int num)80   field_expr(char nm, int num) : name(nm), number(num) { }
81   void evaluate(int, const reference &, string &, substring_position &);
analyze()82   unsigned analyze() { return CONTAINS_VARIABLE; }
83 };
84 
85 class literal_expr : public expression {
86   string s;
87 public:
literal_expr(const char * ptr,int len)88   literal_expr(const char *ptr, int len) : s(ptr, len) { }
89   void evaluate(int, const reference &, string &, substring_position &);
90 };
91 
92 class unary_expr : public expression {
93 protected:
94   expression *expr;
95 public:
unary_expr(expression * e)96   unary_expr(expression *e) : expr(e) { }
~unary_expr()97   ~unary_expr() { delete expr; }
98   void evaluate(int, const reference &, string &, substring_position &) = 0;
analyze()99   unsigned analyze() { return expr ? expr->analyze() : 0; }
100 };
101 
102 // This caches the analysis of an expression.
103 
104 class analyzed_expr : public unary_expr {
105   unsigned flags;
106 public:
107   analyzed_expr(expression *);
108   void evaluate(int, const reference &, string &, substring_position &);
analyze()109   unsigned analyze() { return flags; }
110 };
111 
112 class star_expr : public unary_expr {
113 public:
star_expr(expression * e)114   star_expr(expression *e) : unary_expr(e) { }
115   void evaluate(int, const reference &, string &, substring_position &);
analyze()116   unsigned analyze() {
117     return ((expr ? (expr->analyze() & ~CONTAINS_VARIABLE) : 0)
118 	    | CONTAINS_STAR);
119   }
120 };
121 
122 typedef void map_t(const char *, const char *, string &);
123 
124 class map_expr : public unary_expr {
125   map_t *func;
126 public:
map_expr(expression * e,map_t * f)127   map_expr(expression *e, map_t *f) : unary_expr(e), func(f) { }
128   void evaluate(int, const reference &, string &, substring_position &);
129 };
130 
131 typedef const char *extractor_t(const char *, const char *, const char **);
132 
133 class extractor_expr : public unary_expr {
134   int part;
135   extractor_t *func;
136 public:
137   enum { BEFORE = +1, MATCH = 0, AFTER = -1 };
extractor_expr(expression * e,extractor_t * f,int pt)138   extractor_expr(expression *e, extractor_t *f, int pt)
139     : unary_expr(e), func(f), part(pt) { }
140   void evaluate(int, const reference &, string &, substring_position &);
141 };
142 
143 class truncate_expr : public unary_expr {
144   int n;
145 public:
truncate_expr(expression * e,int i)146   truncate_expr(expression *e, int i) : n(i), unary_expr(e) { }
147   void evaluate(int, const reference &, string &, substring_position &);
148 };
149 
150 class separator_expr : public unary_expr {
151 public:
separator_expr(expression * e)152   separator_expr(expression *e) : unary_expr(e) { }
153   void evaluate(int, const reference &, string &, substring_position &);
154 };
155 
156 class binary_expr : public expression {
157 protected:
158   expression *expr1;
159   expression *expr2;
160 public:
binary_expr(expression * e1,expression * e2)161   binary_expr(expression *e1, expression *e2) : expr1(e1), expr2(e2) { }
~binary_expr()162   ~binary_expr() { delete expr1; delete expr2; }
163   void evaluate(int, const reference &, string &, substring_position &) = 0;
analyze()164   unsigned analyze() {
165     return (expr1 ? expr1->analyze() : 0) | (expr2 ? expr2->analyze() : 0);
166   }
167 };
168 
169 class alternative_expr : public binary_expr {
170 public:
alternative_expr(expression * e1,expression * e2)171   alternative_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { }
172   void evaluate(int, const reference &, string &, substring_position &);
173 };
174 
175 class list_expr : public binary_expr {
176 public:
list_expr(expression * e1,expression * e2)177   list_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { }
178   void evaluate(int, const reference &, string &, substring_position &);
179 };
180 
181 class substitute_expr : public binary_expr {
182 public:
substitute_expr(expression * e1,expression * e2)183   substitute_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { }
184   void evaluate(int, const reference &, string &, substring_position &);
185 };
186 
187 class ternary_expr : public expression {
188 protected:
189   expression *expr1;
190   expression *expr2;
191   expression *expr3;
192 public:
ternary_expr(expression * e1,expression * e2,expression * e3)193   ternary_expr(expression *e1, expression *e2, expression *e3)
194     : expr1(e1), expr2(e2), expr3(e3) { }
~ternary_expr()195   ~ternary_expr() { delete expr1; delete expr2; delete expr3; }
196   void evaluate(int, const reference &, string &, substring_position &) = 0;
analyze()197   unsigned analyze() {
198     return ((expr1 ? expr1->analyze() : 0)
199 	    | (expr2 ? expr2->analyze() : 0)
200 	    | (expr3 ? expr3->analyze() : 0));
201   }
202 };
203 
204 class conditional_expr : public ternary_expr {
205 public:
conditional_expr(expression * e1,expression * e2,expression * e3)206   conditional_expr(expression *e1, expression *e2, expression *e3)
207     : ternary_expr(e1, e2, e3) { }
208   void evaluate(int, const reference &, string &, substring_position &);
209 };
210 
211 static expression *parsed_label = 0;
212 static expression *parsed_date_label = 0;
213 static expression *parsed_short_label = 0;
214 
215 static expression *parse_result;
216 
217 string literals;
218 
219 %}
220 
221 %union {
222   int num;
223   expression *expr;
224   struct { int ndigits; int val; } dig;
225   struct { int start; int len; } str;
226 }
227 
228 /* uppercase or lowercase letter */
229 %token <num> TOKEN_LETTER
230 /* literal characters */
231 %token <str> TOKEN_LITERAL
232 /* digit */
233 %token <num> TOKEN_DIGIT
234 
235 %type <expr> conditional
236 %type <expr> alternative
237 %type <expr> list
238 %type <expr> string
239 %type <expr> substitute
240 %type <expr> optional_conditional
241 %type <num> number
242 %type <dig> digits
243 %type <num> optional_number
244 %type <num> flag
245 
246 %%
247 
248 expr:
249 	optional_conditional
250 		{ parse_result = ($1 ? new analyzed_expr($1) : 0); }
251 	;
252 
253 conditional:
254 	alternative
255 		{ $$ = $1; }
256 	| alternative '?' optional_conditional ':' conditional
257 		{ $$ = new conditional_expr($1, $3, $5); }
258 	;
259 
260 optional_conditional:
261 	/* empty */
262 		{ $$ = 0; }
263 	| conditional
264 		{ $$ = $1; }
265 	;
266 
267 alternative:
268 	list
269 		{ $$ = $1; }
270 	| alternative '|' list
271 		{ $$ = new alternative_expr($1, $3); }
272 	| alternative '&' list
273 		{ $$ = new conditional_expr($1, $3, 0); }
274 	;
275 
276 list:
277 	substitute
278 		{ $$ = $1; }
279 	| list substitute
280 		{ $$ = new list_expr($1, $2); }
281 	;
282 
283 substitute:
284 	string
285 		{ $$ = $1; }
286 	| substitute '~' string
287 		{ $$ = new substitute_expr($1, $3); }
288 	;
289 
290 string:
291 	'@'
292 		{ $$ = new at_expr; }
293 	| TOKEN_LITERAL
294 		{
295 		  $$ = new literal_expr(literals.contents() + $1.start,
296 					$1.len);
297 		}
298 	| TOKEN_LETTER
299 		{ $$ = new field_expr($1, 0); }
300 	| TOKEN_LETTER number
301 		{ $$ = new field_expr($1, $2 - 1); }
302 	| '%' TOKEN_LETTER
303 		{
304 		  switch ($2) {
305 		  case 'I':
306 		  case 'i':
307 		  case 'A':
308 		  case 'a':
309 		    $$ = new format_expr($2);
310 		    break;
311 		  default:
312 		    command_error("unrecognized format `%1'", char($2));
313 		    $$ = new format_expr('a');
314 		    break;
315 		  }
316 		}
317 
318 	| '%' digits
319 		{
320 		  $$ = new format_expr('0', $2.ndigits, $2.val);
321 		}
322 	| string '.' flag TOKEN_LETTER optional_number
323 		{
324 		  switch ($4) {
325 		  case 'l':
326 		    $$ = new map_expr($1, lowercase);
327 		    break;
328 		  case 'u':
329 		    $$ = new map_expr($1, uppercase);
330 		    break;
331 		  case 'c':
332 		    $$ = new map_expr($1, capitalize);
333 		    break;
334 		  case 'r':
335 		    $$ = new map_expr($1, reverse_name);
336 		    break;
337 		  case 'a':
338 		    $$ = new map_expr($1, abbreviate_name);
339 		    break;
340 		  case 'y':
341 		    $$ = new extractor_expr($1, find_year, $3);
342 		    break;
343 		  case 'n':
344 		    $$ = new extractor_expr($1, find_last_name, $3);
345 		    break;
346 		  default:
347 		    $$ = $1;
348 		    command_error("unknown function `%1'", char($4));
349 		    break;
350 		  }
351 		}
352 
353 	| string '+' number
354 		{ $$ = new truncate_expr($1, $3); }
355 	| string '-' number
356 		{ $$ = new truncate_expr($1, -$3); }
357 	| string '*'
358 		{ $$ = new star_expr($1); }
359 	| '(' optional_conditional ')'
360 		{ $$ = $2; }
361 	| '<' optional_conditional '>'
362 		{ $$ = new separator_expr($2); }
363 	;
364 
365 optional_number:
366 	/* empty */
367 		{ $$ = -1; }
368 	| number
369 		{ $$ = $1; }
370 	;
371 
372 number:
373 	TOKEN_DIGIT
374 		{ $$ = $1; }
375 	| number TOKEN_DIGIT
376 		{ $$ = $1*10 + $2; }
377 	;
378 
379 digits:
380 	TOKEN_DIGIT
381 		{ $$.ndigits = 1; $$.val = $1; }
382 	| digits TOKEN_DIGIT
383 		{ $$.ndigits = $1.ndigits + 1; $$.val = $1.val*10 + $2; }
384 	;
385 
386 
387 flag:
388 	/* empty */
389 		{ $$ = 0; }
390 	| '+'
391 		{ $$ = 1; }
392 	| '-'
393 		{ $$ = -1; }
394 	;
395 
396 %%
397 
398 /* bison defines const to be empty unless __STDC__ is defined, which it
399 isn't under cfront */
400 
401 #ifdef const
402 #undef const
403 #endif
404 
405 const char *spec_ptr;
406 const char *spec_end;
407 const char *spec_cur;
408 
yylex()409 int yylex()
410 {
411   while (spec_ptr < spec_end && csspace(*spec_ptr))
412     spec_ptr++;
413   spec_cur = spec_ptr;
414   if (spec_ptr >= spec_end)
415     return 0;
416   unsigned char c = *spec_ptr++;
417   if (csalpha(c)) {
418     yylval.num = c;
419     return TOKEN_LETTER;
420   }
421   if (csdigit(c)) {
422     yylval.num = c - '0';
423     return TOKEN_DIGIT;
424   }
425   if (c == '\'') {
426     yylval.str.start = literals.length();
427     for (; spec_ptr < spec_end; spec_ptr++) {
428       if (*spec_ptr == '\'') {
429 	if (++spec_ptr < spec_end && *spec_ptr == '\'')
430 	  literals += '\'';
431 	else {
432 	  yylval.str.len = literals.length() - yylval.str.start;
433 	  return TOKEN_LITERAL;
434 	}
435       }
436       else
437 	literals += *spec_ptr;
438     }
439     yylval.str.len = literals.length() - yylval.str.start;
440     return TOKEN_LITERAL;
441   }
442   return c;
443 }
444 
set_label_spec(const char * label_spec)445 int set_label_spec(const char *label_spec)
446 {
447   spec_cur = spec_ptr = label_spec;
448   spec_end = strchr(label_spec, '\0');
449   literals.clear();
450   if (yyparse())
451     return 0;
452   delete parsed_label;
453   parsed_label = parse_result;
454   return 1;
455 }
456 
set_date_label_spec(const char * label_spec)457 int set_date_label_spec(const char *label_spec)
458 {
459   spec_cur = spec_ptr = label_spec;
460   spec_end = strchr(label_spec, '\0');
461   literals.clear();
462   if (yyparse())
463     return 0;
464   delete parsed_date_label;
465   parsed_date_label = parse_result;
466   return 1;
467 }
468 
set_short_label_spec(const char * label_spec)469 int set_short_label_spec(const char *label_spec)
470 {
471   spec_cur = spec_ptr = label_spec;
472   spec_end = strchr(label_spec, '\0');
473   literals.clear();
474   if (yyparse())
475     return 0;
476   delete parsed_short_label;
477   parsed_short_label = parse_result;
478   return 1;
479 }
480 
yyerror(const char * message)481 void yyerror(const char *message)
482 {
483   if (spec_cur < spec_end)
484     command_error("label specification %1 before `%2'", message, spec_cur);
485   else
486     command_error("label specification %1 at end of string",
487 		  message, spec_cur);
488 }
489 
evaluate(int tentative,const reference & ref,string & result,substring_position &)490 void at_expr::evaluate(int tentative, const reference &ref,
491 		       string &result, substring_position &)
492 {
493   if (tentative)
494     ref.canonicalize_authors(result);
495   else {
496     const char *end, *start = ref.get_authors(&end);
497     if (start)
498       result.append(start, end - start);
499   }
500 }
501 
evaluate(int tentative,const reference & ref,string & result,substring_position &)502 void format_expr::evaluate(int tentative, const reference &ref,
503 			   string &result, substring_position &)
504 {
505   if (tentative)
506     return;
507   const label_info *lp = ref.get_label_ptr();
508   int num = lp == 0 ? ref.get_number() : lp->count;
509   if (type != '0')
510     result += format_serial(type, num + 1);
511   else {
512     const char *ptr = itoa(num + first_number);
513     int pad = width - strlen(ptr);
514     while (--pad >= 0)
515       result += '0';
516     result += ptr;
517   }
518 }
519 
format_serial(char c,int n)520 static const char *format_serial(char c, int n)
521 {
522   assert(n > 0);
523   static char buf[128]; // more than enough.
524   switch (c) {
525   case 'i':
526   case 'I':
527     {
528       char *p = buf;
529       // troff uses z and w to represent 10000 and 5000 in Roman
530       // numerals; I can find no historical basis for this usage
531       const char *s = c == 'i' ? "zwmdclxvi" : "ZWMDCLXVI";
532       if (n >= 40000)
533 	return itoa(n);
534       while (n >= 10000) {
535 	*p++ = s[0];
536 	n -= 10000;
537       }
538       for (int i = 1000; i > 0; i /= 10, s += 2) {
539 	int m = n/i;
540 	n -= m*i;
541 	switch (m) {
542 	case 3:
543 	  *p++ = s[2];
544 	  /* falls through */
545 	case 2:
546 	  *p++ = s[2];
547 	  /* falls through */
548 	case 1:
549 	  *p++ = s[2];
550 	  break;
551 	case 4:
552 	  *p++ = s[2];
553 	  *p++ = s[1];
554 	  break;
555 	case 8:
556 	  *p++ = s[1];
557 	  *p++ = s[2];
558 	  *p++ = s[2];
559 	  *p++ = s[2];
560 	  break;
561 	case 7:
562 	  *p++ = s[1];
563 	  *p++ = s[2];
564 	  *p++ = s[2];
565 	  break;
566 	case 6:
567 	  *p++ = s[1];
568 	  *p++ = s[2];
569 	  break;
570 	case 5:
571 	  *p++ = s[1];
572 	  break;
573 	case 9:
574 	  *p++ = s[2];
575 	  *p++ = s[0];
576 	}
577       }
578       *p = 0;
579       break;
580     }
581   case 'a':
582   case 'A':
583     {
584       char *p = buf;
585       // this is derived from troff/reg.c
586       while (n > 0) {
587 	int d = n % 26;
588 	if (d == 0)
589 	  d = 26;
590 	n -= d;
591 	n /= 26;
592 	*p++ = c + d - 1;	// ASCII dependent
593       }
594       *p-- = 0;
595       // Reverse it.
596       char *q = buf;
597       while (q < p) {
598 	char temp = *q;
599 	*q = *p;
600 	*p = temp;
601 	--p;
602 	++q;
603       }
604       break;
605     }
606   default:
607     assert(0);
608   }
609   return buf;
610 }
611 
evaluate(int,const reference & ref,string & result,substring_position &)612 void field_expr::evaluate(int, const reference &ref,
613 			  string &result, substring_position &)
614 {
615   const char *end;
616   const char *start = ref.get_field(name, &end);
617   if (start) {
618     start = nth_field(number, start, &end);
619     if (start)
620       result.append(start, end - start);
621   }
622 }
623 
evaluate(int,const reference &,string & result,substring_position &)624 void literal_expr::evaluate(int, const reference &,
625 			    string &result, substring_position &)
626 {
627   result += s;
628 }
629 
analyzed_expr(expression * e)630 analyzed_expr::analyzed_expr(expression *e)
631 : unary_expr(e), flags(e ? e->analyze() : 0)
632 {
633 }
634 
evaluate(int tentative,const reference & ref,string & result,substring_position & pos)635 void analyzed_expr::evaluate(int tentative, const reference &ref,
636 			     string &result, substring_position &pos)
637 {
638   if (expr)
639     expr->evaluate(tentative, ref, result, pos);
640 }
641 
evaluate(int tentative,const reference & ref,string & result,substring_position & pos)642 void star_expr::evaluate(int tentative, const reference &ref,
643 			 string &result, substring_position &pos)
644 {
645   const label_info *lp = ref.get_label_ptr();
646   if (!tentative
647       && (lp == 0 || lp->total > 1)
648       && expr)
649     expr->evaluate(tentative, ref, result, pos);
650 }
651 
evaluate(int tentative,const reference & ref,string & result,substring_position & pos)652 void separator_expr::evaluate(int tentative, const reference &ref,
653 			      string &result, substring_position &pos)
654 {
655   int start_length = result.length();
656   int is_first = pos.start < 0;
657   if (expr)
658     expr->evaluate(tentative, ref, result, pos);
659   if (is_first) {
660     pos.start = start_length;
661     pos.length = result.length() - start_length;
662   }
663 }
664 
evaluate(int tentative,const reference & ref,string & result,substring_position &)665 void map_expr::evaluate(int tentative, const reference &ref,
666 			string &result, substring_position &)
667 {
668   if (expr) {
669     string temp;
670     substring_position temp_pos;
671     expr->evaluate(tentative, ref, temp, temp_pos);
672     (*func)(temp.contents(), temp.contents() + temp.length(), result);
673   }
674 }
675 
evaluate(int tentative,const reference & ref,string & result,substring_position &)676 void extractor_expr::evaluate(int tentative, const reference &ref,
677 			      string &result, substring_position &)
678 {
679   if (expr) {
680     string temp;
681     substring_position temp_pos;
682     expr->evaluate(tentative, ref, temp, temp_pos);
683     const char *end, *start = (*func)(temp.contents(),
684 				      temp.contents() + temp.length(),
685 				      &end);
686     switch (part) {
687     case BEFORE:
688       if (start)
689 	result.append(temp.contents(), start - temp.contents());
690       else
691 	result += temp;
692       break;
693     case MATCH:
694       if (start)
695 	result.append(start, end - start);
696       break;
697     case AFTER:
698       if (start)
699 	result.append(end, temp.contents() + temp.length() - end);
700       break;
701     default:
702       assert(0);
703     }
704   }
705 }
706 
first_part(int len,const char * ptr,const char * end,string & result)707 static void first_part(int len, const char *ptr, const char *end,
708 			  string &result)
709 {
710   for (;;) {
711     const char *token_start = ptr;
712     if (!get_token(&ptr, end))
713       break;
714     const token_info *ti = lookup_token(token_start, ptr);
715     int counts = ti->sortify_non_empty(token_start, ptr);
716     if (counts && --len < 0)
717       break;
718     if (counts || ti->is_accent())
719       result.append(token_start, ptr - token_start);
720   }
721 }
722 
last_part(int len,const char * ptr,const char * end,string & result)723 static void last_part(int len, const char *ptr, const char *end,
724 		      string &result)
725 {
726   const char *start = ptr;
727   int count = 0;
728   for (;;) {
729     const char *token_start = ptr;
730     if (!get_token(&ptr, end))
731       break;
732     const token_info *ti = lookup_token(token_start, ptr);
733     if (ti->sortify_non_empty(token_start, ptr))
734       count++;
735   }
736   ptr = start;
737   int skip = count - len;
738   if (skip > 0) {
739     for (;;) {
740       const char *token_start = ptr;
741       if (!get_token(&ptr, end))
742 	assert(0);
743       const token_info *ti = lookup_token(token_start, ptr);
744       if (ti->sortify_non_empty(token_start, ptr) && --skip < 0) {
745 	ptr = token_start;
746 	break;
747       }
748     }
749   }
750   first_part(len, ptr, end, result);
751 }
752 
evaluate(int tentative,const reference & ref,string & result,substring_position &)753 void truncate_expr::evaluate(int tentative, const reference &ref,
754 			     string &result, substring_position &)
755 {
756   if (expr) {
757     string temp;
758     substring_position temp_pos;
759     expr->evaluate(tentative, ref, temp, temp_pos);
760     const char *start = temp.contents();
761     const char *end = start + temp.length();
762     if (n > 0)
763       first_part(n, start, end, result);
764     else if (n < 0)
765       last_part(-n, start, end, result);
766   }
767 }
768 
evaluate(int tentative,const reference & ref,string & result,substring_position & pos)769 void alternative_expr::evaluate(int tentative, const reference &ref,
770 				string &result, substring_position &pos)
771 {
772   int start_length = result.length();
773   if (expr1)
774     expr1->evaluate(tentative, ref, result, pos);
775   if (result.length() == start_length && expr2)
776     expr2->evaluate(tentative, ref, result, pos);
777 }
778 
evaluate(int tentative,const reference & ref,string & result,substring_position & pos)779 void list_expr::evaluate(int tentative, const reference &ref,
780 			 string &result, substring_position &pos)
781 {
782   if (expr1)
783     expr1->evaluate(tentative, ref, result, pos);
784   if (expr2)
785     expr2->evaluate(tentative, ref, result, pos);
786 }
787 
evaluate(int tentative,const reference & ref,string & result,substring_position & pos)788 void substitute_expr::evaluate(int tentative, const reference &ref,
789 			       string &result, substring_position &pos)
790 {
791   int start_length = result.length();
792   if (expr1)
793     expr1->evaluate(tentative, ref, result, pos);
794   if (result.length() > start_length && result[result.length() - 1] == '-') {
795     // ought to see if pos covers the -
796     result.set_length(result.length() - 1);
797     if (expr2)
798       expr2->evaluate(tentative, ref, result, pos);
799   }
800 }
801 
evaluate(int tentative,const reference & ref,string & result,substring_position & pos)802 void conditional_expr::evaluate(int tentative, const reference &ref,
803 				string &result, substring_position &pos)
804 {
805   string temp;
806   substring_position temp_pos;
807   if (expr1)
808     expr1->evaluate(tentative, ref, temp, temp_pos);
809   if (temp.length() > 0) {
810     if (expr2)
811       expr2->evaluate(tentative, ref, result, pos);
812   }
813   else {
814     if (expr3)
815       expr3->evaluate(tentative, ref, result, pos);
816   }
817 }
818 
pre_compute_label()819 void reference::pre_compute_label()
820 {
821   if (parsed_label != 0
822       && (parsed_label->analyze() & expression::CONTAINS_VARIABLE)) {
823     label.clear();
824     substring_position temp_pos;
825     parsed_label->evaluate(1, *this, label, temp_pos);
826     label_ptr = lookup_label(label);
827   }
828 }
829 
compute_label()830 void reference::compute_label()
831 {
832   label.clear();
833   if (parsed_label)
834     parsed_label->evaluate(0, *this, label, separator_pos);
835   if (short_label_flag && parsed_short_label)
836     parsed_short_label->evaluate(0, *this, short_label, short_separator_pos);
837   if (date_as_label) {
838     string new_date;
839     if (parsed_date_label) {
840       substring_position temp_pos;
841       parsed_date_label->evaluate(0, *this, new_date, temp_pos);
842     }
843     set_date(new_date);
844   }
845   if (label_ptr)
846     label_ptr->count += 1;
847 }
848 
immediate_compute_label()849 void reference::immediate_compute_label()
850 {
851   if (label_ptr)
852     label_ptr->total = 2;	// force use of disambiguator
853   compute_label();
854 }
855 
merge_labels(reference ** v,int n,label_type type,string & result)856 int reference::merge_labels(reference **v, int n, label_type type,
857 			    string &result)
858 {
859   if (abbreviate_label_ranges)
860     return merge_labels_by_number(v, n, type, result);
861   else
862     return merge_labels_by_parts(v, n, type, result);
863 }
864 
merge_labels_by_number(reference ** v,int n,label_type type,string & result)865 int reference::merge_labels_by_number(reference **v, int n, label_type type,
866 				      string &result)
867 {
868   if (n <= 1)
869     return 0;
870   int num = get_number();
871   // Only merge three or more labels.
872   if (v[0]->get_number() != num + 1
873       || v[1]->get_number() != num + 2)
874     return 0;
875   for (int i = 2; i < n; i++)
876     if (v[i]->get_number() != num + i + 1)
877       break;
878   result = get_label(type);
879   result += label_range_indicator;
880   result += v[i - 1]->get_label(type);
881   return i;
882 }
883 
get_separator_pos(label_type type)884 const substring_position &reference::get_separator_pos(label_type type) const
885 {
886   if (type == SHORT_LABEL && short_label_flag)
887     return short_separator_pos;
888   else
889     return separator_pos;
890 }
891 
get_label(label_type type)892 const string &reference::get_label(label_type type) const
893 {
894   if (type == SHORT_LABEL && short_label_flag)
895     return short_label;
896   else
897     return label;
898 }
899 
merge_labels_by_parts(reference ** v,int n,label_type type,string & result)900 int reference::merge_labels_by_parts(reference **v, int n, label_type type,
901 				     string &result)
902 {
903   if (n <= 0)
904     return 0;
905   const string &lb = get_label(type);
906   const substring_position &sp = get_separator_pos(type);
907   if (sp.start < 0
908       || sp.start != v[0]->get_separator_pos(type).start
909       || memcmp(lb.contents(), v[0]->get_label(type).contents(),
910 		sp.start) != 0)
911     return 0;
912   result = lb;
913   int i = 0;
914   do {
915     result += separate_label_second_parts;
916     const substring_position &s = v[i]->get_separator_pos(type);
917     int sep_end_pos = s.start + s.length;
918     result.append(v[i]->get_label(type).contents() + sep_end_pos,
919 		  v[i]->get_label(type).length() - sep_end_pos);
920   } while (++i < n
921 	   && sp.start == v[i]->get_separator_pos(type).start
922 	   && memcmp(lb.contents(), v[i]->get_label(type).contents(),
923 		     sp.start) == 0);
924   return i;
925 }
926 
927 string label_pool;
928 
label_info(const string & s)929 label_info::label_info(const string &s)
930 : count(0), total(1), length(s.length()), start(label_pool.length())
931 {
932   label_pool += s;
933 }
934 
935 static label_info **label_table = 0;
936 static int label_table_size = 0;
937 static int label_table_used = 0;
938 
lookup_label(const string & label)939 label_info *lookup_label(const string &label)
940 {
941   if (label_table == 0) {
942     label_table = new label_info *[17];
943     label_table_size = 17;
944     for (int i = 0; i < 17; i++)
945       label_table[i] = 0;
946   }
947   unsigned h = hash_string(label.contents(), label.length()) % label_table_size;
948   for (label_info **ptr = label_table + h;
949        *ptr != 0;
950        (ptr == label_table)
951        ? (ptr = label_table + label_table_size - 1)
952        : ptr--)
953     if ((*ptr)->length == label.length()
954 	&& memcmp(label_pool.contents() + (*ptr)->start, label.contents(),
955 		  label.length()) == 0) {
956       (*ptr)->total += 1;
957       return *ptr;
958     }
959   label_info *result = *ptr = new label_info(label);
960   if (++label_table_used * 2 > label_table_size) {
961     // Rehash the table.
962     label_info **old_table = label_table;
963     int old_size = label_table_size;
964     label_table_size = next_size(label_table_size);
965     label_table = new label_info *[label_table_size];
966     int i;
967     for (i = 0; i < label_table_size; i++)
968       label_table[i] = 0;
969     for (i = 0; i < old_size; i++)
970       if (old_table[i]) {
971 	unsigned h = hash_string(label_pool.contents() + old_table[i]->start,
972 				 old_table[i]->length);
973 	for (label_info **p = label_table + (h % label_table_size);
974 	     *p != 0;
975 	     (p == label_table)
976 	     ? (p = label_table + label_table_size - 1)
977 	     : --p)
978 	    ;
979 	*p = old_table[i];
980 	}
981     a_delete old_table;
982   }
983   return result;
984 }
985 
clear_labels()986 void clear_labels()
987 {
988   for (int i = 0; i < label_table_size; i++) {
989     delete label_table[i];
990     label_table[i] = 0;
991   }
992   label_table_used = 0;
993   label_pool.clear();
994 }
995 
996 static void consider_authors(reference **start, reference **end, int i);
997 
compute_labels(reference ** v,int n)998 void compute_labels(reference **v, int n)
999 {
1000   if (parsed_label
1001       && (parsed_label->analyze() & expression::CONTAINS_AT)
1002       && sort_fields.length() >= 2
1003       && sort_fields[0] == 'A'
1004       && sort_fields[1] == '+')
1005     consider_authors(v, v + n, 0);
1006   for (int i = 0; i < n; i++)
1007     v[i]->compute_label();
1008 }
1009 
1010 
1011 /* A reference with a list of authors <A0,A1,...,AN> _needs_ author i
1012 where 0 <= i <= N if there exists a reference with a list of authors
1013 <B0,B1,...,BM> such that <A0,A1,...,AN> != <B0,B1,...,BM> and M >= i
1014 and Aj = Bj for 0 <= j < i. In this case if we can't say ``A0,
1015 A1,...,A(i-1) et al'' because this would match both <A0,A1,...,AN> and
1016 <B0,B1,...,BM>.  If a reference needs author i we only have to call
1017 need_author(j) for some j >= i such that the reference also needs
1018 author j. */
1019 
1020 /* This function handles 2 tasks:
1021 determine which authors are needed (cannot be elided with et al.);
1022 determine which authors can have only last names in the labels.
1023 
1024 References >= start and < end have the same first i author names.
1025 Also they're sorted by A+. */
1026 
consider_authors(reference ** start,reference ** end,int i)1027 static void consider_authors(reference **start, reference **end, int i)
1028 {
1029   if (start >= end)
1030     return;
1031   reference **p = start;
1032   if (i >= (*p)->get_nauthors()) {
1033     for (++p; p < end && i >= (*p)->get_nauthors(); p++)
1034       ;
1035     if (p < end && i > 0) {
1036       // If we have an author list <A B C> and an author list <A B C D>,
1037       // then both lists need C.
1038       for (reference **q = start; q < end; q++)
1039 	(*q)->need_author(i - 1);
1040     }
1041     start = p;
1042   }
1043   while (p < end) {
1044     reference **last_name_start = p;
1045     reference **name_start = p;
1046     for (++p;
1047 	 p < end && i < (*p)->get_nauthors()
1048 	 && same_author_last_name(**last_name_start, **p, i);
1049 	 p++) {
1050       if (!same_author_name(**name_start, **p, i)) {
1051 	consider_authors(name_start, p, i + 1);
1052 	name_start = p;
1053       }
1054     }
1055     consider_authors(name_start, p, i + 1);
1056     if (last_name_start == name_start) {
1057       for (reference **q = last_name_start; q < p; q++)
1058 	(*q)->set_last_name_unambiguous(i);
1059     }
1060     // If we have an author list <A B C D> and <A B C E>, then the lists
1061     // need author D and E respectively.
1062     if (name_start > start || p < end) {
1063       for (reference **q = last_name_start; q < p; q++)
1064 	(*q)->need_author(i);
1065     }
1066   }
1067 }
1068 
same_author_last_name(const reference & r1,const reference & r2,int n)1069 int same_author_last_name(const reference &r1, const reference &r2, int n)
1070 {
1071   const char *ae1;
1072   const char *as1 = r1.get_sort_field(0, n, 0, &ae1);
1073   assert(as1 != 0);
1074   const char *ae2;
1075   const char *as2 = r2.get_sort_field(0, n, 0, &ae2);
1076   assert(as2 != 0);
1077   return ae1 - as1 == ae2 - as2 && memcmp(as1, as2, ae1 - as1) == 0;
1078 }
1079 
same_author_name(const reference & r1,const reference & r2,int n)1080 int same_author_name(const reference &r1, const reference &r2, int n)
1081 {
1082   const char *ae1;
1083   const char *as1 = r1.get_sort_field(0, n, -1, &ae1);
1084   assert(as1 != 0);
1085   const char *ae2;
1086   const char *as2 = r2.get_sort_field(0, n, -1, &ae2);
1087   assert(as2 != 0);
1088   return ae1 - as1 == ae2 - as2 && memcmp(as1, as2, ae1 - as1) == 0;
1089 }
1090 
1091 
set(int i)1092 void int_set::set(int i)
1093 {
1094   assert(i >= 0);
1095   int bytei = i >> 3;
1096   if (bytei >= v.length()) {
1097     int old_length = v.length();
1098     v.set_length(bytei + 1);
1099     for (int j = old_length; j <= bytei; j++)
1100       v[j] = 0;
1101   }
1102   v[bytei] |= 1 << (i & 7);
1103 }
1104 
get(int i)1105 int int_set::get(int i) const
1106 {
1107   assert(i >= 0);
1108   int bytei = i >> 3;
1109   return bytei >= v.length() ? 0 : (v[bytei] & (1 << (i & 7))) != 0;
1110 }
1111 
set_last_name_unambiguous(int i)1112 void reference::set_last_name_unambiguous(int i)
1113 {
1114   last_name_unambiguous.set(i);
1115 }
1116 
need_author(int n)1117 void reference::need_author(int n)
1118 {
1119   if (n > last_needed_author)
1120     last_needed_author = n;
1121 }
1122 
get_authors(const char ** end)1123 const char *reference::get_authors(const char **end) const
1124 {
1125   if (!computed_authors) {
1126     ((reference *)this)->computed_authors = 1;
1127     string &result = ((reference *)this)->authors;
1128     int na = get_nauthors();
1129     result.clear();
1130     for (int i = 0; i < na; i++) {
1131       if (last_name_unambiguous.get(i)) {
1132 	const char *e, *start = get_author_last_name(i, &e);
1133 	assert(start != 0);
1134 	result.append(start, e - start);
1135       }
1136       else {
1137 	const char *e, *start = get_author(i, &e);
1138 	assert(start != 0);
1139 	result.append(start, e - start);
1140       }
1141       if (i == last_needed_author
1142 	  && et_al.length() > 0
1143 	  && et_al_min_elide > 0
1144 	  && last_needed_author + et_al_min_elide < na
1145 	  && na >= et_al_min_total) {
1146 	result += et_al;
1147 	break;
1148       }
1149       if (i < na - 1) {
1150 	if (na == 2)
1151 	  result += join_authors_exactly_two;
1152 	else if (i < na - 2)
1153 	  result += join_authors_default;
1154 	else
1155 	  result += join_authors_last_two;
1156       }
1157     }
1158   }
1159   const char *start = authors.contents();
1160   *end = start + authors.length();
1161   return start;
1162 }
1163 
get_nauthors()1164 int reference::get_nauthors() const
1165 {
1166   if (nauthors < 0) {
1167     const char *dummy;
1168     for (int na = 0; get_author(na, &dummy) != 0; na++)
1169       ;
1170     ((reference *)this)->nauthors = na;
1171   }
1172   return nauthors;
1173 }
1174