1 /* -*- C++ -*-
2 Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc.
3 Written by James Clark (jjc@jclark.com)
4
5 This file is part of groff.
6
7 groff is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 groff is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License along
18 with groff; see the file COPYING. If not, write to the Free Software
19 Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
20
21 %{
22
23 #include "refer.h"
24 #include "refid.h"
25 #include "ref.h"
26 #include "token.h"
27
28 int yylex();
29 void yyerror(const char *);
30 int yyparse();
31
32 static const char *format_serial(char c, int n);
33
34 struct label_info {
35 int start;
36 int length;
37 int count;
38 int total;
39 label_info(const string &);
40 };
41
42 label_info *lookup_label(const string &label);
43
44 struct expression {
45 enum {
46 // Does the tentative label depend on the reference?
47 CONTAINS_VARIABLE = 01,
48 CONTAINS_STAR = 02,
49 CONTAINS_FORMAT = 04,
50 CONTAINS_AT = 010
51 };
~expressionexpression52 virtual ~expression() { }
53 virtual void evaluate(int, const reference &, string &,
54 substring_position &) = 0;
analyzeexpression55 virtual unsigned analyze() { return 0; }
56 };
57
58 class at_expr : public expression {
59 public:
at_expr()60 at_expr() { }
61 void evaluate(int, const reference &, string &, substring_position &);
analyze()62 unsigned analyze() { return CONTAINS_VARIABLE|CONTAINS_AT; }
63 };
64
65 class format_expr : public expression {
66 char type;
67 int width;
68 int first_number;
69 public:
70 format_expr(char c, int w = 0, int f = 1)
type(c)71 : type(c), width(w), first_number(f) { }
72 void evaluate(int, const reference &, string &, substring_position &);
analyze()73 unsigned analyze() { return CONTAINS_FORMAT; }
74 };
75
76 class field_expr : public expression {
77 int number;
78 char name;
79 public:
field_expr(char nm,int num)80 field_expr(char nm, int num) : name(nm), number(num) { }
81 void evaluate(int, const reference &, string &, substring_position &);
analyze()82 unsigned analyze() { return CONTAINS_VARIABLE; }
83 };
84
85 class literal_expr : public expression {
86 string s;
87 public:
literal_expr(const char * ptr,int len)88 literal_expr(const char *ptr, int len) : s(ptr, len) { }
89 void evaluate(int, const reference &, string &, substring_position &);
90 };
91
92 class unary_expr : public expression {
93 protected:
94 expression *expr;
95 public:
unary_expr(expression * e)96 unary_expr(expression *e) : expr(e) { }
~unary_expr()97 ~unary_expr() { delete expr; }
98 void evaluate(int, const reference &, string &, substring_position &) = 0;
analyze()99 unsigned analyze() { return expr ? expr->analyze() : 0; }
100 };
101
102 // This caches the analysis of an expression.
103
104 class analyzed_expr : public unary_expr {
105 unsigned flags;
106 public:
107 analyzed_expr(expression *);
108 void evaluate(int, const reference &, string &, substring_position &);
analyze()109 unsigned analyze() { return flags; }
110 };
111
112 class star_expr : public unary_expr {
113 public:
star_expr(expression * e)114 star_expr(expression *e) : unary_expr(e) { }
115 void evaluate(int, const reference &, string &, substring_position &);
analyze()116 unsigned analyze() {
117 return ((expr ? (expr->analyze() & ~CONTAINS_VARIABLE) : 0)
118 | CONTAINS_STAR);
119 }
120 };
121
122 typedef void map_t(const char *, const char *, string &);
123
124 class map_expr : public unary_expr {
125 map_t *func;
126 public:
map_expr(expression * e,map_t * f)127 map_expr(expression *e, map_t *f) : unary_expr(e), func(f) { }
128 void evaluate(int, const reference &, string &, substring_position &);
129 };
130
131 typedef const char *extractor_t(const char *, const char *, const char **);
132
133 class extractor_expr : public unary_expr {
134 int part;
135 extractor_t *func;
136 public:
137 enum { BEFORE = +1, MATCH = 0, AFTER = -1 };
extractor_expr(expression * e,extractor_t * f,int pt)138 extractor_expr(expression *e, extractor_t *f, int pt)
139 : unary_expr(e), func(f), part(pt) { }
140 void evaluate(int, const reference &, string &, substring_position &);
141 };
142
143 class truncate_expr : public unary_expr {
144 int n;
145 public:
truncate_expr(expression * e,int i)146 truncate_expr(expression *e, int i) : n(i), unary_expr(e) { }
147 void evaluate(int, const reference &, string &, substring_position &);
148 };
149
150 class separator_expr : public unary_expr {
151 public:
separator_expr(expression * e)152 separator_expr(expression *e) : unary_expr(e) { }
153 void evaluate(int, const reference &, string &, substring_position &);
154 };
155
156 class binary_expr : public expression {
157 protected:
158 expression *expr1;
159 expression *expr2;
160 public:
binary_expr(expression * e1,expression * e2)161 binary_expr(expression *e1, expression *e2) : expr1(e1), expr2(e2) { }
~binary_expr()162 ~binary_expr() { delete expr1; delete expr2; }
163 void evaluate(int, const reference &, string &, substring_position &) = 0;
analyze()164 unsigned analyze() {
165 return (expr1 ? expr1->analyze() : 0) | (expr2 ? expr2->analyze() : 0);
166 }
167 };
168
169 class alternative_expr : public binary_expr {
170 public:
alternative_expr(expression * e1,expression * e2)171 alternative_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { }
172 void evaluate(int, const reference &, string &, substring_position &);
173 };
174
175 class list_expr : public binary_expr {
176 public:
list_expr(expression * e1,expression * e2)177 list_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { }
178 void evaluate(int, const reference &, string &, substring_position &);
179 };
180
181 class substitute_expr : public binary_expr {
182 public:
substitute_expr(expression * e1,expression * e2)183 substitute_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { }
184 void evaluate(int, const reference &, string &, substring_position &);
185 };
186
187 class ternary_expr : public expression {
188 protected:
189 expression *expr1;
190 expression *expr2;
191 expression *expr3;
192 public:
ternary_expr(expression * e1,expression * e2,expression * e3)193 ternary_expr(expression *e1, expression *e2, expression *e3)
194 : expr1(e1), expr2(e2), expr3(e3) { }
~ternary_expr()195 ~ternary_expr() { delete expr1; delete expr2; delete expr3; }
196 void evaluate(int, const reference &, string &, substring_position &) = 0;
analyze()197 unsigned analyze() {
198 return ((expr1 ? expr1->analyze() : 0)
199 | (expr2 ? expr2->analyze() : 0)
200 | (expr3 ? expr3->analyze() : 0));
201 }
202 };
203
204 class conditional_expr : public ternary_expr {
205 public:
conditional_expr(expression * e1,expression * e2,expression * e3)206 conditional_expr(expression *e1, expression *e2, expression *e3)
207 : ternary_expr(e1, e2, e3) { }
208 void evaluate(int, const reference &, string &, substring_position &);
209 };
210
211 static expression *parsed_label = 0;
212 static expression *parsed_date_label = 0;
213 static expression *parsed_short_label = 0;
214
215 static expression *parse_result;
216
217 string literals;
218
219 %}
220
221 %union {
222 int num;
223 expression *expr;
224 struct { int ndigits; int val; } dig;
225 struct { int start; int len; } str;
226 }
227
228 /* uppercase or lowercase letter */
229 %token <num> TOKEN_LETTER
230 /* literal characters */
231 %token <str> TOKEN_LITERAL
232 /* digit */
233 %token <num> TOKEN_DIGIT
234
235 %type <expr> conditional
236 %type <expr> alternative
237 %type <expr> list
238 %type <expr> string
239 %type <expr> substitute
240 %type <expr> optional_conditional
241 %type <num> number
242 %type <dig> digits
243 %type <num> optional_number
244 %type <num> flag
245
246 %%
247
248 expr:
249 optional_conditional
250 { parse_result = ($1 ? new analyzed_expr($1) : 0); }
251 ;
252
253 conditional:
254 alternative
255 { $$ = $1; }
256 | alternative '?' optional_conditional ':' conditional
257 { $$ = new conditional_expr($1, $3, $5); }
258 ;
259
260 optional_conditional:
261 /* empty */
262 { $$ = 0; }
263 | conditional
264 { $$ = $1; }
265 ;
266
267 alternative:
268 list
269 { $$ = $1; }
270 | alternative '|' list
271 { $$ = new alternative_expr($1, $3); }
272 | alternative '&' list
273 { $$ = new conditional_expr($1, $3, 0); }
274 ;
275
276 list:
277 substitute
278 { $$ = $1; }
279 | list substitute
280 { $$ = new list_expr($1, $2); }
281 ;
282
283 substitute:
284 string
285 { $$ = $1; }
286 | substitute '~' string
287 { $$ = new substitute_expr($1, $3); }
288 ;
289
290 string:
291 '@'
292 { $$ = new at_expr; }
293 | TOKEN_LITERAL
294 {
295 $$ = new literal_expr(literals.contents() + $1.start,
296 $1.len);
297 }
298 | TOKEN_LETTER
299 { $$ = new field_expr($1, 0); }
300 | TOKEN_LETTER number
301 { $$ = new field_expr($1, $2 - 1); }
302 | '%' TOKEN_LETTER
303 {
304 switch ($2) {
305 case 'I':
306 case 'i':
307 case 'A':
308 case 'a':
309 $$ = new format_expr($2);
310 break;
311 default:
312 command_error("unrecognized format `%1'", char($2));
313 $$ = new format_expr('a');
314 break;
315 }
316 }
317
318 | '%' digits
319 {
320 $$ = new format_expr('0', $2.ndigits, $2.val);
321 }
322 | string '.' flag TOKEN_LETTER optional_number
323 {
324 switch ($4) {
325 case 'l':
326 $$ = new map_expr($1, lowercase);
327 break;
328 case 'u':
329 $$ = new map_expr($1, uppercase);
330 break;
331 case 'c':
332 $$ = new map_expr($1, capitalize);
333 break;
334 case 'r':
335 $$ = new map_expr($1, reverse_name);
336 break;
337 case 'a':
338 $$ = new map_expr($1, abbreviate_name);
339 break;
340 case 'y':
341 $$ = new extractor_expr($1, find_year, $3);
342 break;
343 case 'n':
344 $$ = new extractor_expr($1, find_last_name, $3);
345 break;
346 default:
347 $$ = $1;
348 command_error("unknown function `%1'", char($4));
349 break;
350 }
351 }
352
353 | string '+' number
354 { $$ = new truncate_expr($1, $3); }
355 | string '-' number
356 { $$ = new truncate_expr($1, -$3); }
357 | string '*'
358 { $$ = new star_expr($1); }
359 | '(' optional_conditional ')'
360 { $$ = $2; }
361 | '<' optional_conditional '>'
362 { $$ = new separator_expr($2); }
363 ;
364
365 optional_number:
366 /* empty */
367 { $$ = -1; }
368 | number
369 { $$ = $1; }
370 ;
371
372 number:
373 TOKEN_DIGIT
374 { $$ = $1; }
375 | number TOKEN_DIGIT
376 { $$ = $1*10 + $2; }
377 ;
378
379 digits:
380 TOKEN_DIGIT
381 { $$.ndigits = 1; $$.val = $1; }
382 | digits TOKEN_DIGIT
383 { $$.ndigits = $1.ndigits + 1; $$.val = $1.val*10 + $2; }
384 ;
385
386
387 flag:
388 /* empty */
389 { $$ = 0; }
390 | '+'
391 { $$ = 1; }
392 | '-'
393 { $$ = -1; }
394 ;
395
396 %%
397
398 /* bison defines const to be empty unless __STDC__ is defined, which it
399 isn't under cfront */
400
401 #ifdef const
402 #undef const
403 #endif
404
405 const char *spec_ptr;
406 const char *spec_end;
407 const char *spec_cur;
408
yylex()409 int yylex()
410 {
411 while (spec_ptr < spec_end && csspace(*spec_ptr))
412 spec_ptr++;
413 spec_cur = spec_ptr;
414 if (spec_ptr >= spec_end)
415 return 0;
416 unsigned char c = *spec_ptr++;
417 if (csalpha(c)) {
418 yylval.num = c;
419 return TOKEN_LETTER;
420 }
421 if (csdigit(c)) {
422 yylval.num = c - '0';
423 return TOKEN_DIGIT;
424 }
425 if (c == '\'') {
426 yylval.str.start = literals.length();
427 for (; spec_ptr < spec_end; spec_ptr++) {
428 if (*spec_ptr == '\'') {
429 if (++spec_ptr < spec_end && *spec_ptr == '\'')
430 literals += '\'';
431 else {
432 yylval.str.len = literals.length() - yylval.str.start;
433 return TOKEN_LITERAL;
434 }
435 }
436 else
437 literals += *spec_ptr;
438 }
439 yylval.str.len = literals.length() - yylval.str.start;
440 return TOKEN_LITERAL;
441 }
442 return c;
443 }
444
set_label_spec(const char * label_spec)445 int set_label_spec(const char *label_spec)
446 {
447 spec_cur = spec_ptr = label_spec;
448 spec_end = strchr(label_spec, '\0');
449 literals.clear();
450 if (yyparse())
451 return 0;
452 delete parsed_label;
453 parsed_label = parse_result;
454 return 1;
455 }
456
set_date_label_spec(const char * label_spec)457 int set_date_label_spec(const char *label_spec)
458 {
459 spec_cur = spec_ptr = label_spec;
460 spec_end = strchr(label_spec, '\0');
461 literals.clear();
462 if (yyparse())
463 return 0;
464 delete parsed_date_label;
465 parsed_date_label = parse_result;
466 return 1;
467 }
468
set_short_label_spec(const char * label_spec)469 int set_short_label_spec(const char *label_spec)
470 {
471 spec_cur = spec_ptr = label_spec;
472 spec_end = strchr(label_spec, '\0');
473 literals.clear();
474 if (yyparse())
475 return 0;
476 delete parsed_short_label;
477 parsed_short_label = parse_result;
478 return 1;
479 }
480
yyerror(const char * message)481 void yyerror(const char *message)
482 {
483 if (spec_cur < spec_end)
484 command_error("label specification %1 before `%2'", message, spec_cur);
485 else
486 command_error("label specification %1 at end of string",
487 message, spec_cur);
488 }
489
evaluate(int tentative,const reference & ref,string & result,substring_position &)490 void at_expr::evaluate(int tentative, const reference &ref,
491 string &result, substring_position &)
492 {
493 if (tentative)
494 ref.canonicalize_authors(result);
495 else {
496 const char *end, *start = ref.get_authors(&end);
497 if (start)
498 result.append(start, end - start);
499 }
500 }
501
evaluate(int tentative,const reference & ref,string & result,substring_position &)502 void format_expr::evaluate(int tentative, const reference &ref,
503 string &result, substring_position &)
504 {
505 if (tentative)
506 return;
507 const label_info *lp = ref.get_label_ptr();
508 int num = lp == 0 ? ref.get_number() : lp->count;
509 if (type != '0')
510 result += format_serial(type, num + 1);
511 else {
512 const char *ptr = itoa(num + first_number);
513 int pad = width - strlen(ptr);
514 while (--pad >= 0)
515 result += '0';
516 result += ptr;
517 }
518 }
519
format_serial(char c,int n)520 static const char *format_serial(char c, int n)
521 {
522 assert(n > 0);
523 static char buf[128]; // more than enough.
524 switch (c) {
525 case 'i':
526 case 'I':
527 {
528 char *p = buf;
529 // troff uses z and w to represent 10000 and 5000 in Roman
530 // numerals; I can find no historical basis for this usage
531 const char *s = c == 'i' ? "zwmdclxvi" : "ZWMDCLXVI";
532 if (n >= 40000)
533 return itoa(n);
534 while (n >= 10000) {
535 *p++ = s[0];
536 n -= 10000;
537 }
538 for (int i = 1000; i > 0; i /= 10, s += 2) {
539 int m = n/i;
540 n -= m*i;
541 switch (m) {
542 case 3:
543 *p++ = s[2];
544 /* falls through */
545 case 2:
546 *p++ = s[2];
547 /* falls through */
548 case 1:
549 *p++ = s[2];
550 break;
551 case 4:
552 *p++ = s[2];
553 *p++ = s[1];
554 break;
555 case 8:
556 *p++ = s[1];
557 *p++ = s[2];
558 *p++ = s[2];
559 *p++ = s[2];
560 break;
561 case 7:
562 *p++ = s[1];
563 *p++ = s[2];
564 *p++ = s[2];
565 break;
566 case 6:
567 *p++ = s[1];
568 *p++ = s[2];
569 break;
570 case 5:
571 *p++ = s[1];
572 break;
573 case 9:
574 *p++ = s[2];
575 *p++ = s[0];
576 }
577 }
578 *p = 0;
579 break;
580 }
581 case 'a':
582 case 'A':
583 {
584 char *p = buf;
585 // this is derived from troff/reg.c
586 while (n > 0) {
587 int d = n % 26;
588 if (d == 0)
589 d = 26;
590 n -= d;
591 n /= 26;
592 *p++ = c + d - 1; // ASCII dependent
593 }
594 *p-- = 0;
595 // Reverse it.
596 char *q = buf;
597 while (q < p) {
598 char temp = *q;
599 *q = *p;
600 *p = temp;
601 --p;
602 ++q;
603 }
604 break;
605 }
606 default:
607 assert(0);
608 }
609 return buf;
610 }
611
evaluate(int,const reference & ref,string & result,substring_position &)612 void field_expr::evaluate(int, const reference &ref,
613 string &result, substring_position &)
614 {
615 const char *end;
616 const char *start = ref.get_field(name, &end);
617 if (start) {
618 start = nth_field(number, start, &end);
619 if (start)
620 result.append(start, end - start);
621 }
622 }
623
evaluate(int,const reference &,string & result,substring_position &)624 void literal_expr::evaluate(int, const reference &,
625 string &result, substring_position &)
626 {
627 result += s;
628 }
629
analyzed_expr(expression * e)630 analyzed_expr::analyzed_expr(expression *e)
631 : unary_expr(e), flags(e ? e->analyze() : 0)
632 {
633 }
634
evaluate(int tentative,const reference & ref,string & result,substring_position & pos)635 void analyzed_expr::evaluate(int tentative, const reference &ref,
636 string &result, substring_position &pos)
637 {
638 if (expr)
639 expr->evaluate(tentative, ref, result, pos);
640 }
641
evaluate(int tentative,const reference & ref,string & result,substring_position & pos)642 void star_expr::evaluate(int tentative, const reference &ref,
643 string &result, substring_position &pos)
644 {
645 const label_info *lp = ref.get_label_ptr();
646 if (!tentative
647 && (lp == 0 || lp->total > 1)
648 && expr)
649 expr->evaluate(tentative, ref, result, pos);
650 }
651
evaluate(int tentative,const reference & ref,string & result,substring_position & pos)652 void separator_expr::evaluate(int tentative, const reference &ref,
653 string &result, substring_position &pos)
654 {
655 int start_length = result.length();
656 int is_first = pos.start < 0;
657 if (expr)
658 expr->evaluate(tentative, ref, result, pos);
659 if (is_first) {
660 pos.start = start_length;
661 pos.length = result.length() - start_length;
662 }
663 }
664
evaluate(int tentative,const reference & ref,string & result,substring_position &)665 void map_expr::evaluate(int tentative, const reference &ref,
666 string &result, substring_position &)
667 {
668 if (expr) {
669 string temp;
670 substring_position temp_pos;
671 expr->evaluate(tentative, ref, temp, temp_pos);
672 (*func)(temp.contents(), temp.contents() + temp.length(), result);
673 }
674 }
675
evaluate(int tentative,const reference & ref,string & result,substring_position &)676 void extractor_expr::evaluate(int tentative, const reference &ref,
677 string &result, substring_position &)
678 {
679 if (expr) {
680 string temp;
681 substring_position temp_pos;
682 expr->evaluate(tentative, ref, temp, temp_pos);
683 const char *end, *start = (*func)(temp.contents(),
684 temp.contents() + temp.length(),
685 &end);
686 switch (part) {
687 case BEFORE:
688 if (start)
689 result.append(temp.contents(), start - temp.contents());
690 else
691 result += temp;
692 break;
693 case MATCH:
694 if (start)
695 result.append(start, end - start);
696 break;
697 case AFTER:
698 if (start)
699 result.append(end, temp.contents() + temp.length() - end);
700 break;
701 default:
702 assert(0);
703 }
704 }
705 }
706
first_part(int len,const char * ptr,const char * end,string & result)707 static void first_part(int len, const char *ptr, const char *end,
708 string &result)
709 {
710 for (;;) {
711 const char *token_start = ptr;
712 if (!get_token(&ptr, end))
713 break;
714 const token_info *ti = lookup_token(token_start, ptr);
715 int counts = ti->sortify_non_empty(token_start, ptr);
716 if (counts && --len < 0)
717 break;
718 if (counts || ti->is_accent())
719 result.append(token_start, ptr - token_start);
720 }
721 }
722
last_part(int len,const char * ptr,const char * end,string & result)723 static void last_part(int len, const char *ptr, const char *end,
724 string &result)
725 {
726 const char *start = ptr;
727 int count = 0;
728 for (;;) {
729 const char *token_start = ptr;
730 if (!get_token(&ptr, end))
731 break;
732 const token_info *ti = lookup_token(token_start, ptr);
733 if (ti->sortify_non_empty(token_start, ptr))
734 count++;
735 }
736 ptr = start;
737 int skip = count - len;
738 if (skip > 0) {
739 for (;;) {
740 const char *token_start = ptr;
741 if (!get_token(&ptr, end))
742 assert(0);
743 const token_info *ti = lookup_token(token_start, ptr);
744 if (ti->sortify_non_empty(token_start, ptr) && --skip < 0) {
745 ptr = token_start;
746 break;
747 }
748 }
749 }
750 first_part(len, ptr, end, result);
751 }
752
evaluate(int tentative,const reference & ref,string & result,substring_position &)753 void truncate_expr::evaluate(int tentative, const reference &ref,
754 string &result, substring_position &)
755 {
756 if (expr) {
757 string temp;
758 substring_position temp_pos;
759 expr->evaluate(tentative, ref, temp, temp_pos);
760 const char *start = temp.contents();
761 const char *end = start + temp.length();
762 if (n > 0)
763 first_part(n, start, end, result);
764 else if (n < 0)
765 last_part(-n, start, end, result);
766 }
767 }
768
evaluate(int tentative,const reference & ref,string & result,substring_position & pos)769 void alternative_expr::evaluate(int tentative, const reference &ref,
770 string &result, substring_position &pos)
771 {
772 int start_length = result.length();
773 if (expr1)
774 expr1->evaluate(tentative, ref, result, pos);
775 if (result.length() == start_length && expr2)
776 expr2->evaluate(tentative, ref, result, pos);
777 }
778
evaluate(int tentative,const reference & ref,string & result,substring_position & pos)779 void list_expr::evaluate(int tentative, const reference &ref,
780 string &result, substring_position &pos)
781 {
782 if (expr1)
783 expr1->evaluate(tentative, ref, result, pos);
784 if (expr2)
785 expr2->evaluate(tentative, ref, result, pos);
786 }
787
evaluate(int tentative,const reference & ref,string & result,substring_position & pos)788 void substitute_expr::evaluate(int tentative, const reference &ref,
789 string &result, substring_position &pos)
790 {
791 int start_length = result.length();
792 if (expr1)
793 expr1->evaluate(tentative, ref, result, pos);
794 if (result.length() > start_length && result[result.length() - 1] == '-') {
795 // ought to see if pos covers the -
796 result.set_length(result.length() - 1);
797 if (expr2)
798 expr2->evaluate(tentative, ref, result, pos);
799 }
800 }
801
evaluate(int tentative,const reference & ref,string & result,substring_position & pos)802 void conditional_expr::evaluate(int tentative, const reference &ref,
803 string &result, substring_position &pos)
804 {
805 string temp;
806 substring_position temp_pos;
807 if (expr1)
808 expr1->evaluate(tentative, ref, temp, temp_pos);
809 if (temp.length() > 0) {
810 if (expr2)
811 expr2->evaluate(tentative, ref, result, pos);
812 }
813 else {
814 if (expr3)
815 expr3->evaluate(tentative, ref, result, pos);
816 }
817 }
818
pre_compute_label()819 void reference::pre_compute_label()
820 {
821 if (parsed_label != 0
822 && (parsed_label->analyze() & expression::CONTAINS_VARIABLE)) {
823 label.clear();
824 substring_position temp_pos;
825 parsed_label->evaluate(1, *this, label, temp_pos);
826 label_ptr = lookup_label(label);
827 }
828 }
829
compute_label()830 void reference::compute_label()
831 {
832 label.clear();
833 if (parsed_label)
834 parsed_label->evaluate(0, *this, label, separator_pos);
835 if (short_label_flag && parsed_short_label)
836 parsed_short_label->evaluate(0, *this, short_label, short_separator_pos);
837 if (date_as_label) {
838 string new_date;
839 if (parsed_date_label) {
840 substring_position temp_pos;
841 parsed_date_label->evaluate(0, *this, new_date, temp_pos);
842 }
843 set_date(new_date);
844 }
845 if (label_ptr)
846 label_ptr->count += 1;
847 }
848
immediate_compute_label()849 void reference::immediate_compute_label()
850 {
851 if (label_ptr)
852 label_ptr->total = 2; // force use of disambiguator
853 compute_label();
854 }
855
merge_labels(reference ** v,int n,label_type type,string & result)856 int reference::merge_labels(reference **v, int n, label_type type,
857 string &result)
858 {
859 if (abbreviate_label_ranges)
860 return merge_labels_by_number(v, n, type, result);
861 else
862 return merge_labels_by_parts(v, n, type, result);
863 }
864
merge_labels_by_number(reference ** v,int n,label_type type,string & result)865 int reference::merge_labels_by_number(reference **v, int n, label_type type,
866 string &result)
867 {
868 if (n <= 1)
869 return 0;
870 int num = get_number();
871 // Only merge three or more labels.
872 if (v[0]->get_number() != num + 1
873 || v[1]->get_number() != num + 2)
874 return 0;
875 for (int i = 2; i < n; i++)
876 if (v[i]->get_number() != num + i + 1)
877 break;
878 result = get_label(type);
879 result += label_range_indicator;
880 result += v[i - 1]->get_label(type);
881 return i;
882 }
883
get_separator_pos(label_type type)884 const substring_position &reference::get_separator_pos(label_type type) const
885 {
886 if (type == SHORT_LABEL && short_label_flag)
887 return short_separator_pos;
888 else
889 return separator_pos;
890 }
891
get_label(label_type type)892 const string &reference::get_label(label_type type) const
893 {
894 if (type == SHORT_LABEL && short_label_flag)
895 return short_label;
896 else
897 return label;
898 }
899
merge_labels_by_parts(reference ** v,int n,label_type type,string & result)900 int reference::merge_labels_by_parts(reference **v, int n, label_type type,
901 string &result)
902 {
903 if (n <= 0)
904 return 0;
905 const string &lb = get_label(type);
906 const substring_position &sp = get_separator_pos(type);
907 if (sp.start < 0
908 || sp.start != v[0]->get_separator_pos(type).start
909 || memcmp(lb.contents(), v[0]->get_label(type).contents(),
910 sp.start) != 0)
911 return 0;
912 result = lb;
913 int i = 0;
914 do {
915 result += separate_label_second_parts;
916 const substring_position &s = v[i]->get_separator_pos(type);
917 int sep_end_pos = s.start + s.length;
918 result.append(v[i]->get_label(type).contents() + sep_end_pos,
919 v[i]->get_label(type).length() - sep_end_pos);
920 } while (++i < n
921 && sp.start == v[i]->get_separator_pos(type).start
922 && memcmp(lb.contents(), v[i]->get_label(type).contents(),
923 sp.start) == 0);
924 return i;
925 }
926
927 string label_pool;
928
label_info(const string & s)929 label_info::label_info(const string &s)
930 : count(0), total(1), length(s.length()), start(label_pool.length())
931 {
932 label_pool += s;
933 }
934
935 static label_info **label_table = 0;
936 static int label_table_size = 0;
937 static int label_table_used = 0;
938
lookup_label(const string & label)939 label_info *lookup_label(const string &label)
940 {
941 if (label_table == 0) {
942 label_table = new label_info *[17];
943 label_table_size = 17;
944 for (int i = 0; i < 17; i++)
945 label_table[i] = 0;
946 }
947 unsigned h = hash_string(label.contents(), label.length()) % label_table_size;
948 for (label_info **ptr = label_table + h;
949 *ptr != 0;
950 (ptr == label_table)
951 ? (ptr = label_table + label_table_size - 1)
952 : ptr--)
953 if ((*ptr)->length == label.length()
954 && memcmp(label_pool.contents() + (*ptr)->start, label.contents(),
955 label.length()) == 0) {
956 (*ptr)->total += 1;
957 return *ptr;
958 }
959 label_info *result = *ptr = new label_info(label);
960 if (++label_table_used * 2 > label_table_size) {
961 // Rehash the table.
962 label_info **old_table = label_table;
963 int old_size = label_table_size;
964 label_table_size = next_size(label_table_size);
965 label_table = new label_info *[label_table_size];
966 int i;
967 for (i = 0; i < label_table_size; i++)
968 label_table[i] = 0;
969 for (i = 0; i < old_size; i++)
970 if (old_table[i]) {
971 unsigned h = hash_string(label_pool.contents() + old_table[i]->start,
972 old_table[i]->length);
973 for (label_info **p = label_table + (h % label_table_size);
974 *p != 0;
975 (p == label_table)
976 ? (p = label_table + label_table_size - 1)
977 : --p)
978 ;
979 *p = old_table[i];
980 }
981 a_delete old_table;
982 }
983 return result;
984 }
985
clear_labels()986 void clear_labels()
987 {
988 for (int i = 0; i < label_table_size; i++) {
989 delete label_table[i];
990 label_table[i] = 0;
991 }
992 label_table_used = 0;
993 label_pool.clear();
994 }
995
996 static void consider_authors(reference **start, reference **end, int i);
997
compute_labels(reference ** v,int n)998 void compute_labels(reference **v, int n)
999 {
1000 if (parsed_label
1001 && (parsed_label->analyze() & expression::CONTAINS_AT)
1002 && sort_fields.length() >= 2
1003 && sort_fields[0] == 'A'
1004 && sort_fields[1] == '+')
1005 consider_authors(v, v + n, 0);
1006 for (int i = 0; i < n; i++)
1007 v[i]->compute_label();
1008 }
1009
1010
1011 /* A reference with a list of authors <A0,A1,...,AN> _needs_ author i
1012 where 0 <= i <= N if there exists a reference with a list of authors
1013 <B0,B1,...,BM> such that <A0,A1,...,AN> != <B0,B1,...,BM> and M >= i
1014 and Aj = Bj for 0 <= j < i. In this case if we can't say ``A0,
1015 A1,...,A(i-1) et al'' because this would match both <A0,A1,...,AN> and
1016 <B0,B1,...,BM>. If a reference needs author i we only have to call
1017 need_author(j) for some j >= i such that the reference also needs
1018 author j. */
1019
1020 /* This function handles 2 tasks:
1021 determine which authors are needed (cannot be elided with et al.);
1022 determine which authors can have only last names in the labels.
1023
1024 References >= start and < end have the same first i author names.
1025 Also they're sorted by A+. */
1026
consider_authors(reference ** start,reference ** end,int i)1027 static void consider_authors(reference **start, reference **end, int i)
1028 {
1029 if (start >= end)
1030 return;
1031 reference **p = start;
1032 if (i >= (*p)->get_nauthors()) {
1033 for (++p; p < end && i >= (*p)->get_nauthors(); p++)
1034 ;
1035 if (p < end && i > 0) {
1036 // If we have an author list <A B C> and an author list <A B C D>,
1037 // then both lists need C.
1038 for (reference **q = start; q < end; q++)
1039 (*q)->need_author(i - 1);
1040 }
1041 start = p;
1042 }
1043 while (p < end) {
1044 reference **last_name_start = p;
1045 reference **name_start = p;
1046 for (++p;
1047 p < end && i < (*p)->get_nauthors()
1048 && same_author_last_name(**last_name_start, **p, i);
1049 p++) {
1050 if (!same_author_name(**name_start, **p, i)) {
1051 consider_authors(name_start, p, i + 1);
1052 name_start = p;
1053 }
1054 }
1055 consider_authors(name_start, p, i + 1);
1056 if (last_name_start == name_start) {
1057 for (reference **q = last_name_start; q < p; q++)
1058 (*q)->set_last_name_unambiguous(i);
1059 }
1060 // If we have an author list <A B C D> and <A B C E>, then the lists
1061 // need author D and E respectively.
1062 if (name_start > start || p < end) {
1063 for (reference **q = last_name_start; q < p; q++)
1064 (*q)->need_author(i);
1065 }
1066 }
1067 }
1068
same_author_last_name(const reference & r1,const reference & r2,int n)1069 int same_author_last_name(const reference &r1, const reference &r2, int n)
1070 {
1071 const char *ae1;
1072 const char *as1 = r1.get_sort_field(0, n, 0, &ae1);
1073 assert(as1 != 0);
1074 const char *ae2;
1075 const char *as2 = r2.get_sort_field(0, n, 0, &ae2);
1076 assert(as2 != 0);
1077 return ae1 - as1 == ae2 - as2 && memcmp(as1, as2, ae1 - as1) == 0;
1078 }
1079
same_author_name(const reference & r1,const reference & r2,int n)1080 int same_author_name(const reference &r1, const reference &r2, int n)
1081 {
1082 const char *ae1;
1083 const char *as1 = r1.get_sort_field(0, n, -1, &ae1);
1084 assert(as1 != 0);
1085 const char *ae2;
1086 const char *as2 = r2.get_sort_field(0, n, -1, &ae2);
1087 assert(as2 != 0);
1088 return ae1 - as1 == ae2 - as2 && memcmp(as1, as2, ae1 - as1) == 0;
1089 }
1090
1091
set(int i)1092 void int_set::set(int i)
1093 {
1094 assert(i >= 0);
1095 int bytei = i >> 3;
1096 if (bytei >= v.length()) {
1097 int old_length = v.length();
1098 v.set_length(bytei + 1);
1099 for (int j = old_length; j <= bytei; j++)
1100 v[j] = 0;
1101 }
1102 v[bytei] |= 1 << (i & 7);
1103 }
1104
get(int i)1105 int int_set::get(int i) const
1106 {
1107 assert(i >= 0);
1108 int bytei = i >> 3;
1109 return bytei >= v.length() ? 0 : (v[bytei] & (1 << (i & 7))) != 0;
1110 }
1111
set_last_name_unambiguous(int i)1112 void reference::set_last_name_unambiguous(int i)
1113 {
1114 last_name_unambiguous.set(i);
1115 }
1116
need_author(int n)1117 void reference::need_author(int n)
1118 {
1119 if (n > last_needed_author)
1120 last_needed_author = n;
1121 }
1122
get_authors(const char ** end)1123 const char *reference::get_authors(const char **end) const
1124 {
1125 if (!computed_authors) {
1126 ((reference *)this)->computed_authors = 1;
1127 string &result = ((reference *)this)->authors;
1128 int na = get_nauthors();
1129 result.clear();
1130 for (int i = 0; i < na; i++) {
1131 if (last_name_unambiguous.get(i)) {
1132 const char *e, *start = get_author_last_name(i, &e);
1133 assert(start != 0);
1134 result.append(start, e - start);
1135 }
1136 else {
1137 const char *e, *start = get_author(i, &e);
1138 assert(start != 0);
1139 result.append(start, e - start);
1140 }
1141 if (i == last_needed_author
1142 && et_al.length() > 0
1143 && et_al_min_elide > 0
1144 && last_needed_author + et_al_min_elide < na
1145 && na >= et_al_min_total) {
1146 result += et_al;
1147 break;
1148 }
1149 if (i < na - 1) {
1150 if (na == 2)
1151 result += join_authors_exactly_two;
1152 else if (i < na - 2)
1153 result += join_authors_default;
1154 else
1155 result += join_authors_last_two;
1156 }
1157 }
1158 }
1159 const char *start = authors.contents();
1160 *end = start + authors.length();
1161 return start;
1162 }
1163
get_nauthors()1164 int reference::get_nauthors() const
1165 {
1166 if (nauthors < 0) {
1167 const char *dummy;
1168 for (int na = 0; get_author(na, &dummy) != 0; na++)
1169 ;
1170 ((reference *)this)->nauthors = na;
1171 }
1172 return nauthors;
1173 }
1174