1 // -*- C++ -*-
2 /* Copyright (C) 1989-2018 Free Software Foundation, Inc.
3 Written by James Clark (jjc@jclark.com)
4
5 This file is part of groff.
6
7 groff is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 groff is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19
20 #include "refer.h"
21 #include "refid.h"
22 #include "search.h"
23 #include "command.h"
24
25 cset cs_field_name = csalpha;
26
27 class input_item {
28 input_item *next;
29 char *filename;
30 int first_lineno;
31 string buffer;
32 const char *ptr;
33 const char *end;
34 public:
35 input_item(string &, const char *, int = 1);
36 ~input_item();
37 int get_char();
38 int peek_char();
39 void skip_char();
40 int get_location(const char **, int *);
41
42 friend class input_stack;
43 };
44
input_item(string & s,const char * fn,int ln)45 input_item::input_item(string &s, const char *fn, int ln)
46 : filename(strsave(fn)), first_lineno(ln)
47 {
48 buffer.move(s);
49 ptr = buffer.contents();
50 end = ptr + buffer.length();
51 }
52
~input_item()53 input_item::~input_item()
54 {
55 a_delete filename;
56 }
57
peek_char()58 inline int input_item::peek_char()
59 {
60 if (ptr >= end)
61 return EOF;
62 else
63 return (unsigned char)*ptr;
64 }
65
get_char()66 inline int input_item::get_char()
67 {
68 if (ptr >= end)
69 return EOF;
70 else
71 return (unsigned char)*ptr++;
72 }
73
skip_char()74 inline void input_item::skip_char()
75 {
76 ptr++;
77 }
78
get_location(const char ** filenamep,int * linenop)79 int input_item::get_location(const char **filenamep, int *linenop)
80 {
81 *filenamep = filename;
82 if (ptr == buffer.contents())
83 *linenop = first_lineno;
84 else {
85 int ln = first_lineno;
86 const char *e = ptr - 1;
87 for (const char *p = buffer.contents(); p < e; p++)
88 if (*p == '\n')
89 ln++;
90 *linenop = ln;
91 }
92 return 1;
93 }
94
95 class input_stack {
96 static input_item *top;
97 public:
98 static void init();
99 static int get_char();
100 static int peek_char();
skip_char()101 static void skip_char() { top->skip_char(); }
102 static void push_file(const char *);
103 static void push_string(string &, const char *, int);
104 static void error(const char *format,
105 const errarg &arg1 = empty_errarg,
106 const errarg &arg2 = empty_errarg,
107 const errarg &arg3 = empty_errarg);
108 };
109
110 input_item *input_stack::top = 0;
111
init()112 void input_stack::init()
113 {
114 while (top) {
115 input_item *tem = top;
116 top = top->next;
117 delete tem;
118 }
119 }
120
get_char()121 int input_stack::get_char()
122 {
123 while (top) {
124 int c = top->get_char();
125 if (c >= 0)
126 return c;
127 input_item *tem = top;
128 top = top->next;
129 delete tem;
130 }
131 return -1;
132 }
133
peek_char()134 int input_stack::peek_char()
135 {
136 while (top) {
137 int c = top->peek_char();
138 if (c >= 0)
139 return c;
140 input_item *tem = top;
141 top = top->next;
142 delete tem;
143 }
144 return -1;
145 }
146
push_file(const char * fn)147 void input_stack::push_file(const char *fn)
148 {
149 FILE *fp;
150 if (strcmp(fn, "-") == 0) {
151 fp = stdin;
152 fn = "<standard input>";
153 }
154 else {
155 errno = 0;
156 fp = fopen(fn, "r");
157 if (fp == 0) {
158 error("can't open '%1': %2", fn, strerror(errno));
159 return;
160 }
161 }
162 string buf;
163 int bol = 1;
164 int lineno = 1;
165 for (;;) {
166 int c = getc(fp);
167 if (bol && c == '.') {
168 // replace lines beginning with .R1 or .R2 with a blank line
169 c = getc(fp);
170 if (c == 'R') {
171 c = getc(fp);
172 if (c == '1' || c == '2') {
173 int cc = c;
174 c = getc(fp);
175 if (compatible_flag || c == ' ' || c == '\n' || c == EOF) {
176 while (c != '\n' && c != EOF)
177 c = getc(fp);
178 }
179 else {
180 buf += '.';
181 buf += 'R';
182 buf += cc;
183 }
184 }
185 else {
186 buf += '.';
187 buf += 'R';
188 }
189 }
190 else
191 buf += '.';
192 }
193 if (c == EOF)
194 break;
195 if (invalid_input_char(c))
196 error_with_file_and_line(fn, lineno,
197 "invalid input character code %1", int(c));
198 else {
199 buf += c;
200 if (c == '\n') {
201 bol = 1;
202 lineno++;
203 }
204 else
205 bol = 0;
206 }
207 }
208 if (fp != stdin)
209 fclose(fp);
210 if (buf.length() > 0 && buf[buf.length() - 1] != '\n')
211 buf += '\n';
212 input_item *it = new input_item(buf, fn);
213 it->next = top;
214 top = it;
215 }
216
push_string(string & s,const char * filename,int lineno)217 void input_stack::push_string(string &s, const char *filename, int lineno)
218 {
219 input_item *it = new input_item(s, filename, lineno);
220 it->next = top;
221 top = it;
222 }
223
error(const char * format,const errarg & arg1,const errarg & arg2,const errarg & arg3)224 void input_stack::error(const char *format, const errarg &arg1,
225 const errarg &arg2, const errarg &arg3)
226 {
227 const char *filename;
228 int lineno;
229 for (input_item *it = top; it; it = it->next)
230 if (it->get_location(&filename, &lineno)) {
231 error_with_file_and_line(filename, lineno, format, arg1, arg2, arg3);
232 return;
233 }
234 ::error(format, arg1, arg2, arg3);
235 }
236
command_error(const char * format,const errarg & arg1,const errarg & arg2,const errarg & arg3)237 void command_error(const char *format, const errarg &arg1,
238 const errarg &arg2, const errarg &arg3)
239 {
240 input_stack::error(format, arg1, arg2, arg3);
241 }
242
243 // # not recognized in ""
244 // \<newline> is recognized in ""
245 // # does not conceal newline
246 // if missing closing quote, word extends to end of line
247 // no special treatment of \ other than before newline
248 // \<newline> not recognized after #
249 // ; allowed as alternative to newline
250 // ; not recognized in ""
251 // don't clear word_buffer; just append on
252 // return -1 for EOF, 0 for newline, 1 for word
253
get_word(string & word_buffer)254 int get_word(string &word_buffer)
255 {
256 int c = input_stack::get_char();
257 for (;;) {
258 if (c == '#') {
259 do {
260 c = input_stack::get_char();
261 } while (c != '\n' && c != EOF);
262 break;
263 }
264 if (c == '\\' && input_stack::peek_char() == '\n')
265 input_stack::skip_char();
266 else if (c != ' ' && c != '\t')
267 break;
268 c = input_stack::get_char();
269 }
270 if (c == EOF)
271 return -1;
272 if (c == '\n' || c == ';')
273 return 0;
274 if (c == '"') {
275 for (;;) {
276 c = input_stack::peek_char();
277 if (c == EOF || c == '\n')
278 break;
279 input_stack::skip_char();
280 if (c == '"') {
281 int d = input_stack::peek_char();
282 if (d == '"')
283 input_stack::skip_char();
284 else
285 break;
286 }
287 else if (c == '\\') {
288 int d = input_stack::peek_char();
289 if (d == '\n')
290 input_stack::skip_char();
291 else
292 word_buffer += '\\';
293 }
294 else
295 word_buffer += c;
296 }
297 return 1;
298 }
299 word_buffer += c;
300 for (;;) {
301 c = input_stack::peek_char();
302 if (c == ' ' || c == '\t' || c == '\n' || c == '#' || c == ';')
303 break;
304 input_stack::skip_char();
305 if (c == '\\') {
306 int d = input_stack::peek_char();
307 if (d == '\n')
308 input_stack::skip_char();
309 else
310 word_buffer += '\\';
311 }
312 else
313 word_buffer += c;
314 }
315 return 1;
316 }
317
318 union argument {
319 const char *s;
320 int n;
321 };
322
323 // This is for debugging.
324
echo_command(int argc,argument * argv)325 static void echo_command(int argc, argument *argv)
326 {
327 for (int i = 0; i < argc; i++)
328 fprintf(stderr, "%s\n", argv[i].s);
329 }
330
include_command(int argc,argument * argv)331 static void include_command(int argc, argument *argv)
332 {
333 assert(argc == 1);
334 input_stack::push_file(argv[0].s);
335 }
336
capitalize_command(int argc,argument * argv)337 static void capitalize_command(int argc, argument *argv)
338 {
339 if (argc > 0)
340 capitalize_fields = argv[0].s;
341 else
342 capitalize_fields.clear();
343 }
344
accumulate_command(int,argument *)345 static void accumulate_command(int, argument *)
346 {
347 accumulate = 1;
348 }
349
no_accumulate_command(int,argument *)350 static void no_accumulate_command(int, argument *)
351 {
352 accumulate = 0;
353 }
354
move_punctuation_command(int,argument *)355 static void move_punctuation_command(int, argument *)
356 {
357 move_punctuation = 1;
358 }
359
no_move_punctuation_command(int,argument *)360 static void no_move_punctuation_command(int, argument *)
361 {
362 move_punctuation = 0;
363 }
364
sort_command(int argc,argument * argv)365 static void sort_command(int argc, argument *argv)
366 {
367 if (argc == 0)
368 sort_fields = "AD";
369 else
370 sort_fields = argv[0].s;
371 accumulate = 1;
372 }
373
no_sort_command(int,argument *)374 static void no_sort_command(int, argument *)
375 {
376 sort_fields.clear();
377 }
378
articles_command(int argc,argument * argv)379 static void articles_command(int argc, argument *argv)
380 {
381 articles.clear();
382 int i;
383 for (i = 0; i < argc; i++) {
384 articles += argv[i].s;
385 articles += '\0';
386 }
387 int len = articles.length();
388 for (i = 0; i < len; i++)
389 articles[i] = cmlower(articles[i]);
390 }
391
database_command(int argc,argument * argv)392 static void database_command(int argc, argument *argv)
393 {
394 for (int i = 0; i < argc; i++)
395 database_list.add_file(argv[i].s);
396 }
397
default_database_command(int,argument *)398 static void default_database_command(int, argument *)
399 {
400 search_default = 1;
401 }
402
no_default_database_command(int,argument *)403 static void no_default_database_command(int, argument *)
404 {
405 search_default = 0;
406 }
407
bibliography_command(int argc,argument * argv)408 static void bibliography_command(int argc, argument *argv)
409 {
410 have_bibliography = 1;
411 const char *saved_filename = current_filename;
412 int saved_lineno = current_lineno;
413 int saved_label_in_text = label_in_text;
414 label_in_text = 0;
415 if (!accumulate)
416 fputs(".]<\n", stdout);
417 for (int i = 0; i < argc; i++)
418 do_bib(argv[i].s);
419 if (accumulate)
420 output_references();
421 else
422 fputs(".]>\n", stdout);
423 current_filename = saved_filename;
424 current_lineno = saved_lineno;
425 label_in_text = saved_label_in_text;
426 }
427
annotate_command(int argc,argument * argv)428 static void annotate_command(int argc, argument *argv)
429 {
430 if (argc > 0)
431 annotation_field = argv[0].s[0];
432 else
433 annotation_field = 'X';
434 if (argc == 2)
435 annotation_macro = argv[1].s;
436 else
437 annotation_macro = "AP";
438 }
439
no_annotate_command(int,argument *)440 static void no_annotate_command(int, argument *)
441 {
442 annotation_macro.clear();
443 annotation_field = -1;
444 }
445
reverse_command(int,argument * argv)446 static void reverse_command(int, argument *argv)
447 {
448 reverse_fields = argv[0].s;
449 }
450
no_reverse_command(int,argument *)451 static void no_reverse_command(int, argument *)
452 {
453 reverse_fields.clear();
454 }
455
abbreviate_command(int argc,argument * argv)456 static void abbreviate_command(int argc, argument *argv)
457 {
458 abbreviate_fields = argv[0].s;
459 period_before_initial = argc > 1 ? argv[1].s : ". ";
460 period_before_last_name = argc > 2 ? argv[2].s : ". ";
461 period_before_other = argc > 3 ? argv[3].s : ". ";
462 period_before_hyphen = argc > 4 ? argv[4].s : ".";
463 }
464
no_abbreviate_command(int,argument *)465 static void no_abbreviate_command(int, argument *)
466 {
467 abbreviate_fields.clear();
468 }
469
470 string search_ignore_fields;
471
search_ignore_command(int argc,argument * argv)472 static void search_ignore_command(int argc, argument *argv)
473 {
474 if (argc > 0)
475 search_ignore_fields = argv[0].s;
476 else
477 search_ignore_fields = "XYZ";
478 search_ignore_fields += '\0';
479 linear_ignore_fields = search_ignore_fields.contents();
480 }
481
no_search_ignore_command(int,argument *)482 static void no_search_ignore_command(int, argument *)
483 {
484 linear_ignore_fields = "";
485 }
486
search_truncate_command(int argc,argument * argv)487 static void search_truncate_command(int argc, argument *argv)
488 {
489 if (argc > 0)
490 linear_truncate_len = argv[0].n;
491 else
492 linear_truncate_len = 6;
493 }
494
no_search_truncate_command(int,argument *)495 static void no_search_truncate_command(int, argument *)
496 {
497 linear_truncate_len = -1;
498 }
499
discard_command(int argc,argument * argv)500 static void discard_command(int argc, argument *argv)
501 {
502 if (argc == 0)
503 discard_fields = "XYZ";
504 else
505 discard_fields = argv[0].s;
506 accumulate = 1;
507 }
508
no_discard_command(int,argument *)509 static void no_discard_command(int, argument *)
510 {
511 discard_fields.clear();
512 }
513
label_command(int,argument * argv)514 static void label_command(int, argument *argv)
515 {
516 set_label_spec(argv[0].s);
517 }
518
abbreviate_label_ranges_command(int argc,argument * argv)519 static void abbreviate_label_ranges_command(int argc, argument *argv)
520 {
521 abbreviate_label_ranges = 1;
522 label_range_indicator = argc > 0 ? argv[0].s : "-";
523 }
524
no_abbreviate_label_ranges_command(int,argument *)525 static void no_abbreviate_label_ranges_command(int, argument *)
526 {
527 abbreviate_label_ranges = 0;
528 }
529
label_in_reference_command(int,argument *)530 static void label_in_reference_command(int, argument *)
531 {
532 label_in_reference = 1;
533 }
534
no_label_in_reference_command(int,argument *)535 static void no_label_in_reference_command(int, argument *)
536 {
537 label_in_reference = 0;
538 }
539
label_in_text_command(int,argument *)540 static void label_in_text_command(int, argument *)
541 {
542 label_in_text = 1;
543 }
544
no_label_in_text_command(int,argument *)545 static void no_label_in_text_command(int, argument *)
546 {
547 label_in_text = 0;
548 }
549
sort_adjacent_labels_command(int,argument *)550 static void sort_adjacent_labels_command(int, argument *)
551 {
552 sort_adjacent_labels = 1;
553 }
554
no_sort_adjacent_labels_command(int,argument *)555 static void no_sort_adjacent_labels_command(int, argument *)
556 {
557 sort_adjacent_labels = 0;
558 }
559
date_as_label_command(int argc,argument * argv)560 static void date_as_label_command(int argc, argument *argv)
561 {
562 if (set_date_label_spec(argc > 0 ? argv[0].s : "D%a*"))
563 date_as_label = 1;
564 }
565
no_date_as_label_command(int,argument *)566 static void no_date_as_label_command(int, argument *)
567 {
568 date_as_label = 0;
569 }
570
short_label_command(int,argument * argv)571 static void short_label_command(int, argument *argv)
572 {
573 if (set_short_label_spec(argv[0].s))
574 short_label_flag = 1;
575 }
576
no_short_label_command(int,argument *)577 static void no_short_label_command(int, argument *)
578 {
579 short_label_flag = 0;
580 }
581
compatible_command(int,argument *)582 static void compatible_command(int, argument *)
583 {
584 compatible_flag = 1;
585 }
586
no_compatible_command(int,argument *)587 static void no_compatible_command(int, argument *)
588 {
589 compatible_flag = 0;
590 }
591
join_authors_command(int argc,argument * argv)592 static void join_authors_command(int argc, argument *argv)
593 {
594 join_authors_exactly_two = argv[0].s;
595 join_authors_default = argc > 1 ? argv[1].s : argv[0].s;
596 join_authors_last_two = argc == 3 ? argv[2].s : argv[0].s;
597 }
598
bracket_label_command(int,argument * argv)599 static void bracket_label_command(int, argument *argv)
600 {
601 pre_label = argv[0].s;
602 post_label = argv[1].s;
603 sep_label = argv[2].s;
604 }
605
separate_label_second_parts_command(int,argument * argv)606 static void separate_label_second_parts_command(int, argument *argv)
607 {
608 separate_label_second_parts = argv[0].s;
609 }
610
et_al_command(int argc,argument * argv)611 static void et_al_command(int argc, argument *argv)
612 {
613 et_al = argv[0].s;
614 et_al_min_elide = argv[1].n;
615 if (et_al_min_elide < 1)
616 et_al_min_elide = 1;
617 et_al_min_total = argc >= 3 ? argv[2].n : 0;
618 }
619
no_et_al_command(int,argument *)620 static void no_et_al_command(int, argument *)
621 {
622 et_al.clear();
623 et_al_min_elide = 0;
624 }
625
626 typedef void (*command_t)(int, argument *);
627
628 /* arg_types is a string describing the numbers and types of arguments.
629 s means a string, i means an integer, f is a list of fields, F is
630 a single field,
631 ? means that the previous argument is optional, * means that the
632 previous argument can occur any number of times. */
633
634 struct S {
635 const char *name;
636 command_t func;
637 const char *arg_types;
638 } command_table[] = {
639 { "include", include_command, "s" },
640 { "echo", echo_command, "s*" },
641 { "capitalize", capitalize_command, "f?" },
642 { "accumulate", accumulate_command, "" },
643 { "no-accumulate", no_accumulate_command, "" },
644 { "move-punctuation", move_punctuation_command, "" },
645 { "no-move-punctuation", no_move_punctuation_command, "" },
646 { "sort", sort_command, "s?" },
647 { "no-sort", no_sort_command, "" },
648 { "articles", articles_command, "s*" },
649 { "database", database_command, "ss*" },
650 { "default-database", default_database_command, "" },
651 { "no-default-database", no_default_database_command, "" },
652 { "bibliography", bibliography_command, "ss*" },
653 { "annotate", annotate_command, "F?s?" },
654 { "no-annotate", no_annotate_command, "" },
655 { "reverse", reverse_command, "s" },
656 { "no-reverse", no_reverse_command, "" },
657 { "abbreviate", abbreviate_command, "ss?s?s?s?" },
658 { "no-abbreviate", no_abbreviate_command, "" },
659 { "search-ignore", search_ignore_command, "f?" },
660 { "no-search-ignore", no_search_ignore_command, "" },
661 { "search-truncate", search_truncate_command, "i?" },
662 { "no-search-truncate", no_search_truncate_command, "" },
663 { "discard", discard_command, "f?" },
664 { "no-discard", no_discard_command, "" },
665 { "label", label_command, "s" },
666 { "abbreviate-label-ranges", abbreviate_label_ranges_command, "s?" },
667 { "no-abbreviate-label-ranges", no_abbreviate_label_ranges_command, "" },
668 { "label-in-reference", label_in_reference_command, "" },
669 { "no-label-in-reference", no_label_in_reference_command, "" },
670 { "label-in-text", label_in_text_command, "" },
671 { "no-label-in-text", no_label_in_text_command, "" },
672 { "sort-adjacent-labels", sort_adjacent_labels_command, "" },
673 { "no-sort-adjacent-labels", no_sort_adjacent_labels_command, "" },
674 { "date-as-label", date_as_label_command, "s?" },
675 { "no-date-as-label", no_date_as_label_command, "" },
676 { "short-label", short_label_command, "s" },
677 { "no-short-label", no_short_label_command, "" },
678 { "compatible", compatible_command, "" },
679 { "no-compatible", no_compatible_command, "" },
680 { "join-authors", join_authors_command, "sss?" },
681 { "bracket-label", bracket_label_command, "sss" },
682 { "separate-label-second-parts", separate_label_second_parts_command, "s" },
683 { "et-al", et_al_command, "sii?" },
684 { "no-et-al", no_et_al_command, "" },
685 };
686
check_args(const char * types,const char * name,int argc,argument * argv)687 static int check_args(const char *types, const char *name,
688 int argc, argument *argv)
689 {
690 int argno = 0;
691 while (*types) {
692 if (argc == 0) {
693 if (types[1] == '?')
694 break;
695 else if (types[1] == '*') {
696 assert(types[2] == '\0');
697 break;
698 }
699 else {
700 input_stack::error("missing argument for command '%1'", name);
701 return 0;
702 }
703 }
704 switch (*types) {
705 case 's':
706 break;
707 case 'i':
708 {
709 char *ptr;
710 long n = strtol(argv->s, &ptr, 10);
711 if ((n == 0 && ptr == argv->s)
712 || *ptr != '\0') {
713 input_stack::error("argument %1 for command '%2' must be an integer",
714 argno + 1, name);
715 return 0;
716 }
717 argv->n = (int)n;
718 break;
719 }
720 case 'f':
721 {
722 for (const char *ptr = argv->s; *ptr != '\0'; ptr++)
723 if (!cs_field_name(*ptr)) {
724 input_stack::error("argument %1 for command '%2' must be a list of fields",
725 argno + 1, name);
726 return 0;
727 }
728 break;
729 }
730 case 'F':
731 if (argv->s[0] == '\0' || argv->s[1] != '\0'
732 || !cs_field_name(argv->s[0])) {
733 input_stack::error("argument %1 for command '%2' must be a field name",
734 argno + 1, name);
735 return 0;
736 }
737 break;
738 default:
739 assert(0);
740 }
741 if (types[1] == '?')
742 types += 2;
743 else if (types[1] != '*')
744 types += 1;
745 --argc;
746 ++argv;
747 ++argno;
748 }
749 if (argc > 0) {
750 input_stack::error("too many arguments for command '%1'", name);
751 return 0;
752 }
753 return 1;
754 }
755
execute_command(const char * name,int argc,argument * argv)756 static void execute_command(const char *name, int argc, argument *argv)
757 {
758 for (unsigned int i = 0;
759 i < sizeof(command_table)/sizeof(command_table[0]); i++)
760 if (strcmp(name, command_table[i].name) == 0) {
761 if (check_args(command_table[i].arg_types, name, argc, argv))
762 (*command_table[i].func)(argc, argv);
763 return;
764 }
765 input_stack::error("unknown command '%1'", name);
766 }
767
command_loop()768 static void command_loop()
769 {
770 string command;
771 for (;;) {
772 command.clear();
773 int res = get_word(command);
774 if (res != 1) {
775 if (res == 0)
776 continue;
777 break;
778 }
779 int argc = 0;
780 command += '\0';
781 while ((res = get_word(command)) == 1) {
782 argc++;
783 command += '\0';
784 }
785 argument *argv = new argument[argc];
786 const char *ptr = command.contents();
787 for (int i = 0; i < argc; i++)
788 argv[i].s = ptr = strchr(ptr, '\0') + 1;
789 execute_command(command.contents(), argc, argv);
790 a_delete argv;
791 if (res == -1)
792 break;
793 }
794 }
795
process_commands(const char * file)796 void process_commands(const char *file)
797 {
798 input_stack::init();
799 input_stack::push_file(file);
800 command_loop();
801 }
802
process_commands(string & s,const char * file,int lineno)803 void process_commands(string &s, const char *file, int lineno)
804 {
805 input_stack::init();
806 input_stack::push_string(s, file, lineno);
807 command_loop();
808 }
809