1 // -*- C++ -*-
2 /* Copyright (C) 1989-2018 Free Software Foundation, Inc.
3      Written by James Clark (jjc@jclark.com)
4 
5 This file is part of groff.
6 
7 groff is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11 
12 groff is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with this program.  If not, see <http://www.gnu.org/licenses/>. */
19 
20 #include "table.h"
21 
22 #define MAX_POINT_SIZE 99
23 #define MAX_VERTICAL_SPACING 72
24 
25 extern "C" const char *Version_string;
26 
27 int compatible_flag = 0;
28 
29 class table_input {
30   FILE *fp;
31   enum { START, MIDDLE,
32 	 REREAD_T, REREAD_TE, REREAD_E,
33 	 LEADER_1, LEADER_2, LEADER_3, LEADER_4,
34 	 END, ERROR } state;
35   string unget_stack;
36 public:
37   table_input(FILE *);
38   int get();
ended()39   int ended() { return unget_stack.empty() && state == END; }
40   void unget(char);
41 };
42 
table_input(FILE * p)43 table_input::table_input(FILE *p)
44 : fp(p), state(START)
45 {
46 }
47 
unget(char c)48 void table_input::unget(char c)
49 {
50   assert(c != '\0');
51   unget_stack += c;
52   if (c == '\n')
53     current_lineno--;
54 }
55 
get()56 int table_input::get()
57 {
58   int len = unget_stack.length();
59   if (len != 0) {
60     unsigned char c = unget_stack[len - 1];
61     unget_stack.set_length(len - 1);
62     if (c == '\n')
63       current_lineno++;
64     return c;
65   }
66   int c;
67   for (;;) {
68     switch (state) {
69     case START:
70       if ((c = getc(fp)) == '.') {
71 	if ((c = getc(fp)) == 'T') {
72 	  if ((c = getc(fp)) == 'E') {
73 	    if (compatible_flag) {
74 	      state = END;
75 	      return EOF;
76 	    }
77 	    else {
78 	      c = getc(fp);
79 	      if (c != EOF)
80 		ungetc(c, fp);
81 	      if (c == EOF || c == ' ' || c == '\n') {
82 		state = END;
83 		return EOF;
84 	      }
85 	      state = REREAD_TE;
86 	      return '.';
87 	    }
88 	  }
89 	  else {
90 	    if (c != EOF)
91 	      ungetc(c, fp);
92 	    state = REREAD_T;
93 	    return '.';
94 	  }
95 	}
96 	else {
97 	  if (c != EOF)
98 	    ungetc(c, fp);
99 	  state = MIDDLE;
100 	  return '.';
101 	}
102       }
103       else if (c == EOF) {
104 	state = ERROR;
105 	return EOF;
106       }
107       else {
108 	if (c == '\n')
109 	  current_lineno++;
110 	else {
111 	  state = MIDDLE;
112 	  if (c == '\0') {
113 	    error("invalid input character code 0");
114 	    break;
115 	  }
116 	}
117 	return c;
118       }
119       break;
120     case MIDDLE:
121       // handle line continuation and uninterpreted leader character
122       if ((c = getc(fp)) == '\\') {
123 	c = getc(fp);
124 	if (c == '\n')
125 	  c = getc(fp);		// perhaps state ought to be START now
126 	else if (c == 'a' && compatible_flag) {
127 	  state = LEADER_1;
128 	  return '\\';
129 	}
130 	else {
131 	  if (c != EOF)
132 	    ungetc(c, fp);
133 	  c = '\\';
134 	}
135       }
136       if (c == EOF) {
137 	state = ERROR;
138 	return EOF;
139       }
140       else {
141 	if (c == '\n') {
142 	  state = START;
143 	  current_lineno++;
144 	}
145 	else if (c == '\0') {
146 	  error("invalid input character code 0");
147 	  break;
148 	}
149 	return c;
150       }
151     case REREAD_T:
152       state = MIDDLE;
153       return 'T';
154     case REREAD_TE:
155       state = REREAD_E;
156       return 'T';
157     case REREAD_E:
158       state = MIDDLE;
159       return 'E';
160     case LEADER_1:
161       state = LEADER_2;
162       return '*';
163     case LEADER_2:
164       state = LEADER_3;
165       return '(';
166     case LEADER_3:
167       state = LEADER_4;
168       return PREFIX_CHAR;
169     case LEADER_4:
170       state = MIDDLE;
171       return LEADER_CHAR;
172     case END:
173     case ERROR:
174       return EOF;
175     }
176   }
177 }
178 
179 void process_input_file(FILE *);
180 void process_table(table_input &in);
181 
process_input_file(FILE * fp)182 void process_input_file(FILE *fp)
183 {
184   enum { START, MIDDLE, HAD_DOT, HAD_T, HAD_TS, HAD_l, HAD_lf } state;
185   state = START;
186   int c;
187   while ((c = getc(fp)) != EOF)
188     switch (state) {
189     case START:
190       if (c == '.')
191 	state = HAD_DOT;
192       else {
193 	if (c == '\n')
194 	  current_lineno++;
195 	else
196 	  state = MIDDLE;
197 	putchar(c);
198       }
199       break;
200     case MIDDLE:
201       if (c == '\n') {
202 	current_lineno++;
203 	state = START;
204       }
205       putchar(c);
206       break;
207     case HAD_DOT:
208       if (c == 'T')
209 	state = HAD_T;
210       else if (c == 'l')
211 	state = HAD_l;
212       else {
213 	putchar('.');
214 	putchar(c);
215 	if (c == '\n') {
216 	  current_lineno++;
217 	  state = START;
218 	}
219 	else
220 	  state = MIDDLE;
221       }
222       break;
223     case HAD_T:
224       if (c == 'S')
225 	state = HAD_TS;
226       else {
227 	putchar('.');
228 	putchar('T');
229 	putchar(c);
230 	if (c == '\n') {
231  	  current_lineno++;
232 	  state = START;
233 	}
234 	else
235 	  state = MIDDLE;
236       }
237       break;
238     case HAD_TS:
239       if (c == ' ' || c == '\n' || compatible_flag) {
240 	putchar('.');
241 	putchar('T');
242 	putchar('S');
243 	while (c != '\n') {
244 	  if (c == EOF) {
245 	    error("end of file at beginning of table");
246 	    return;
247 	  }
248 	  putchar(c);
249 	  c = getc(fp);
250 	}
251 	putchar('\n');
252 	current_lineno++;
253 	{
254 	  table_input input(fp);
255 	  process_table(input);
256 	  set_troff_location(current_filename, current_lineno);
257 	  if (input.ended()) {
258 	    fputs(".TE", stdout);
259 	    while ((c = getc(fp)) != '\n') {
260 	      if (c == EOF) {
261 		putchar('\n');
262 		return;
263 	      }
264 	      putchar(c);
265 	    }
266 	    putchar('\n');
267 	    current_lineno++;
268 	  }
269 	}
270 	state = START;
271       }
272       else {
273 	fputs(".TS", stdout);
274 	putchar(c);
275 	state = MIDDLE;
276       }
277       break;
278     case HAD_l:
279       if (c == 'f')
280 	state = HAD_lf;
281       else {
282 	putchar('.');
283 	putchar('l');
284 	putchar(c);
285 	if (c == '\n') {
286  	  current_lineno++;
287 	  state = START;
288 	}
289 	else
290 	  state = MIDDLE;
291       }
292       break;
293     case HAD_lf:
294       if (c == ' ' || c == '\n' || compatible_flag) {
295 	string line;
296 	while (c != EOF) {
297 	  line += c;
298 	  if (c == '\n') {
299 	    current_lineno++;
300 	    break;
301 	  }
302 	  c = getc(fp);
303 	}
304 	line += '\0';
305 	interpret_lf_args(line.contents());
306 	printf(".lf%s", line.contents());
307 	state = START;
308       }
309       else {
310 	fputs(".lf", stdout);
311 	putchar(c);
312 	state = MIDDLE;
313       }
314       break;
315     default:
316       assert(0);
317     }
318   switch(state) {
319   case START:
320     break;
321   case MIDDLE:
322     putchar('\n');
323     break;
324   case HAD_DOT:
325     fputs(".\n", stdout);
326     break;
327   case HAD_l:
328     fputs(".l\n", stdout);
329     break;
330   case HAD_T:
331     fputs(".T\n", stdout);
332     break;
333   case HAD_lf:
334     fputs(".lf\n", stdout);
335     break;
336   case HAD_TS:
337     fputs(".TS\n", stdout);
338     break;
339   }
340   if (fp != stdin)
341     fclose(fp);
342 }
343 
344 struct options {
345   unsigned flags;
346   int linesize;
347   char delim[2];
348   char tab_char;
349   char decimal_point_char;
350 
351   options();
352 };
353 
options()354 options::options()
355 : flags(0), linesize(0), tab_char('\t'), decimal_point_char('.')
356 {
357   delim[0] = delim[1] = '\0';
358 }
359 
360 // Return non-zero if p and q are the same ignoring case.
361 
strieq(const char * p,const char * q)362 int strieq(const char *p, const char *q)
363 {
364   for (; cmlower(*p) == cmlower(*q); p++, q++)
365     if (*p == '\0')
366       return 1;
367   return 0;
368 }
369 
370 // return 0 if we should give up in this table
371 
process_options(table_input & in)372 options *process_options(table_input &in)
373 {
374   options *opt = new options;
375   string line;
376   int level = 0;
377   for (;;) {
378     int c = in.get();
379     if (c == EOF) {
380       int i = line.length();
381       while (--i >= 0)
382 	in.unget(line[i]);
383       return opt;
384     }
385     if (c == '\n') {
386       in.unget(c);
387       int i = line.length();
388       while (--i >= 0)
389 	in.unget(line[i]);
390       return opt;
391     }
392     else if (c == '(')
393       level++;
394     else if (c == ')')
395       level--;
396     else if (c == ';' && level == 0) {
397       line += '\0';
398       break;
399     }
400     line += c;
401   }
402   if (line.empty())
403     return opt;
404   char *p = &line[0];
405   for (;;) {
406     while (!csalpha(*p) && *p != '\0')
407       p++;
408     if (*p == '\0')
409       break;
410     char *q = p;
411     while (csalpha(*q))
412       q++;
413     char *arg = 0;
414     if (*q != '(' && *q != '\0')
415       *q++ = '\0';
416     while (csspace(*q))
417       q++;
418     if (*q == '(') {
419       *q++ = '\0';
420       arg = q;
421       while (*q != ')' && *q != '\0')
422 	q++;
423       if (*q == '\0')
424 	error("missing ')'");
425       else
426 	*q++ = '\0';
427     }
428     if (*p == '\0') {
429       if (arg)
430 	error("argument without option");
431     }
432     else if (strieq(p, "tab")) {
433       if (!arg)
434 	error("'tab' option requires argument in parentheses");
435       else {
436 	if (arg[0] == '\0' || arg[1] != '\0')
437 	  error("argument to 'tab' option must be a single character");
438 	else
439 	  opt->tab_char = arg[0];
440       }
441     }
442     else if (strieq(p, "linesize")) {
443       if (!arg)
444 	error("'linesize' option requires argument in parentheses");
445       else {
446 	if (sscanf(arg, "%d", &opt->linesize) != 1)
447 	  error("bad linesize '%s'", arg);
448 	else if (opt->linesize <= 0) {
449 	  error("linesize must be positive");
450 	  opt->linesize = 0;
451 	}
452       }
453     }
454     else if (strieq(p, "delim")) {
455       if (!arg)
456 	error("'delim' option requires argument in parentheses");
457       else if (arg[0] == '\0' || arg[1] == '\0' || arg[2] != '\0')
458 	error("argument to 'delim' option must be two characters");
459       else {
460 	opt->delim[0] = arg[0];
461 	opt->delim[1] = arg[1];
462       }
463     }
464     else if (strieq(p, "center") || strieq(p, "centre")) {
465       if (arg)
466 	error("'center' option does not take an argument");
467       opt->flags |= table::CENTER;
468     }
469     else if (strieq(p, "expand")) {
470       if (arg)
471 	error("'expand' option does not take an argument");
472       opt->flags |= table::EXPAND;
473     }
474     else if (strieq(p, "box") || strieq(p, "frame")) {
475       if (arg)
476 	error("'box' option does not take an argument");
477       opt->flags |= table::BOX;
478     }
479     else if (strieq(p, "doublebox") || strieq(p, "doubleframe")) {
480       if (arg)
481 	error("'doublebox' option does not take an argument");
482       opt->flags |= table::DOUBLEBOX;
483     }
484     else if (strieq(p, "allbox")) {
485       if (arg)
486 	error("'allbox' option does not take an argument");
487       opt->flags |= table::ALLBOX;
488     }
489     else if (strieq(p, "nokeep")) {
490       if (arg)
491 	error("'nokeep' option does not take an argument");
492       opt->flags |= table::NOKEEP;
493     }
494     else if (strieq(p, "nospaces")) {
495       if (arg)
496 	error("'nospaces' option does not take an argument");
497       opt->flags |= table::NOSPACES;
498     }
499     else if (strieq(p, "nowarn")) {
500       if (arg)
501 	error("'nowarn' option does not take an argument");
502       opt->flags |= table::NOWARN;
503     }
504     else if (strieq(p, "decimalpoint")) {
505       if (!arg)
506 	error("'decimalpoint' option requires argument in parentheses");
507       else {
508 	if (arg[0] == '\0' || arg[1] != '\0')
509 	  error("argument to 'decimalpoint' option must be a single character");
510 	else
511 	  opt->decimal_point_char = arg[0];
512       }
513     }
514     else if (strieq(p, "experimental")) {
515       opt->flags |= table::EXPERIMENTAL;
516     }
517     else {
518       error("unrecognised global option '%1'", p);
519       // delete opt;
520       // return 0;
521     }
522     p = q;
523   }
524   return opt;
525 }
526 
entry_modifier()527 entry_modifier::entry_modifier()
528 : vertical_alignment(CENTER), zero_width(0), stagger(0)
529 {
530   vertical_spacing.inc = vertical_spacing.val = 0;
531   point_size.inc = point_size.val = 0;
532 }
533 
~entry_modifier()534 entry_modifier::~entry_modifier()
535 {
536 }
537 
entry_format()538 entry_format::entry_format() : type(FORMAT_LEFT)
539 {
540 }
541 
entry_format(format_type t)542 entry_format::entry_format(format_type t) : type(t)
543 {
544 }
545 
debug_print() const546 void entry_format::debug_print() const
547 {
548   switch (type) {
549   case FORMAT_LEFT:
550     putc('l', stderr);
551     break;
552   case FORMAT_CENTER:
553     putc('c', stderr);
554     break;
555   case FORMAT_RIGHT:
556     putc('r', stderr);
557     break;
558   case FORMAT_NUMERIC:
559     putc('n', stderr);
560     break;
561   case FORMAT_ALPHABETIC:
562     putc('a', stderr);
563     break;
564   case FORMAT_SPAN:
565     putc('s', stderr);
566     break;
567   case FORMAT_VSPAN:
568     putc('^', stderr);
569     break;
570   case FORMAT_HLINE:
571     putc('_', stderr);
572     break;
573   case FORMAT_DOUBLE_HLINE:
574     putc('=', stderr);
575     break;
576   default:
577     assert(0);
578     break;
579   }
580   if (point_size.val != 0) {
581     putc('p', stderr);
582     if (point_size.inc > 0)
583       putc('+', stderr);
584     else if (point_size.inc < 0)
585       putc('-', stderr);
586     fprintf(stderr, "%d ", point_size.val);
587   }
588   if (vertical_spacing.val != 0) {
589     putc('v', stderr);
590     if (vertical_spacing.inc > 0)
591       putc('+', stderr);
592     else if (vertical_spacing.inc < 0)
593       putc('-', stderr);
594     fprintf(stderr, "%d ", vertical_spacing.val);
595   }
596   if (!font.empty()) {
597     putc('f', stderr);
598     put_string(font, stderr);
599     putc(' ', stderr);
600   }
601   if (!macro.empty()) {
602     putc('m', stderr);
603     put_string(macro, stderr);
604     putc(' ', stderr);
605   }
606   switch (vertical_alignment) {
607   case entry_modifier::CENTER:
608     break;
609   case entry_modifier::TOP:
610     putc('t', stderr);
611     break;
612   case entry_modifier::BOTTOM:
613     putc('d', stderr);
614     break;
615   }
616   if (zero_width)
617     putc('z', stderr);
618   if (stagger)
619     putc('u', stderr);
620 }
621 
622 struct format {
623   int nrows;
624   int ncolumns;
625   int *separation;
626   string *width;
627   char *equal;
628   char *expand;
629   entry_format **entry;
630   char **vline;
631 
632   format(int nr, int nc);
633   ~format();
634   void add_rows(int n);
635 };
636 
format(int nr,int nc)637 format::format(int nr, int nc) : nrows(nr), ncolumns(nc)
638 {
639   int i;
640   separation = ncolumns > 1 ? new int[ncolumns - 1] : 0;
641   for (i = 0; i < ncolumns-1; i++)
642     separation[i] = -1;
643   width = new string[ncolumns];
644   equal = new char[ncolumns];
645   expand = new char[ncolumns];
646   for (i = 0; i < ncolumns; i++) {
647     equal[i] = 0;
648     expand[i] = 0;
649   }
650   entry = new entry_format *[nrows];
651   for (i = 0; i < nrows; i++)
652     entry[i] = new entry_format[ncolumns];
653   vline = new char*[nrows];
654   for (i = 0; i < nrows; i++) {
655     vline[i] = new char[ncolumns+1];
656     for (int j = 0; j < ncolumns+1; j++)
657       vline[i][j] = 0;
658   }
659 }
660 
add_rows(int n)661 void format::add_rows(int n)
662 {
663   int i;
664   char **old_vline = vline;
665   vline = new char*[nrows + n];
666   for (i = 0; i < nrows; i++)
667     vline[i] = old_vline[i];
668   a_delete old_vline;
669   for (i = 0; i < n; i++) {
670     vline[nrows + i] = new char[ncolumns + 1];
671     for (int j = 0; j < ncolumns + 1; j++)
672       vline[nrows + i][j] = 0;
673   }
674   entry_format **old_entry = entry;
675   entry = new entry_format *[nrows + n];
676   for (i = 0; i < nrows; i++)
677     entry[i] = old_entry[i];
678   a_delete old_entry;
679   for (i = 0; i < n; i++)
680     entry[nrows + i] = new entry_format[ncolumns];
681   nrows += n;
682 }
683 
~format()684 format::~format()
685 {
686   a_delete separation;
687   ad_delete(ncolumns) width;
688   a_delete equal;
689   a_delete expand;
690   for (int i = 0; i < nrows; i++) {
691     a_delete vline[i];
692     ad_delete(ncolumns) entry[i];
693   }
694   a_delete vline;
695   a_delete entry;
696 }
697 
698 struct input_entry_format : public entry_format {
699   input_entry_format *next;
700   string width;
701   int separation;
702   int vline;
703   int pre_vline;
704   int last_column;
705   int equal;
706   int expand;
707   input_entry_format(format_type, input_entry_format * = 0);
708   ~input_entry_format();
709   void debug_print();
710 };
711 
input_entry_format(format_type t,input_entry_format * p)712 input_entry_format::input_entry_format(format_type t, input_entry_format *p)
713 : entry_format(t), next(p)
714 {
715   separation = -1;
716   last_column = 0;
717   vline = 0;
718   pre_vline = 0;
719   equal = 0;
720   expand = 0;
721 }
722 
~input_entry_format()723 input_entry_format::~input_entry_format()
724 {
725 }
726 
free_input_entry_format_list(input_entry_format * list)727 void free_input_entry_format_list(input_entry_format *list)
728 {
729   while (list) {
730     input_entry_format *tem = list;
731     list = list->next;
732     delete tem;
733   }
734 }
735 
debug_print()736 void input_entry_format::debug_print()
737 {
738   int i;
739   for (i = 0; i < pre_vline; i++)
740     putc('|', stderr);
741   entry_format::debug_print();
742   if (!width.empty()) {
743     putc('w', stderr);
744     putc('(', stderr);
745     put_string(width, stderr);
746     putc(')', stderr);
747   }
748   if (equal)
749     putc('e', stderr);
750   if (expand)
751     putc('x', stderr);
752   if (separation >= 0)
753     fprintf(stderr, "%d", separation);
754   for (i = 0; i < vline; i++)
755     putc('|', stderr);
756   if (last_column)
757     putc(',', stderr);
758 }
759 
760 // Return zero if we should give up on this table.
761 // If this is a continuation format line, current_format will be the current
762 // format line.
763 
process_format(table_input & in,options * opt,format * current_format=0)764 format *process_format(table_input &in, options *opt,
765 		       format *current_format = 0)
766 {
767   input_entry_format *list = 0;
768   int have_expand = 0;
769   int c = in.get();
770   for (;;) {
771     int pre_vline = 0;
772     int got_format = 0;
773     int got_period = 0;
774     format_type t = FORMAT_LEFT;
775     for (;;) {
776       if (c == EOF) {
777 	error("end of input while processing format");
778 	free_input_entry_format_list(list);
779 	return 0;
780       }
781       switch (c) {
782       case 'n':
783       case 'N':
784 	t = FORMAT_NUMERIC;
785 	got_format = 1;
786 	break;
787       case 'a':
788       case 'A':
789 	got_format = 1;
790 	t = FORMAT_ALPHABETIC;
791 	break;
792       case 'c':
793       case 'C':
794 	got_format = 1;
795 	t = FORMAT_CENTER;
796 	break;
797       case 'l':
798       case 'L':
799 	got_format = 1;
800 	t = FORMAT_LEFT;
801 	break;
802       case 'r':
803       case 'R':
804 	got_format = 1;
805 	t = FORMAT_RIGHT;
806 	break;
807       case 's':
808       case 'S':
809 	got_format = 1;
810 	t = FORMAT_SPAN;
811 	break;
812       case '^':
813 	got_format = 1;
814 	t = FORMAT_VSPAN;
815 	break;
816       case '_':
817       case '-':			// tbl also accepts this
818 	got_format = 1;
819 	t = FORMAT_HLINE;
820 	break;
821       case '=':
822 	got_format = 1;
823 	t = FORMAT_DOUBLE_HLINE;
824 	break;
825       case '.':
826 	got_period = 1;
827 	break;
828       case '|':
829 	pre_vline++;
830 	break;
831       case ' ':
832       case '\t':
833       case '\n':
834 	break;
835       default:
836 	if (c == opt->tab_char)
837 	  break;
838 	error("unrecognised format '%1'", char(c));
839 	free_input_entry_format_list(list);
840 	return 0;
841       }
842       if (got_period)
843 	break;
844       c = in.get();
845       if (got_format)
846 	break;
847     }
848     if (got_period)
849       break;
850     list = new input_entry_format(t, list);
851     if (pre_vline)
852       list->pre_vline = pre_vline;
853     int success = 1;
854     do {
855       switch (c) {
856       case '0':
857       case '1':
858       case '2':
859       case '3':
860       case '4':
861       case '5':
862       case '6':
863       case '7':
864       case '8':
865       case '9':
866 	{
867 	  int w = 0;
868 	  do {
869 	    w = w*10 + (c - '0');
870 	    c = in.get();
871 	  } while (c != EOF && csdigit(c));
872 	  list->separation = w;
873 	}
874 	break;
875       case 'B':
876       case 'b':
877 	c = in.get();
878 	list->font = "B";
879 	break;
880       case 'd':
881       case 'D':
882 	c = in.get();
883 	list->vertical_alignment = entry_modifier::BOTTOM;
884 	break;
885       case 'e':
886       case 'E':
887 	c = in.get();
888 	list->equal = 1;
889 	// 'e' and 'x' are mutually exclusive
890 	list->expand = 0;
891 	break;
892       case 'f':
893       case 'F':
894 	do {
895 	  c = in.get();
896 	} while (c == ' ' || c == '\t');
897 	if (c == EOF) {
898 	  error("missing font name");
899 	  break;
900 	}
901 	if (c == '(') {
902 	  for (;;) {
903 	    c = in.get();
904 	    if (c == EOF || c == ' ' || c == '\t') {
905 	      error("missing ')'");
906 	      break;
907 	    }
908 	    if (c == ')') {
909 	      c = in.get();
910 	      break;
911 	    }
912 	    list->font += char(c);
913 	  }
914 	}
915 	else {
916 	  list->font = c;
917 	  char cc = c;
918 	  c = in.get();
919 	  if (!csdigit(cc)
920 	      && c != EOF && c != ' ' && c != '\t' && c != '.' && c != '\n') {
921 	    list->font += char(c);
922 	    c = in.get();
923 	  }
924 	}
925 	break;
926       case 'I':
927       case 'i':
928 	c = in.get();
929 	list->font = "I";
930 	break;
931       case 'm':
932       case 'M':
933 	do {
934 	  c = in.get();
935 	} while (c == ' ' || c == '\t');
936 	if (c == EOF) {
937 	  error("missing macro name");
938 	  break;
939 	}
940 	if (c == '(') {
941 	  for (;;) {
942 	    c = in.get();
943 	    if (c == EOF || c == ' ' || c == '\t') {
944 	      error("missing ')'");
945 	      break;
946 	    }
947 	    if (c == ')') {
948 	      c = in.get();
949 	      break;
950 	    }
951 	    list->macro += char(c);
952 	  }
953 	}
954 	else {
955 	  list->macro = c;
956 	  char cc = c;
957 	  c = in.get();
958 	  if (!csdigit(cc)
959 	      && c != EOF && c != ' ' && c != '\t' && c != '.' && c != '\n') {
960 	    list->macro += char(c);
961 	    c = in.get();
962 	  }
963 	}
964 	break;
965       case 'p':
966       case 'P':
967 	c = in.get();
968 	list->point_size.val = 0;
969 	list->point_size.inc = 0;
970 	if (c == '+' || c == '-') {
971 	  list->point_size.inc = (c == '+' ? 1 : -1);
972 	  c = in.get();
973 	}
974 	if (c == EOF || !csdigit(c)) {
975 	  error("'p' modifier must be followed by number");
976 	  list->point_size.inc = 0;
977 	}
978 	else {
979 	  do {
980 	    list->point_size.val *= 10;
981 	    list->point_size.val += c - '0';
982 	    c = in.get();
983 	  } while (c != EOF && csdigit(c));
984 	}
985 	if (list->point_size.val > MAX_POINT_SIZE
986 	    || list->point_size.val < -MAX_POINT_SIZE) {
987 	  error("unreasonable point size");
988 	  list->point_size.val = 0;
989 	  list->point_size.inc = 0;
990 	}
991 	break;
992       case 't':
993       case 'T':
994 	c = in.get();
995 	list->vertical_alignment = entry_modifier::TOP;
996 	break;
997       case 'u':
998       case 'U':
999 	c = in.get();
1000 	list->stagger = 1;
1001 	break;
1002       case 'v':
1003       case 'V':
1004 	c = in.get();
1005 	list->vertical_spacing.val = 0;
1006 	list->vertical_spacing.inc = 0;
1007 	if (c == '+' || c == '-') {
1008 	  list->vertical_spacing.inc = (c == '+' ? 1 : -1);
1009 	  c = in.get();
1010 	}
1011 	if (c == EOF || !csdigit(c)) {
1012 	  error("'v' modifier must be followed by number");
1013 	  list->vertical_spacing.inc = 0;
1014 	}
1015 	else {
1016 	  do {
1017 	    list->vertical_spacing.val *= 10;
1018 	    list->vertical_spacing.val += c - '0';
1019 	    c = in.get();
1020 	  } while (c != EOF && csdigit(c));
1021 	}
1022 	if (list->vertical_spacing.val > MAX_VERTICAL_SPACING
1023 	    || list->vertical_spacing.val < -MAX_VERTICAL_SPACING) {
1024 	  error("unreasonable vertical spacing");
1025 	  list->vertical_spacing.val = 0;
1026 	  list->vertical_spacing.inc = 0;
1027 	}
1028 	break;
1029       case 'w':
1030       case 'W':
1031 	c = in.get();
1032 	while (c == ' ' || c == '\t')
1033 	  c = in.get();
1034 	if (c == '(') {
1035 	  list->width = "";
1036 	  c = in.get();
1037 	  while (c != ')') {
1038 	    if (c == EOF || c == '\n') {
1039 	      error("missing ')'");
1040 	      free_input_entry_format_list(list);
1041 	      return 0;
1042 	    }
1043 	    list->width += c;
1044 	    c = in.get();
1045 	  }
1046 	  c = in.get();
1047 	}
1048 	else {
1049 	  if (c == '+' || c == '-') {
1050 	    list->width = char(c);
1051 	    c = in.get();
1052 	  }
1053 	  else
1054 	    list->width = "";
1055 	  if (c == EOF || !csdigit(c))
1056 	    error("bad argument for 'w' modifier");
1057 	  else {
1058 	    do {
1059 	      list->width += char(c);
1060 	      c = in.get();
1061 	    } while (c != EOF && csdigit(c));
1062 	  }
1063 	}
1064 	// 'w' and 'x' are mutually exclusive
1065 	list->expand = 0;
1066 	break;
1067       case 'x':
1068       case 'X':
1069 	c = in.get();
1070 	list->expand = 1;
1071 	// 'x' and 'e' are mutually exclusive
1072 	list->equal = 0;
1073 	// 'x' and 'w' are mutually exclusive
1074 	list->width = "";
1075 	break;
1076       case 'z':
1077       case 'Z':
1078 	c = in.get();
1079 	list->zero_width = 1;
1080 	break;
1081       case '|':
1082 	c = in.get();
1083 	list->vline++;
1084 	break;
1085       case ' ':
1086       case '\t':
1087 	c = in.get();
1088 	break;
1089       default:
1090 	if (c == opt->tab_char)
1091 	  c = in.get();
1092 	else
1093 	  success = 0;
1094 	break;
1095       }
1096     } while (success);
1097     if (list->vline > 2) {
1098       list->vline = 2;
1099       error("more than 2 vertical bars between key letters");
1100     }
1101     if (c == '\n' || c == ',') {
1102       c = in.get();
1103       list->last_column = 1;
1104     }
1105   }
1106   if (c == '.') {
1107     do {
1108       c = in.get();
1109     } while (c == ' ' || c == '\t');
1110     if (c != '\n') {
1111       error("'.' not last character on line");
1112       free_input_entry_format_list(list);
1113       return 0;
1114     }
1115   }
1116   if (!list) {
1117     error("no format");
1118     free_input_entry_format_list(list);
1119     return 0;
1120   }
1121   list->last_column = 1;
1122   // now reverse the list so that the first row is at the beginning
1123   input_entry_format *rev = 0;
1124   while (list != 0) {
1125     input_entry_format *tem = list->next;
1126     list->next = rev;
1127     rev = list;
1128     list = tem;
1129   }
1130   list = rev;
1131   input_entry_format *tem;
1132 
1133 #if 0
1134   for (tem = list; tem; tem = tem->next)
1135     tem->debug_print();
1136   putc('\n', stderr);
1137 #endif
1138   // compute number of columns and rows
1139   int ncolumns = 0;
1140   int nrows = 0;
1141   int col = 0;
1142   for (tem = list; tem; tem = tem->next) {
1143     if (tem->last_column) {
1144       if (col >= ncolumns)
1145 	ncolumns = col + 1;
1146       col = 0;
1147       nrows++;
1148     }
1149     else
1150       col++;
1151   }
1152   int row;
1153   format *f;
1154   if (current_format) {
1155     if (ncolumns > current_format->ncolumns) {
1156       error("cannot increase the number of columns in a continued format");
1157       free_input_entry_format_list(list);
1158       return 0;
1159     }
1160     f = current_format;
1161     row = f->nrows;
1162     f->add_rows(nrows);
1163   }
1164   else {
1165     f = new format(nrows, ncolumns);
1166     row = 0;
1167   }
1168   col = 0;
1169   for (tem = list; tem; tem = tem->next) {
1170     f->entry[row][col] = *tem;
1171     if (col < ncolumns - 1) {
1172       // use the greatest separation
1173       if (tem->separation > f->separation[col]) {
1174 	if (current_format)
1175 	  error("cannot change column separation in continued format");
1176 	else
1177 	  f->separation[col] = tem->separation;
1178       }
1179     }
1180     else if (tem->separation >= 0)
1181       error("column separation specified for last column");
1182     if (tem->equal && !f->equal[col]) {
1183       if (current_format)
1184 	error("cannot change which columns are equal in continued format");
1185       else
1186 	f->equal[col] = 1;
1187     }
1188     if (tem->expand && !f->expand[col]) {
1189       if (current_format)
1190 	error("cannot change which columns are expanded in continued format");
1191       else {
1192 	f->expand[col] = 1;
1193 	have_expand = 1;
1194       }
1195     }
1196     if (!tem->width.empty()) {
1197       // use the last width
1198       if (!f->width[col].empty() && f->width[col] != tem->width)
1199 	error("multiple widths for column %1", col + 1);
1200       f->width[col] = tem->width;
1201     }
1202     if (tem->pre_vline) {
1203       assert(col == 0);
1204       f->vline[row][col] = tem->pre_vline;
1205     }
1206     f->vline[row][col + 1] = tem->vline;
1207     if (tem->last_column) {
1208       row++;
1209       col = 0;
1210     }
1211     else
1212       col++;
1213   }
1214   free_input_entry_format_list(list);
1215   for (col = 0; col < ncolumns; col++) {
1216     entry_format *e = f->entry[f->nrows - 1] + col;
1217     if (e->type != FORMAT_HLINE
1218 	&& e->type != FORMAT_DOUBLE_HLINE
1219 	&& e->type != FORMAT_SPAN)
1220       break;
1221   }
1222   if (col >= ncolumns) {
1223     error("last row of format is all lines");
1224     delete f;
1225     return 0;
1226   }
1227   if (have_expand && (opt->flags & table::EXPAND)) {
1228     error("ignoring global 'expand' option because of 'x' specifiers");
1229     opt->flags &= ~table::EXPAND;
1230   }
1231   return f;
1232 }
1233 
process_data(table_input & in,format * f,options * opt)1234 table *process_data(table_input &in, format *f, options *opt)
1235 {
1236   char tab_char = opt->tab_char;
1237   int ncolumns = f->ncolumns;
1238   int current_row = 0;
1239   int format_index = 0;
1240   int give_up = 0;
1241   enum { DATA_INPUT_LINE, TROFF_INPUT_LINE, SINGLE_HLINE, DOUBLE_HLINE } type;
1242   table *tbl = new table(ncolumns, opt->flags, opt->linesize,
1243 			 opt->decimal_point_char);
1244   if (opt->delim[0] != '\0')
1245     tbl->set_delim(opt->delim[0], opt->delim[1]);
1246   for (;;) {
1247     // first determine what type of line this is
1248     int c = in.get();
1249     if (c == EOF)
1250       break;
1251     if (c == '.') {
1252       int d = in.get();
1253       if (d != EOF && csdigit(d)) {
1254 	in.unget(d);
1255 	type = DATA_INPUT_LINE;
1256       }
1257       else {
1258 	in.unget(d);
1259 	type = TROFF_INPUT_LINE;
1260       }
1261     }
1262     else if (c == '_' || c == '=') {
1263       int d = in.get();
1264       if (d == '\n') {
1265 	if (c == '_')
1266 	  type = SINGLE_HLINE;
1267 	else
1268 	  type = DOUBLE_HLINE;
1269       }
1270       else {
1271 	in.unget(d);
1272 	type = DATA_INPUT_LINE;
1273       }
1274     }
1275     else {
1276       type = DATA_INPUT_LINE;
1277     }
1278     switch (type) {
1279     case DATA_INPUT_LINE:
1280       {
1281 	string input_entry;
1282 	if (format_index >= f->nrows)
1283 	  format_index = f->nrows - 1;
1284 	// A format row that is all lines doesn't use up a data line.
1285 	while (format_index < f->nrows - 1) {
1286 	  int cnt;
1287 	  for (cnt = 0; cnt < ncolumns; cnt++) {
1288 	    entry_format *e = f->entry[format_index] + cnt;
1289 	    if (e->type != FORMAT_HLINE
1290 		&& e->type != FORMAT_DOUBLE_HLINE
1291 		// Unfortunately tbl treats a span as needing data.
1292 		// && e->type != FORMAT_SPAN
1293 		)
1294 	      break;
1295 	  }
1296 	  if (cnt < ncolumns)
1297 	    break;
1298 	  for (cnt = 0; cnt < ncolumns; cnt++)
1299 	    tbl->add_entry(current_row, cnt, input_entry,
1300 			   f->entry[format_index] + cnt, current_filename,
1301 			   current_lineno);
1302 	  tbl->add_vlines(current_row, f->vline[format_index]);
1303 	  format_index++;
1304 	  current_row++;
1305 	}
1306 	entry_format *line_format = f->entry[format_index];
1307 	int col = 0;
1308 	int row_comment = 0;
1309 	for (;;) {
1310 	  if (c == tab_char || c == '\n') {
1311 	    int ln = current_lineno;
1312 	    if (c == '\n')
1313 	      --ln;
1314 	    if ((opt->flags & table::NOSPACES))
1315 	      input_entry.remove_spaces();
1316 	    while (col < ncolumns
1317 		   && line_format[col].type == FORMAT_SPAN) {
1318 	      tbl->add_entry(current_row, col, "", &line_format[col],
1319 			     current_filename, ln);
1320 	      col++;
1321 	    }
1322 	    if (c == '\n' && input_entry.length() == 2
1323 		&& input_entry[0] == 'T' && input_entry[1] == '{') {
1324 	      input_entry = "";
1325 	      ln++;
1326 	      enum {
1327 		START, MIDDLE, GOT_T, GOT_RIGHT_BRACE, GOT_DOT,
1328 		GOT_l, GOT_lf, END
1329 	      } state = START;
1330 	      while (state != END) {
1331 		c = in.get();
1332 		if (c == EOF)
1333 		  break;
1334 		switch (state) {
1335 		case START:
1336 		  if (c == 'T')
1337 		    state = GOT_T;
1338 		  else if (c == '.')
1339 		    state = GOT_DOT;
1340 		  else {
1341 		    input_entry += c;
1342 		    if (c != '\n')
1343 		      state = MIDDLE;
1344 		  }
1345 		  break;
1346 		case GOT_T:
1347 		  if (c == '}')
1348 		    state = GOT_RIGHT_BRACE;
1349 		  else {
1350 		    input_entry += 'T';
1351 		    input_entry += c;
1352 		    state = c == '\n' ? START : MIDDLE;
1353 		  }
1354 		  break;
1355 		case GOT_DOT:
1356 		  if (c == 'l')
1357 		    state = GOT_l;
1358 		  else {
1359 		    input_entry += '.';
1360 		    input_entry += c;
1361 		    state = c == '\n' ? START : MIDDLE;
1362 		  }
1363 		  break;
1364 		case GOT_l:
1365 		  if (c == 'f')
1366 		    state = GOT_lf;
1367 		  else {
1368 		    input_entry += ".l";
1369 		    input_entry += c;
1370 		    state = c == '\n' ? START : MIDDLE;
1371 		  }
1372 		  break;
1373 		case GOT_lf:
1374 		  if (c == ' ' || c == '\n' || compatible_flag) {
1375 		    string args;
1376 		    input_entry += ".lf";
1377 		    while (c != EOF) {
1378 		      args += c;
1379 		      if (c == '\n')
1380 			break;
1381 		      c = in.get();
1382 		    }
1383 		    args += '\0';
1384 		    interpret_lf_args(args.contents());
1385 		    // remove the '\0'
1386 		    args.set_length(args.length() - 1);
1387 		    input_entry += args;
1388 		    state = START;
1389 		  }
1390 		  else {
1391 		    input_entry += ".lf";
1392 		    input_entry += c;
1393 		    state = MIDDLE;
1394 		  }
1395 		  break;
1396 		case GOT_RIGHT_BRACE:
1397 		  if ((opt->flags & table::NOSPACES)) {
1398 		    while (c == ' ')
1399 		      c = in.get();
1400 		    if (c == EOF)
1401 		      break;
1402 		  }
1403 		  if (c == '\n' || c == tab_char)
1404 		    state = END;
1405 		  else {
1406 		    input_entry += 'T';
1407 		    input_entry += '}';
1408 		    input_entry += c;
1409 		    state = MIDDLE;
1410 		  }
1411 		  break;
1412 		case MIDDLE:
1413 		  if (c == '\n')
1414 		    state = START;
1415 		  input_entry += c;
1416 		  break;
1417 		case END:
1418 		default:
1419 		  assert(0);
1420 		}
1421 	      }
1422 	      if (c == EOF) {
1423 		error("end of data in middle of text block");
1424 		give_up = 1;
1425 		break;
1426 	      }
1427 	    }
1428 	    if (col >= ncolumns) {
1429 	      if (!input_entry.empty()) {
1430 		if (input_entry.length() >= 2
1431 		    && input_entry[0] == '\\'
1432 		    && input_entry[1] == '"')
1433 		  row_comment = 1;
1434 		else if (!row_comment) {
1435 		  if (c == '\n')
1436 		    in.unget(c);
1437 		  input_entry += '\0';
1438 		  error("excess data entry '%1' discarded",
1439 			input_entry.contents());
1440 		  if (c == '\n')
1441 		    (void)in.get();
1442 		}
1443 	      }
1444 	    }
1445 	    else
1446 	      tbl->add_entry(current_row, col, input_entry,
1447 			     &line_format[col], current_filename, ln);
1448 	    col++;
1449 	    if (c == '\n')
1450 	      break;
1451 	    input_entry = "";
1452 	  }
1453 	  else
1454 	    input_entry += c;
1455 	  c = in.get();
1456 	  if (c == EOF)
1457 	    break;
1458 	}
1459 	if (give_up)
1460 	  break;
1461 	input_entry = "";
1462 	for (; col < ncolumns; col++)
1463 	  tbl->add_entry(current_row, col, input_entry, &line_format[col],
1464 			 current_filename, current_lineno - 1);
1465 	tbl->add_vlines(current_row, f->vline[format_index]);
1466 	current_row++;
1467 	format_index++;
1468       }
1469       break;
1470     case TROFF_INPUT_LINE:
1471       {
1472 	string line;
1473 	int ln = current_lineno;
1474 	for (;;) {
1475 	  line += c;
1476 	  if (c == '\n')
1477 	    break;
1478 	  c = in.get();
1479 	  if (c == EOF) {
1480 	    break;
1481 	  }
1482 	}
1483 	tbl->add_text_line(current_row, line, current_filename, ln);
1484 	if (line.length() >= 4
1485 	    && line[0] == '.' && line[1] == 'T' && line[2] == '&') {
1486 	  format *newf = process_format(in, opt, f);
1487 	  if (newf == 0)
1488 	    give_up = 1;
1489 	  else
1490 	    f = newf;
1491 	}
1492 	if (line.length() >= 3
1493 	    && line[0] == '.' && line[1] == 'l' && line[2] == 'f') {
1494 	  line += '\0';
1495 	  interpret_lf_args(line.contents() + 3);
1496 	}
1497       }
1498       break;
1499     case SINGLE_HLINE:
1500       tbl->add_single_hline(current_row);
1501       break;
1502     case DOUBLE_HLINE:
1503       tbl->add_double_hline(current_row);
1504       break;
1505     default:
1506       assert(0);
1507     }
1508     if (give_up)
1509       break;
1510   }
1511   if (!give_up && current_row == 0) {
1512     error("no real data");
1513     give_up = 1;
1514   }
1515   if (give_up) {
1516     delete tbl;
1517     return 0;
1518   }
1519   // Do this here rather than at the beginning in case continued formats
1520   // change it.
1521   int i;
1522   for (i = 0; i < ncolumns - 1; i++)
1523     if (f->separation[i] >= 0)
1524       tbl->set_column_separation(i, f->separation[i]);
1525   for (i = 0; i < ncolumns; i++)
1526     if (!f->width[i].empty())
1527       tbl->set_minimum_width(i, f->width[i]);
1528   for (i = 0; i < ncolumns; i++)
1529     if (f->equal[i])
1530       tbl->set_equal_column(i);
1531   for (i = 0; i < ncolumns; i++)
1532     if (f->expand[i])
1533       tbl->set_expand_column(i);
1534   return tbl;
1535 }
1536 
process_table(table_input & in)1537 void process_table(table_input &in)
1538 {
1539   options *opt = 0;
1540   format *form = 0;
1541   table *tbl = 0;
1542   if ((opt = process_options(in)) != 0
1543       && (form = process_format(in, opt)) != 0
1544       && (tbl = process_data(in, form, opt)) != 0) {
1545     tbl->print();
1546     delete tbl;
1547   }
1548   else {
1549     error("giving up on this table");
1550     while (in.get() != EOF)
1551       ;
1552   }
1553   delete opt;
1554   delete form;
1555   if (!in.ended())
1556     error("premature end of file");
1557 }
1558 
usage(FILE * stream)1559 static void usage(FILE *stream)
1560 {
1561   fprintf(stream, "usage: %s [ -vC ] [ files... ]\n", program_name);
1562 }
1563 
main(int argc,char ** argv)1564 int main(int argc, char **argv)
1565 {
1566   program_name = argv[0];
1567   static char stderr_buf[BUFSIZ];
1568   setbuf(stderr, stderr_buf);
1569   int opt;
1570   static const struct option long_options[] = {
1571     { "help", no_argument, 0, CHAR_MAX + 1 },
1572     { "version", no_argument, 0, 'v' },
1573     { NULL, 0, 0, 0 }
1574   };
1575   while ((opt = getopt_long(argc, argv, "vCT:", long_options, NULL)) != EOF)
1576     switch (opt) {
1577     case 'C':
1578       compatible_flag = 1;
1579       break;
1580     case 'v':
1581       {
1582 	printf("GNU tbl (groff) version %s\n", Version_string);
1583 	exit(0);
1584 	break;
1585       }
1586     case 'T':
1587       // I'm sick of getting bug reports from IRIX users
1588       break;
1589     case CHAR_MAX + 1: // --help
1590       usage(stdout);
1591       exit(0);
1592       break;
1593     case '?':
1594       usage(stderr);
1595       exit(1);
1596       break;
1597     default:
1598       assert(0);
1599     }
1600   printf(".if !\\n(.g .ab GNU tbl requires GNU troff.\n"
1601 	 ".if !dTS .ds TS\n"
1602 	 ".if !dTE .ds TE\n");
1603   if (argc > optind) {
1604     for (int i = optind; i < argc; i++)
1605       if (argv[i][0] == '-' && argv[i][1] == '\0') {
1606 	current_filename = "-";
1607 	current_lineno = 1;
1608 	printf(".lf 1 -\n");
1609 	process_input_file(stdin);
1610       }
1611       else {
1612 	errno = 0;
1613 	FILE *fp = fopen(argv[i], "r");
1614 	if (fp == 0)
1615 	  fatal("can't open '%1': %2", argv[i], strerror(errno));
1616 	else {
1617 	  current_lineno = 1;
1618 	  string fn(argv[i]);
1619 	  fn += '\0';
1620 	  normalize_for_lf(fn);
1621 	  current_filename = fn.contents();
1622 	  printf(".lf 1 %s\n", current_filename);
1623 	  process_input_file(fp);
1624 	}
1625       }
1626   }
1627   else {
1628     current_filename = "-";
1629     current_lineno = 1;
1630     printf(".lf 1 -\n");
1631     process_input_file(stdin);
1632   }
1633   if (ferror(stdout) || fflush(stdout) < 0)
1634     fatal("output error");
1635   return 0;
1636 }
1637 
1638