1 /* Copyright 2010-2019 Free Software Foundation, Inc.
2 
3    This program is free software: you can redistribute it and/or modify
4    it under the terms of the GNU General Public License as published by
5    the Free Software Foundation, either version 3 of the License, or
6    (at your option) any later version.
7 
8    This program is distributed in the hope that it will be useful,
9    but WITHOUT ANY WARRANTY; without even the implied warranty of
10    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11    GNU General Public License for more details.
12 
13    You should have received a copy of the GNU General Public License
14    along with this program.  If not, see <http://www.gnu.org/licenses/>. */
15 
16 #include <config.h>
17 #include <string.h>
18 #include <stdlib.h>
19 #include <stdio.h>
20 #include <ctype.h>
21 
22 #include "parser.h"
23 #include "text.h"
24 #include "input.h"
25 
26 
27 const char *whitespace_chars = " \t\f\r\n";
28 const char *digit_chars = "0123456789";
29 
30 // [^\S\r\n] in Perl
31 const char *whitespace_chars_except_newline = " \t\f";
32 
33 /* Check if the contents of S2 appear at S1). */
34 int
looking_at(char * s1,char * s2)35 looking_at (char *s1, char *s2)
36 {
37   return !strncmp (s1, s2, strlen (s2));
38 }
39 
40 /* Look for a sequence of alphanumeric characters or hyphens, where the
41    first isn't a hyphen.  This is the format of (non-single-character) Texinfo
42    commands, but is also used elsewhere.  Return value to be freed by caller.
43    *PTR is advanced past the read name.  Return 0 if name is invalid. */
44 char *
read_command_name(char ** ptr)45 read_command_name (char **ptr)
46 {
47   char *p = *ptr, *q;
48   char *ret = 0;
49 
50   q = p;
51   if (!isalnum (*q))
52     return 0; /* Invalid. */
53 
54   while (isalnum (*q) || *q == '-' || *q == '_')
55     q++;
56   ret = strndup (p, q - p);
57   p = q;
58 
59   *ptr = p;
60   return ret;
61 }
62 
63 /* Read a name used for @set and @value. */
64 char *
read_flag_name(char ** ptr)65 read_flag_name (char **ptr)
66 {
67   char *p = *ptr, *q;
68   char *ret = 0;
69 
70   q = p;
71   if (!isalnum (*q) && *q != '-' && *q != '_')
72     return 0; /* Invalid. */
73 
74   while (!strchr (whitespace_chars, *q)
75          && !strchr ("{\\}~`^+\"<>|@", *q))
76     q++;
77   ret = strndup (p, q - p);
78   p = q;
79 
80   *ptr = p;
81   return ret;
82 }
83 
84 char *
element_type_name(ELEMENT * e)85 element_type_name (ELEMENT *e)
86 {
87   return element_type_names[(e)->type];
88 }
89 
90 
91 
92 /* Current node, section and part. */
93 
94 ELEMENT *current_node = 0;
95 ELEMENT *current_section = 0;
96 ELEMENT *current_part = 0;
97 
98 
99 /* Conditional stack. */
100 
101 enum command_id *conditional_stack;
102 size_t conditional_number;
103 size_t conditional_space;
104 
105 void
push_conditional_stack(enum command_id cond)106 push_conditional_stack (enum command_id cond)
107 {
108   if (conditional_number == conditional_space)
109     {
110       conditional_stack = realloc (conditional_stack,
111                                    (conditional_space += 5)
112                                    * sizeof (enum command_id));
113       if (!conditional_stack)
114         fatal ("realloc failed");
115     }
116   conditional_stack[conditional_number++] = cond;
117 }
118 
119 enum command_id
pop_conditional_stack(void)120 pop_conditional_stack (void)
121 {
122   if (conditional_number == 0)
123     return CM_NONE;
124   return conditional_stack[--conditional_number];
125 }
126 
127 
128 /* Counters */
129 COUNTER count_remaining_args;
130 COUNTER count_items;
131 COUNTER count_cells;
132 
133 
134 /* Information that is not local to where it is set in the Texinfo input,
135    for example document language and encoding. */
136 GLOBAL_INFO global_info;
137 char *global_clickstyle = 0;
138 char *global_documentlanguage = 0;
139 
140 enum kbd_enum global_kbdinputstyle = kbd_distinct;
141 
142 void
set_documentlanguage(char * value)143 set_documentlanguage (char *value)
144 {
145   free (global_documentlanguage);
146   global_documentlanguage = strdup (value);
147 }
148 
149 void
set_novalidate(int value)150 set_novalidate (int value)
151 {
152   global_info.novalidate = value;
153 }
154 
155 /* Record the information from a command of global effect. */
156 int
register_global_command(ELEMENT * current)157 register_global_command (ELEMENT *current)
158 {
159   enum command_id cmd = current->cmd;
160   if (cmd == CM_summarycontents)
161     cmd = CM_shortcontents;
162 
163   if (command_data(cmd).flags & CF_global)
164     {
165       if (!current->line_nr.line_nr)
166         current->line_nr = line_nr;
167       switch (cmd)
168         {
169 #define GLOBAL_CASE(cmx) \
170         case CM_##cmx:   \
171           add_to_contents_as_array (&global_info.cmx, current); \
172           break
173 
174         case CM_footnote:
175           add_to_contents_as_array (&global_info.footnotes, current);
176           break;
177 
178         GLOBAL_CASE(hyphenation);
179         GLOBAL_CASE(insertcopying);
180         GLOBAL_CASE(printindex);
181         GLOBAL_CASE(subtitle);
182         GLOBAL_CASE(titlefont);
183         GLOBAL_CASE(listoffloats);
184         GLOBAL_CASE(detailmenu);
185         GLOBAL_CASE(part);
186 
187         /* from Common.pm %document_settable_at_commands */
188         GLOBAL_CASE(allowcodebreaks);
189         GLOBAL_CASE(clickstyle);
190         GLOBAL_CASE(codequotebacktick);
191         GLOBAL_CASE(codequoteundirected);
192         GLOBAL_CASE(contents);
193         GLOBAL_CASE(deftypefnnewline);
194         GLOBAL_CASE(documentencoding);
195         GLOBAL_CASE(documentlanguage);
196         GLOBAL_CASE(exampleindent);
197         GLOBAL_CASE(firstparagraphindent);
198         GLOBAL_CASE(frenchspacing);
199         GLOBAL_CASE(headings);
200         GLOBAL_CASE(kbdinputstyle);
201         GLOBAL_CASE(paragraphindent);
202         GLOBAL_CASE(shortcontents);
203         GLOBAL_CASE(urefbreakstyle);
204         GLOBAL_CASE(xrefautomaticsectiontitle);
205 #undef GLOBAL_CASE
206         default:
207           /* do nothing; just silence -Wswitch about lots of un-covered cases */
208           break;
209         }
210       /* TODO: Check if all of these are necessary. */
211       return 1;
212     }
213   else if ((command_data(cmd).flags & CF_global_unique))
214     {
215       ELEMENT **where = 0;
216 
217       if (!current->line_nr.line_nr)
218         current->line_nr = line_nr;
219       switch (cmd)
220         {
221         case CM_setfilename:
222           /* Check if we are inside an @include, and if so, do nothing. */
223           if (top_file_index () > 0)
224             break;
225           where = &global_info.setfilename;
226           break;
227 
228 #define GLOBAL_UNIQUE_CASE(cmd) \
229         case CM_##cmd: \
230           where = &global_info.cmd; \
231           break
232 
233         GLOBAL_UNIQUE_CASE(settitle);
234         GLOBAL_UNIQUE_CASE(copying);
235         GLOBAL_UNIQUE_CASE(titlepage);
236         GLOBAL_UNIQUE_CASE(top);
237         GLOBAL_UNIQUE_CASE(documentdescription);
238         GLOBAL_UNIQUE_CASE(pagesizes);
239         GLOBAL_UNIQUE_CASE(fonttextsize);
240         GLOBAL_UNIQUE_CASE(footnotestyle);
241         GLOBAL_UNIQUE_CASE(setchapternewpage);
242         GLOBAL_UNIQUE_CASE(everyheading);
243         GLOBAL_UNIQUE_CASE(everyfooting);
244         GLOBAL_UNIQUE_CASE(evenheading);
245         GLOBAL_UNIQUE_CASE(evenfooting);
246         GLOBAL_UNIQUE_CASE(oddheading);
247         GLOBAL_UNIQUE_CASE(oddfooting);
248         GLOBAL_UNIQUE_CASE(everyheadingmarks);
249         GLOBAL_UNIQUE_CASE(everyfootingmarks);
250         GLOBAL_UNIQUE_CASE(evenheadingmarks);
251         GLOBAL_UNIQUE_CASE(oddheadingmarks);
252         GLOBAL_UNIQUE_CASE(evenfootingmarks);
253         GLOBAL_UNIQUE_CASE(oddfootingmarks);
254         GLOBAL_UNIQUE_CASE(shorttitlepage);
255         GLOBAL_UNIQUE_CASE(title);
256 #undef GLOBAL_UNIQUE_CASE
257         /* NOTE: Same list in api.c:build_global_info2 and wipe_global_info. */
258         default:
259           /* do nothing; just silence -Wswitch about lots of un-covered cases */
260           break;
261         }
262       if (where)
263         {
264           if (*where)
265             line_warn ("multiple @%s", command_name(cmd));
266           else
267             *where = current;
268         }
269       return 1;
270     }
271 
272   return 0;
273 }
274 
275 
276 void
wipe_global_info(void)277 wipe_global_info (void)
278 {
279   free (global_clickstyle);
280   free (global_documentlanguage);
281   global_clickstyle = strdup ("arrow");
282   global_documentlanguage = strdup ("");
283   global_kbdinputstyle = kbd_distinct;
284 
285   free (global_info.input_perl_encoding);
286   free (global_info.input_encoding_name);
287 
288   free (global_info.dircategory_direntry.contents.list);
289   free (global_info.footnotes.contents.list);
290 
291 #define GLOBAL_CASE(cmx) \
292   free (global_info.cmx.contents.list)
293 
294   GLOBAL_CASE(hyphenation);
295   GLOBAL_CASE(insertcopying);
296   GLOBAL_CASE(printindex);
297   GLOBAL_CASE(subtitle);
298   GLOBAL_CASE(titlefont);
299   GLOBAL_CASE(listoffloats);
300   GLOBAL_CASE(detailmenu);
301   GLOBAL_CASE(part);
302   GLOBAL_CASE(allowcodebreaks);
303   GLOBAL_CASE(clickstyle);
304   GLOBAL_CASE(codequotebacktick);
305   GLOBAL_CASE(codequoteundirected);
306   GLOBAL_CASE(contents);
307   GLOBAL_CASE(deftypefnnewline);
308   GLOBAL_CASE(documentencoding);
309   GLOBAL_CASE(documentlanguage);
310   GLOBAL_CASE(exampleindent);
311   GLOBAL_CASE(firstparagraphindent);
312   GLOBAL_CASE(frenchspacing);
313   GLOBAL_CASE(headings);
314   GLOBAL_CASE(kbdinputstyle);
315   GLOBAL_CASE(paragraphindent);
316   GLOBAL_CASE(shortcontents);
317   GLOBAL_CASE(urefbreakstyle);
318   GLOBAL_CASE(xrefautomaticsectiontitle);
319 
320 #undef GLOBAL_CASE
321   memset (&global_info, 0, sizeof (global_info));
322 
323   global_info.input_perl_encoding = strdup ("utf-8");
324   global_info.input_encoding_name = strdup ("utf-8");
325 }
326 
327 
328 ELEMENT *
parse_texi_file(char * filename)329 parse_texi_file (char *filename)
330 {
331   char *p, *q;
332   char *linep, *line = 0;
333   ELEMENT *root = new_element (ET_text_root);
334   ELEMENT *preamble = 0;
335   char c;
336 
337   int status;
338 
339   status = input_push_file (filename);
340   if (status)
341     return 0;
342 
343   /* Strip off a leading directory path, by looking for the last
344      '/' in filename. */
345   p = 0;
346   q = strchr (filename, '/');
347   while (q)
348     {
349       p = q;
350       q = strchr (q + 1, '/');
351     }
352 
353   if (p)
354     {
355       c = *p;
356       *p = '\0';
357       add_include_directory (filename);
358       *p = c;
359     }
360 
361   /* Put all lines up to a line starting "\input" inside a "preamble"
362      element. */
363   while (1)
364     {
365       ELEMENT *l;
366 
367       free (line);
368       line = next_text ();
369       if (!line)
370         break;
371 
372       linep = line;
373       linep += strspn (linep, whitespace_chars);
374       if (*linep && !looking_at (linep, "\\input"))
375         {
376           /* This line is not part of the preamble.  Shove back
377              into input stream. */
378           input_push (line, 0, line_nr.file_name, line_nr.line_nr);
379           break;
380         }
381 
382       if (!preamble)
383         preamble = new_element (ET_preamble);
384 
385       l = new_element (ET_preamble_text);
386       text_append (&l->text, line);
387       add_to_element_contents (preamble, l);
388     }
389 
390   if (preamble)
391     add_to_element_contents (root, preamble);
392 
393   return parse_texi (root);
394 }
395 
396 
397 int
begin_paragraph_p(ELEMENT * current)398 begin_paragraph_p (ELEMENT *current)
399 {
400   return (current->type == ET_NONE /* "True for @-commands" */
401            || current->type == ET_before_item
402            || current->type == ET_text_root
403            || current->type == ET_document_root
404            || current->type == ET_brace_command_context)
405          && in_paragraph_context (current_context ());
406 }
407 
408 /* If in a context where paragraphs are to be started, start a new
409    paragraph.  */
410 ELEMENT *
begin_paragraph(ELEMENT * current)411 begin_paragraph (ELEMENT *current)
412 {
413   if (begin_paragraph_p (current))
414     {
415       ELEMENT *e;
416       enum command_id indent = 0;
417 
418       /* Check if an @indent precedes the paragraph (to record it
419          in the 'extra' key). */
420       if (current->contents.number > 0)
421         {
422           int i = current->contents.number - 1;
423           while (i >= 0)
424             {
425               ELEMENT *child = contents_child_by_index (current, i);
426               if (child->type == ET_empty_line
427                   || child->type == ET_paragraph)
428                 break;
429               if (close_paragraph_command(child->cmd))
430                 break;
431               if (child->cmd == CM_indent
432                   || child->cmd == CM_noindent)
433                 {
434                   indent = child->cmd;
435                   break;
436                 }
437               i--;
438             }
439 
440         }
441 
442       e = new_element (ET_paragraph);
443       if (indent)
444         add_extra_integer (e, indent == CM_indent ? "indent" : "noindent",
445                               1);
446       add_to_element_contents (current, e);
447       current = e;
448 
449       debug ("PARAGRAPH");
450     }
451   return current;
452 }
453 
454 /* Begin a preformatted element if in a preformatted context. */
455 ELEMENT *
begin_preformatted(ELEMENT * current)456 begin_preformatted (ELEMENT *current)
457 {
458   if (current_context() == ct_preformatted
459       || current_context() == ct_rawpreformatted)
460     {
461       ELEMENT *e;
462       enum element_type et;
463 
464       if (current_context() == ct_preformatted)
465         et = ET_preformatted;
466       else
467         et = ET_rawpreformatted;
468       e = new_element (et);
469       add_to_element_contents (current, e);
470       current = e;
471       debug ("PREFORMATTED %s", et == ET_preformatted ? "preformatted"
472                                                       : "rawpreformatted");
473     }
474   return current;
475 }
476 
477 ELEMENT *
end_paragraph(ELEMENT * current,enum command_id closed_command,enum command_id interrupting_command)478 end_paragraph (ELEMENT *current,
479                enum command_id closed_command,
480                enum command_id interrupting_command)
481 {
482   current = close_all_style_commands (current,
483                                       closed_command, interrupting_command);
484   if (current->type == ET_paragraph)
485     {
486       debug ("CLOSE PARA");
487       current = current->parent;
488     }
489 
490   return current;
491 }
492 
493 ELEMENT *
end_preformatted(ELEMENT * current,enum command_id closed_command,enum command_id interrupting_command)494 end_preformatted (ELEMENT *current,
495                   enum command_id closed_command,
496                   enum command_id interrupting_command)
497 {
498   current = close_all_style_commands (current,
499                                       closed_command, interrupting_command);
500   if (current->type == ET_preformatted
501       || current->type == ET_rawpreformatted)
502     {
503       debug ("CLOSE PREFORMATTED %s",
504              current->type == ET_preformatted ? "preformatted"
505                                               : "rawpreformatted");
506       if (current->contents.number == 0)
507         {
508           current = current->parent;
509           destroy_element (pop_element_from_contents (current));
510           debug ("popping");
511         }
512       else
513         current = current->parent;
514     }
515   return current;
516 }
517 
518 /* Add TEXT to the contents of CURRENT, maybe starting a new paragraph. */
519 ELEMENT *
merge_text(ELEMENT * current,char * text)520 merge_text (ELEMENT *current, char *text)
521 {
522   int no_merge_with_following_text = 0;
523   int leading_spaces = strspn (text, whitespace_chars);
524   ELEMENT *last_child = last_contents_child (current);
525 
526   /* Is there a non-whitespace character in the line? */
527   if (text[leading_spaces])
528     {
529       char *additional = 0;
530 
531       if (last_child
532           && (last_child->type == ET_empty_line_after_command
533               || last_child->type == ET_empty_spaces_after_command
534               || last_child->type == ET_empty_spaces_before_argument
535               || last_child->type == ET_empty_spaces_after_close_brace))
536         {
537           no_merge_with_following_text = 1;
538         }
539 
540       if (leading_spaces)
541         {
542           additional = malloc (leading_spaces + 1);
543           if (!additional)
544             fatal ("malloc failed");
545           memcpy (additional, text, leading_spaces);
546           additional[leading_spaces] = '\0';
547         }
548 
549       if (abort_empty_line (&current, additional))
550         text += leading_spaces;
551 
552       free (additional);
553 
554       current = begin_paragraph (current);
555     }
556 
557   last_child = last_contents_child (current);
558   if (last_child
559       /* There is a difference between the text being defined and empty,
560          and not defined at all.  The latter is true for 'brace_command_arg'
561          elements.  We need either to make sure that we initialize all elements
562          with text_append (&e->text, "") where we want merging with following
563          text, or treat as a special case here.
564          Unfortunately we can't make a special case for
565          ET_empty_spaces_before_argument, because abort_empty_line above
566          produces such an element that shouldn't be merged with. */
567       && (last_child->text.space > 0
568             && !strchr (last_child->text.text, '\n')
569              ) /* || last_child->type == ET_empty_spaces_before_argument) */
570       && last_child->cmd != CM_value
571       && !no_merge_with_following_text)
572     {
573       /* Append text to contents */
574       text_append (&last_child->text, text);
575       debug ("MERGED TEXT: %s|||", text);
576     }
577   else
578     {
579       ELEMENT *e = new_element (ET_NONE);
580       text_append (&e->text, text);
581       add_to_element_contents (current, e);
582       debug ("NEW TEXT: %s|||", text);
583     }
584 
585   return current;
586 }
587 
588 /* If last contents child of CURRENT is an empty line element, remove
589    or merge text, and return true. */
590 int
abort_empty_line(ELEMENT ** current_inout,char * additional_spaces)591 abort_empty_line (ELEMENT **current_inout, char *additional_spaces)
592 {
593   ELEMENT *current = *current_inout;
594   int retval;
595 
596   ELEMENT *last_child = last_contents_child (current);
597 
598   if (!additional_spaces)
599     additional_spaces = "";
600 
601   if (last_child
602       && (last_child->type == ET_empty_line
603           || last_child->type == ET_empty_line_after_command
604           || last_child->type == ET_empty_spaces_before_argument
605           || last_child->type == ET_empty_spaces_after_close_brace))
606     {
607       ELEMENT *owning_element = 0, *e;
608       KEY_PAIR *k;
609 
610       retval = 1;
611 
612       k = lookup_extra (last_child, "command");
613       if (k)
614         owning_element = (ELEMENT *) k->value;
615 
616       debug ("ABORT EMPTY %s additional text |%s| "
617              "current |%s|",
618              element_type_name(last_child),
619              additional_spaces,
620              last_child->text.text);
621       text_append (&last_child->text, additional_spaces);
622 
623       /* Remove element altogether if it's empty. */
624       if (last_child->text.end == 0)
625         {
626           e = pop_element_from_contents (current);
627           destroy_element (e);
628         }
629       else if (last_child->type == ET_empty_line)
630         {
631           last_child->type = begin_paragraph_p (current)
632                              ? ET_empty_spaces_before_paragraph : ET_NONE;
633         }
634       else if (last_child->type == ET_empty_line_after_command
635                || last_child->type == ET_empty_spaces_before_argument)
636         {
637           if (owning_element)
638             {
639               /* Remove element from main tree. */
640               ELEMENT *e = pop_element_from_contents (current);
641               add_extra_string_dup (owning_element, "spaces_before_argument",
642                                     e->text.text);
643               destroy_element (e);
644             }
645           else
646             {
647               last_child->type = ET_empty_spaces_after_command;
648             }
649         }
650     }
651   else
652     retval = 0;
653 
654   *current_inout = current;
655   return retval;
656 }
657 
658 static void
isolate_last_space_internal(ELEMENT * current)659 isolate_last_space_internal (ELEMENT *current)
660 {
661   ELEMENT *last_elt;
662 
663   last_elt = last_contents_child (current);
664   char *text = element_text (last_elt);
665 
666   int text_len = last_elt->text.end;
667 
668   /* If text all whitespace */
669   if (text[strspn (text, whitespace_chars)] == '\0')
670     {
671       add_extra_string_dup (current, "spaces_after_argument",
672                             last_elt->text.text);
673       destroy_element (pop_element_from_contents (current));
674     }
675   else
676     {
677       int i, trailing_spaces;
678       static TEXT t;
679 
680       text_reset (&t);
681 
682       trailing_spaces = 0;
683       for (i = strlen (text) - 1;
684            i > 0 && strchr (whitespace_chars, text[i]);
685            i--)
686         trailing_spaces++;
687 
688       text_append_n (&t,
689                      text + text_len - trailing_spaces,
690                      trailing_spaces);
691 
692       text[text_len - trailing_spaces] = '\0';
693       last_elt->text.end -= trailing_spaces;
694 
695       add_extra_string_dup (current, "spaces_after_argument",
696                             t.text);
697     }
698 }
699 
700 static void
isolate_trailing_space(ELEMENT * current,enum element_type spaces_type)701 isolate_trailing_space (ELEMENT *current, enum element_type spaces_type)
702 {
703   ELEMENT *last_elt;
704   char *text;
705   int text_len;
706 
707   last_elt = last_contents_child (current);
708   text = element_text (last_elt);
709 
710   text_len = last_elt->text.end;
711 
712   /* If text all whitespace */
713   if (text[strspn (text, whitespace_chars)] == '\0')
714     {
715       last_elt->type = spaces_type;
716     }
717   else
718     {
719       ELEMENT *new_spaces;
720       int i, trailing_spaces;
721 
722       trailing_spaces = 0;
723       for (i = strlen (text) - 1;
724            i > 0 && strchr (whitespace_chars, text[i]);
725            i--)
726         trailing_spaces++;
727 
728       new_spaces = new_element (spaces_type);
729       text_append_n (&new_spaces->text,
730                      text + text_len - trailing_spaces,
731                      trailing_spaces);
732       text[text_len - trailing_spaces] = '\0';
733       last_elt->text.end -= trailing_spaces;
734 
735       add_to_element_contents (current, new_spaces);
736     }
737 }
738 
739 void
isolate_last_space(ELEMENT * current)740 isolate_last_space (ELEMENT *current)
741 {
742   char *text;
743   ELEMENT *last_elt;
744   int text_len;
745 
746   if (current->contents.number == 0)
747     return;
748 
749   if (last_contents_child(current)->cmd == CM_c
750       || last_contents_child(current)->cmd == CM_comment)
751     {
752       add_extra_element_oot (current, "comment_at_end",
753                              pop_element_from_contents (current));
754     }
755 
756   if (current->contents.number == 0)
757     return;
758 
759   last_elt = last_contents_child (current);
760   text = element_text (last_elt);
761   if (!text || !*text
762       || (last_elt->type && (!current->type
763                              || current->type != ET_line_arg)))
764     return;
765 
766   text_len = last_elt->text.end;
767   /* Does the text end in whitespace? */
768   if (!strchr (whitespace_chars, text[text_len - 1]))
769     return;
770 
771   if (current->type == ET_menu_entry_node)
772     isolate_trailing_space (current, ET_space_at_end_menu_node);
773   else
774     isolate_last_space_internal (current);
775 }
776 
777 
778 /* Add an "ET_empty_line_after_command" element containing the whitespace at
779    the beginning of the rest of the line.  This element can be later changed to
780    a "ET_empty_spaces_after_command" element in 'abort_empty_line' if more
781    text follows on the line.  Used after line commands or commands starting
782    a block. */
783 void
start_empty_line_after_command(ELEMENT * current,char ** line_inout,ELEMENT * command)784 start_empty_line_after_command (ELEMENT *current, char **line_inout,
785                                 ELEMENT *command)
786 {
787   char *line = *line_inout;
788   ELEMENT *e;
789   int len;
790 
791   len = strspn (line, whitespace_chars_except_newline);
792   e = new_element (ET_empty_line_after_command);
793   add_to_element_contents (current, e);
794   text_append_n (&e->text, line, len);
795   line += len;
796 
797   if (command)
798     add_extra_element (e, "command", command);
799 
800   *line_inout = line;
801 }
802 
803 
804 /* If the parent element takes a command as an argument, like
805    @itemize @bullet. */
806 int
command_with_command_as_argument(ELEMENT * current)807 command_with_command_as_argument (ELEMENT *current)
808 {
809   return current->type == ET_block_line_arg
810     && (current->parent->cmd == CM_itemize
811         || item_line_command (current->parent->cmd))
812     && (current->contents.number == 1);
813 }
814 
815 /* Check if line is "@end ..." for current command.  If so, advance LINE. */
816 int
is_end_current_command(ELEMENT * current,char ** line,enum command_id * end_cmd)817 is_end_current_command (ELEMENT *current, char **line,
818                         enum command_id *end_cmd)
819 {
820   char *linep;
821   char *cmdname;
822 
823   linep = *line;
824 
825   linep += strspn (linep, whitespace_chars);
826   if (!looking_at (linep, "@end"))
827     return 0;
828 
829   linep += 4;
830   if (!strchr (whitespace_chars, *linep))
831     return 0;
832 
833   linep += strspn (linep, whitespace_chars);
834   if (!*linep)
835     return 0;
836 
837   cmdname = read_command_name (&linep);
838   if (!cmdname)
839     return 0;
840 
841   *end_cmd = lookup_command (cmdname);
842   free (cmdname);
843   if (*end_cmd != current->cmd)
844     return 0;
845 
846   *line = linep;
847   return 1;
848 }
849 
850 void
check_valid_nesting(ELEMENT * current,enum command_id cmd)851 check_valid_nesting (ELEMENT *current, enum command_id cmd)
852 {
853   enum command_id invalid_parent = 0;
854 
855   /* Check whether outer command can contain cmd.  Commands are
856      classified according to what commands they can contain:
857 
858      accents
859      full text
860      simple text
861      full line
862      full line no refs
863 
864    */
865 
866   int ok = 0; /* Whether nesting is allowed. */
867 
868   /* Whether command is a "simple text" command.  Use a variable
869      to avoid repeating a complex conditional. */
870   int simple_text_command = 0;
871 
872   enum command_id outer = current->parent->cmd;
873   unsigned long outer_flags = command_data(outer).flags;
874   unsigned long cmd_flags = command_data(cmd).flags;
875 
876   // much TODO here.
877 
878   if ((outer_flags & CF_line
879             && (command_data(outer).data >= 0
880                 || (command_data(outer).data == LINE_line
881                     && !(outer_flags & (CF_def | CF_sectioning)))
882                 || command_data(outer).data == LINE_text)
883             && outer != CM_center
884             && outer != CM_exdent)
885       || ((outer_flags & CF_brace)
886            && !(outer_flags & CF_inline)
887            && command_data(outer).data > 0)
888       || outer == CM_shortcaption
889       || outer == CM_math
890       || (outer_flags & CF_index_entry_command)
891       || (outer_flags & CF_block
892           && !(outer_flags & CF_def)
893           && command_data(outer).data != BLOCK_raw
894           && command_data(outer).data != BLOCK_conditional))
895     {
896       simple_text_command = 1;
897     }
898 
899   if (outer_flags & CF_root && current->type != ET_line_arg)
900     ok = 1;
901   else if (outer_flags & CF_block
902            && current->type != ET_block_line_arg)
903     ok = 1;
904   else if ((outer == CM_item
905            || outer == CM_itemx)
906            && current->type != ET_line_arg)
907     ok = 1;
908   else if (outer_flags & CF_accent)
909     {
910       if (cmd_flags & (CF_nobrace | CF_accent))
911         ok = 1;
912       else if (cmd_flags & CF_brace
913                && command_data(cmd).data == 0)
914         ok = 1; /* glyph command */
915       if (cmd == CM_c || cmd == CM_comment)
916         ok = 1;
917     }
918   else if (simple_text_command
919            /* "full text commands" */
920            || (outer_flags & CF_brace)
921                  && command_data(outer).data == BRACE_style
922            /* "full line commands" */
923            || outer == CM_center
924            || outer == CM_exdent
925            || outer == CM_item
926            || outer == CM_itemx
927 
928            || (!current->parent->cmd && current_context () == ct_def)
929 
930            /* "full line no refs commands" */
931            || (outer_flags & (CF_sectioning | CF_def))
932            || (!current->parent->cmd && current_context () == ct_def))
933     {
934       /* Start by checking if the command is allowed inside a "full text
935          command" - this is the most permissive. */
936       if (cmd_flags & CF_nobrace)
937         ok = 1;
938       if (cmd_flags & CF_brace && !(cmd_flags & CF_INFOENCLOSE))
939         ok = 1;
940       else if (cmd == CM_c
941                || cmd == CM_comment
942                || cmd == CM_refill
943                || cmd == CM_subentry
944                || cmd == CM_columnfractions
945                || cmd == CM_set
946                || cmd == CM_clear
947                || cmd == CM_end)
948         ok = 1;
949       else if (cmd_flags & CF_format_raw)
950         ok = 1;
951       if (cmd == CM_caption || cmd == CM_shortcaption)
952         ok = 0;
953       if (cmd_flags & CF_block
954           && command_data(cmd).data == BLOCK_conditional)
955         ok = 1;
956 
957       /* Now add more restrictions for "full line no refs" commands and "simple
958          text" commands. */
959       if (outer_flags & (CF_sectioning | CF_def)
960           || (!current->parent->cmd && current_context () == ct_def)
961           || simple_text_command)
962         {
963           if (cmd == CM_titlefont
964               || cmd == CM_anchor
965               || cmd == CM_footnote
966               || cmd == CM_verb
967               || cmd == CM_indent || cmd == CM_noindent)
968             ok = 0;
969         }
970 
971       /* Exceptions for "simple text commands" only. */
972       if (simple_text_command)
973         {
974           if (cmd == CM_xref
975               || cmd == CM_ref
976               || cmd == CM_pxref
977               || cmd == CM_inforef)
978             ok = 0;
979         }
980     }
981   else
982     {
983       /* Default to valid nesting, for example for commands for which
984          it is not defined which commands can occur within them (e.g.
985          @tab?). */
986       ok = 1;
987     }
988 
989   if (!ok)
990     {
991       invalid_parent = current->parent->cmd;
992       if (!invalid_parent)
993         {
994           /* current_context () == ct_def.  Find def block containing
995              command. */
996           ELEMENT *d = current;
997           while (d->parent
998                  && d->parent->type != ET_def_line)
999             d = d->parent;
1000           invalid_parent = d->parent->parent->cmd;
1001         }
1002 
1003       line_warn ("@%s should not appear in @%s",
1004                  command_name(cmd),
1005                  command_name(invalid_parent));
1006     }
1007 }
1008 
1009 /* *LINEP is a pointer into the line being processed.  It is advanced past any
1010    bytes processed.  Return 0 when we need to read a new line. */
1011 int
process_remaining_on_line(ELEMENT ** current_inout,char ** line_inout)1012 process_remaining_on_line (ELEMENT **current_inout, char **line_inout)
1013 {
1014   ELEMENT *current = *current_inout;
1015   char *line = *line_inout;
1016   char *line_after_command;
1017   int retval = STILL_MORE_TO_PROCESS;
1018   enum command_id end_cmd;
1019   char *p;
1020 
1021   enum command_id cmd = CM_NONE;
1022 
1023   /********* BLOCK_raw or (ignored) BLOCK_conditional ******************/
1024   /* If in raw block, or ignored conditional block. */
1025   if (command_flags(current) & CF_block
1026       && (command_data(current->cmd).data == BLOCK_raw
1027           || command_data(current->cmd).data == BLOCK_conditional))
1028     {
1029       /* Check if we are using a macro within a macro. */
1030       if (current->cmd == CM_macro || current->cmd == CM_rmacro)
1031         {
1032           enum command_id cmd = 0;
1033           char *p = line;
1034           p += strspn (p, whitespace_chars);
1035           if (!strncmp (p, "@macro", strlen ("@macro")))
1036             {
1037               p += strlen ("@macro");
1038               cmd = CM_macro;
1039             }
1040           else if (!strncmp (p, "@rmacro", strlen ("@rmacro")))
1041             {
1042               p += strlen ("@rmacro");
1043               cmd = CM_rmacro;
1044             }
1045           if (cmd)
1046             {
1047               ELEMENT *e = new_element (ET_NONE);
1048               e->cmd = cmd;
1049               line = p;
1050               add_to_element_contents (current, e);
1051               add_extra_string (e, "arg_line", strdup (line));
1052               current = e;
1053               retval = GET_A_NEW_LINE;
1054               goto funexit;
1055             }
1056         }
1057 
1058       /* Else check for nested @ifset (so that @end ifset doesn't
1059          end the outermost @ifset). */
1060       if (current->cmd == CM_ifclear || current->cmd == CM_ifset
1061           || current->cmd == CM_ifcommanddefined
1062           || current->cmd == CM_ifcommandnotdefined)
1063         {
1064           ELEMENT *e;
1065           char *p = line;
1066           p += strspn (p, whitespace_chars);
1067           if (*p == '@'
1068               && !strncmp (p + 1, command_name(current->cmd),
1069                            strlen (command_name(current->cmd))))
1070             {
1071               line = p + 1;
1072               p += strlen (command_name(current->cmd));
1073               e = new_element (ET_NONE);
1074               e->cmd = current->cmd;
1075               add_extra_string (e, "line", strdup (line));
1076               add_to_element_contents (current, e);
1077               current = e;
1078               retval = GET_A_NEW_LINE;
1079               goto funexit;
1080             }
1081         }
1082 
1083       /* Else check if line is "@end ..." for current command. */
1084       p = line;
1085       if (is_end_current_command (current, &line, &end_cmd))
1086         {
1087           ELEMENT *last_child;
1088           char *tmp = 0;
1089 
1090           last_child = last_contents_child (current);
1091 
1092           if (strchr (whitespace_chars, *p))
1093             {
1094               ELEMENT *e;
1095               int n = strspn (line, whitespace_chars);
1096               e = new_element (ET_raw);
1097               text_append_n (&e->text, line, n);
1098               add_to_element_contents (current, e);
1099               line += n;
1100               line_warn ("@end %s should only appear at the "
1101                          "beginning of a line", command_name(end_cmd));
1102             }
1103           else if (last_child
1104                    && last_child->type == ET_raw
1105                    && current->cmd != CM_verbatim)
1106             {
1107               if (last_child->text.end > 0
1108                   && last_child->text.text[last_child->text.end - 1] == '\n')
1109                 {
1110                   ELEMENT *lrn;
1111                   last_child->text.text[--last_child->text.end] = '\0';
1112                   lrn = new_element (ET_last_raw_newline);
1113                   text_append (&lrn->text, "\n");
1114                   add_to_element_contents (current, lrn);
1115                 }
1116             }
1117 
1118           /* 'line' is now advanced past the "@end ...".  Check if
1119              there's anything after it. */
1120           p = line + strspn (line, whitespace_chars);
1121           if (*p && *p != '@')
1122             goto superfluous_arg;
1123           if (*p)
1124             {
1125               p++;
1126               tmp = read_command_name (&p);
1127               if (tmp && (!strcmp (tmp, "c") || !strcmp (tmp, "comment")))
1128                 {
1129                 }
1130               else if (*p && p[strspn (p, whitespace_chars)])
1131                 {
1132 superfluous_arg:
1133                   line_warn ("superfluous argument to @end %s: %s",
1134                              command_name (current->cmd), line);
1135                 }
1136               free (tmp);
1137             }
1138 
1139 
1140           /* For macros, define a new macro (unless we are in a nested
1141              macro definition). */
1142           if ((end_cmd == CM_macro || end_cmd == CM_rmacro)
1143               && (!current->parent
1144                   || (current->parent->cmd != CM_macro
1145                       && current->parent->cmd != CM_rmacro)))
1146             {
1147               char *name;
1148               enum command_id existing;
1149               if (current->args.number > 0)
1150                 {
1151                   name = element_text (args_child_by_index (current, 0));
1152 
1153                   existing = lookup_command (name);
1154                   if (existing)
1155                     {
1156                       MACRO *macro;
1157                       macro = lookup_macro (existing);
1158                       if (macro)
1159                         {
1160                           line_error_ext (1, &current->line_nr,
1161                              "macro `%s' previously defined", name);
1162                           line_error_ext (1, &macro->element->line_nr,
1163                              "here is the previous definition of `%s'", name);
1164                         }
1165                       else if (!(existing & USER_COMMAND_BIT))
1166                         {
1167                           line_error_ext (1, &current->line_nr,
1168                             "redefining Texinfo language command: @%s",
1169                             name);
1170                         }
1171                     }
1172                   if (!lookup_extra (current, "invalid_syntax"))
1173                     {
1174                       new_macro (name, current);
1175                     }
1176                 }
1177             }
1178 
1179           current = current->parent;
1180 
1181           /* Check for conditionals. */
1182           if (command_data(end_cmd).flags & CF_block
1183               && command_data(end_cmd).data == BLOCK_conditional)
1184             {
1185               /* Remove an ignored block. */
1186               ELEMENT *popped;
1187               popped = pop_element_from_contents (current);
1188               if (popped->cmd != end_cmd)
1189                 fatal ("command mismatch for ignored block");
1190 
1191               /* Ignore until end of line */
1192               if (!strchr (line, '\n'))
1193                 {
1194                   line = new_line ();
1195                   debug ("IGNORE CLOSE LINE");
1196                 }
1197               destroy_element_and_children (popped);
1198 
1199               debug ("CLOSED conditional %s", command_name(end_cmd));
1200               retval = GET_A_NEW_LINE;
1201               goto funexit;
1202             }
1203           else
1204             {
1205               ELEMENT *e;
1206               int n;
1207 
1208               debug ("CLOSED raw %s", command_name(end_cmd));
1209               e = new_element (ET_empty_line_after_command);
1210               n = strspn (line, whitespace_chars_except_newline);
1211               text_append_n (&e->text, line, n);
1212               line += n;
1213               add_to_element_contents (current, e);
1214             }
1215         }
1216       else /* save the line verbatim */
1217         {
1218           ELEMENT *last = last_contents_child (current);
1219           /* Append to existing element only if the text is all
1220              whitespace.  */
1221           if (last && last->type == ET_empty_line_after_command
1222               && line[strspn (line, whitespace_chars)] == '\0'
1223               && !strchr (last->text.text, '\n'))
1224             {
1225               text_append (&last->text, line);
1226             }
1227           else
1228             {
1229               ELEMENT *e;
1230               e = new_element (ET_raw);
1231               text_append (&e->text, line);
1232               add_to_element_contents (current, e);
1233             }
1234 
1235           retval = GET_A_NEW_LINE;
1236           goto funexit;
1237         }
1238     } /********* BLOCK_raw or (ignored) BLOCK_conditional *************/
1239 
1240   /* Check if parent element is 'verb' */
1241   else if (current->parent && current->parent->cmd == CM_verb)
1242     {
1243       char c;
1244       char *q;
1245       KEY_PAIR *k;
1246 
1247       k = lookup_extra (current->parent, "delimiter");
1248 
1249       c = *(char *)k->value;
1250       if (c)
1251         {
1252           /* Look forward for the delimiter character followed by a close
1253              brace. */
1254           q = line;
1255           while (1)
1256             {
1257               q = strchr (q, c);
1258               if (!q || q[1] == '}')
1259                 break;
1260               q++;
1261             }
1262         }
1263       else
1264         {
1265           /* Look forward for a close brace. */
1266           q = strchr (line, '}');
1267         }
1268 
1269       if (q)
1270         {
1271           /* Save up to the delimiter character. */
1272           if (q != line)
1273             {
1274               ELEMENT *e = new_element (ET_raw);
1275               text_append_n (&e->text, line, q - line);
1276               add_to_element_contents (current, e);
1277             }
1278           debug ("END VERB");
1279           if (c)
1280             line = q + 1;
1281           else
1282             line = q;
1283           /* The '}' will close the @verb command in handle_separator below. */
1284         }
1285       else
1286         {
1287           /* Save the rest of line. */
1288           ELEMENT *e = new_element (ET_raw);
1289           text_append (&e->text, line);
1290           add_to_element_contents (current, e);
1291 
1292           debug_nonl ("LINE VERB: %s", line);
1293 
1294           retval = GET_A_NEW_LINE; goto funexit;  /* Get next line. */
1295         }
1296     } /* CM_verb */
1297 
1298   /* Skip empty lines.  If we reach the end of input, continue in case there
1299      is an @include. */
1300 
1301   /* There are cases when we need more input, but we don't want to
1302      get it in the top-level loop in parse_texi - this is mostly
1303      (always?) when we don't want to start a new, empty line, and
1304      need to get more from the current, incomplete line of input. */
1305   while (*line == '\0')
1306     {
1307       static char *allocated_text;
1308       debug ("EMPTY TEXT");
1309 
1310       /* Each place we supply Texinfo input we store the supplied
1311          input in a static variable like allocated_text, to prevent
1312          memory leaks.  */
1313       free (allocated_text);
1314       line = allocated_text = next_text ();
1315 
1316       if (!line)
1317         {
1318           /* TODO: Can this only happen at end of file? */
1319           current = end_line (current);
1320           retval = GET_A_NEW_LINE;
1321           goto funexit;
1322         }
1323     }
1324 
1325   if (*line == '@')
1326     {
1327       line_after_command = line + 1;
1328 
1329       /* List of single character Texinfo commands. */
1330       if (strchr ("([\"'~@&}{,.!?"
1331                   " \f\n\r\t"
1332                   "*-^`=:|/\\",
1333               *line_after_command))
1334         {
1335           char single_char[2];
1336           single_char[0] = *line_after_command++;
1337           single_char[1] = '\0';
1338           cmd = lookup_command (single_char);
1339         }
1340       else
1341         {
1342           char *command = read_command_name (&line_after_command);
1343 
1344           cmd = 0;
1345           if (command)
1346             {
1347               ELEMENT *paragraph;
1348 
1349               cmd = lookup_command (command);
1350               if (!cmd)
1351                 {
1352                   line_error ("unknown command `%s'", command);
1353                   debug ("COMMAND (UNKNOWN) %s", command);
1354                   free (command);
1355 
1356                   abort_empty_line (&current, 0);
1357                   paragraph = begin_paragraph (current);
1358                   if (paragraph)
1359                     current = paragraph;
1360 
1361                   line = line_after_command;
1362                   retval = STILL_MORE_TO_PROCESS;
1363                   goto funexit;
1364                 }
1365               free (command);
1366             }
1367           else
1368             {
1369               /* @ was followed by gibberish.  "unexpected @" is printed
1370                  below. */
1371             }
1372         }
1373       if (cmd && (command_data(cmd).flags & CF_ALIAS))
1374         cmd = command_data(cmd).data;
1375     }
1376 
1377   /* Handle user-defined macros before anything else because their expansion
1378      may lead to changes in the line. */
1379   if (cmd && (command_data(cmd).flags & CF_MACRO))
1380     {
1381       static char *allocated_line;
1382       line = line_after_command;
1383       current = handle_macro (current, &line, cmd);
1384       free (allocated_line);
1385       allocated_line = next_text ();
1386       line = allocated_line;
1387     }
1388 
1389   /* Cases that may "lead to command closing": brace commands that don't
1390      need a brace: accent commands.
1391      @definfoenclose. */
1392   /* This condition is only checked immediately after the command opening,
1393      otherwise the current element is in the 'args' and not right in the
1394      command container. */
1395   else if (command_flags(current) & CF_brace && *line != '{')
1396     {
1397       if (command_with_command_as_argument (current->parent))
1398         {
1399           debug ("FOR PARENT @%s command_as_argument @%s",
1400                  command_name(current->parent->parent->cmd),
1401                  command_name(current->cmd));
1402           if (!current->type)
1403             current->type = ET_command_as_argument;
1404           add_extra_element (current->parent->parent,
1405                                  "command_as_argument", current);
1406           current = current->parent;
1407         }
1408       else if (command_flags(current) & CF_accent)
1409         {
1410           if (strchr (whitespace_chars_except_newline, *line))
1411             {
1412               if (isalpha (command_name(current->cmd)[0]))
1413               /* e.g. @dotaccent */
1414                 {
1415                   char *p; char *s;
1416                   KEY_PAIR *k;
1417                   p = line + strspn (line, whitespace_chars_except_newline);
1418                   k = lookup_extra (current, "spaces");
1419                   if (!k)
1420                     {
1421                       asprintf (&s, "%.*s", (int) (p - line), line);
1422                       add_extra_string (current, "spaces", s);
1423                     }
1424                   else
1425                     {
1426                       asprintf (&s, "%s%.*s",
1427                                 (char *) k->value,
1428                                 (int) (p - line), p);
1429                       free (k->value);
1430                       k->value = (ELEMENT *) s;
1431                     }
1432                   line = p;
1433                 }
1434               else
1435                 {
1436                   line_warn ("accent command `@%s' must not be followed "
1437                              "by whitespace", command_name(current->cmd));
1438                   current = current->parent;
1439                 }
1440             }
1441           else if (*line == '@')
1442             {
1443               line_error ("use braces to give a command as an argument "
1444                           "to @%s", command_name(current->cmd));
1445               current = current->parent;
1446             }
1447           else if (*line != '\0' && *line != '\n' && *line != '\r')
1448             {
1449               ELEMENT *e, *e2;
1450               debug ("ACCENT");
1451               e = new_element (ET_following_arg);
1452               add_to_element_args (current, e);
1453               e2 = new_element (ET_NONE);
1454               text_append_n (&e2->text, line, 1);
1455               add_to_element_contents (e, e2);
1456 
1457               if (current->cmd == CM_dotless
1458                   && *line != 'i' && *line != 'j')
1459                 {
1460                   line_error ("@dotless expects `i' or `j' as argument, "
1461                               "not `%c'", *line);
1462                 }
1463               if (isalpha (command_name(current->cmd)[0]))
1464                 e->type = ET_space_command_arg;
1465               while (current->contents.number > 0)
1466                 destroy_element (pop_element_from_contents (current));
1467               line++;
1468               current = current->parent;
1469             }
1470           else
1471             {
1472               debug ("STRANGE ACC");
1473               line_warn ("accent command `@%s' must not be followed by "
1474                          "new line", command_name(current->cmd));
1475               current = current->parent;
1476             }
1477           goto funexit;
1478         }
1479       else
1480         {
1481           if (conf.ignore_space_after_braced_command_name)
1482             {
1483               char *p;
1484               p = line + strspn (line, whitespace_chars);
1485               if (p != line)
1486                 {
1487                   line = p;
1488                   goto funexit;
1489                 }
1490             }
1491           line_error ("@%s expected braces",
1492                        command_name(current->cmd));
1493           current = current->parent;
1494         }
1495     }
1496   else if (handle_menu (&current, &line))
1497     {
1498       ; /* Nothing - everything was done in handle_menu. */
1499     }
1500   /* Any other @-command. */
1501   else if (cmd)
1502     {
1503       int def_line_continuation;
1504 
1505       line = line_after_command;
1506       debug ("COMMAND %s", command_name(cmd));
1507 
1508       /* @value */
1509       if (cmd == CM_value)
1510         {
1511           char *arg_start;
1512           char *flag;
1513           line += strspn (line, whitespace_chars);
1514           if (*line != '{')
1515             goto value_invalid;
1516 
1517           line++;
1518           arg_start = line;
1519           flag = read_flag_name (&line);
1520           if (!flag)
1521             goto value_invalid;
1522 
1523           if (*line != '}')
1524             {
1525               line = arg_start - 1;
1526               goto value_invalid;
1527             }
1528 
1529           if (1) /* @value syntax is valid */
1530             {
1531               char *value;
1532 value_valid:
1533               value = fetch_value (flag);
1534               if (!value)
1535                 {
1536                   /* Add element for unexpanded @value.
1537                      This is not necessarily an error - in
1538                      Texinfo::Report::gdt we deliberately pass
1539                      in undefined values. */
1540                   ELEMENT *value_elt;
1541 
1542                   line_warn ("undefined flag: %s", flag);
1543                   /* Note: In the Perl code, this warning is conditional on
1544                      in_gdt setting, but the only effect that this possibly has
1545                      is on speed, as these warnings would not be printed to the
1546                      user. */
1547 
1548                   abort_empty_line (&current, NULL);
1549                   value_elt = new_element (ET_NONE);
1550                   value_elt->cmd = CM_value;
1551                   text_append (&value_elt->text, flag);
1552 
1553                   /* In the Perl code, the name of the flag is stored in
1554                      the "type" field.  We need to store in 'text' instead
1555                      and then output it as the type in
1556                      dump_perl.c / api.c. */
1557 
1558                   add_to_element_contents (current, value_elt);
1559 
1560                   line++; /* past '}' */
1561                   retval = STILL_MORE_TO_PROCESS;
1562                 }
1563               else
1564                 {
1565                   line++; /* past '}' */
1566                   input_push_text (strdup (line), line_nr.macro);
1567                   input_push_text (strdup (value), line_nr.macro);
1568                   line += strlen (line);
1569                   retval = STILL_MORE_TO_PROCESS;
1570                 }
1571               free (flag);
1572               goto funexit;
1573             }
1574           else
1575             {
1576 value_invalid:
1577               line_error ("bad syntax for @value");
1578               retval = STILL_MORE_TO_PROCESS;
1579               goto funexit;
1580             }
1581         }
1582 
1583       /* Warn on deprecated command */
1584       if (command_data(cmd).flags & CF_deprecated)
1585         {
1586           char *msg = 0;
1587           switch (cmd)
1588             {
1589               /* messages for commands could go here. */
1590             default:
1591               break;
1592             }
1593           if (!msg)
1594             line_warn ("@%s is obsolete.", command_name(cmd));
1595           else
1596             line_warn ("@%s is obsolete; %s", command_name(cmd), msg);
1597           /* note: will have to translate msg if string translation with
1598              gettext is implemented */
1599         }
1600 
1601       def_line_continuation = (current_context() == ct_def
1602                                && cmd == CM_NEWLINE);
1603       /* warn on not appearing at line beginning */
1604       /* TODO maybe have a command flag for "begin line commands" */
1605       if (!def_line_continuation
1606           && !abort_empty_line (&current, NULL)
1607           && ((cmd == CM_node || cmd == CM_bye)
1608               || (command_data(cmd).flags & CF_block)
1609               || ((command_data(cmd).flags & CF_line)
1610                   && cmd != CM_comment
1611                   && cmd != CM_c
1612                   && cmd != CM_sp
1613                   && cmd != CM_columnfractions
1614                   && cmd != CM_item
1615                   && cmd != CM_verbatiminclude
1616                   && cmd != CM_set
1617                   && cmd != CM_clear
1618                   && cmd != CM_vskip)
1619                   && cmd != CM_subentry))
1620         {
1621           line_warn ("@%s should only appear at the beginning of a line",
1622                      command_name(cmd));
1623         }
1624 
1625       if (current->parent)
1626         check_valid_nesting (current, cmd);
1627 
1628       if (def_line_continuation)
1629         {
1630           retval = GET_A_NEW_LINE;
1631           goto funexit;
1632         }
1633 
1634       /* check command doesn't start a paragraph */
1635       /* TODO store this in cmd->flags. */
1636       if (!(command_data(cmd).flags & (CF_line | CF_other | CF_block)
1637             || cmd == CM_titlefont
1638             || cmd == CM_caption
1639             || cmd == CM_shortcaption
1640             || cmd == CM_image
1641             || cmd == CM_ASTERISK /* @* */
1642             || cmd == CM_hyphenation
1643             || cmd == CM_anchor
1644             || cmd == CM_errormsg
1645             || (command_data(cmd).flags & CF_index_entry_command)))
1646         {
1647           ELEMENT *paragraph;
1648           paragraph = begin_paragraph (current);
1649           if (paragraph)
1650             current = paragraph;
1651         }
1652 
1653       if (cmd)
1654         {
1655           if (close_paragraph_command (cmd))
1656             current = end_paragraph (current, 0, 0);
1657           if (close_preformatted_command (cmd))
1658             current = end_preformatted (current, 0, 0);
1659         }
1660 
1661       if ((cmd == CM_sortas
1662            || cmd == CM_seeentry
1663            || cmd == CM_seealso
1664            || cmd == CM_subentry)
1665           && current->contents.number > 0
1666           && last_contents_child(current)->text.end > 0)
1667         {
1668           isolate_trailing_space (current, ET_empty_spaces_before_argument);
1669         }
1670 
1671       if (cmd == CM_item && item_line_parent (current))
1672         cmd = CM_item_LINE;
1673       /* We could possibly have done this before check_valid_nesting. */
1674 
1675       if (command_data(cmd).flags & CF_other)
1676         {
1677           int status;
1678           current = handle_other_command (current, &line, cmd, &status);
1679           if (status == GET_A_NEW_LINE || status == FINISHED_TOTALLY)
1680             {
1681               retval = status;
1682               goto funexit;
1683             }
1684         }
1685       else if (command_data(cmd).flags & CF_line)
1686         {
1687           int status;
1688           current = handle_line_command (current, &line, cmd, &status);
1689           if (status == GET_A_NEW_LINE || status == FINISHED_TOTALLY)
1690             {
1691               retval = status;
1692               goto funexit;
1693             }
1694         }
1695       else if (command_data(cmd).flags & CF_block)
1696         {
1697           int new_line = 0;
1698           current = handle_block_command (current, &line, cmd, &new_line);
1699           if (new_line)
1700             {
1701               /* For @macro, to get a new line.  This is done instead of
1702                  doing the EMPTY TEXT code on the next time round. */
1703               retval = GET_A_NEW_LINE;
1704               goto funexit;
1705             }
1706         }
1707       else if (command_data(cmd).flags & (CF_brace | CF_accent))
1708         {
1709           current = handle_brace_command (current, &line, cmd);
1710         }
1711       /* No-brace command */
1712       else if (command_data(cmd).flags & CF_nobrace)
1713         {
1714           ELEMENT *nobrace;
1715 
1716           nobrace = new_element (ET_NONE);
1717           nobrace->cmd = cmd;
1718           add_to_element_contents (current, nobrace);
1719 
1720           if (cmd == CM_BACKSLASH && current_context () != ct_math)
1721             {
1722               line_warn ("@\\ should only appear in math context");
1723             }
1724           if (cmd == CM_NEWLINE)
1725             {
1726               current = end_line (current);
1727               retval = GET_A_NEW_LINE;
1728               goto funexit;
1729             }
1730         }
1731     }
1732   /* "Separator" character */
1733   else if (*line != '\0' && strchr ("{}@,:\t.\f", *line))
1734     {
1735       char separator = *line++;
1736       debug ("SEPARATOR: %c", separator);
1737       if (separator == '@')
1738         line_error ("unexpected @");
1739       else
1740         current = handle_separator (current, separator, &line);
1741     }
1742   /* "Misc text except end of line." */
1743   else if (*line && *line != '\n')
1744     {
1745       size_t len;
1746 
1747       /* Output until next command, separator or newline. */
1748       {
1749         char saved; /* TODO: Have a length argument to merge_text? */
1750         len = strcspn (line, "{}@,:\t.\n\f");
1751         saved = line[len];
1752         line[len] = '\0';
1753         current = merge_text (current, line);
1754         line += len;
1755         *line = saved;
1756       }
1757 
1758       retval = STILL_MORE_TO_PROCESS;
1759       goto funexit;
1760     }
1761   else /*  End of line */
1762     {
1763       if (current->type)
1764         debug ("END LINE (%s)", element_type_names[current->type]);
1765       else if (current->cmd)
1766         debug ("END LINE (@%s)", command_name(current->cmd));
1767       else
1768         debug ("END LINE");
1769       if (current->parent)
1770         {
1771           debug_nonl (" <- ");
1772           if (current->parent->cmd)
1773             debug_nonl("@%s", command_name(current->parent->cmd));
1774           if (current->parent->type)
1775             debug_nonl("(%s)", element_type_names[current->parent->type]);
1776           debug ("");
1777           debug ("");
1778         }
1779 
1780       if (*line == '\n')
1781         {
1782           current = merge_text (current, "\n");
1783           line++;
1784         }
1785       else
1786         {
1787           if (input_number > 0)
1788             bug_message ("Text remaining without normal text but `%s'", line);
1789         }
1790 
1791       /* '@end' is processed in here. */
1792       current = end_line (current);
1793       retval = GET_A_NEW_LINE;
1794     }
1795 
1796 funexit:
1797   *current_inout = current;
1798   *line_inout = line;
1799   return retval;
1800 }
1801 
1802 /* Check for a #line directive. */
1803 static int
check_line_directive(char * line)1804 check_line_directive (char *line)
1805 {
1806   char *p = line, *q;
1807   int line_no = 0;
1808   char *filename = 0;
1809 
1810   if (!conf.cpp_line_directives)
1811     return 0;
1812 
1813   /* Check input is coming directly from a file. */
1814   if (!line_nr.file_name || !line_nr.file_name
1815       || (line_nr.macro && *line_nr.macro))
1816     return 0;
1817 
1818   p += strspn (p, " \t");
1819   if (*p != '#')
1820     return 0;
1821   p++;
1822 
1823   q = p + strspn (p, " \t");
1824   if (!memcmp (q, "line", strlen ("line")))
1825     p = q + strlen ("line");
1826 
1827   if (!strchr (" \t", *p))
1828     return 0;
1829   p += strspn (p, " \t");
1830 
1831   /* p should now be at the line number */
1832   if (!strchr ("0123456789", *p))
1833     return 0;
1834   line_no = strtoul (p, &p, 10);
1835 
1836   p += strspn (p, " \t");
1837   if (*p == '"')
1838     {
1839       char c;
1840       p++;
1841       q = strchr (p, '"');
1842       if (!q)
1843         return 0;
1844       c = *q;
1845       *q = 0;
1846       filename = save_string (p);
1847       *q = c;
1848       p = q + 1;
1849       p += strspn (p, " \t");
1850 
1851       p += strspn (p, "0123456789");
1852       p += strspn (p, " \t");
1853     }
1854   if (*p && *p != '\n')
1855     return 0; /* trailing text on line */
1856 
1857   save_line_directive (line_no, filename);
1858 
1859   return 1;
1860 }
1861 
1862 /* Pass in and return root of a "Texinfo tree". */
1863 ELEMENT *
parse_texi(ELEMENT * root_elt)1864 parse_texi (ELEMENT *root_elt)
1865 {
1866   ELEMENT *current = root_elt;
1867   static char *allocated_line;
1868   char *line;
1869 
1870   /* Read input file line-by-line. */
1871   while (1)
1872     {
1873       free (allocated_line);
1874       line = allocated_line = next_text ();
1875       if (!allocated_line)
1876         break; /* Out of input. */
1877 
1878       debug_nonl ("NEW LINE %s", line);
1879 
1880       /* If not in 'raw' or 'conditional' and parent isn't a 'verb', collect
1881          leading whitespace and save as an "ET_empty_line" element.  This
1882          element type can be changed in 'abort_empty_line' when more text is
1883          read. */
1884       if (!((command_flags(current) & CF_block)
1885              && (command_data(current->cmd).data == BLOCK_raw
1886                  || command_data(current->cmd).data == BLOCK_conditional)
1887             || current->parent && current->parent->cmd == CM_verb)
1888           && current_context () != ct_def)
1889         {
1890           ELEMENT *e;
1891           int n;
1892 
1893           if (check_line_directive (line))
1894             continue;
1895 
1896           debug ("BEGIN LINE");
1897 
1898           if (current->contents.number > 0
1899               && last_contents_child(current)->type
1900                  == ET_empty_spaces_before_argument)
1901             {
1902               /* Remove this element and update 'extra' values. */
1903               abort_empty_line (&current, 0);
1904             }
1905 
1906           e = new_element (ET_empty_line);
1907           add_to_element_contents (current, e);
1908 
1909           n = strspn (line, whitespace_chars_except_newline);
1910           text_append_n (&e->text, line, n);
1911           line += n;
1912         }
1913 
1914       /* Process from start of remaining line, advancing it until we run out
1915          of line. */
1916       while (1)
1917         {
1918           int status = process_remaining_on_line (&current, &line);
1919           if (status == GET_A_NEW_LINE)
1920             break;
1921           if (status == FINISHED_TOTALLY)
1922             goto finished_totally;
1923           if (!line)
1924             break;
1925         }
1926     }
1927 finished_totally:
1928 
1929   /* Check for unclosed conditionals */
1930   while (conditional_number > 0)
1931     {
1932       line_error ("expected @end %s",
1933                   command_name(conditional_stack[conditional_number - 1]));
1934       conditional_number--;
1935     }
1936 
1937     {
1938       ELEMENT *dummy;
1939       current = close_commands (current, CM_NONE, &dummy, CM_NONE);
1940 
1941       /* Make sure we are at the very top - we could have stopped at the "top"
1942          element, with "document_root" still to go.  (This happens if the file
1943          didn't end with "@bye".) */
1944       while (current->parent)
1945         current = current->parent;
1946     }
1947 
1948   input_reset_input_stack (); /* to avoid a memory leak if @bye is given */
1949 
1950   /* TODO: Check for "unclosed stacks". */
1951 
1952   return current;
1953 }
1954