1 /* Copyright 2010-2020 Free Software Foundation, Inc.
2 
3    This program is free software: you can redistribute it and/or modify
4    it under the terms of the GNU General Public License as published by
5    the Free Software Foundation, either version 3 of the License, or
6    (at your option) any later version.
7 
8    This program is distributed in the hope that it will be useful,
9    but WITHOUT ANY WARRANTY; without even the implied warranty of
10    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11    GNU General Public License for more details.
12 
13    You should have received a copy of the GNU General Public License
14    along with this program.  If not, see <http://www.gnu.org/licenses/>. */
15 
16 #define _GNU_SOURCE
17 
18 #include <config.h>
19 
20 #include <stdlib.h>
21 #include <stdio.h>
22 #include <string.h>
23 #include <iconv.h>
24 #include <errno.h>
25 #include <sys/stat.h>
26 
27 #include "errors.h"
28 #include "input.h"
29 #include "text.h"
30 #include "commands.h"
31 
32 enum input_type { IN_file, IN_text };
33 
34 enum character_encoding {
35     ce_latin1,
36     ce_latin2,
37     ce_latin15,
38     ce_utf8,
39     ce_shiftjis,
40     ce_koi8r,
41     ce_koi8u
42 };
43 
44 typedef struct {
45     enum input_type type;
46 
47     FILE *file;
48     LINE_NR line_nr;
49 
50     char *text;  /* Input text to be parsed as Texinfo. */
51     char *ptext; /* How far we are through 'text'.  Used to split 'text'
52                     into lines. */
53 } INPUT;
54 
55 enum character_encoding input_encoding;
56 
57 void
set_input_encoding(char * encoding)58 set_input_encoding (char *encoding)
59 {
60   if (!strcasecmp (encoding, "utf-8"))
61     input_encoding = ce_utf8;
62   else if (!strcmp (encoding, "iso-8859-1")
63           || !strcmp (encoding, "us-ascii"))
64     input_encoding = ce_latin1;
65   else if (!strcmp (encoding, "iso-8859-2"))
66     input_encoding = ce_latin2;
67   else if (!strcmp (encoding, "iso-8859-15"))
68     input_encoding = ce_latin15;
69   else if (!strcmp (encoding, "shift_jis"))
70     input_encoding = ce_shiftjis;
71   else if (!strcmp (encoding, "koi8-r"))
72     input_encoding = ce_koi8r;
73   else if (!strcmp (encoding, "koi8-u"))
74     input_encoding = ce_koi8u;
75   else
76     fprintf (stderr, "warning: unhandled encoding %s\n", encoding);
77 }
78 
79 
80 static INPUT *input_stack = 0;
81 int input_number = 0;
82 int input_space = 0;
83 
84 /* Current filename and line number.  Used for reporting. */
85 LINE_NR line_nr;
86 
87 /* Change the line number of filename of the top input source.  Used to
88    record a #line directive.  If FILENAME is non-null, it should hbae
89    been returned from save_string. */
90 void
save_line_directive(int line_nr,char * filename)91 save_line_directive (int line_nr, char *filename)
92 {
93   INPUT *top = &input_stack[input_number - 1];
94   if (line_nr)
95     top->line_nr.line_nr = line_nr;
96   if (filename)
97     top->line_nr.file_name = filename;
98 }
99 
100 /* Collect text from the input sources until a newline is found.  This is used
101    instead of next_text when we need to be sure we get an entire line of
102    Texinfo input (for example as a line argument to a command), which might not
103    be the case if the input is the result of a macro expansion.
104 
105    Return value should not be freed by caller, and becomes invalid after
106    a subsequent call. */
107 char *
new_line(void)108 new_line (void)
109 {
110   static TEXT t;
111   char *new = 0;
112 
113   t.end = 0;
114 
115   while (1)
116     {
117       new = next_text ();
118       if (!new)
119         break;
120       text_append (&t, new);
121       free (new);
122 
123       if (t.text[t.end - 1] == '\n')
124         break;
125     }
126 
127   if (t.end > 0)
128     return t.text;
129   else
130     return 0;
131 }
132 
133 
134 static iconv_t iconv_from_latin1;
135 static iconv_t iconv_from_latin2;
136 static iconv_t iconv_from_latin15;
137 static iconv_t iconv_from_shiftjis;
138 static iconv_t iconv_from_koi8u;
139 static iconv_t iconv_from_koi8r;
140 static iconv_t iconv_validate_utf8;
141 
142 /* Run iconv using text buffer as output buffer. */
143 size_t
text_buffer_iconv(TEXT * buf,iconv_t iconv_state,ICONV_CONST char ** inbuf,size_t * inbytesleft)144 text_buffer_iconv (TEXT *buf, iconv_t iconv_state,
145                    ICONV_CONST char **inbuf, size_t *inbytesleft)
146 {
147   size_t out_bytes_left;
148   char *outptr;
149   size_t iconv_ret;
150 
151   outptr = buf->text + buf->end;
152   if (buf->end == buf->space - 1)
153     {
154       errno = E2BIG;
155       return (size_t) -1;
156     }
157   out_bytes_left = buf->space - buf->end - 1;
158   iconv_ret = iconv (iconv_state, inbuf, inbytesleft,
159                      &outptr, &out_bytes_left);
160 
161   buf->end = outptr - buf->text;
162 
163   return iconv_ret;
164 }
165 
166 
167 /* Return conversion of S according to input_encoding.  This function
168    frees S. */
169 static char *
convert_to_utf8(char * s)170 convert_to_utf8 (char *s)
171 {
172   iconv_t our_iconv = (iconv_t) -1;
173   static TEXT t;
174   ICONV_CONST char *inptr; size_t bytes_left;
175   size_t iconv_ret;
176   enum character_encoding enc;
177 
178   /* Convert from @documentencoding to UTF-8.
179      It might be possible not to convert to UTF-8 and use an 8-bit encoding
180      throughout, but then we'd have to not set the UTF-8 flag on the Perl
181      strings in api.c.  If multiple character encodings were used in a single
182      file, then we'd have to keep track of which strings needed the UTF-8 flag
183      and which didn't. */
184 
185   /* Initialize conversions for the first time. */
186   if (iconv_validate_utf8 == (iconv_t) 0)
187     iconv_validate_utf8 = iconv_open ("UTF-8", "UTF-8");
188   if (iconv_from_latin1 == (iconv_t) 0)
189     iconv_from_latin1 = iconv_open ("UTF-8", "ISO-8859-1");
190   if (iconv_from_latin2 == (iconv_t) 0)
191     iconv_from_latin2 = iconv_open ("UTF-8", "ISO-8859-2");
192   if (iconv_from_latin15 == (iconv_t) 0)
193     iconv_from_latin15 = iconv_open ("UTF-8", "ISO-8859-15");
194   if (iconv_from_shiftjis == (iconv_t) 0)
195     iconv_from_shiftjis = iconv_open ("UTF-8", "SHIFT-JIS");
196   if (iconv_from_koi8r == (iconv_t) 0)
197     iconv_from_koi8r = iconv_open ("UTF-8", "KOI8-R");
198   if (iconv_from_koi8u == (iconv_t) 0)
199     iconv_from_koi8u = iconv_open ("UTF-8", "KOI8-U");
200 
201   switch (input_encoding)
202     {
203     case ce_utf8:
204       our_iconv = iconv_validate_utf8;
205       break;
206     case ce_latin1:
207       our_iconv = iconv_from_latin1;
208       break;
209     case ce_latin2:
210       our_iconv = iconv_from_latin2;
211       break;
212     case ce_latin15:
213       our_iconv = iconv_from_latin15;
214       break;
215     case ce_shiftjis:
216       our_iconv = iconv_from_shiftjis;
217       break;
218     case ce_koi8r:
219       our_iconv = iconv_from_koi8r;
220       break;
221     case ce_koi8u:
222       our_iconv = iconv_from_koi8u;
223       break;
224     }
225 
226   if (our_iconv == (iconv_t) -1)
227     {
228       /* In case the converter couldn't be initialised.
229          Danger: this will cause problems if the input is not in UTF-8 as
230          the Perl strings that are created are flagged as being UTF-8. */
231       return s;
232     }
233 
234   t.end = 0;
235   inptr = s;
236   bytes_left = strlen (s);
237   text_alloc (&t, 10);
238 
239   while (1)
240     {
241       iconv_ret = text_buffer_iconv (&t, our_iconv,
242                                      &inptr, &bytes_left);
243 
244       /* Make sure libiconv flushes out the last converted character.
245          This is required when the conversion is stateful, in which
246          case libiconv might not output the last character, waiting to
247          see whether it should be combined with the next one.  */
248       if (iconv_ret != (size_t) -1
249           && text_buffer_iconv (&t, our_iconv, 0, 0) != (size_t) -1)
250         /* Success: all of input converted. */
251         break;
252 
253       if (bytes_left == 0)
254         break;
255 
256       switch (errno)
257         {
258         case E2BIG:
259           text_alloc (&t, t.space + 20);
260           break;
261         case EILSEQ:
262         default:
263           fprintf(stderr, "%s:%d: encoding error at byte 0x%2x\n",
264             line_nr.file_name, line_nr.line_nr, *(unsigned char *)inptr);
265           inptr++; bytes_left--;
266           break;
267         }
268     }
269 
270   free (s);
271   t.text[t.end] = '\0';
272   return strdup (t.text);
273 }
274 
275 int
expanding_macro(char * macro)276 expanding_macro (char *macro)
277 {
278   int i;
279   for (i = 0; i < input_number; i++)
280     {
281       if (input_stack[i].line_nr.macro
282           && !strcmp (input_stack[i].line_nr.macro, macro))
283         {
284           return 1;
285         }
286     }
287   return 0;
288 }
289 
290 char *save_string (char *string);
291 
292 /* Return value to be freed by caller.  Return null if we are out of input. */
293 char *
next_text(void)294 next_text (void)
295 {
296   ssize_t status;
297   char *line = 0;
298   size_t n;
299   FILE *input_file;
300 
301   while (input_number > 0)
302     {
303       /* Check for pending input. */
304       INPUT *i = &input_stack[input_number - 1];
305 
306       switch (i->type)
307         {
308           char *p, *new;
309         case IN_text:
310           if (!*i->ptext)
311             {
312               /* End of text reached. */
313               free (i->text);
314               break;
315             }
316           /* Split off a line of input. */
317           p = strchrnul (i->ptext, '\n');
318           new = strndup (i->ptext, p - i->ptext + 1);
319           if (*p)
320             i->ptext = p + 1;
321           else
322             i->ptext = p; /* The next time, we will pop the input source. */
323 
324           if (!i->line_nr.macro)
325             i->line_nr.line_nr++;
326 
327           line_nr = i->line_nr;
328 
329           return new;
330 
331           break;
332         case IN_file:
333           input_file = input_stack[input_number - 1].file;
334           status = getline (&line, &n, input_file);
335           if (status != -1)
336             {
337               char *comment;
338               if (feof (input_file))
339                 {
340                   /* Add a newline at the end of the file if one is missing. */
341                   char *line2;
342                   asprintf (&line2, "%s\n", line);
343                   free (line);
344                   line = line2;
345                 }
346 
347               /* Strip off a comment. */
348               comment = strchr (line, '\x7F');
349               if (comment)
350                 *comment = '\0';
351 
352               i->line_nr.line_nr++;
353               line_nr = i->line_nr;
354 
355               return convert_to_utf8 (line);
356             }
357           free (line); line = 0;
358           break;
359         default:
360           fatal ("unknown input source type");
361         }
362 
363       /* Top input source failed.  Pop it and try the next one. */
364 
365       if (input_stack[input_number - 1].type == IN_file)
366         {
367           FILE *file = input_stack[input_number - 1].file;
368 
369           if (file != stdin)
370             {
371               if (fclose (input_stack[input_number - 1].file) == EOF)
372                 fprintf (stderr, "error on closing %s: %s",
373                         input_stack[input_number - 1].line_nr.file_name,
374                         strerror (errno));
375             }
376         }
377 
378       input_number--;
379     }
380   return 0;
381 }
382 
383 void
input_push(char * text,char * macro,char * filename,int line_number)384 input_push (char *text, char *macro, char *filename, int line_number)
385 {
386   if (input_number == input_space)
387     {
388       input_space++; input_space *= 1.5;
389       input_stack = realloc (input_stack, input_space * sizeof (INPUT));
390       if (!input_stack)
391         fatal ("realloc failed");
392     }
393 
394   input_stack[input_number].type = IN_text;
395   input_stack[input_number].file = 0;
396   input_stack[input_number].text = text;
397   input_stack[input_number].ptext = text;
398 
399   if (!macro)
400     line_number--;
401   input_stack[input_number].line_nr.line_nr = line_number;
402   input_stack[input_number].line_nr.file_name = save_string (filename);
403   input_stack[input_number].line_nr.macro = save_string (macro);
404   input_number++;
405 }
406 
407 /* For filenames and macro names, it is possible that they won't be referenced
408    in the line number of any element.  It would be too much work to keep track,
409    so just keep them all here, and free them all together at the end. */
410 static char **small_strings;
411 static size_t small_strings_num;
412 static size_t small_strings_space;
413 
414 char *
save_string(char * string)415 save_string (char *string)
416 {
417   char *ret = string ? strdup (string) : 0;
418   if (ret)
419     {
420       if (small_strings_num == small_strings_space)
421         {
422           small_strings_space++;
423           small_strings_space += (small_strings_space >> 2);
424           small_strings = realloc (small_strings, small_strings_space
425                                    * sizeof (char *));
426           if (!small_strings)
427             fatal ("realloc failed");
428         }
429       small_strings[small_strings_num++] = ret;
430     }
431   return ret;
432 }
433 
434 /* Called in reset_parser. */
435 void
free_small_strings(void)436 free_small_strings (void)
437 {
438   size_t i;
439   for (i = 0; i < small_strings_num; i++)
440     {
441       free (small_strings[i]);
442     }
443   small_strings_num = 0;
444 }
445 
446 
447 /* Store TEXT as a source for Texinfo content.  TEXT should be a UTF-8
448    string.  TEXT will be later free'd and must be allocated on the heap.
449    MACRO is the name of a macro that the text came from. */
450 void
input_push_text(char * text,char * macro)451 input_push_text (char *text, char *macro)
452 {
453   if (text)
454     {
455       char *filename = 0;
456       if (input_number > 0)
457         {
458           filename = input_stack[input_number - 1].line_nr.file_name;
459         }
460       input_push (text, macro, filename, line_nr.line_nr);
461     }
462 }
463 
464 /* Used in tests - like input_push_text, but the lines from the text have
465    line numbers. */
466 void
input_push_text_with_line_nos(char * text,int starting)467 input_push_text_with_line_nos (char *text, int starting)
468 {
469   input_push (text, 0, 0, starting);
470   input_stack[input_number - 1].type = IN_text;
471 }
472 
473 void
input_reset_input_stack(void)474 input_reset_input_stack (void)
475 {
476   int i;
477   for (i = 0; i < input_number; i++)
478     {
479       switch (input_stack[i].type)
480         {
481         case IN_file:
482           if (input_stack[i].file != stdin)
483             fclose (input_stack[i].file);
484           break;
485         case IN_text:
486           free (input_stack[i].text);
487           break;
488         }
489     }
490   input_number = 0;
491 }
492 
493 int
top_file_index(void)494 top_file_index (void)
495 {
496   int i = input_number - 1;
497   while (i >= 0 && input_stack[i].type != IN_file)
498     i--;
499   return i;
500 }
501 
502 
503 static char **include_dirs;
504 static size_t include_dirs_number;
505 static size_t include_dirs_space;
506 
507 void
add_include_directory(char * filename)508 add_include_directory (char *filename)
509 {
510   int len;
511   if (include_dirs_number == include_dirs_space)
512     {
513       include_dirs = realloc (include_dirs,
514                               sizeof (char *) * (include_dirs_space += 5));
515     }
516   filename = strdup (filename);
517   include_dirs[include_dirs_number++] = filename;
518   len = strlen (filename);
519   if (len > 0 && filename[len - 1] == '/')
520     filename[len - 1] = '\0';
521 }
522 
523 /* Return value to be freed by caller. */
524 char *
locate_include_file(char * filename)525 locate_include_file (char *filename)
526 {
527   char *fullpath;
528   struct stat dummy;
529   int i, status;
530 
531   /* Checks if filename is absolute or relative to current directory.
532      TODO: Could use macros in top-level config.h for this. */
533   /* TODO: The Perl code (in Common.pm, 'locate_include_file') handles
534      a volume in a path (like "A:"), possibly more general treatment
535      with File::Spec module. */
536   if (!memcmp (filename, "/", 1)
537       || !memcmp (filename, "../", 3)
538       || !memcmp (filename, "./", 2))
539     {
540       status = stat (filename, &dummy);
541       if (status == 0)
542         return strdup (filename);
543     }
544   else
545     {
546       for (i = 0; i < include_dirs_number; i++)
547         {
548           asprintf (&fullpath, "%s/%s", include_dirs[i], filename);
549           status = stat (fullpath, &dummy);
550           if (status == 0)
551             return fullpath;
552           free (fullpath);
553         }
554     }
555   return 0;
556 }
557 
558 /* Try to open a file called FILENAME, looking for it in the list of include
559    directories. */
560 int
input_push_file(char * filename)561 input_push_file (char *filename)
562 {
563   FILE *stream;
564 
565   stream = fopen (filename, "r");
566   if (!stream)
567     return errno;
568 
569   if (input_number == input_space)
570     {
571       input_stack = realloc (input_stack, (input_space += 5) * sizeof (INPUT));
572       if (!input_stack)
573         fatal ("realloc failed");
574     }
575 
576   /* Strip off a leading directory path. */
577   char *p, *q;
578   p = 0;
579   q = strchr (filename, '/');
580   while (q)
581     {
582       p = q;
583       q = strchr (q + 1, '/');
584     }
585   if (p)
586     filename = save_string (p+1);
587   else
588     filename = save_string (filename);
589 
590   input_stack[input_number].type = IN_file;
591   input_stack[input_number].file = stream;
592   input_stack[input_number].line_nr.file_name = filename;
593   input_stack[input_number].line_nr.line_nr = 0;
594   input_stack[input_number].line_nr.macro = 0;
595   input_stack[input_number].text = 0;
596   input_stack[input_number].ptext = 0;
597   input_number++;
598 
599   return 0;
600 }
601 
602