1 /* info-utils.c -- miscellanous.
2 
3    Copyright 1993-2020 Free Software Foundation, Inc.
4 
5    This program is free software: you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation, either version 3 of the License, or
8    (at your option) any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 
18    Originally written by Brian Fox. */
19 
20 #include "info.h"
21 #include "session.h"
22 #include "info-utils.h"
23 #include "tag.h"
24 
25 #include <langinfo.h>
26 #if HAVE_ICONV
27 # include <iconv.h>
28 #endif
29 #include <wchar.h>
30 #ifdef __MINGW32__
31 /* MinGW uses a replacement nl_langinfo, see pcterm.c.  */
32 # define nl_langinfo rpl_nl_langinfo
33 extern char * rpl_nl_langinfo (nl_item);
34 /* MinGW uses its own replacement wcwidth, see pcterm.c for the
35    reasons.  Since Gnulib's wchar.h might redirect wcwidth to
36    rpl_wcwidth, we explicitly undo that here.  */
37 #undef wcwidth
38 #endif
39 
40 #ifdef __hpux
41 #define va_copy(ap1,ap2) memcpy((&ap1),(&ap2),sizeof(va_list))
42 #endif
43 
44 /* Variable which holds the most recent filename parsed as a result of
45    calling info_parse_xxx (). */
46 char *info_parsed_filename = NULL;
47 
48 /* Variable which holds the most recent nodename parsed as a result of
49    calling info_parse_xxx (). */
50 char *info_parsed_nodename = NULL;
51 
52 /* Read a filename surrounded by "(" and ")", accounting for matching
53    characters, and place it in *FILENAME if FILENAME is not null.  Return
54    length of read filename.  On error, set *FILENAME to null and return 0.  */
55 int
read_bracketed_filename(char * string,char ** filename)56 read_bracketed_filename (char *string, char **filename)
57 {
58   register int i = 0;
59   int count = 0; /* Level of nesting. */
60   int first_close = -1; /* First ")" encountered. */
61 
62   if (*string != '(')
63     return 0;
64 
65   string++;
66   count = 1;
67   for (i = 0; string[i]; i++)
68     {
69       if (string[i] == '(')
70         count++;
71       else if (string[i] == ')')
72         {
73           if (first_close == -1)
74             first_close = i;
75 
76           count--;
77           if (count == 0)
78             break;
79         }
80     }
81 
82   /* If string ended before brackets were balanced, take the first ")" as
83      terminating the filename. */
84   if (count > 0)
85     {
86       if (first_close == -1)
87         {
88           if (filename)
89             *filename = 0;
90           return 0;
91         }
92       i = first_close;
93     }
94 
95   if (filename)
96     {
97       *filename = xcalloc (1, i + 1);
98       memcpy (*filename, string, i);
99     }
100 
101   return i + 2; /* Length of filename plus "(" and ")". */
102 }
103 
104 /* Parse the filename and nodename out of STRING, saving in
105    INFO_PARSED_FILENAME and INFO_PARSED_NODENAME.  These variables should not
106    be freed by calling code.  If either is missing, the relevant variable is
107    set to a null pointer. */
108 void
info_parse_node(char * string)109 info_parse_node (char *string)
110 {
111   int nodename_len;
112 
113   free (info_parsed_filename);
114   free (info_parsed_nodename);
115   info_parsed_filename = 0;
116   info_parsed_nodename = 0;
117 
118   /* Special case of nothing passed.  Return nothing. */
119   if (!string || !*string)
120     return;
121 
122   string += skip_whitespace_and_newlines (string);
123 
124   string += read_bracketed_filename (string, &info_parsed_filename);
125 
126   /* Parse out nodename. */
127   string += skip_whitespace_and_newlines (string);
128   nodename_len = read_quoted_string (string, "", 0, &info_parsed_nodename);
129 
130   if (nodename_len != 0)
131     {
132       canonicalize_whitespace (info_parsed_nodename);
133     }
134 }
135 
136 /* Set *OUTPUT to a copy of the string starting at START and finishing at
137    a character in TERMINATOR, unless START[0] == INFO_QUOTE, in which case
138    copy string from START+1 until the next occurence of INFO_QUOTE.  If
139    TERMINATOR is an empty string, finish at a null character.   LINES is
140    the number of lines that the string can span.  If LINES is zero, there is no
141    limit.  Return length of string including any quoting characters.  Return
142    0 if input was invalid. */
143 long
read_quoted_string(char * start,char * terminator,int lines,char ** output)144 read_quoted_string (char *start, char *terminator, int lines, char **output)
145 {
146   long len;
147   char *nl = 0, saved_char;
148 
149   if (lines)
150     {
151       int i;
152       nl = start;
153       for (i = 0; i < lines; i++)
154         {
155           nl = strchr (nl, '\n');
156           if (!nl)
157             break; /* End of input string reached. */
158           nl++;
159         }
160       if (nl)
161         {
162           saved_char = *nl;
163           *nl = '\0';
164         }
165     }
166 
167   if (start[0] != '\177')
168     {
169       len = strcspn (start, terminator);
170 
171       if (*terminator && !start[len])
172         {
173           len = 0;
174           *output = 0;
175         }
176       else
177         {
178           *output = xmalloc (len + 1);
179           strncpy (*output, start, len);
180           (*output)[len] = '\0';
181         }
182     }
183   else
184     {
185       len = strcspn (start + 1, "\177");
186 
187       if (*terminator && !(start + 1)[len])
188         {
189           /* No closing 177 byte. */
190           len = 0;
191           *output = 0;
192         }
193       else
194         {
195           *output = xmalloc (len + 1);
196           strncpy (*output, start + 1, len);
197           (*output)[len] = '\0';
198           len += 2; /* Count the two 177 bytes. */
199         }
200 
201     }
202 
203   if (nl)
204     *nl = saved_char;
205   return len;
206 }
207 
208 
209 /* **************************************************************** */
210 /*                                                                  */
211 /*                  Finding and Building Menus                      */
212 /*                                                                  */
213 /* **************************************************************** */
214 
215 /* Get the entry associated with LABEL in the menu of NODE.  Return a
216    pointer to the ENTRY if found, or null.  Return value should not
217    be freed by caller.  If SLOPPY, allow initial matches, like
218    "Buffers" for a LABEL "buffer". */
219 REFERENCE *
info_get_menu_entry_by_label(NODE * node,char * label,int sloppy)220 info_get_menu_entry_by_label (NODE *node, char *label, int sloppy)
221 {
222   register int i;
223   int best_guess = -1;
224   REFERENCE *entry;
225   REFERENCE **references = node->references;
226 
227   if (!references)
228     return 0;
229 
230   for (i = 0; (entry = references[i]); i++)
231     {
232       if (entry->type != REFERENCE_MENU_ITEM)
233         continue;
234       if (mbscasecmp (label, entry->label) == 0)
235         return entry; /* Exact, case-insensitive match. */
236       else if (sloppy && best_guess == -1
237                && (mbsncasecmp (entry->label, label, strlen (label)) == 0))
238         best_guess = i;
239     }
240 
241   if (sloppy && best_guess != -1)
242     return references[best_guess];
243 
244   return 0;
245 }
246 
247 /* A utility function for concatenating REFERENCE **.  Returns a new
248    REFERENCE ** which is the concatenation of REF1 and REF2.  */
249 REFERENCE **
info_concatenate_references(REFERENCE ** ref1,REFERENCE ** ref2)250 info_concatenate_references (REFERENCE **ref1, REFERENCE **ref2)
251 {
252   register int i, j;
253   REFERENCE **result;
254   int size = 0;
255 
256   /* Get the total size of the slots that we will need. */
257   if (ref1)
258     {
259       for (i = 0; ref1[i]; i++);
260       size += i;
261     }
262 
263   if (ref2)
264     {
265       for (i = 0; ref2[i]; i++);
266       size += i;
267     }
268 
269   result = xmalloc ((1 + size) * sizeof (REFERENCE *));
270 
271   /* Copy the contents over. */
272 
273   j = 0;
274   if (ref1)
275     {
276       for (i = 0; ref1[i]; i++)
277         result[j++] = ref1[i];
278     }
279 
280   if (ref2)
281     {
282       for (i = 0; ref2[i]; i++)
283         result[j++] = ref2[i];
284     }
285 
286   result[j] = NULL;
287   return result;
288 }
289 
290 /* Copy a reference structure.  Copy each field into new memory.  */
291 REFERENCE *
info_copy_reference(REFERENCE * src)292 info_copy_reference (REFERENCE *src)
293 {
294   REFERENCE *dest = xmalloc (sizeof (REFERENCE));
295   dest->label = src->label ? xstrdup (src->label) : NULL;
296   dest->filename = src->filename ? xstrdup (src->filename) : NULL;
297   dest->nodename = src->nodename ? xstrdup (src->nodename) : NULL;
298   dest->start = src->start;
299   dest->end = src->end;
300   dest->line_number = src->line_number;
301   dest->type = src->type;
302 
303   return dest;
304 }
305 
306 /* Copy a list of references, copying in reference in turn with
307    info_copy_reference. */
308 REFERENCE **
info_copy_references(REFERENCE ** ref1)309 info_copy_references (REFERENCE **ref1)
310 {
311   int i;
312   REFERENCE **result;
313   int size;
314 
315   if (!ref1)
316     return 0;
317 
318   /* Get the total size of the slots that we will need. */
319   for (i = 0; ref1[i]; i++);
320   size = i;
321 
322   result = xmalloc ((1 + size) * sizeof (REFERENCE *));
323 
324   /* Copy the contents over. */
325   for (i = 0; ref1[i]; i++)
326     result[i] = info_copy_reference (ref1[i]);
327   result[i] = NULL;
328 
329   return result;
330 }
331 
332 void
info_reference_free(REFERENCE * ref)333 info_reference_free (REFERENCE *ref)
334 {
335   if (ref)
336     {
337       free (ref->label);
338       free (ref->filename);
339       free (ref->nodename);
340       free (ref);
341     }
342 }
343 
344 /* Free the data associated with REFERENCES. */
345 void
info_free_references(REFERENCE ** references)346 info_free_references (REFERENCE **references)
347 {
348   register int i;
349   REFERENCE *entry;
350 
351   if (references)
352     {
353       for (i = 0; references && (entry = references[i]); i++)
354         info_reference_free (entry);
355 
356       free (references);
357     }
358 }
359 
360 /* Return new REFERENCE with filename and nodename fields set. */
361 REFERENCE *
info_new_reference(char * filename,char * nodename)362 info_new_reference (char *filename, char *nodename)
363 {
364   REFERENCE *r = xmalloc (sizeof (REFERENCE));
365   r->label = 0;
366   r->filename = filename ? xstrdup (filename) : 0;
367   r->nodename = nodename ? xstrdup (nodename) : 0;
368   r->start = 0;
369   r->end = 0;
370   r->line_number = 0;
371   r->type = 0;
372   return r;
373 }
374 
375 
376 /* Search for sequences of whitespace or newlines in STRING, replacing
377    all such sequences with just a single space.  Remove whitespace from
378    start and end of string. */
379 void
canonicalize_whitespace(char * string)380 canonicalize_whitespace (char *string)
381 {
382   register int i, j;
383   int len, whitespace_found, whitespace_loc = 0;
384   char *temp;
385 
386   if (!string)
387     return;
388 
389   len = strlen (string);
390   temp = xmalloc (1 + len);
391 
392   /* Search for sequences of whitespace or newlines.  Replace all such
393      sequences in the string with just a single space. */
394 
395   whitespace_found = 0;
396   for (i = 0, j = 0; string[i]; i++)
397     {
398       if (whitespace_or_newline (string[i]))
399         {
400           whitespace_found++;
401           whitespace_loc = i;
402           continue;
403         }
404       else
405         {
406           if (whitespace_found && whitespace_loc)
407             {
408               whitespace_found = 0;
409 
410               /* Suppress whitespace at start of string. */
411               if (j)
412                 temp[j++] = ' ';
413             }
414 
415           temp[j++] = string[i];
416         }
417     }
418 
419   /* Kill trailing whitespace. */
420   if (j && whitespace (temp[j - 1]))
421     j--;
422 
423   temp[j] = '\0';
424   strcpy (string, temp);
425   free (temp);
426 }
427 
428 /* If ITER points to an ANSI escape sequence, process it, set PLEN to its
429    length in bytes, and return 1.
430    Otherwise, return 0.
431  */
432 int
ansi_escape(mbi_iterator_t iter,size_t * plen)433 ansi_escape (mbi_iterator_t iter, size_t *plen)
434 {
435   if (raw_escapes_p && *mbi_cur_ptr (iter) == '\033' && mbi_avail (iter))
436     {
437       mbi_advance (iter);
438       if (*mbi_cur_ptr (iter) == '[' &&  mbi_avail (iter))
439         {
440           ITER_SETBYTES (iter, 1);
441           mbi_advance (iter);
442           if (isdigit (*mbi_cur_ptr (iter)) && mbi_avail (iter))
443             {
444               ITER_SETBYTES (iter, 1);
445               mbi_advance (iter);
446               if (*mbi_cur_ptr (iter) == 'm')
447                 {
448                   *plen = 4;
449                   return 1;
450                 }
451               else if (isdigit (*mbi_cur_ptr (iter)) && mbi_avail (iter))
452                 {
453                   ITER_SETBYTES (iter, 1);
454                   mbi_advance (iter);
455                   if (*mbi_cur_ptr (iter) == 'm')
456                     {
457                       *plen = 5;
458                       return 1;
459                     }
460                 }
461             }
462         }
463     }
464 
465   return 0;
466 }
467 
468 static struct text_buffer printed_rep = { 0 };
469 
470 /* Return pointer to string that is the printed representation of character
471    (or other logical unit) at ITER if it were printed at screen column
472    PL_CHARS.  Use ITER_SETBYTES (info-utils.h) on ITER if we need to advance
473    past a unit that the multibyte iteractor doesn't know about (like an ANSI
474    escape sequence).  If ITER points at an end-of-line character, set *DELIM to
475    this character.  *PCHARS gets the number of screen columns taken up by
476    outputting the return value, and *PBYTES the number of bytes in returned
477    string.  Return value is not null-terminated.  Return value must not be
478    freed by caller. */
479 char *
printed_representation(mbi_iterator_t * iter,int * delim,size_t pl_chars,size_t * pchars,size_t * pbytes)480 printed_representation (mbi_iterator_t *iter, int *delim, size_t pl_chars,
481                         size_t *pchars, size_t *pbytes)
482 {
483   struct text_buffer *rep = &printed_rep;
484 
485   char *cur_ptr = (char *) mbi_cur_ptr (*iter);
486   size_t cur_len = mb_len (mbi_cur (*iter));
487 
488   text_buffer_reset (&printed_rep);
489 
490   if (mb_isprint (mbi_cur (*iter)))
491     {
492       /* cur.wc gives a wchar_t object.  See mbiter.h in the
493          gnulib/lib directory. */
494       *pchars = wcwidth ((*iter).cur.wc);
495       *pbytes = cur_len;
496       return cur_ptr;
497     }
498   else if (cur_len == 1)
499     {
500       if (*cur_ptr == '\n' || *cur_ptr == '\r')
501         {
502           /* If this is a CRLF line ending, ignore this character. */
503           if (*cur_ptr == '\r' && cur_ptr[1] == '\n')
504             {
505               *pchars = 0;
506               *pbytes = 0;
507               return cur_ptr;
508             }
509 
510           *pchars = 1;
511           *pbytes = cur_len;
512           *delim = *cur_ptr;
513           text_buffer_add_char (rep, ' ');
514           return cur_ptr;
515         }
516       else if (ansi_escape (*iter, &cur_len))
517         {
518           *pchars = 0;
519           *pbytes = cur_len;
520           ITER_SETBYTES (*iter, cur_len);
521 
522           return cur_ptr;
523         }
524       else if (*cur_ptr == '\t')
525         {
526           int i = 0;
527 
528           *pchars = ((pl_chars + 8) & 0xf8) - pl_chars;
529           *pbytes = *pchars;
530 
531           /* We must output spaces instead of the tab because a tab may
532              not clear characters already on the screen. */
533           for (i = 0; i < *pbytes; i++)
534             text_buffer_add_char (rep, ' ');
535           return text_buffer_base (rep);
536         }
537     }
538 
539   /* Show CTRL-x as "^X".  */
540   if (iscntrl (*cur_ptr) && *(unsigned char *)cur_ptr < 127)
541     {
542       *pchars = 2;
543       *pbytes = 2;
544       text_buffer_add_char (rep, '^');
545       text_buffer_add_char (rep, *cur_ptr | 0x40);
546       return text_buffer_base (rep);
547     }
548   else if (*cur_ptr == DEL)
549     {
550       *pchars = 0;
551       *pbytes = 0;
552       return text_buffer_base (rep);
553     }
554   else
555     {
556       /* Original byte was not recognized as anything.  Display its octal
557          value.  This could happen in the C locale for bytes above 128,
558          or for bytes 128-159 in an ISO-8859-1 locale.  Don't output the bytes
559          as they are, because they could have special meaning to the
560          terminal. */
561       *pchars = 4;
562       *pbytes = 4;
563       text_buffer_printf (rep, "\\%o", *(unsigned char *)cur_ptr);
564       return text_buffer_base (rep);
565     }
566 }
567 
568 
569 /* **************************************************************** */
570 /*                                                                  */
571 /*                          Scanning node                           */
572 /*                                                                  */
573 /* **************************************************************** */
574 
575 /* Whether to strip syntax from the text of nodes. */
576 int preprocess_nodes_p;
577 
578 /* Whether contents of nodes should be rewritten. */
579 static int rewrite_p;
580 
581 /* inptr is moved forward through the body of a node. */
582 static char *inptr;
583 
584 /* Pointer to first byte of node (after node separator). */
585 static char *input_start;
586 
587 /* Number of bytes in node contents. */
588 static size_t input_length;
589 
590 struct text_buffer output_buf;
591 
592 /* Pointer into a tags table for the file to the anchor we need to adjust as
593    a result of byte counts changing due to character encoding conversion or
594    inserted/deleted text. */
595 static TAG **anchor_to_adjust;
596 /* Offset within file buffer of first byte of node, used for anchor
597    adjustment. */
598 static int node_offset;
599 
600 /* Difference so far between the number of bytes input in the file and
601    bytes output.  Used to adjust the values of anchors in nodes. */
602 static long int output_bytes_difference;
603 
604 /* Whether we are converting the character encoding of the file. */
605 static int convert_encoding_p;
606 
607 #if HAVE_ICONV
608 
609 /* Whether text in file is encoded in UTF-8. */
610 static int file_is_in_utf8;
611 
612 /* Used for conversion from file encoding to output encoding. */
613 static iconv_t iconv_to_output;
614 
615 /* Conversion from file encoding to UTF-8. */
616 static iconv_t iconv_to_utf8;
617 
618 #endif /* HAVE_ICONV */
619 
620 void
init_conversion(FILE_BUFFER * fb)621 init_conversion (FILE_BUFFER *fb)
622 {
623   char *target_encoding;
624 
625   convert_encoding_p = 0;
626 
627   /* Node being processed does not come from an Info file. */
628   if (!fb)
629     return;
630 
631 #if !HAVE_ICONV
632   return;
633 #else
634   file_is_in_utf8 = 0;
635 
636   /* Don't process file if encoding is unknown. */
637   if (!fb->encoding)
638     return;
639 
640   /* Read name of character encoding from environment locale */
641   target_encoding = nl_langinfo (CODESET);
642 
643   /* Don't convert the contents if the locale
644      uses the same character encoding as the file */
645   if (!strcasecmp(target_encoding, fb->encoding))
646     return;
647 
648   /* Check if an iconv conversion from file locale to system
649      locale exists */
650   iconv_to_output = iconv_open (target_encoding, fb->encoding);
651   if (iconv_to_output == (iconv_t) -1)
652     return; /* Return if no conversion function implemented */
653 
654   if (   !strcasecmp ("UTF8",  fb->encoding)
655       || !strcasecmp ("UTF-8", fb->encoding))
656     file_is_in_utf8 = 1;
657 
658   if (!file_is_in_utf8)
659     {
660       iconv_to_utf8 = iconv_open ("UTF-8", fb->encoding);
661       if (iconv_to_utf8 == (iconv_t) -1)
662         {
663           /* Return if no conversion function implemented */
664           iconv_close (iconv_to_output);
665           return;
666         }
667     }
668 
669   convert_encoding_p = 1;
670   rewrite_p = 1;
671 #endif /* HAVE_ICONV */
672 }
673 
close_conversion(void)674 void close_conversion (void)
675 {
676 #if HAVE_ICONV
677   if (convert_encoding_p)
678     {
679       iconv_close (iconv_to_output);
680       if (!file_is_in_utf8) iconv_close (iconv_to_utf8);
681     }
682 #endif
683 }
684 
685 static void
init_output_stream(FILE_BUFFER * fb)686 init_output_stream (FILE_BUFFER *fb)
687 {
688   init_conversion (fb);
689   output_bytes_difference = 0;
690 
691   if (rewrite_p)
692     text_buffer_init (&output_buf);
693 }
694 
695 static size_t saved_offset;
696 static char *saved_inptr;
697 static long saved_difference;
698 
699 void
save_conversion_state(void)700 save_conversion_state (void)
701 {
702   saved_offset = text_buffer_off (&output_buf);
703   saved_inptr = inptr;
704   saved_difference = output_bytes_difference;
705 }
706 
707 /* Go back to the saved state of the output stream. */
708 void
reset_conversion(void)709 reset_conversion (void)
710 {
711   text_buffer_off (&output_buf) = saved_offset;
712   inptr = saved_inptr;
713   output_bytes_difference = saved_difference;
714 }
715 
716 /* Copy bytes from input to output with no encoding conversion. */
717 static void
copy_direct(long n)718 copy_direct (long n)
719 {
720   text_buffer_add_string (&output_buf, inptr, n);
721   inptr += n;
722 }
723 
724 /* Read one character at *FROM and write out a sequence
725    of bytes representing that character in ASCII.  *FROM
726    is advanced past the read character. */
727 static int
degrade_utf8(char ** from,size_t * from_left)728 degrade_utf8 (char **from, size_t *from_left)
729 {
730   static struct encoding_replacement
731   {
732     char *from_string;
733     char *to_string;
734   } er[] = {
735     {"\xE2\x80\x98","'"}, /* Opening single quote */
736     {"\xE2\x80\x99","'"}, /* Closing single quote */
737     {"\xE2\x80\x9C","\""},/* Opening double quote */
738     {"\xE2\x80\x9D","\""},/* Closing double quote */
739     {"\xC2\xA9","(C)"},   /* Copyright symbol */
740     {"\xC2\xBB",">>"},    /* Closing double angle brackets */
741 
742     {"\xE2\x86\x92","->"},/* Right arrow */
743     {"\xE2\x87\x92","=>"},/* Right double arrow */
744     {"\xE2\x8A\xA3","-|"},/* Print symbol */
745     {"\xE2\x98\x85","-!-"}, /* Point symbol */
746     {"\xE2\x86\xA6","==>"}, /* Expansion symbol */
747 
748     {"\xE2\x80\x90","-"},  /* Hyphen */
749     {"\xE2\x80\x91","-"},  /* Non-breaking hyphen */
750     {"\xE2\x80\x92","-"},  /* Figure dash */
751     {"\xE2\x80\x93","-"},  /* En dash */
752     {"\xE2\x80\x94","--"},  /* Em dash */
753     {"\xE2\x88\x92","-"},  /* Minus sign */
754     {"\xE2\x80\xA6","..."},  /* Ellipsis */
755     {"\xE2\x80\xA2","*"},  /* Bullet */
756 
757     {"\xC3\xA0","a`"},   /* Lower case letter a with grave accent */
758     {"\xC3\xA2","a^"},   /* Lower case letter a with circumflex */
759     {"\xC3\xA4","a\""},  /* Lower case letter a with diaeresis */
760     {"\xC3\xA6","ae"},   /* Lower case letter ae ligature */
761     {"\xC3\xA9","e'"},   /* Lower case letter e with acute accent */
762     {"\xC3\xA8","e`"},   /* Lower case letter e with grave accent */
763     {"\xC3\xAA","e^"},   /* Lower case letter e with circumflex */
764     {"\xC3\xAB","e\""},  /* Lower case letter e with diaeresis */
765     {"\xC3\xB6","o\""},  /* Lower case letter o with diaeresis */
766     {"\xC3\xBC","u\""},  /* Lower case letter u with diaeresis */
767     {"\xC3\x84", "A\""},  /* Upper case letter A with diaeresis. */
768     {"\xC3\x96", "O\""},  /* Upper case letter O with diaeresis. */
769     {"\xC3\x9c", "U\""},  /* Upper case letter U with diaeresis. */
770 
771     {"\xC3\xB1","n~"},  /* Lower case letter n with tilde */
772     {"\xC3\x87","C,"},  /* Upper case letter C with cedilla */
773     {"\xC3\xA7","c,"},  /* Lower case letter c with cedilla */
774     {"\xC3\x9f","ss"},  /* Lower case letter sharp s */
775 
776     {0, 0}
777   };
778 
779   struct encoding_replacement *erp;
780 
781   for (erp = er; erp->from_string != 0; erp++)
782     {
783       /* Avoid reading past end of input. */
784       int width = strlen (erp->from_string);
785       if (width > *from_left)
786         continue;
787 
788       if (!strncmp (erp->from_string, *from, width))
789         {
790           text_buffer_add_string (&output_buf, erp->to_string,
791                                   strlen(erp->to_string));
792           *from += width;
793           *from_left -= width;
794           return 1;
795         }
796     }
797 
798   /* Failing this, just print a question mark.  Maybe we should use SUB
799      (^Z) (ASCII substitute character code) instead, or pass through the
800      original bytes. */
801   text_buffer_add_string (&output_buf, "?", 1);
802 
803   /* Ideally we would advance one UTF-8 character.  This would
804      require knowing its length in bytes. */
805   (*from)++;
806   (*from_left)--;
807 
808   return 0;
809 }
810 
811 /* Convert N bytes from input to output encoding and write to
812    output buffer.  Return number of bytes over N written. */
813 static int
copy_converting(long n)814 copy_converting (long n)
815 {
816 #if !HAVE_ICONV
817   return 0;
818 #else
819   size_t bytes_left, orig_bytes_left;
820   int extra_at_end;
821   size_t iconv_ret;
822   long output_start;
823 
824   size_t utf8_char_free;
825   char utf8_char[4]; /* Maximum 4 bytes in a UTF-8 character */
826   char *utf8_char_ptr, *orig_inptr;
827   size_t i;
828 
829   /* Use n as an estimate of how many bytes will be required
830      in target encoding. */
831   text_buffer_alloc (&output_buf, (size_t) n);
832 
833   output_start = text_buffer_off (&output_buf);
834   bytes_left = n;
835   extra_at_end = 0;
836   while (1)
837     {
838       iconv_ret = text_buffer_iconv (&output_buf, iconv_to_output,
839                                      (ICONV_CONST char **)&inptr, &bytes_left);
840 
841       /* Make sure libiconv flushes out the last converted character.
842 	 This is required when the conversion is stateful, in which
843 	 case libiconv might not output the last character, waiting to
844 	 see whether it should be combined with the next one.  */
845       if (iconv_ret != (size_t) -1
846 	  && text_buffer_iconv (&output_buf, iconv_to_output,
847 				NULL, NULL) != (size_t) -1)
848         /* Success: all of input converted. */
849         break;
850 
851       /* There's been an error while converting. */
852       switch (errno)
853         {
854         case E2BIG:
855           /* Ran out of space in output buffer.  Allocate more
856              and try again. */
857           text_buffer_alloc (&output_buf, n);
858           continue;
859         case EINVAL:
860           /* Incomplete byte sequence at end of input buffer.  Try to read
861              more. */
862 
863           /* input_length - 2 is offset of last-but-one byte within input.
864              This checks if there is at least one more byte within node
865              contents. */
866           if (inptr - input_start + (bytes_left - 1) <= input_length - 2)
867             {
868               bytes_left++;
869               extra_at_end++;
870             }
871           else
872             {
873               copy_direct (bytes_left);
874               bytes_left = 0;
875             }
876           continue;
877         default: /* Unknown error */
878           info_error (_("Error converting file character encoding"));
879 
880           /* Skip past current input and hope we don't get an
881              error next time. */
882           inptr += bytes_left;
883           return 0;
884         case EILSEQ:
885           /* Byte sequence in input not recognized.  Degrade to ASCII.  */
886           break;
887         }
888 
889       /* Flush any waiting input in iconv_to_output and enter the
890          default shift state. */
891       text_buffer_iconv (&output_buf, iconv_to_output, NULL, NULL);
892 
893       if (file_is_in_utf8)
894         {
895           degrade_utf8 (&inptr, &bytes_left);
896           continue;
897         }
898 
899       /* If file is not in UTF-8, we degrade to ASCII in two steps:
900          first convert the character to UTF-8, then look up a replacement
901          string.  Note that mixing iconv_to_output and iconv_to_utf8
902          on the same input may not work well if the input encoding
903          is stateful.  We could deal with this by always converting to
904          UTF-8 first; then we could mix conversions on the UTF-8 stream. */
905 
906       /* We want to read exactly one character.  Do this by
907          restricting size of output buffer. */
908       utf8_char_ptr = utf8_char;
909       orig_inptr = inptr;
910       orig_bytes_left = bytes_left;
911       for (i = 1; i <= 4; i++)
912         {
913           utf8_char_free = i;
914           errno = 0;
915           iconv_ret = iconv (iconv_to_utf8, (ICONV_CONST char **)&inptr,
916                              &bytes_left, &utf8_char_ptr, &utf8_char_free);
917           if ((iconv_ret == (size_t) -1 && errno != E2BIG)
918               /* If we managed to convert a character: */
919               || utf8_char_ptr > utf8_char)
920             break;
921         }
922 
923       /* errno == E2BIG if iconv ran out of output buffer,
924          which is expected. */
925       if (iconv_ret == (size_t) -1 && errno != E2BIG)
926 	{
927 	  /* Character is not recognized.  Copy a single byte.  */
928 	  inptr = orig_inptr;	/* iconv might have incremented inptr  */
929 	  copy_direct (1);
930 	  bytes_left = orig_bytes_left - 1;
931 	}
932       else
933         {
934           utf8_char_ptr = utf8_char;
935           /* i is width of UTF-8 character */
936           degrade_utf8 (&utf8_char_ptr, &i);
937 	  /* If we are done, make sure iconv flushes the last character.  */
938 	  if (bytes_left <= 0)
939 	    {
940 	      utf8_char_ptr = utf8_char;
941 	      i = 4;
942 	      iconv (iconv_to_utf8, NULL, NULL,
943 		     &utf8_char_ptr, &utf8_char_free);
944 	      if (utf8_char_ptr > utf8_char)
945 		{
946 		  utf8_char_ptr = utf8_char;
947 		  degrade_utf8 (&utf8_char_ptr, &i);
948 		}
949 	    }
950         }
951     }
952 
953   /* Must cast because the difference between unsigned size_t is always
954      positive. */
955   output_bytes_difference +=
956     n - ((signed long) text_buffer_off (&output_buf) - output_start);
957 
958   return extra_at_end;
959 #endif /* HAVE_ICONV */
960 }
961 
962 /* Functions below are named from the perspective of the preprocess_nodes_p
963    flag being on. */
964 
965 /* Copy text from input node contents, possibly converting the
966    character encoding and adjusting anchor offsets at the same time. */
967 static void
copy_input_to_output(long n)968 copy_input_to_output (long n)
969 {
970   if (rewrite_p)
971     {
972       long bytes_left;
973 
974       bytes_left = n;
975       while (bytes_left > 0)
976         {
977           if (!convert_encoding_p)
978             {
979               copy_direct (bytes_left);
980               bytes_left = 0;
981             }
982           else
983             {
984               long bytes_to_convert;
985               long extra_written;
986 
987               bytes_to_convert = bytes_left;
988 
989               if (anchor_to_adjust)
990                 {
991                   /* Check there is an anchor in the input. */
992                   long first_anchor =
993                     (*anchor_to_adjust)->nodestart - node_offset;
994 
995                   if (first_anchor < 0)
996                     anchor_to_adjust = 0; /* error in input file */
997                   else if (first_anchor < (inptr-input_start) + bytes_left)
998                     {
999                       /* Convert enough to pass the first anchor in input. */
1000                       bytes_to_convert = first_anchor - (inptr-input_start)+1;
1001                       if (bytes_to_convert < 0)
1002                         {
1003                           bytes_to_convert = bytes_left;
1004                           anchor_to_adjust = 0;
1005                         }
1006                     }
1007                 }
1008 
1009               /* copy_converting may read more than bytes_to_convert
1010                  bytes if its input ends in an incomplete byte sequence. */
1011               extra_written = copy_converting (bytes_to_convert);
1012 
1013               bytes_left -= bytes_to_convert + extra_written;
1014             }
1015 
1016           /* Check if we have gone past any anchors and
1017              adjust with output_bytes_difference. */
1018           if (anchor_to_adjust)
1019             while ((*anchor_to_adjust)->nodestart - node_offset
1020                    <= inptr - input_start)
1021               {
1022                 (*anchor_to_adjust)->nodestart_adjusted
1023                    = (*anchor_to_adjust)->nodestart - output_bytes_difference;
1024 
1025                 anchor_to_adjust++;
1026                 if (!*anchor_to_adjust
1027                     || (*anchor_to_adjust)->cache.nodelen != 0)
1028                   {
1029                     anchor_to_adjust = 0;
1030                     break;
1031                   }
1032               }
1033         }
1034     }
1035   else
1036     inptr += n;
1037 }
1038 
1039 static void
skip_input(long n)1040 skip_input (long n)
1041 {
1042   if (preprocess_nodes_p)
1043     {
1044       inptr += n;
1045       output_bytes_difference += n;
1046     }
1047   else if (rewrite_p)
1048     {
1049       /* We are expanding tags only.  Do not skip input. */
1050       copy_input_to_output (n);
1051     }
1052   else
1053     {
1054       inptr += n;
1055     }
1056 }
1057 
1058 static void
write_extra_bytes_to_output(char * input,long n)1059 write_extra_bytes_to_output (char *input, long n)
1060 {
1061   if (preprocess_nodes_p)
1062     {
1063       text_buffer_add_string (&output_buf, input, n);
1064       output_bytes_difference -= n;
1065     }
1066 }
1067 
1068 /* Like write_extra_bytes_to_output, but writes bytes even when
1069    preprocess_nodes=Off. */
1070 static void
write_tag_contents(char * input,long n)1071 write_tag_contents (char *input, long n)
1072 {
1073   if (rewrite_p)
1074     {
1075       text_buffer_add_string (&output_buf, input, n);
1076       output_bytes_difference -= n;
1077     }
1078 }
1079 
1080 /* Like skip_input, but skip even when !preprocess_nodes_p. */
1081 static void
skip_tag_contents(long n)1082 skip_tag_contents (long n)
1083 {
1084   if (rewrite_p)
1085     {
1086       inptr += n;
1087       output_bytes_difference += n;
1088     }
1089 }
1090 
1091 /* Read first line of node and set next, prev and up. */
1092 static void
parse_top_node_line(NODE * node)1093 parse_top_node_line (NODE *node)
1094 {
1095   char **store_in = 0;
1096   char *nodename;
1097   char *ptr;
1098   int value_length;
1099 
1100   /* If the first line is empty, leave it in.  This is the case
1101      in the index-apropos window. */
1102   if (*node->contents == '\n')
1103     return;
1104 
1105   node->next = node->prev = node->up = 0;
1106   ptr = node->contents;
1107 
1108   while (1)
1109     {
1110       store_in = 0;
1111 
1112       ptr += skip_whitespace (ptr);
1113 
1114       /* Check what field we are looking at */
1115       if (!strncasecmp (ptr, INFO_FILE_LABEL, strlen(INFO_FILE_LABEL)))
1116         {
1117           ptr += strlen (INFO_FILE_LABEL);
1118         }
1119       else if (!strncasecmp (ptr, INFO_NODE_LABEL, strlen(INFO_NODE_LABEL)))
1120         {
1121           ptr += strlen (INFO_NODE_LABEL);
1122         }
1123       else if (!strncasecmp (ptr, INFO_PREV_LABEL, strlen(INFO_PREV_LABEL)))
1124         {
1125           ptr += strlen (INFO_PREV_LABEL);
1126           store_in = &node->prev;
1127         }
1128       else if (!strncasecmp (ptr, INFO_ALTPREV_LABEL,
1129                              strlen(INFO_ALTPREV_LABEL)))
1130         {
1131           ptr += strlen (INFO_ALTPREV_LABEL);
1132           store_in = &node->prev;
1133         }
1134       else if (!strncasecmp (ptr, INFO_NEXT_LABEL, strlen(INFO_NEXT_LABEL)))
1135         {
1136           ptr += strlen (INFO_NEXT_LABEL);
1137           store_in = &node->next;
1138         }
1139       else if (!strncasecmp (ptr, INFO_UP_LABEL, strlen(INFO_UP_LABEL)))
1140         {
1141           ptr += strlen (INFO_UP_LABEL);
1142           store_in = &node->up;
1143         }
1144       else
1145         {
1146           store_in = 0;
1147           /* Not recognized - code below will skip to next comma */
1148         }
1149       ptr += skip_whitespace (ptr);
1150 
1151       /* Get length of a bracketed filename component. */
1152       if (*ptr != '(')
1153         value_length = 0;
1154       else
1155         value_length = read_bracketed_filename (ptr, 0);
1156 
1157       /* Get length of node name, or filename if following "File:".  Note
1158          that .  is not included in the second argument here in order to
1159          support this character in file names. */
1160       value_length += read_quoted_string (ptr + value_length,
1161                                           "\n\r\t,", 1, &nodename);
1162       if (store_in)
1163         {
1164           *store_in = xmalloc (value_length + 1);
1165           strncpy (*store_in, ptr, value_length);
1166           (*store_in)[value_length] = '\0';
1167         }
1168 
1169       free (nodename);
1170       ptr += value_length;
1171 
1172       if (*ptr == '\n' || !*ptr)
1173         break;
1174 
1175       ptr += 1; /* Point after field terminator */
1176     }
1177 }
1178 
1179 /* Output, replace or hide text introducing a reference.  INPTR starts on
1180    the first byte of a sequence introducing a reference and finishes on the
1181    first (non-whitespace) byte of the reference label. */
1182 static int
scan_reference_marker(REFERENCE * entry,int in_parentheses)1183 scan_reference_marker (REFERENCE *entry, int in_parentheses)
1184 {
1185   /* When preprocess_nodes is Off, we position the cursor on
1186      the "*" when moving between references. */
1187   if (!preprocess_nodes_p)
1188     {
1189       if (rewrite_p)
1190         entry->start = text_buffer_off(&output_buf);
1191       else
1192         entry->start = inptr - input_start;
1193     }
1194 
1195   /* Check what we found based on first character of match */
1196   if (inptr[0] == '\n')
1197     {
1198       entry->type = REFERENCE_MENU_ITEM;
1199       if (!preprocess_nodes_p)
1200         entry->start++;
1201     }
1202   else
1203     entry->type = REFERENCE_XREF;
1204 
1205   if (entry->type == REFERENCE_MENU_ITEM)
1206     copy_input_to_output (strlen ("\n* "));
1207   else
1208     {
1209       /* Only match "*Note" if it is followed by a whitespace character so that
1210          it will not be recognized if, e.g., it is surrounded in inverted
1211          commas. */
1212       if (!strchr (" \t\r\n", inptr[strlen ("*Note")]))
1213         {
1214           copy_input_to_output (strlen ("*Note:"));
1215           return 0;
1216         }
1217 
1218       /* Cross-references can be generated by four different Texinfo
1219          commands.  @inforef and @xref output "*Note " in Info format,
1220          and "See" in HTML and print.  @ref and @pxref output "*note "
1221          in Info format, and either nothing at all or "see" in HTML
1222          and print.  Unfortunately, there is no easy way to distinguish
1223          between these latter two cases. */
1224       /* TODO: Internationalize these strings, but only if we know the
1225          language of the document. */
1226       if (inptr[1] == 'N')
1227         {
1228           write_extra_bytes_to_output ("See", 3);
1229           in_parentheses = 1;
1230         }
1231       else if (in_parentheses)
1232         {
1233           write_extra_bytes_to_output ("see", 3);
1234           /* Only output the "see" for input like "(*note ...)", which
1235              would have come from a use of @pxref.  We used to output "see" for
1236              "*note" in more circumstances, with a list of words where to
1237              suppress it (to avoid "see *note" turning into "see see"), but
1238              such a list can't be complete or reliable.  It's better to remove
1239              it with more enthusiasm, then if the document writer wants a "see"
1240              to appear, they can add one themselves. */
1241         }
1242 
1243       skip_input (strlen ("*Note"));
1244       if (!in_parentheses)
1245         skip_input (skip_whitespace (inptr));
1246     }
1247 
1248   /* Copy any white space before label. */
1249   copy_input_to_output (skip_whitespace_and_newlines (inptr));
1250 
1251   return 1;
1252 }
1253 
1254 /* Output reference label and update ENTRY.  INPTR should be on the first
1255    non-whitespace byte of label when this function is called.  It is left
1256    at the first character after the colon terminating the label.  Return 0 if
1257    invalid syntax is encountered. */
1258 static int
scan_reference_label(REFERENCE * entry,int in_index)1259 scan_reference_label (REFERENCE *entry, int in_index)
1260 {
1261   int max_lines;
1262   int len, label_len = 0;
1263 
1264   /* Handle case of cross-reference like (FILE)NODE::. */
1265   if (inptr[0] == '(')
1266     label_len = read_bracketed_filename (inptr, &entry->filename);
1267 
1268   /* Search forward to ":" to get label name.  Cross-references may have
1269      a newline in the middle. */
1270   if (entry->type == REFERENCE_MENU_ITEM)
1271     max_lines = 1;
1272   else
1273     max_lines = 2;
1274   if (!in_index || inptr[label_len] == '\177')
1275     {
1276       len = read_quoted_string (inptr + label_len, ":", max_lines,
1277                                 &entry->nodename);
1278       canonicalize_whitespace (entry->nodename);
1279       if (!len)
1280         return 0; /* Input invalid. */
1281       label_len += len;
1282     }
1283   else
1284     {
1285       /* If in an index node, go forward to the last colon on the line
1286          (not preceded by a newline, NUL or DEL).  This is in order to
1287          support index entries containing colons.  This should work fine
1288          as long as the node name does not contain a colon as well. */
1289 
1290       char *p;
1291       int n, m = 0;
1292       p = inptr + label_len;
1293 
1294       while (1)
1295         {
1296           n = strcspn (p, ":\n\177");
1297           if (p[n] == ':')
1298             {
1299               m += n + 1;
1300               p += n + 1;
1301               continue;
1302             }
1303           break;
1304         }
1305       if (m == 0)
1306         return 0; /* no : found */
1307       label_len += m - 1;
1308     }
1309 
1310   entry->label = xmalloc (label_len + 1);
1311   memcpy (entry->label, inptr, label_len);
1312   entry->label[label_len] = '\0';
1313   canonicalize_whitespace (entry->label);
1314 
1315   if (preprocess_nodes_p)
1316     entry->start = text_buffer_off (&output_buf);
1317 
1318   /* Write text of label. */
1319   copy_input_to_output (label_len);
1320 
1321   if (rewrite_p)
1322     entry->end = text_buffer_off (&output_buf);
1323   else
1324     entry->end = inptr - input_start;
1325 
1326   /* Colon after label. */
1327   if (*inptr)
1328     skip_input (1);
1329   /* Don't mess up the margin of a menu description. */
1330   if (entry->type == REFERENCE_MENU_ITEM)
1331     write_extra_bytes_to_output (" ", 1);
1332 
1333   return 1;
1334 }
1335 
1336 /* INPTR should be at the first character after the colon
1337    terminating the label.  Return 0 on syntax error. */
1338 static int
scan_reference_target(REFERENCE * entry,NODE * node,int in_parentheses)1339 scan_reference_target (REFERENCE *entry, NODE *node, int in_parentheses)
1340 {
1341   int i;
1342 
1343   /* This entry continues with a specific target.  Parse the
1344      file name and node name from the specification. */
1345 
1346   if (entry->type == REFERENCE_XREF)
1347     {
1348       int length = 0; /* Length of specification */
1349       char *target_start = inptr;
1350       char *nl_off = 0;
1351       int space_at_start_of_line = 0;
1352 
1353       length += skip_whitespace_and_newlines (inptr);
1354 
1355       length += read_bracketed_filename (inptr + length, &entry->filename);
1356 
1357       length += skip_whitespace_and_newlines (inptr + length);
1358 
1359       /* Get the node name. */
1360       length += read_quoted_string (inptr + length, ",.", 2, &entry->nodename);
1361 
1362       skip_input (length);
1363 
1364       /* Check if there is a newline in the target. */
1365       nl_off = strchr (target_start, '\n');
1366       if (nl_off)
1367         {
1368           if (nl_off < inptr)
1369             space_at_start_of_line = skip_whitespace (nl_off + 1);
1370           else
1371             nl_off = 0;
1372         }
1373       canonicalize_whitespace (entry->nodename);
1374 
1375       if (entry->filename)
1376         {
1377           /* Heuristic of whether it's worth outputing a newline before the
1378              filename.  This checks whether the newline appears more
1379              than half way through the text, and therefore which side is
1380              longer. */
1381           if (nl_off
1382               && nl_off < target_start + (length - space_at_start_of_line) / 2)
1383             {
1384               int i;
1385               write_extra_bytes_to_output ("\n", 1);
1386 
1387               for (i = 0; i < space_at_start_of_line; i++)
1388                 write_extra_bytes_to_output (" ", 1);
1389               skip_input (strspn (inptr, " "));
1390               nl_off = 0;
1391             }
1392           else
1393 
1394           if (*inptr != '\n')
1395             {
1396               write_extra_bytes_to_output (" ", 1);
1397             }
1398           write_extra_bytes_to_output ("(", 1);
1399           write_extra_bytes_to_output (entry->filename,
1400                                        strlen (entry->filename));
1401           write_extra_bytes_to_output (" manual)",
1402                                        strlen (" manual)"));
1403         }
1404 
1405       /* Hide terminating punctuation if we are in a reference
1406          like "(*note Label:(file)node.)". */
1407       if (in_parentheses && inptr[0] == '.')
1408         skip_input (1);
1409 
1410       /* Copy any terminating punctuation before the optional newline. */
1411       copy_input_to_output (strspn (inptr, ".),"));
1412 
1413       /* Output a newline if one is needed.  Don't do it at the end of
1414          a paragraph. */
1415       if (nl_off && *inptr != '\n')
1416         {
1417           int i;
1418 
1419           write_extra_bytes_to_output ("\n", 1);
1420           for (i = 0; i < space_at_start_of_line; i++)
1421             write_extra_bytes_to_output (" ", 1);
1422           skip_input (strspn (inptr, " "));
1423         }
1424     }
1425   else /* entry->type == REFERENCE_MENU_ITEM */
1426     {
1427       int line_len;
1428       int length = 0; /* Length of specification */
1429 
1430       length = skip_whitespace (inptr);
1431       length += read_bracketed_filename (inptr + length, &entry->filename);
1432       length += strspn (inptr + length, " ");
1433 
1434       /* Get the node name. */
1435       length += read_quoted_string (inptr + length, ",.\t\n", 2,
1436                                     &entry->nodename);
1437       if (inptr[length] == '.') /* A '.' terminating the entry. */
1438         length++;
1439 
1440       if (node->flags & N_IsDir)
1441         {
1442           /* Set line_len to length of line so far. */
1443 
1444           char *linestart;
1445           linestart = memrchr (input_start, '\n', inptr - input_start);
1446           if (!linestart)
1447             linestart = input_start;
1448           else
1449             linestart++; /* Point to first character after newline. */
1450           line_len = inptr - linestart;
1451         }
1452 
1453       if (node->flags & N_IsIndex)
1454         /* Show the name of the node the index entry refers to. */
1455         copy_input_to_output (length);
1456       else
1457         {
1458           skip_input (length);
1459 
1460           if ((node->flags & N_IsDir) && inptr[strspn (inptr, " ")] == '\n')
1461             {
1462               /* For a dir node, if there is no more text in this line,
1463                  check if there is a menu entry description in the next
1464                  line to the right of the end of the label, and display it
1465                  in this line. */
1466               skip_input (strspn (inptr, " "));
1467               if (line_len <= strspn (inptr + 1, " "))
1468                 skip_input (1 + line_len);
1469             }
1470           else
1471             {
1472               for (i = 0; i < length; i++)
1473                 write_extra_bytes_to_output (" ", 1);
1474             }
1475         }
1476 
1477       /* Parse "(line ...)" part of menus, if any.  */
1478       {
1479         char *lineptr = inptr;
1480         /* Skip any whitespace first, and then a newline in case the item
1481            was so long to contain the ``(line ...)'' string in the same
1482            physical line.  */
1483         lineptr += skip_whitespace (inptr);
1484         if (*lineptr == '\n')
1485           lineptr += 1 + skip_whitespace (lineptr + 1);
1486 
1487         if (!strncmp (lineptr, "(line ", strlen ("(line ")))
1488           {
1489             lineptr += strlen ("(line ");
1490             entry->line_number = strtol (lineptr, 0, 0);
1491           }
1492         else
1493           entry->line_number = 0;
1494       }
1495     }
1496 
1497   return 1;
1498 }
1499 
1500 /* BASE is earlier in a block of allocated memory than PTR, and the block
1501    extends until at least BASE + LEN - 1.  Return PTR[INDEX], unless this
1502    could be outside the allocated block, in which case return 0. */
1503 static char
safe_string_index(char * ptr,long index,char * base,long len)1504 safe_string_index (char *ptr, long index, char *base, long len)
1505 {
1506   long offset = ptr - base;
1507 
1508   if (   offset + index < 0
1509       || offset + index >= len)
1510     return 0;
1511 
1512   return ptr[index];
1513 }
1514 
1515 /* Process an in index marker ("^@^H[index^@^H]") or an image marker
1516    ("^@^H[image ...^@^H]"). */
1517 static void
scan_info_tag(NODE * node,int * in_index,FILE_BUFFER * fb)1518 scan_info_tag (NODE *node, int *in_index, FILE_BUFFER *fb)
1519 {
1520   char *p, *p1;
1521   struct text_buffer *expansion = xmalloc (sizeof (struct text_buffer));
1522 
1523   p = inptr;
1524   p1 = p;
1525 
1526   text_buffer_init (expansion);
1527 
1528   if (tag_expand (&p1, input_start + input_length, expansion, in_index))
1529     {
1530       if (*in_index)
1531         node->flags |= N_IsIndex;
1532 
1533       if (!rewrite_p)
1534         {
1535           rewrite_p = 1;
1536           init_output_stream (fb);
1537 
1538           /* Put inptr back to start so that
1539              copy_input_to_output below gets all
1540              preceding contents. */
1541           inptr = node->contents;
1542         }
1543 
1544       /* Write out up to tag. */
1545       copy_input_to_output (p - inptr);
1546 
1547       write_tag_contents (text_buffer_base (expansion),
1548                           text_buffer_off (expansion));
1549       /* Skip past body of tag. */
1550       skip_tag_contents (p1 - inptr);
1551     }
1552   else
1553     {
1554       /* It was not a valid tag. */
1555       copy_input_to_output (p - inptr + 1);
1556     }
1557 
1558   text_buffer_free (expansion);
1559   free (expansion);
1560 }
1561 
1562 #define looking_at_string(contents, string) \
1563   (!strncasecmp (contents, string, strlen (string)))
1564 
1565 static char *
forward_to_info_syntax(char * contents)1566 forward_to_info_syntax (char *contents)
1567 {
1568   /* Loop until just before the end of the input.  The '- 3' prevents us
1569      accessing memory after the end of the input, and none of the strings we
1570      are looking for are shorter than 3 bytes. */
1571   while (contents < input_start + input_length - 3)
1572     {
1573       /* Menu entry comes first to optimize for the case of looking through a
1574          long index node. */
1575       if (looking_at_string (contents, INFO_MENU_ENTRY_LABEL)
1576           || looking_at_string (contents, INFO_XREF_LABEL)
1577           || !memcmp (contents, "\0\b[", 3))
1578         return contents;
1579       contents++;
1580     }
1581   return 0;
1582 }
1583 
1584 /* Scan contents of NODE, recording cross-references and similar.
1585 
1586    Convert character encoding of node contents to that of the user if the two
1587    are known to be different.  If PREPROCESS_NODES_P == 1, remove Info syntax
1588    in contents.
1589 
1590    If FB is non-null, it is the file containing the node, and TAG_PTR is an
1591    offset into FB->tags.  If the node contents are rewritten, adjust anchors
1592    that occur in the node and store adjusted value as TAG->nodestart_adjusted,
1593    otherwise simply copy TAG->nodestart to TAG->nodestart_adjusted for each
1594    anchor in the node. */
1595 void
scan_node_contents(NODE * node,FILE_BUFFER * fb,TAG ** tag_ptr)1596 scan_node_contents (NODE *node, FILE_BUFFER *fb, TAG **tag_ptr)
1597 {
1598   int in_menu = 0;
1599   char *match;
1600 
1601   REFERENCE **refs = NULL;
1602   size_t refs_index = 0, refs_slots = 0;
1603 
1604   /* Whether an index tag was seen. */
1605   int in_index = 0;
1606 
1607   rewrite_p = preprocess_nodes_p;
1608 
1609   init_output_stream (fb);
1610 
1611   if (fb)
1612     {
1613       char *file_contents;
1614 
1615       /* Set anchor_to_adjust to first anchor in node, if any. */
1616       anchor_to_adjust = tag_ptr + 1;
1617       if (!*anchor_to_adjust)
1618         anchor_to_adjust = 0;
1619       else if (*anchor_to_adjust
1620                && (*anchor_to_adjust)->cache.nodelen != 0)
1621         anchor_to_adjust = 0;
1622 
1623       if (!node->subfile)
1624         file_contents = fb->contents;
1625       else
1626         {
1627           FILE_BUFFER *f = info_find_subfile (node->subfile);
1628           if (!f)
1629             return; /* This shouldn't happen. */
1630           file_contents = f->contents;
1631         }
1632       node_offset = (*tag_ptr)->nodestart
1633         + skip_node_separator (file_contents + (*tag_ptr)->nodestart);
1634     }
1635   else
1636     anchor_to_adjust = 0;
1637 
1638   /* Initialize refs to point to array of one null pointer in case
1639      there are no results.  This way we know if refs has been initialized
1640      even if it is empty. */
1641   refs = calloc (1, sizeof *refs);
1642   refs_slots = 1;
1643 
1644   parse_top_node_line (node);
1645 
1646   /* This should be the only time we assign to inptr in this function -
1647      all other assignment should be done with the helper functions above. */
1648   inptr = node->contents;
1649   input_start = node->contents;
1650   input_length = node->nodelen;
1651 
1652 
1653   while ((match = forward_to_info_syntax (inptr))
1654           && match < node->contents + node->nodelen)
1655     {
1656       int in_parentheses = 0;
1657       REFERENCE *entry;
1658 
1659       /* Write out up to match */
1660       copy_input_to_output (match - inptr);
1661 
1662       if ((in_menu && match[0] == '\n') || match[0] == '*')
1663         {
1664           /* Menu entry or cross reference. */
1665           /* Create REFERENCE entity. */
1666           entry = info_new_reference (0, 0);
1667 
1668           if (safe_string_index (inptr, -1, input_start, input_length) == '('
1669              && safe_string_index (inptr, 1, input_start, input_length) == 'n')
1670             in_parentheses = 1;
1671 
1672           save_conversion_state ();
1673 
1674           if (!scan_reference_marker (entry, in_parentheses))
1675             goto not_a_reference;
1676 
1677           if (!scan_reference_label (entry, in_index))
1678             goto not_a_reference;
1679 
1680           /* If this reference entry continues with another ':' then the target
1681              of the reference is given by the label. */
1682           if (*inptr == ':')
1683             {
1684               int label_len;
1685               skip_input (1);
1686               if (entry->type == REFERENCE_MENU_ITEM)
1687                 write_extra_bytes_to_output (" ", 1);
1688 
1689               /* Remove the DEL bytes from a label like "(FOO)^?BAR^?::". */
1690               label_len = strlen (entry->label);
1691               if (label_len >= 2 && entry->label[label_len - 1] == 0177)
1692                 {
1693                   char *p = strchr (entry->label, '\177');
1694                   memmove (p, p + 1, label_len - (p - entry->label) - 1);
1695                   entry->label[label_len - 2] = '\0';
1696                 }
1697             }
1698           else
1699             {
1700               /* Proceed to read the rest of the reference. */
1701               /* TODO: we should probably not allow references of the form
1702                  "(file)node1:node2." or "(file1)node1:(file2)node2", so
1703                  bail out here if entry->filename is non-null. */
1704 
1705               free (entry->filename); entry->filename = 0;
1706               free (entry->nodename); entry->nodename = 0;
1707               if (!scan_reference_target (entry, node, in_parentheses))
1708                 goto not_a_reference;
1709             }
1710 
1711           if (0)
1712             {
1713               char *cur_inptr;
1714 
1715 not_a_reference:
1716               /* This is not a menu entry or reference.  Do not add to our
1717                  list. */
1718               cur_inptr = inptr;
1719               reset_conversion ();
1720               copy_input_to_output (cur_inptr - inptr);
1721 
1722               info_reference_free (entry);
1723               continue;
1724             }
1725 
1726           add_pointer_to_array (entry, refs_index, refs, refs_slots, 50);
1727         }
1728       /* Was "* Menu:" seen?  If so, search for menu entries hereafter. */
1729       else if (!in_menu && !strncmp (match, INFO_MENU_LABEL,
1730                                strlen (INFO_MENU_LABEL)))
1731         {
1732           in_menu = 1;
1733           skip_input (strlen ("\n* Menu:"));
1734           if (*inptr == '\n')
1735             skip_input (strspn (inptr, "\n") - 1); /* Keep one newline. */
1736 
1737         }
1738       else if (match[0] == '\0') /* Info tag */
1739         {
1740           scan_info_tag (node, &in_index, fb);
1741         }
1742       else
1743         copy_input_to_output (1);
1744     }
1745 
1746   /* If we haven't accidentally gone past the end of the node, write
1747      out the rest of it. */
1748   if (inptr < node->contents + node->nodelen)
1749     copy_input_to_output ((node->contents + node->nodelen) - inptr);
1750 
1751   /* Null to terminate buffer. */
1752   if (rewrite_p)
1753     text_buffer_add_string (&output_buf, "\0", 1);
1754 
1755   /* Free resources used in character encoding conversion. */
1756   close_conversion ();
1757 
1758   node->references = refs;
1759 
1760   if (rewrite_p)
1761     {
1762       node->contents = text_buffer_base (&output_buf);
1763       node->flags |= N_WasRewritten;
1764 
1765       /* output_buf.off is the offset of the next character to be
1766          written.  Subtracting 1 gives the offset of our terminating
1767          null, that is, the length. */
1768       node->nodelen = text_buffer_off (&output_buf) - 1;
1769     }
1770   else if (fb && tag_ptr)
1771     {
1772       /* Set nodestart_adjusted for all of the anchors in this node. */
1773       tag_ptr++;
1774       while (*tag_ptr && (*tag_ptr)->cache.nodelen == 0)
1775         {
1776           (*tag_ptr)->nodestart_adjusted = (*tag_ptr)->nodestart
1777                                              - output_bytes_difference;
1778           tag_ptr++;
1779         }
1780     }
1781 }
1782 
1783 
1784 /* Various utility functions */
1785 
1786 /* Return the file buffer which belongs to WINDOW's node. */
1787 FILE_BUFFER *
file_buffer_of_window(WINDOW * window)1788 file_buffer_of_window (WINDOW *window)
1789 {
1790   /* If this window has no node, then it has no file buffer. */
1791   if (!window->node)
1792     return NULL;
1793 
1794   if (window->node->fullpath)
1795     return info_find_file (window->node->fullpath);
1796 
1797   return NULL;
1798 }
1799 
1800 /* Return "(FILENAME)NODENAME" for NODE, or just "NODENAME" if NODE's
1801    filename is not set.  Return value should not be freed. */
1802 char *
node_printed_rep(NODE * node)1803 node_printed_rep (NODE *node)
1804 {
1805   static char *rep;
1806 
1807   if (node->fullpath)
1808     {
1809       char *filename = filename_non_directory (node->fullpath);
1810       rep = xrealloc (rep, 1 + strlen (filename) + 1 + strlen (node->nodename) + 1);
1811       sprintf (rep, "(%s)%s", filename, node->nodename);
1812       return rep;
1813     }
1814   else
1815     return node->nodename;
1816 }
1817 
1818 
1819 /* Return a pointer to the part of PATHNAME that simply defines the file. */
1820 char *
filename_non_directory(char * pathname)1821 filename_non_directory (char *pathname)
1822 {
1823   register char *filename = pathname + strlen (pathname);
1824 
1825   if (HAVE_DRIVE (pathname))
1826     pathname += 2;
1827 
1828   while (filename > pathname && !IS_SLASH (filename[-1]))
1829     filename--;
1830 
1831   return filename;
1832 }
1833 
1834 /* Return non-zero if NODE is one especially created by Info. */
1835 int
internal_info_node_p(NODE * node)1836 internal_info_node_p (NODE *node)
1837 {
1838   return (node != NULL) && (node->flags & N_IsInternal);
1839 }
1840 
1841 /* Make NODE appear to be one especially created by Info. */
1842 void
name_internal_node(NODE * node,char * name)1843 name_internal_node (NODE *node, char *name)
1844 {
1845   if (!node)
1846     return;
1847 
1848   node->fullpath = "";
1849   node->subfile = 0;
1850   node->nodename = name;
1851   node->flags |= N_IsInternal;
1852 }
1853 
1854 /* Return the window displaying NAME, the name of an internally created
1855    Info window. */
1856 WINDOW *
get_internal_info_window(char * name)1857 get_internal_info_window (char *name)
1858 {
1859   WINDOW *win;
1860 
1861   for (win = windows; win; win = win->next)
1862     if (internal_info_node_p (win->node) &&
1863         (strcmp (win->node->nodename, name) == 0))
1864       break;
1865 
1866   return win;
1867 }
1868 
1869 /* Flexible Text Buffer */
1870 
1871 void
text_buffer_init(struct text_buffer * buf)1872 text_buffer_init (struct text_buffer *buf)
1873 {
1874   memset (buf, 0, sizeof *buf);
1875 }
1876 
1877 void
text_buffer_free(struct text_buffer * buf)1878 text_buffer_free (struct text_buffer *buf)
1879 {
1880   free (buf->base);
1881 }
1882 
1883 size_t
text_buffer_vprintf(struct text_buffer * buf,const char * format,va_list ap)1884 text_buffer_vprintf (struct text_buffer *buf, const char *format, va_list ap)
1885 {
1886   ssize_t n;
1887   va_list ap_copy;
1888 
1889   if (!buf->base)
1890     {
1891       if (buf->size == 0)
1892 	buf->size = MIN_TEXT_BUF_ALLOC; /* Initial allocation */
1893 
1894       buf->base = xmalloc (buf->size);
1895     }
1896 
1897   for (;;)
1898     {
1899       va_copy (ap_copy, ap);
1900       n = vsnprintf (buf->base + buf->off, buf->size - buf->off,
1901 		     format, ap_copy);
1902       va_end (ap_copy);
1903       if (n < 0 || buf->off + n >= buf->size ||
1904 	  !memchr (buf->base + buf->off, '\0', buf->size - buf->off + 1))
1905 	{
1906 	  size_t newlen = buf->size * 2;
1907 	  if (newlen < buf->size)
1908 	    xalloc_die ();
1909 	  buf->size = newlen;
1910 	  buf->base = xrealloc (buf->base, buf->size);
1911 	}
1912       else
1913 	{
1914 	  buf->off += n;
1915 	  break;
1916 	}
1917     }
1918   return n;
1919 }
1920 
1921 /* Make sure there are LEN free bytes at end of BUF. */
1922 void
text_buffer_alloc(struct text_buffer * buf,size_t len)1923 text_buffer_alloc (struct text_buffer *buf, size_t len)
1924 {
1925   if (buf->off + len > buf->size)
1926     {
1927       buf->size = buf->off + len;
1928       if (buf->size < MIN_TEXT_BUF_ALLOC)
1929 	buf->size = MIN_TEXT_BUF_ALLOC;
1930       buf->base = xrealloc (buf->base, buf->size);
1931     }
1932 }
1933 
1934 /* Return number of bytes that can be written to text buffer without
1935    reallocating the text buffer. */
1936 size_t
text_buffer_space_left(struct text_buffer * buf)1937 text_buffer_space_left (struct text_buffer *buf)
1938 {
1939   /* buf->size is the offset of the first byte after the allocated space.
1940      buf->off is the offset of the first byte to be written to. */
1941   return buf->size - buf->off;
1942 }
1943 
1944 #if HAVE_ICONV
1945 
1946 /* Run iconv using text buffer as output buffer. */
1947 size_t
text_buffer_iconv(struct text_buffer * buf,iconv_t iconv_state,ICONV_CONST char ** inbuf,size_t * inbytesleft)1948 text_buffer_iconv (struct text_buffer *buf, iconv_t iconv_state,
1949                    ICONV_CONST char **inbuf, size_t *inbytesleft)
1950 {
1951   size_t out_bytes_left;
1952   char *outptr;
1953   size_t iconv_ret;
1954 
1955   outptr = text_buffer_base (buf) + text_buffer_off (buf);
1956   out_bytes_left = text_buffer_space_left (buf);
1957   iconv_ret = iconv (iconv_state, inbuf, inbytesleft,
1958                      &outptr, &out_bytes_left);
1959 
1960   text_buffer_off (buf) = outptr - text_buffer_base (buf);
1961 
1962   return iconv_ret;
1963 }
1964 
1965 #endif /* HAVE_ICONV */
1966 
1967 size_t
text_buffer_add_string(struct text_buffer * buf,const char * str,size_t len)1968 text_buffer_add_string (struct text_buffer *buf, const char *str, size_t len)
1969 {
1970   text_buffer_alloc (buf, len);
1971   memcpy (buf->base + buf->off, str, len);
1972   buf->off += len;
1973   return len;
1974 }
1975 
1976 size_t
text_buffer_fill(struct text_buffer * buf,int c,size_t len)1977 text_buffer_fill (struct text_buffer *buf, int c, size_t len)
1978 {
1979   char *p;
1980   int i;
1981 
1982   text_buffer_alloc (buf, len);
1983 
1984   for (i = 0, p = buf->base + buf->off; i < len; i++)
1985     *p++ = c;
1986   buf->off += len;
1987 
1988   return len;
1989 }
1990 
1991 void
text_buffer_add_char(struct text_buffer * buf,int c)1992 text_buffer_add_char (struct text_buffer *buf, int c)
1993 {
1994   char ch = c;
1995   text_buffer_add_string (buf, &ch, 1);
1996 }
1997 
1998 size_t
text_buffer_printf(struct text_buffer * buf,const char * format,...)1999 text_buffer_printf (struct text_buffer *buf, const char *format, ...)
2000 {
2001   va_list ap;
2002   size_t n;
2003 
2004   va_start (ap, format);
2005   n = text_buffer_vprintf (buf, format, ap);
2006   va_end (ap);
2007   return n;
2008 }
2009 
2010 #if defined(__MSDOS__) || defined(__MINGW32__)
2011 /* Cannot use FILENAME_CMP here, since that does not consider forward-
2012    and back-slash characters equal.  */
2013 int
fncmp(const char * fn1,const char * fn2)2014 fncmp (const char *fn1, const char *fn2)
2015 {
2016   const char *s1 = fn1, *s2 = fn2;
2017 
2018   while (tolower (*s1) == tolower (*s2)
2019 	 || (IS_SLASH (*s1) && IS_SLASH (*s2)))
2020     {
2021       if (*s1 == 0)
2022 	return 0;
2023       s1++;
2024       s2++;
2025     }
2026 
2027   return tolower (*s1) - tolower (*s2);
2028 }
2029 #endif
2030 
2031 struct info_namelist_entry
2032 {
2033   struct info_namelist_entry *next;
2034   char name[1];
2035 };
2036 
2037 int
info_namelist_add(struct info_namelist_entry ** ptop,const char * name)2038 info_namelist_add (struct info_namelist_entry **ptop, const char *name)
2039 {
2040   struct info_namelist_entry *p;
2041 
2042   for (p = *ptop; p; p = p->next)
2043     if (fncmp (p->name, name) == 0)
2044       return 1;
2045 
2046   p = xmalloc (sizeof (*p) + strlen (name));
2047   strcpy (p->name, name);
2048   p->next = *ptop;
2049   *ptop = p;
2050   return 0;
2051 }
2052 
2053 void
info_namelist_free(struct info_namelist_entry * top)2054 info_namelist_free (struct info_namelist_entry *top)
2055 {
2056   while (top)
2057     {
2058       struct info_namelist_entry *next = top->next;
2059       free (top);
2060       top = next;
2061     }
2062 }
2063 
2064