1 /* info-utils.c -- miscellanous.
2
3 Copyright 1993-2020 Free Software Foundation, Inc.
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
17
18 Originally written by Brian Fox. */
19
20 #include "info.h"
21 #include "session.h"
22 #include "info-utils.h"
23 #include "tag.h"
24
25 #include <langinfo.h>
26 #if HAVE_ICONV
27 # include <iconv.h>
28 #endif
29 #include <wchar.h>
30 #ifdef __MINGW32__
31 /* MinGW uses a replacement nl_langinfo, see pcterm.c. */
32 # define nl_langinfo rpl_nl_langinfo
33 extern char * rpl_nl_langinfo (nl_item);
34 /* MinGW uses its own replacement wcwidth, see pcterm.c for the
35 reasons. Since Gnulib's wchar.h might redirect wcwidth to
36 rpl_wcwidth, we explicitly undo that here. */
37 #undef wcwidth
38 #endif
39
40 #ifdef __hpux
41 #define va_copy(ap1,ap2) memcpy((&ap1),(&ap2),sizeof(va_list))
42 #endif
43
44 /* Variable which holds the most recent filename parsed as a result of
45 calling info_parse_xxx (). */
46 char *info_parsed_filename = NULL;
47
48 /* Variable which holds the most recent nodename parsed as a result of
49 calling info_parse_xxx (). */
50 char *info_parsed_nodename = NULL;
51
52 /* Read a filename surrounded by "(" and ")", accounting for matching
53 characters, and place it in *FILENAME if FILENAME is not null. Return
54 length of read filename. On error, set *FILENAME to null and return 0. */
55 int
read_bracketed_filename(char * string,char ** filename)56 read_bracketed_filename (char *string, char **filename)
57 {
58 register int i = 0;
59 int count = 0; /* Level of nesting. */
60 int first_close = -1; /* First ")" encountered. */
61
62 if (*string != '(')
63 return 0;
64
65 string++;
66 count = 1;
67 for (i = 0; string[i]; i++)
68 {
69 if (string[i] == '(')
70 count++;
71 else if (string[i] == ')')
72 {
73 if (first_close == -1)
74 first_close = i;
75
76 count--;
77 if (count == 0)
78 break;
79 }
80 }
81
82 /* If string ended before brackets were balanced, take the first ")" as
83 terminating the filename. */
84 if (count > 0)
85 {
86 if (first_close == -1)
87 {
88 if (filename)
89 *filename = 0;
90 return 0;
91 }
92 i = first_close;
93 }
94
95 if (filename)
96 {
97 *filename = xcalloc (1, i + 1);
98 memcpy (*filename, string, i);
99 }
100
101 return i + 2; /* Length of filename plus "(" and ")". */
102 }
103
104 /* Parse the filename and nodename out of STRING, saving in
105 INFO_PARSED_FILENAME and INFO_PARSED_NODENAME. These variables should not
106 be freed by calling code. If either is missing, the relevant variable is
107 set to a null pointer. */
108 void
info_parse_node(char * string)109 info_parse_node (char *string)
110 {
111 int nodename_len;
112
113 free (info_parsed_filename);
114 free (info_parsed_nodename);
115 info_parsed_filename = 0;
116 info_parsed_nodename = 0;
117
118 /* Special case of nothing passed. Return nothing. */
119 if (!string || !*string)
120 return;
121
122 string += skip_whitespace_and_newlines (string);
123
124 string += read_bracketed_filename (string, &info_parsed_filename);
125
126 /* Parse out nodename. */
127 string += skip_whitespace_and_newlines (string);
128 nodename_len = read_quoted_string (string, "", 0, &info_parsed_nodename);
129
130 if (nodename_len != 0)
131 {
132 canonicalize_whitespace (info_parsed_nodename);
133 }
134 }
135
136 /* Set *OUTPUT to a copy of the string starting at START and finishing at
137 a character in TERMINATOR, unless START[0] == INFO_QUOTE, in which case
138 copy string from START+1 until the next occurence of INFO_QUOTE. If
139 TERMINATOR is an empty string, finish at a null character. LINES is
140 the number of lines that the string can span. If LINES is zero, there is no
141 limit. Return length of string including any quoting characters. Return
142 0 if input was invalid. */
143 long
read_quoted_string(char * start,char * terminator,int lines,char ** output)144 read_quoted_string (char *start, char *terminator, int lines, char **output)
145 {
146 long len;
147 char *nl = 0, saved_char;
148
149 if (lines)
150 {
151 int i;
152 nl = start;
153 for (i = 0; i < lines; i++)
154 {
155 nl = strchr (nl, '\n');
156 if (!nl)
157 break; /* End of input string reached. */
158 nl++;
159 }
160 if (nl)
161 {
162 saved_char = *nl;
163 *nl = '\0';
164 }
165 }
166
167 if (start[0] != '\177')
168 {
169 len = strcspn (start, terminator);
170
171 if (*terminator && !start[len])
172 {
173 len = 0;
174 *output = 0;
175 }
176 else
177 {
178 *output = xmalloc (len + 1);
179 strncpy (*output, start, len);
180 (*output)[len] = '\0';
181 }
182 }
183 else
184 {
185 len = strcspn (start + 1, "\177");
186
187 if (*terminator && !(start + 1)[len])
188 {
189 /* No closing 177 byte. */
190 len = 0;
191 *output = 0;
192 }
193 else
194 {
195 *output = xmalloc (len + 1);
196 strncpy (*output, start + 1, len);
197 (*output)[len] = '\0';
198 len += 2; /* Count the two 177 bytes. */
199 }
200
201 }
202
203 if (nl)
204 *nl = saved_char;
205 return len;
206 }
207
208
209 /* **************************************************************** */
210 /* */
211 /* Finding and Building Menus */
212 /* */
213 /* **************************************************************** */
214
215 /* Get the entry associated with LABEL in the menu of NODE. Return a
216 pointer to the ENTRY if found, or null. Return value should not
217 be freed by caller. If SLOPPY, allow initial matches, like
218 "Buffers" for a LABEL "buffer". */
219 REFERENCE *
info_get_menu_entry_by_label(NODE * node,char * label,int sloppy)220 info_get_menu_entry_by_label (NODE *node, char *label, int sloppy)
221 {
222 register int i;
223 int best_guess = -1;
224 REFERENCE *entry;
225 REFERENCE **references = node->references;
226
227 if (!references)
228 return 0;
229
230 for (i = 0; (entry = references[i]); i++)
231 {
232 if (entry->type != REFERENCE_MENU_ITEM)
233 continue;
234 if (mbscasecmp (label, entry->label) == 0)
235 return entry; /* Exact, case-insensitive match. */
236 else if (sloppy && best_guess == -1
237 && (mbsncasecmp (entry->label, label, strlen (label)) == 0))
238 best_guess = i;
239 }
240
241 if (sloppy && best_guess != -1)
242 return references[best_guess];
243
244 return 0;
245 }
246
247 /* A utility function for concatenating REFERENCE **. Returns a new
248 REFERENCE ** which is the concatenation of REF1 and REF2. */
249 REFERENCE **
info_concatenate_references(REFERENCE ** ref1,REFERENCE ** ref2)250 info_concatenate_references (REFERENCE **ref1, REFERENCE **ref2)
251 {
252 register int i, j;
253 REFERENCE **result;
254 int size = 0;
255
256 /* Get the total size of the slots that we will need. */
257 if (ref1)
258 {
259 for (i = 0; ref1[i]; i++);
260 size += i;
261 }
262
263 if (ref2)
264 {
265 for (i = 0; ref2[i]; i++);
266 size += i;
267 }
268
269 result = xmalloc ((1 + size) * sizeof (REFERENCE *));
270
271 /* Copy the contents over. */
272
273 j = 0;
274 if (ref1)
275 {
276 for (i = 0; ref1[i]; i++)
277 result[j++] = ref1[i];
278 }
279
280 if (ref2)
281 {
282 for (i = 0; ref2[i]; i++)
283 result[j++] = ref2[i];
284 }
285
286 result[j] = NULL;
287 return result;
288 }
289
290 /* Copy a reference structure. Copy each field into new memory. */
291 REFERENCE *
info_copy_reference(REFERENCE * src)292 info_copy_reference (REFERENCE *src)
293 {
294 REFERENCE *dest = xmalloc (sizeof (REFERENCE));
295 dest->label = src->label ? xstrdup (src->label) : NULL;
296 dest->filename = src->filename ? xstrdup (src->filename) : NULL;
297 dest->nodename = src->nodename ? xstrdup (src->nodename) : NULL;
298 dest->start = src->start;
299 dest->end = src->end;
300 dest->line_number = src->line_number;
301 dest->type = src->type;
302
303 return dest;
304 }
305
306 /* Copy a list of references, copying in reference in turn with
307 info_copy_reference. */
308 REFERENCE **
info_copy_references(REFERENCE ** ref1)309 info_copy_references (REFERENCE **ref1)
310 {
311 int i;
312 REFERENCE **result;
313 int size;
314
315 if (!ref1)
316 return 0;
317
318 /* Get the total size of the slots that we will need. */
319 for (i = 0; ref1[i]; i++);
320 size = i;
321
322 result = xmalloc ((1 + size) * sizeof (REFERENCE *));
323
324 /* Copy the contents over. */
325 for (i = 0; ref1[i]; i++)
326 result[i] = info_copy_reference (ref1[i]);
327 result[i] = NULL;
328
329 return result;
330 }
331
332 void
info_reference_free(REFERENCE * ref)333 info_reference_free (REFERENCE *ref)
334 {
335 if (ref)
336 {
337 free (ref->label);
338 free (ref->filename);
339 free (ref->nodename);
340 free (ref);
341 }
342 }
343
344 /* Free the data associated with REFERENCES. */
345 void
info_free_references(REFERENCE ** references)346 info_free_references (REFERENCE **references)
347 {
348 register int i;
349 REFERENCE *entry;
350
351 if (references)
352 {
353 for (i = 0; references && (entry = references[i]); i++)
354 info_reference_free (entry);
355
356 free (references);
357 }
358 }
359
360 /* Return new REFERENCE with filename and nodename fields set. */
361 REFERENCE *
info_new_reference(char * filename,char * nodename)362 info_new_reference (char *filename, char *nodename)
363 {
364 REFERENCE *r = xmalloc (sizeof (REFERENCE));
365 r->label = 0;
366 r->filename = filename ? xstrdup (filename) : 0;
367 r->nodename = nodename ? xstrdup (nodename) : 0;
368 r->start = 0;
369 r->end = 0;
370 r->line_number = 0;
371 r->type = 0;
372 return r;
373 }
374
375
376 /* Search for sequences of whitespace or newlines in STRING, replacing
377 all such sequences with just a single space. Remove whitespace from
378 start and end of string. */
379 void
canonicalize_whitespace(char * string)380 canonicalize_whitespace (char *string)
381 {
382 register int i, j;
383 int len, whitespace_found, whitespace_loc = 0;
384 char *temp;
385
386 if (!string)
387 return;
388
389 len = strlen (string);
390 temp = xmalloc (1 + len);
391
392 /* Search for sequences of whitespace or newlines. Replace all such
393 sequences in the string with just a single space. */
394
395 whitespace_found = 0;
396 for (i = 0, j = 0; string[i]; i++)
397 {
398 if (whitespace_or_newline (string[i]))
399 {
400 whitespace_found++;
401 whitespace_loc = i;
402 continue;
403 }
404 else
405 {
406 if (whitespace_found && whitespace_loc)
407 {
408 whitespace_found = 0;
409
410 /* Suppress whitespace at start of string. */
411 if (j)
412 temp[j++] = ' ';
413 }
414
415 temp[j++] = string[i];
416 }
417 }
418
419 /* Kill trailing whitespace. */
420 if (j && whitespace (temp[j - 1]))
421 j--;
422
423 temp[j] = '\0';
424 strcpy (string, temp);
425 free (temp);
426 }
427
428 /* If ITER points to an ANSI escape sequence, process it, set PLEN to its
429 length in bytes, and return 1.
430 Otherwise, return 0.
431 */
432 int
ansi_escape(mbi_iterator_t iter,size_t * plen)433 ansi_escape (mbi_iterator_t iter, size_t *plen)
434 {
435 if (raw_escapes_p && *mbi_cur_ptr (iter) == '\033' && mbi_avail (iter))
436 {
437 mbi_advance (iter);
438 if (*mbi_cur_ptr (iter) == '[' && mbi_avail (iter))
439 {
440 ITER_SETBYTES (iter, 1);
441 mbi_advance (iter);
442 if (isdigit (*mbi_cur_ptr (iter)) && mbi_avail (iter))
443 {
444 ITER_SETBYTES (iter, 1);
445 mbi_advance (iter);
446 if (*mbi_cur_ptr (iter) == 'm')
447 {
448 *plen = 4;
449 return 1;
450 }
451 else if (isdigit (*mbi_cur_ptr (iter)) && mbi_avail (iter))
452 {
453 ITER_SETBYTES (iter, 1);
454 mbi_advance (iter);
455 if (*mbi_cur_ptr (iter) == 'm')
456 {
457 *plen = 5;
458 return 1;
459 }
460 }
461 }
462 }
463 }
464
465 return 0;
466 }
467
468 static struct text_buffer printed_rep = { 0 };
469
470 /* Return pointer to string that is the printed representation of character
471 (or other logical unit) at ITER if it were printed at screen column
472 PL_CHARS. Use ITER_SETBYTES (info-utils.h) on ITER if we need to advance
473 past a unit that the multibyte iteractor doesn't know about (like an ANSI
474 escape sequence). If ITER points at an end-of-line character, set *DELIM to
475 this character. *PCHARS gets the number of screen columns taken up by
476 outputting the return value, and *PBYTES the number of bytes in returned
477 string. Return value is not null-terminated. Return value must not be
478 freed by caller. */
479 char *
printed_representation(mbi_iterator_t * iter,int * delim,size_t pl_chars,size_t * pchars,size_t * pbytes)480 printed_representation (mbi_iterator_t *iter, int *delim, size_t pl_chars,
481 size_t *pchars, size_t *pbytes)
482 {
483 struct text_buffer *rep = &printed_rep;
484
485 char *cur_ptr = (char *) mbi_cur_ptr (*iter);
486 size_t cur_len = mb_len (mbi_cur (*iter));
487
488 text_buffer_reset (&printed_rep);
489
490 if (mb_isprint (mbi_cur (*iter)))
491 {
492 /* cur.wc gives a wchar_t object. See mbiter.h in the
493 gnulib/lib directory. */
494 *pchars = wcwidth ((*iter).cur.wc);
495 *pbytes = cur_len;
496 return cur_ptr;
497 }
498 else if (cur_len == 1)
499 {
500 if (*cur_ptr == '\n' || *cur_ptr == '\r')
501 {
502 /* If this is a CRLF line ending, ignore this character. */
503 if (*cur_ptr == '\r' && cur_ptr[1] == '\n')
504 {
505 *pchars = 0;
506 *pbytes = 0;
507 return cur_ptr;
508 }
509
510 *pchars = 1;
511 *pbytes = cur_len;
512 *delim = *cur_ptr;
513 text_buffer_add_char (rep, ' ');
514 return cur_ptr;
515 }
516 else if (ansi_escape (*iter, &cur_len))
517 {
518 *pchars = 0;
519 *pbytes = cur_len;
520 ITER_SETBYTES (*iter, cur_len);
521
522 return cur_ptr;
523 }
524 else if (*cur_ptr == '\t')
525 {
526 int i = 0;
527
528 *pchars = ((pl_chars + 8) & 0xf8) - pl_chars;
529 *pbytes = *pchars;
530
531 /* We must output spaces instead of the tab because a tab may
532 not clear characters already on the screen. */
533 for (i = 0; i < *pbytes; i++)
534 text_buffer_add_char (rep, ' ');
535 return text_buffer_base (rep);
536 }
537 }
538
539 /* Show CTRL-x as "^X". */
540 if (iscntrl (*cur_ptr) && *(unsigned char *)cur_ptr < 127)
541 {
542 *pchars = 2;
543 *pbytes = 2;
544 text_buffer_add_char (rep, '^');
545 text_buffer_add_char (rep, *cur_ptr | 0x40);
546 return text_buffer_base (rep);
547 }
548 else if (*cur_ptr == DEL)
549 {
550 *pchars = 0;
551 *pbytes = 0;
552 return text_buffer_base (rep);
553 }
554 else
555 {
556 /* Original byte was not recognized as anything. Display its octal
557 value. This could happen in the C locale for bytes above 128,
558 or for bytes 128-159 in an ISO-8859-1 locale. Don't output the bytes
559 as they are, because they could have special meaning to the
560 terminal. */
561 *pchars = 4;
562 *pbytes = 4;
563 text_buffer_printf (rep, "\\%o", *(unsigned char *)cur_ptr);
564 return text_buffer_base (rep);
565 }
566 }
567
568
569 /* **************************************************************** */
570 /* */
571 /* Scanning node */
572 /* */
573 /* **************************************************************** */
574
575 /* Whether to strip syntax from the text of nodes. */
576 int preprocess_nodes_p;
577
578 /* Whether contents of nodes should be rewritten. */
579 static int rewrite_p;
580
581 /* inptr is moved forward through the body of a node. */
582 static char *inptr;
583
584 /* Pointer to first byte of node (after node separator). */
585 static char *input_start;
586
587 /* Number of bytes in node contents. */
588 static size_t input_length;
589
590 struct text_buffer output_buf;
591
592 /* Pointer into a tags table for the file to the anchor we need to adjust as
593 a result of byte counts changing due to character encoding conversion or
594 inserted/deleted text. */
595 static TAG **anchor_to_adjust;
596 /* Offset within file buffer of first byte of node, used for anchor
597 adjustment. */
598 static int node_offset;
599
600 /* Difference so far between the number of bytes input in the file and
601 bytes output. Used to adjust the values of anchors in nodes. */
602 static long int output_bytes_difference;
603
604 /* Whether we are converting the character encoding of the file. */
605 static int convert_encoding_p;
606
607 #if HAVE_ICONV
608
609 /* Whether text in file is encoded in UTF-8. */
610 static int file_is_in_utf8;
611
612 /* Used for conversion from file encoding to output encoding. */
613 static iconv_t iconv_to_output;
614
615 /* Conversion from file encoding to UTF-8. */
616 static iconv_t iconv_to_utf8;
617
618 #endif /* HAVE_ICONV */
619
620 void
init_conversion(FILE_BUFFER * fb)621 init_conversion (FILE_BUFFER *fb)
622 {
623 char *target_encoding;
624
625 convert_encoding_p = 0;
626
627 /* Node being processed does not come from an Info file. */
628 if (!fb)
629 return;
630
631 #if !HAVE_ICONV
632 return;
633 #else
634 file_is_in_utf8 = 0;
635
636 /* Don't process file if encoding is unknown. */
637 if (!fb->encoding)
638 return;
639
640 /* Read name of character encoding from environment locale */
641 target_encoding = nl_langinfo (CODESET);
642
643 /* Don't convert the contents if the locale
644 uses the same character encoding as the file */
645 if (!strcasecmp(target_encoding, fb->encoding))
646 return;
647
648 /* Check if an iconv conversion from file locale to system
649 locale exists */
650 iconv_to_output = iconv_open (target_encoding, fb->encoding);
651 if (iconv_to_output == (iconv_t) -1)
652 return; /* Return if no conversion function implemented */
653
654 if ( !strcasecmp ("UTF8", fb->encoding)
655 || !strcasecmp ("UTF-8", fb->encoding))
656 file_is_in_utf8 = 1;
657
658 if (!file_is_in_utf8)
659 {
660 iconv_to_utf8 = iconv_open ("UTF-8", fb->encoding);
661 if (iconv_to_utf8 == (iconv_t) -1)
662 {
663 /* Return if no conversion function implemented */
664 iconv_close (iconv_to_output);
665 return;
666 }
667 }
668
669 convert_encoding_p = 1;
670 rewrite_p = 1;
671 #endif /* HAVE_ICONV */
672 }
673
close_conversion(void)674 void close_conversion (void)
675 {
676 #if HAVE_ICONV
677 if (convert_encoding_p)
678 {
679 iconv_close (iconv_to_output);
680 if (!file_is_in_utf8) iconv_close (iconv_to_utf8);
681 }
682 #endif
683 }
684
685 static void
init_output_stream(FILE_BUFFER * fb)686 init_output_stream (FILE_BUFFER *fb)
687 {
688 init_conversion (fb);
689 output_bytes_difference = 0;
690
691 if (rewrite_p)
692 text_buffer_init (&output_buf);
693 }
694
695 static size_t saved_offset;
696 static char *saved_inptr;
697 static long saved_difference;
698
699 void
save_conversion_state(void)700 save_conversion_state (void)
701 {
702 saved_offset = text_buffer_off (&output_buf);
703 saved_inptr = inptr;
704 saved_difference = output_bytes_difference;
705 }
706
707 /* Go back to the saved state of the output stream. */
708 void
reset_conversion(void)709 reset_conversion (void)
710 {
711 text_buffer_off (&output_buf) = saved_offset;
712 inptr = saved_inptr;
713 output_bytes_difference = saved_difference;
714 }
715
716 /* Copy bytes from input to output with no encoding conversion. */
717 static void
copy_direct(long n)718 copy_direct (long n)
719 {
720 text_buffer_add_string (&output_buf, inptr, n);
721 inptr += n;
722 }
723
724 /* Read one character at *FROM and write out a sequence
725 of bytes representing that character in ASCII. *FROM
726 is advanced past the read character. */
727 static int
degrade_utf8(char ** from,size_t * from_left)728 degrade_utf8 (char **from, size_t *from_left)
729 {
730 static struct encoding_replacement
731 {
732 char *from_string;
733 char *to_string;
734 } er[] = {
735 {"\xE2\x80\x98","'"}, /* Opening single quote */
736 {"\xE2\x80\x99","'"}, /* Closing single quote */
737 {"\xE2\x80\x9C","\""},/* Opening double quote */
738 {"\xE2\x80\x9D","\""},/* Closing double quote */
739 {"\xC2\xA9","(C)"}, /* Copyright symbol */
740 {"\xC2\xBB",">>"}, /* Closing double angle brackets */
741
742 {"\xE2\x86\x92","->"},/* Right arrow */
743 {"\xE2\x87\x92","=>"},/* Right double arrow */
744 {"\xE2\x8A\xA3","-|"},/* Print symbol */
745 {"\xE2\x98\x85","-!-"}, /* Point symbol */
746 {"\xE2\x86\xA6","==>"}, /* Expansion symbol */
747
748 {"\xE2\x80\x90","-"}, /* Hyphen */
749 {"\xE2\x80\x91","-"}, /* Non-breaking hyphen */
750 {"\xE2\x80\x92","-"}, /* Figure dash */
751 {"\xE2\x80\x93","-"}, /* En dash */
752 {"\xE2\x80\x94","--"}, /* Em dash */
753 {"\xE2\x88\x92","-"}, /* Minus sign */
754 {"\xE2\x80\xA6","..."}, /* Ellipsis */
755 {"\xE2\x80\xA2","*"}, /* Bullet */
756
757 {"\xC3\xA0","a`"}, /* Lower case letter a with grave accent */
758 {"\xC3\xA2","a^"}, /* Lower case letter a with circumflex */
759 {"\xC3\xA4","a\""}, /* Lower case letter a with diaeresis */
760 {"\xC3\xA6","ae"}, /* Lower case letter ae ligature */
761 {"\xC3\xA9","e'"}, /* Lower case letter e with acute accent */
762 {"\xC3\xA8","e`"}, /* Lower case letter e with grave accent */
763 {"\xC3\xAA","e^"}, /* Lower case letter e with circumflex */
764 {"\xC3\xAB","e\""}, /* Lower case letter e with diaeresis */
765 {"\xC3\xB6","o\""}, /* Lower case letter o with diaeresis */
766 {"\xC3\xBC","u\""}, /* Lower case letter u with diaeresis */
767 {"\xC3\x84", "A\""}, /* Upper case letter A with diaeresis. */
768 {"\xC3\x96", "O\""}, /* Upper case letter O with diaeresis. */
769 {"\xC3\x9c", "U\""}, /* Upper case letter U with diaeresis. */
770
771 {"\xC3\xB1","n~"}, /* Lower case letter n with tilde */
772 {"\xC3\x87","C,"}, /* Upper case letter C with cedilla */
773 {"\xC3\xA7","c,"}, /* Lower case letter c with cedilla */
774 {"\xC3\x9f","ss"}, /* Lower case letter sharp s */
775
776 {0, 0}
777 };
778
779 struct encoding_replacement *erp;
780
781 for (erp = er; erp->from_string != 0; erp++)
782 {
783 /* Avoid reading past end of input. */
784 int width = strlen (erp->from_string);
785 if (width > *from_left)
786 continue;
787
788 if (!strncmp (erp->from_string, *from, width))
789 {
790 text_buffer_add_string (&output_buf, erp->to_string,
791 strlen(erp->to_string));
792 *from += width;
793 *from_left -= width;
794 return 1;
795 }
796 }
797
798 /* Failing this, just print a question mark. Maybe we should use SUB
799 (^Z) (ASCII substitute character code) instead, or pass through the
800 original bytes. */
801 text_buffer_add_string (&output_buf, "?", 1);
802
803 /* Ideally we would advance one UTF-8 character. This would
804 require knowing its length in bytes. */
805 (*from)++;
806 (*from_left)--;
807
808 return 0;
809 }
810
811 /* Convert N bytes from input to output encoding and write to
812 output buffer. Return number of bytes over N written. */
813 static int
copy_converting(long n)814 copy_converting (long n)
815 {
816 #if !HAVE_ICONV
817 return 0;
818 #else
819 size_t bytes_left, orig_bytes_left;
820 int extra_at_end;
821 size_t iconv_ret;
822 long output_start;
823
824 size_t utf8_char_free;
825 char utf8_char[4]; /* Maximum 4 bytes in a UTF-8 character */
826 char *utf8_char_ptr, *orig_inptr;
827 size_t i;
828
829 /* Use n as an estimate of how many bytes will be required
830 in target encoding. */
831 text_buffer_alloc (&output_buf, (size_t) n);
832
833 output_start = text_buffer_off (&output_buf);
834 bytes_left = n;
835 extra_at_end = 0;
836 while (1)
837 {
838 iconv_ret = text_buffer_iconv (&output_buf, iconv_to_output,
839 (ICONV_CONST char **)&inptr, &bytes_left);
840
841 /* Make sure libiconv flushes out the last converted character.
842 This is required when the conversion is stateful, in which
843 case libiconv might not output the last character, waiting to
844 see whether it should be combined with the next one. */
845 if (iconv_ret != (size_t) -1
846 && text_buffer_iconv (&output_buf, iconv_to_output,
847 NULL, NULL) != (size_t) -1)
848 /* Success: all of input converted. */
849 break;
850
851 /* There's been an error while converting. */
852 switch (errno)
853 {
854 case E2BIG:
855 /* Ran out of space in output buffer. Allocate more
856 and try again. */
857 text_buffer_alloc (&output_buf, n);
858 continue;
859 case EINVAL:
860 /* Incomplete byte sequence at end of input buffer. Try to read
861 more. */
862
863 /* input_length - 2 is offset of last-but-one byte within input.
864 This checks if there is at least one more byte within node
865 contents. */
866 if (inptr - input_start + (bytes_left - 1) <= input_length - 2)
867 {
868 bytes_left++;
869 extra_at_end++;
870 }
871 else
872 {
873 copy_direct (bytes_left);
874 bytes_left = 0;
875 }
876 continue;
877 default: /* Unknown error */
878 info_error (_("Error converting file character encoding"));
879
880 /* Skip past current input and hope we don't get an
881 error next time. */
882 inptr += bytes_left;
883 return 0;
884 case EILSEQ:
885 /* Byte sequence in input not recognized. Degrade to ASCII. */
886 break;
887 }
888
889 /* Flush any waiting input in iconv_to_output and enter the
890 default shift state. */
891 text_buffer_iconv (&output_buf, iconv_to_output, NULL, NULL);
892
893 if (file_is_in_utf8)
894 {
895 degrade_utf8 (&inptr, &bytes_left);
896 continue;
897 }
898
899 /* If file is not in UTF-8, we degrade to ASCII in two steps:
900 first convert the character to UTF-8, then look up a replacement
901 string. Note that mixing iconv_to_output and iconv_to_utf8
902 on the same input may not work well if the input encoding
903 is stateful. We could deal with this by always converting to
904 UTF-8 first; then we could mix conversions on the UTF-8 stream. */
905
906 /* We want to read exactly one character. Do this by
907 restricting size of output buffer. */
908 utf8_char_ptr = utf8_char;
909 orig_inptr = inptr;
910 orig_bytes_left = bytes_left;
911 for (i = 1; i <= 4; i++)
912 {
913 utf8_char_free = i;
914 errno = 0;
915 iconv_ret = iconv (iconv_to_utf8, (ICONV_CONST char **)&inptr,
916 &bytes_left, &utf8_char_ptr, &utf8_char_free);
917 if ((iconv_ret == (size_t) -1 && errno != E2BIG)
918 /* If we managed to convert a character: */
919 || utf8_char_ptr > utf8_char)
920 break;
921 }
922
923 /* errno == E2BIG if iconv ran out of output buffer,
924 which is expected. */
925 if (iconv_ret == (size_t) -1 && errno != E2BIG)
926 {
927 /* Character is not recognized. Copy a single byte. */
928 inptr = orig_inptr; /* iconv might have incremented inptr */
929 copy_direct (1);
930 bytes_left = orig_bytes_left - 1;
931 }
932 else
933 {
934 utf8_char_ptr = utf8_char;
935 /* i is width of UTF-8 character */
936 degrade_utf8 (&utf8_char_ptr, &i);
937 /* If we are done, make sure iconv flushes the last character. */
938 if (bytes_left <= 0)
939 {
940 utf8_char_ptr = utf8_char;
941 i = 4;
942 iconv (iconv_to_utf8, NULL, NULL,
943 &utf8_char_ptr, &utf8_char_free);
944 if (utf8_char_ptr > utf8_char)
945 {
946 utf8_char_ptr = utf8_char;
947 degrade_utf8 (&utf8_char_ptr, &i);
948 }
949 }
950 }
951 }
952
953 /* Must cast because the difference between unsigned size_t is always
954 positive. */
955 output_bytes_difference +=
956 n - ((signed long) text_buffer_off (&output_buf) - output_start);
957
958 return extra_at_end;
959 #endif /* HAVE_ICONV */
960 }
961
962 /* Functions below are named from the perspective of the preprocess_nodes_p
963 flag being on. */
964
965 /* Copy text from input node contents, possibly converting the
966 character encoding and adjusting anchor offsets at the same time. */
967 static void
copy_input_to_output(long n)968 copy_input_to_output (long n)
969 {
970 if (rewrite_p)
971 {
972 long bytes_left;
973
974 bytes_left = n;
975 while (bytes_left > 0)
976 {
977 if (!convert_encoding_p)
978 {
979 copy_direct (bytes_left);
980 bytes_left = 0;
981 }
982 else
983 {
984 long bytes_to_convert;
985 long extra_written;
986
987 bytes_to_convert = bytes_left;
988
989 if (anchor_to_adjust)
990 {
991 /* Check there is an anchor in the input. */
992 long first_anchor =
993 (*anchor_to_adjust)->nodestart - node_offset;
994
995 if (first_anchor < 0)
996 anchor_to_adjust = 0; /* error in input file */
997 else if (first_anchor < (inptr-input_start) + bytes_left)
998 {
999 /* Convert enough to pass the first anchor in input. */
1000 bytes_to_convert = first_anchor - (inptr-input_start)+1;
1001 if (bytes_to_convert < 0)
1002 {
1003 bytes_to_convert = bytes_left;
1004 anchor_to_adjust = 0;
1005 }
1006 }
1007 }
1008
1009 /* copy_converting may read more than bytes_to_convert
1010 bytes if its input ends in an incomplete byte sequence. */
1011 extra_written = copy_converting (bytes_to_convert);
1012
1013 bytes_left -= bytes_to_convert + extra_written;
1014 }
1015
1016 /* Check if we have gone past any anchors and
1017 adjust with output_bytes_difference. */
1018 if (anchor_to_adjust)
1019 while ((*anchor_to_adjust)->nodestart - node_offset
1020 <= inptr - input_start)
1021 {
1022 (*anchor_to_adjust)->nodestart_adjusted
1023 = (*anchor_to_adjust)->nodestart - output_bytes_difference;
1024
1025 anchor_to_adjust++;
1026 if (!*anchor_to_adjust
1027 || (*anchor_to_adjust)->cache.nodelen != 0)
1028 {
1029 anchor_to_adjust = 0;
1030 break;
1031 }
1032 }
1033 }
1034 }
1035 else
1036 inptr += n;
1037 }
1038
1039 static void
skip_input(long n)1040 skip_input (long n)
1041 {
1042 if (preprocess_nodes_p)
1043 {
1044 inptr += n;
1045 output_bytes_difference += n;
1046 }
1047 else if (rewrite_p)
1048 {
1049 /* We are expanding tags only. Do not skip input. */
1050 copy_input_to_output (n);
1051 }
1052 else
1053 {
1054 inptr += n;
1055 }
1056 }
1057
1058 static void
write_extra_bytes_to_output(char * input,long n)1059 write_extra_bytes_to_output (char *input, long n)
1060 {
1061 if (preprocess_nodes_p)
1062 {
1063 text_buffer_add_string (&output_buf, input, n);
1064 output_bytes_difference -= n;
1065 }
1066 }
1067
1068 /* Like write_extra_bytes_to_output, but writes bytes even when
1069 preprocess_nodes=Off. */
1070 static void
write_tag_contents(char * input,long n)1071 write_tag_contents (char *input, long n)
1072 {
1073 if (rewrite_p)
1074 {
1075 text_buffer_add_string (&output_buf, input, n);
1076 output_bytes_difference -= n;
1077 }
1078 }
1079
1080 /* Like skip_input, but skip even when !preprocess_nodes_p. */
1081 static void
skip_tag_contents(long n)1082 skip_tag_contents (long n)
1083 {
1084 if (rewrite_p)
1085 {
1086 inptr += n;
1087 output_bytes_difference += n;
1088 }
1089 }
1090
1091 /* Read first line of node and set next, prev and up. */
1092 static void
parse_top_node_line(NODE * node)1093 parse_top_node_line (NODE *node)
1094 {
1095 char **store_in = 0;
1096 char *nodename;
1097 char *ptr;
1098 int value_length;
1099
1100 /* If the first line is empty, leave it in. This is the case
1101 in the index-apropos window. */
1102 if (*node->contents == '\n')
1103 return;
1104
1105 node->next = node->prev = node->up = 0;
1106 ptr = node->contents;
1107
1108 while (1)
1109 {
1110 store_in = 0;
1111
1112 ptr += skip_whitespace (ptr);
1113
1114 /* Check what field we are looking at */
1115 if (!strncasecmp (ptr, INFO_FILE_LABEL, strlen(INFO_FILE_LABEL)))
1116 {
1117 ptr += strlen (INFO_FILE_LABEL);
1118 }
1119 else if (!strncasecmp (ptr, INFO_NODE_LABEL, strlen(INFO_NODE_LABEL)))
1120 {
1121 ptr += strlen (INFO_NODE_LABEL);
1122 }
1123 else if (!strncasecmp (ptr, INFO_PREV_LABEL, strlen(INFO_PREV_LABEL)))
1124 {
1125 ptr += strlen (INFO_PREV_LABEL);
1126 store_in = &node->prev;
1127 }
1128 else if (!strncasecmp (ptr, INFO_ALTPREV_LABEL,
1129 strlen(INFO_ALTPREV_LABEL)))
1130 {
1131 ptr += strlen (INFO_ALTPREV_LABEL);
1132 store_in = &node->prev;
1133 }
1134 else if (!strncasecmp (ptr, INFO_NEXT_LABEL, strlen(INFO_NEXT_LABEL)))
1135 {
1136 ptr += strlen (INFO_NEXT_LABEL);
1137 store_in = &node->next;
1138 }
1139 else if (!strncasecmp (ptr, INFO_UP_LABEL, strlen(INFO_UP_LABEL)))
1140 {
1141 ptr += strlen (INFO_UP_LABEL);
1142 store_in = &node->up;
1143 }
1144 else
1145 {
1146 store_in = 0;
1147 /* Not recognized - code below will skip to next comma */
1148 }
1149 ptr += skip_whitespace (ptr);
1150
1151 /* Get length of a bracketed filename component. */
1152 if (*ptr != '(')
1153 value_length = 0;
1154 else
1155 value_length = read_bracketed_filename (ptr, 0);
1156
1157 /* Get length of node name, or filename if following "File:". Note
1158 that . is not included in the second argument here in order to
1159 support this character in file names. */
1160 value_length += read_quoted_string (ptr + value_length,
1161 "\n\r\t,", 1, &nodename);
1162 if (store_in)
1163 {
1164 *store_in = xmalloc (value_length + 1);
1165 strncpy (*store_in, ptr, value_length);
1166 (*store_in)[value_length] = '\0';
1167 }
1168
1169 free (nodename);
1170 ptr += value_length;
1171
1172 if (*ptr == '\n' || !*ptr)
1173 break;
1174
1175 ptr += 1; /* Point after field terminator */
1176 }
1177 }
1178
1179 /* Output, replace or hide text introducing a reference. INPTR starts on
1180 the first byte of a sequence introducing a reference and finishes on the
1181 first (non-whitespace) byte of the reference label. */
1182 static int
scan_reference_marker(REFERENCE * entry,int in_parentheses)1183 scan_reference_marker (REFERENCE *entry, int in_parentheses)
1184 {
1185 /* When preprocess_nodes is Off, we position the cursor on
1186 the "*" when moving between references. */
1187 if (!preprocess_nodes_p)
1188 {
1189 if (rewrite_p)
1190 entry->start = text_buffer_off(&output_buf);
1191 else
1192 entry->start = inptr - input_start;
1193 }
1194
1195 /* Check what we found based on first character of match */
1196 if (inptr[0] == '\n')
1197 {
1198 entry->type = REFERENCE_MENU_ITEM;
1199 if (!preprocess_nodes_p)
1200 entry->start++;
1201 }
1202 else
1203 entry->type = REFERENCE_XREF;
1204
1205 if (entry->type == REFERENCE_MENU_ITEM)
1206 copy_input_to_output (strlen ("\n* "));
1207 else
1208 {
1209 /* Only match "*Note" if it is followed by a whitespace character so that
1210 it will not be recognized if, e.g., it is surrounded in inverted
1211 commas. */
1212 if (!strchr (" \t\r\n", inptr[strlen ("*Note")]))
1213 {
1214 copy_input_to_output (strlen ("*Note:"));
1215 return 0;
1216 }
1217
1218 /* Cross-references can be generated by four different Texinfo
1219 commands. @inforef and @xref output "*Note " in Info format,
1220 and "See" in HTML and print. @ref and @pxref output "*note "
1221 in Info format, and either nothing at all or "see" in HTML
1222 and print. Unfortunately, there is no easy way to distinguish
1223 between these latter two cases. */
1224 /* TODO: Internationalize these strings, but only if we know the
1225 language of the document. */
1226 if (inptr[1] == 'N')
1227 {
1228 write_extra_bytes_to_output ("See", 3);
1229 in_parentheses = 1;
1230 }
1231 else if (in_parentheses)
1232 {
1233 write_extra_bytes_to_output ("see", 3);
1234 /* Only output the "see" for input like "(*note ...)", which
1235 would have come from a use of @pxref. We used to output "see" for
1236 "*note" in more circumstances, with a list of words where to
1237 suppress it (to avoid "see *note" turning into "see see"), but
1238 such a list can't be complete or reliable. It's better to remove
1239 it with more enthusiasm, then if the document writer wants a "see"
1240 to appear, they can add one themselves. */
1241 }
1242
1243 skip_input (strlen ("*Note"));
1244 if (!in_parentheses)
1245 skip_input (skip_whitespace (inptr));
1246 }
1247
1248 /* Copy any white space before label. */
1249 copy_input_to_output (skip_whitespace_and_newlines (inptr));
1250
1251 return 1;
1252 }
1253
1254 /* Output reference label and update ENTRY. INPTR should be on the first
1255 non-whitespace byte of label when this function is called. It is left
1256 at the first character after the colon terminating the label. Return 0 if
1257 invalid syntax is encountered. */
1258 static int
scan_reference_label(REFERENCE * entry,int in_index)1259 scan_reference_label (REFERENCE *entry, int in_index)
1260 {
1261 int max_lines;
1262 int len, label_len = 0;
1263
1264 /* Handle case of cross-reference like (FILE)NODE::. */
1265 if (inptr[0] == '(')
1266 label_len = read_bracketed_filename (inptr, &entry->filename);
1267
1268 /* Search forward to ":" to get label name. Cross-references may have
1269 a newline in the middle. */
1270 if (entry->type == REFERENCE_MENU_ITEM)
1271 max_lines = 1;
1272 else
1273 max_lines = 2;
1274 if (!in_index || inptr[label_len] == '\177')
1275 {
1276 len = read_quoted_string (inptr + label_len, ":", max_lines,
1277 &entry->nodename);
1278 canonicalize_whitespace (entry->nodename);
1279 if (!len)
1280 return 0; /* Input invalid. */
1281 label_len += len;
1282 }
1283 else
1284 {
1285 /* If in an index node, go forward to the last colon on the line
1286 (not preceded by a newline, NUL or DEL). This is in order to
1287 support index entries containing colons. This should work fine
1288 as long as the node name does not contain a colon as well. */
1289
1290 char *p;
1291 int n, m = 0;
1292 p = inptr + label_len;
1293
1294 while (1)
1295 {
1296 n = strcspn (p, ":\n\177");
1297 if (p[n] == ':')
1298 {
1299 m += n + 1;
1300 p += n + 1;
1301 continue;
1302 }
1303 break;
1304 }
1305 if (m == 0)
1306 return 0; /* no : found */
1307 label_len += m - 1;
1308 }
1309
1310 entry->label = xmalloc (label_len + 1);
1311 memcpy (entry->label, inptr, label_len);
1312 entry->label[label_len] = '\0';
1313 canonicalize_whitespace (entry->label);
1314
1315 if (preprocess_nodes_p)
1316 entry->start = text_buffer_off (&output_buf);
1317
1318 /* Write text of label. */
1319 copy_input_to_output (label_len);
1320
1321 if (rewrite_p)
1322 entry->end = text_buffer_off (&output_buf);
1323 else
1324 entry->end = inptr - input_start;
1325
1326 /* Colon after label. */
1327 if (*inptr)
1328 skip_input (1);
1329 /* Don't mess up the margin of a menu description. */
1330 if (entry->type == REFERENCE_MENU_ITEM)
1331 write_extra_bytes_to_output (" ", 1);
1332
1333 return 1;
1334 }
1335
1336 /* INPTR should be at the first character after the colon
1337 terminating the label. Return 0 on syntax error. */
1338 static int
scan_reference_target(REFERENCE * entry,NODE * node,int in_parentheses)1339 scan_reference_target (REFERENCE *entry, NODE *node, int in_parentheses)
1340 {
1341 int i;
1342
1343 /* This entry continues with a specific target. Parse the
1344 file name and node name from the specification. */
1345
1346 if (entry->type == REFERENCE_XREF)
1347 {
1348 int length = 0; /* Length of specification */
1349 char *target_start = inptr;
1350 char *nl_off = 0;
1351 int space_at_start_of_line = 0;
1352
1353 length += skip_whitespace_and_newlines (inptr);
1354
1355 length += read_bracketed_filename (inptr + length, &entry->filename);
1356
1357 length += skip_whitespace_and_newlines (inptr + length);
1358
1359 /* Get the node name. */
1360 length += read_quoted_string (inptr + length, ",.", 2, &entry->nodename);
1361
1362 skip_input (length);
1363
1364 /* Check if there is a newline in the target. */
1365 nl_off = strchr (target_start, '\n');
1366 if (nl_off)
1367 {
1368 if (nl_off < inptr)
1369 space_at_start_of_line = skip_whitespace (nl_off + 1);
1370 else
1371 nl_off = 0;
1372 }
1373 canonicalize_whitespace (entry->nodename);
1374
1375 if (entry->filename)
1376 {
1377 /* Heuristic of whether it's worth outputing a newline before the
1378 filename. This checks whether the newline appears more
1379 than half way through the text, and therefore which side is
1380 longer. */
1381 if (nl_off
1382 && nl_off < target_start + (length - space_at_start_of_line) / 2)
1383 {
1384 int i;
1385 write_extra_bytes_to_output ("\n", 1);
1386
1387 for (i = 0; i < space_at_start_of_line; i++)
1388 write_extra_bytes_to_output (" ", 1);
1389 skip_input (strspn (inptr, " "));
1390 nl_off = 0;
1391 }
1392 else
1393
1394 if (*inptr != '\n')
1395 {
1396 write_extra_bytes_to_output (" ", 1);
1397 }
1398 write_extra_bytes_to_output ("(", 1);
1399 write_extra_bytes_to_output (entry->filename,
1400 strlen (entry->filename));
1401 write_extra_bytes_to_output (" manual)",
1402 strlen (" manual)"));
1403 }
1404
1405 /* Hide terminating punctuation if we are in a reference
1406 like "(*note Label:(file)node.)". */
1407 if (in_parentheses && inptr[0] == '.')
1408 skip_input (1);
1409
1410 /* Copy any terminating punctuation before the optional newline. */
1411 copy_input_to_output (strspn (inptr, ".),"));
1412
1413 /* Output a newline if one is needed. Don't do it at the end of
1414 a paragraph. */
1415 if (nl_off && *inptr != '\n')
1416 {
1417 int i;
1418
1419 write_extra_bytes_to_output ("\n", 1);
1420 for (i = 0; i < space_at_start_of_line; i++)
1421 write_extra_bytes_to_output (" ", 1);
1422 skip_input (strspn (inptr, " "));
1423 }
1424 }
1425 else /* entry->type == REFERENCE_MENU_ITEM */
1426 {
1427 int line_len;
1428 int length = 0; /* Length of specification */
1429
1430 length = skip_whitespace (inptr);
1431 length += read_bracketed_filename (inptr + length, &entry->filename);
1432 length += strspn (inptr + length, " ");
1433
1434 /* Get the node name. */
1435 length += read_quoted_string (inptr + length, ",.\t\n", 2,
1436 &entry->nodename);
1437 if (inptr[length] == '.') /* A '.' terminating the entry. */
1438 length++;
1439
1440 if (node->flags & N_IsDir)
1441 {
1442 /* Set line_len to length of line so far. */
1443
1444 char *linestart;
1445 linestart = memrchr (input_start, '\n', inptr - input_start);
1446 if (!linestart)
1447 linestart = input_start;
1448 else
1449 linestart++; /* Point to first character after newline. */
1450 line_len = inptr - linestart;
1451 }
1452
1453 if (node->flags & N_IsIndex)
1454 /* Show the name of the node the index entry refers to. */
1455 copy_input_to_output (length);
1456 else
1457 {
1458 skip_input (length);
1459
1460 if ((node->flags & N_IsDir) && inptr[strspn (inptr, " ")] == '\n')
1461 {
1462 /* For a dir node, if there is no more text in this line,
1463 check if there is a menu entry description in the next
1464 line to the right of the end of the label, and display it
1465 in this line. */
1466 skip_input (strspn (inptr, " "));
1467 if (line_len <= strspn (inptr + 1, " "))
1468 skip_input (1 + line_len);
1469 }
1470 else
1471 {
1472 for (i = 0; i < length; i++)
1473 write_extra_bytes_to_output (" ", 1);
1474 }
1475 }
1476
1477 /* Parse "(line ...)" part of menus, if any. */
1478 {
1479 char *lineptr = inptr;
1480 /* Skip any whitespace first, and then a newline in case the item
1481 was so long to contain the ``(line ...)'' string in the same
1482 physical line. */
1483 lineptr += skip_whitespace (inptr);
1484 if (*lineptr == '\n')
1485 lineptr += 1 + skip_whitespace (lineptr + 1);
1486
1487 if (!strncmp (lineptr, "(line ", strlen ("(line ")))
1488 {
1489 lineptr += strlen ("(line ");
1490 entry->line_number = strtol (lineptr, 0, 0);
1491 }
1492 else
1493 entry->line_number = 0;
1494 }
1495 }
1496
1497 return 1;
1498 }
1499
1500 /* BASE is earlier in a block of allocated memory than PTR, and the block
1501 extends until at least BASE + LEN - 1. Return PTR[INDEX], unless this
1502 could be outside the allocated block, in which case return 0. */
1503 static char
safe_string_index(char * ptr,long index,char * base,long len)1504 safe_string_index (char *ptr, long index, char *base, long len)
1505 {
1506 long offset = ptr - base;
1507
1508 if ( offset + index < 0
1509 || offset + index >= len)
1510 return 0;
1511
1512 return ptr[index];
1513 }
1514
1515 /* Process an in index marker ("^@^H[index^@^H]") or an image marker
1516 ("^@^H[image ...^@^H]"). */
1517 static void
scan_info_tag(NODE * node,int * in_index,FILE_BUFFER * fb)1518 scan_info_tag (NODE *node, int *in_index, FILE_BUFFER *fb)
1519 {
1520 char *p, *p1;
1521 struct text_buffer *expansion = xmalloc (sizeof (struct text_buffer));
1522
1523 p = inptr;
1524 p1 = p;
1525
1526 text_buffer_init (expansion);
1527
1528 if (tag_expand (&p1, input_start + input_length, expansion, in_index))
1529 {
1530 if (*in_index)
1531 node->flags |= N_IsIndex;
1532
1533 if (!rewrite_p)
1534 {
1535 rewrite_p = 1;
1536 init_output_stream (fb);
1537
1538 /* Put inptr back to start so that
1539 copy_input_to_output below gets all
1540 preceding contents. */
1541 inptr = node->contents;
1542 }
1543
1544 /* Write out up to tag. */
1545 copy_input_to_output (p - inptr);
1546
1547 write_tag_contents (text_buffer_base (expansion),
1548 text_buffer_off (expansion));
1549 /* Skip past body of tag. */
1550 skip_tag_contents (p1 - inptr);
1551 }
1552 else
1553 {
1554 /* It was not a valid tag. */
1555 copy_input_to_output (p - inptr + 1);
1556 }
1557
1558 text_buffer_free (expansion);
1559 free (expansion);
1560 }
1561
1562 #define looking_at_string(contents, string) \
1563 (!strncasecmp (contents, string, strlen (string)))
1564
1565 static char *
forward_to_info_syntax(char * contents)1566 forward_to_info_syntax (char *contents)
1567 {
1568 /* Loop until just before the end of the input. The '- 3' prevents us
1569 accessing memory after the end of the input, and none of the strings we
1570 are looking for are shorter than 3 bytes. */
1571 while (contents < input_start + input_length - 3)
1572 {
1573 /* Menu entry comes first to optimize for the case of looking through a
1574 long index node. */
1575 if (looking_at_string (contents, INFO_MENU_ENTRY_LABEL)
1576 || looking_at_string (contents, INFO_XREF_LABEL)
1577 || !memcmp (contents, "\0\b[", 3))
1578 return contents;
1579 contents++;
1580 }
1581 return 0;
1582 }
1583
1584 /* Scan contents of NODE, recording cross-references and similar.
1585
1586 Convert character encoding of node contents to that of the user if the two
1587 are known to be different. If PREPROCESS_NODES_P == 1, remove Info syntax
1588 in contents.
1589
1590 If FB is non-null, it is the file containing the node, and TAG_PTR is an
1591 offset into FB->tags. If the node contents are rewritten, adjust anchors
1592 that occur in the node and store adjusted value as TAG->nodestart_adjusted,
1593 otherwise simply copy TAG->nodestart to TAG->nodestart_adjusted for each
1594 anchor in the node. */
1595 void
scan_node_contents(NODE * node,FILE_BUFFER * fb,TAG ** tag_ptr)1596 scan_node_contents (NODE *node, FILE_BUFFER *fb, TAG **tag_ptr)
1597 {
1598 int in_menu = 0;
1599 char *match;
1600
1601 REFERENCE **refs = NULL;
1602 size_t refs_index = 0, refs_slots = 0;
1603
1604 /* Whether an index tag was seen. */
1605 int in_index = 0;
1606
1607 rewrite_p = preprocess_nodes_p;
1608
1609 init_output_stream (fb);
1610
1611 if (fb)
1612 {
1613 char *file_contents;
1614
1615 /* Set anchor_to_adjust to first anchor in node, if any. */
1616 anchor_to_adjust = tag_ptr + 1;
1617 if (!*anchor_to_adjust)
1618 anchor_to_adjust = 0;
1619 else if (*anchor_to_adjust
1620 && (*anchor_to_adjust)->cache.nodelen != 0)
1621 anchor_to_adjust = 0;
1622
1623 if (!node->subfile)
1624 file_contents = fb->contents;
1625 else
1626 {
1627 FILE_BUFFER *f = info_find_subfile (node->subfile);
1628 if (!f)
1629 return; /* This shouldn't happen. */
1630 file_contents = f->contents;
1631 }
1632 node_offset = (*tag_ptr)->nodestart
1633 + skip_node_separator (file_contents + (*tag_ptr)->nodestart);
1634 }
1635 else
1636 anchor_to_adjust = 0;
1637
1638 /* Initialize refs to point to array of one null pointer in case
1639 there are no results. This way we know if refs has been initialized
1640 even if it is empty. */
1641 refs = calloc (1, sizeof *refs);
1642 refs_slots = 1;
1643
1644 parse_top_node_line (node);
1645
1646 /* This should be the only time we assign to inptr in this function -
1647 all other assignment should be done with the helper functions above. */
1648 inptr = node->contents;
1649 input_start = node->contents;
1650 input_length = node->nodelen;
1651
1652
1653 while ((match = forward_to_info_syntax (inptr))
1654 && match < node->contents + node->nodelen)
1655 {
1656 int in_parentheses = 0;
1657 REFERENCE *entry;
1658
1659 /* Write out up to match */
1660 copy_input_to_output (match - inptr);
1661
1662 if ((in_menu && match[0] == '\n') || match[0] == '*')
1663 {
1664 /* Menu entry or cross reference. */
1665 /* Create REFERENCE entity. */
1666 entry = info_new_reference (0, 0);
1667
1668 if (safe_string_index (inptr, -1, input_start, input_length) == '('
1669 && safe_string_index (inptr, 1, input_start, input_length) == 'n')
1670 in_parentheses = 1;
1671
1672 save_conversion_state ();
1673
1674 if (!scan_reference_marker (entry, in_parentheses))
1675 goto not_a_reference;
1676
1677 if (!scan_reference_label (entry, in_index))
1678 goto not_a_reference;
1679
1680 /* If this reference entry continues with another ':' then the target
1681 of the reference is given by the label. */
1682 if (*inptr == ':')
1683 {
1684 int label_len;
1685 skip_input (1);
1686 if (entry->type == REFERENCE_MENU_ITEM)
1687 write_extra_bytes_to_output (" ", 1);
1688
1689 /* Remove the DEL bytes from a label like "(FOO)^?BAR^?::". */
1690 label_len = strlen (entry->label);
1691 if (label_len >= 2 && entry->label[label_len - 1] == 0177)
1692 {
1693 char *p = strchr (entry->label, '\177');
1694 memmove (p, p + 1, label_len - (p - entry->label) - 1);
1695 entry->label[label_len - 2] = '\0';
1696 }
1697 }
1698 else
1699 {
1700 /* Proceed to read the rest of the reference. */
1701 /* TODO: we should probably not allow references of the form
1702 "(file)node1:node2." or "(file1)node1:(file2)node2", so
1703 bail out here if entry->filename is non-null. */
1704
1705 free (entry->filename); entry->filename = 0;
1706 free (entry->nodename); entry->nodename = 0;
1707 if (!scan_reference_target (entry, node, in_parentheses))
1708 goto not_a_reference;
1709 }
1710
1711 if (0)
1712 {
1713 char *cur_inptr;
1714
1715 not_a_reference:
1716 /* This is not a menu entry or reference. Do not add to our
1717 list. */
1718 cur_inptr = inptr;
1719 reset_conversion ();
1720 copy_input_to_output (cur_inptr - inptr);
1721
1722 info_reference_free (entry);
1723 continue;
1724 }
1725
1726 add_pointer_to_array (entry, refs_index, refs, refs_slots, 50);
1727 }
1728 /* Was "* Menu:" seen? If so, search for menu entries hereafter. */
1729 else if (!in_menu && !strncmp (match, INFO_MENU_LABEL,
1730 strlen (INFO_MENU_LABEL)))
1731 {
1732 in_menu = 1;
1733 skip_input (strlen ("\n* Menu:"));
1734 if (*inptr == '\n')
1735 skip_input (strspn (inptr, "\n") - 1); /* Keep one newline. */
1736
1737 }
1738 else if (match[0] == '\0') /* Info tag */
1739 {
1740 scan_info_tag (node, &in_index, fb);
1741 }
1742 else
1743 copy_input_to_output (1);
1744 }
1745
1746 /* If we haven't accidentally gone past the end of the node, write
1747 out the rest of it. */
1748 if (inptr < node->contents + node->nodelen)
1749 copy_input_to_output ((node->contents + node->nodelen) - inptr);
1750
1751 /* Null to terminate buffer. */
1752 if (rewrite_p)
1753 text_buffer_add_string (&output_buf, "\0", 1);
1754
1755 /* Free resources used in character encoding conversion. */
1756 close_conversion ();
1757
1758 node->references = refs;
1759
1760 if (rewrite_p)
1761 {
1762 node->contents = text_buffer_base (&output_buf);
1763 node->flags |= N_WasRewritten;
1764
1765 /* output_buf.off is the offset of the next character to be
1766 written. Subtracting 1 gives the offset of our terminating
1767 null, that is, the length. */
1768 node->nodelen = text_buffer_off (&output_buf) - 1;
1769 }
1770 else if (fb && tag_ptr)
1771 {
1772 /* Set nodestart_adjusted for all of the anchors in this node. */
1773 tag_ptr++;
1774 while (*tag_ptr && (*tag_ptr)->cache.nodelen == 0)
1775 {
1776 (*tag_ptr)->nodestart_adjusted = (*tag_ptr)->nodestart
1777 - output_bytes_difference;
1778 tag_ptr++;
1779 }
1780 }
1781 }
1782
1783
1784 /* Various utility functions */
1785
1786 /* Return the file buffer which belongs to WINDOW's node. */
1787 FILE_BUFFER *
file_buffer_of_window(WINDOW * window)1788 file_buffer_of_window (WINDOW *window)
1789 {
1790 /* If this window has no node, then it has no file buffer. */
1791 if (!window->node)
1792 return NULL;
1793
1794 if (window->node->fullpath)
1795 return info_find_file (window->node->fullpath);
1796
1797 return NULL;
1798 }
1799
1800 /* Return "(FILENAME)NODENAME" for NODE, or just "NODENAME" if NODE's
1801 filename is not set. Return value should not be freed. */
1802 char *
node_printed_rep(NODE * node)1803 node_printed_rep (NODE *node)
1804 {
1805 static char *rep;
1806
1807 if (node->fullpath)
1808 {
1809 char *filename = filename_non_directory (node->fullpath);
1810 rep = xrealloc (rep, 1 + strlen (filename) + 1 + strlen (node->nodename) + 1);
1811 sprintf (rep, "(%s)%s", filename, node->nodename);
1812 return rep;
1813 }
1814 else
1815 return node->nodename;
1816 }
1817
1818
1819 /* Return a pointer to the part of PATHNAME that simply defines the file. */
1820 char *
filename_non_directory(char * pathname)1821 filename_non_directory (char *pathname)
1822 {
1823 register char *filename = pathname + strlen (pathname);
1824
1825 if (HAVE_DRIVE (pathname))
1826 pathname += 2;
1827
1828 while (filename > pathname && !IS_SLASH (filename[-1]))
1829 filename--;
1830
1831 return filename;
1832 }
1833
1834 /* Return non-zero if NODE is one especially created by Info. */
1835 int
internal_info_node_p(NODE * node)1836 internal_info_node_p (NODE *node)
1837 {
1838 return (node != NULL) && (node->flags & N_IsInternal);
1839 }
1840
1841 /* Make NODE appear to be one especially created by Info. */
1842 void
name_internal_node(NODE * node,char * name)1843 name_internal_node (NODE *node, char *name)
1844 {
1845 if (!node)
1846 return;
1847
1848 node->fullpath = "";
1849 node->subfile = 0;
1850 node->nodename = name;
1851 node->flags |= N_IsInternal;
1852 }
1853
1854 /* Return the window displaying NAME, the name of an internally created
1855 Info window. */
1856 WINDOW *
get_internal_info_window(char * name)1857 get_internal_info_window (char *name)
1858 {
1859 WINDOW *win;
1860
1861 for (win = windows; win; win = win->next)
1862 if (internal_info_node_p (win->node) &&
1863 (strcmp (win->node->nodename, name) == 0))
1864 break;
1865
1866 return win;
1867 }
1868
1869 /* Flexible Text Buffer */
1870
1871 void
text_buffer_init(struct text_buffer * buf)1872 text_buffer_init (struct text_buffer *buf)
1873 {
1874 memset (buf, 0, sizeof *buf);
1875 }
1876
1877 void
text_buffer_free(struct text_buffer * buf)1878 text_buffer_free (struct text_buffer *buf)
1879 {
1880 free (buf->base);
1881 }
1882
1883 size_t
text_buffer_vprintf(struct text_buffer * buf,const char * format,va_list ap)1884 text_buffer_vprintf (struct text_buffer *buf, const char *format, va_list ap)
1885 {
1886 ssize_t n;
1887 va_list ap_copy;
1888
1889 if (!buf->base)
1890 {
1891 if (buf->size == 0)
1892 buf->size = MIN_TEXT_BUF_ALLOC; /* Initial allocation */
1893
1894 buf->base = xmalloc (buf->size);
1895 }
1896
1897 for (;;)
1898 {
1899 va_copy (ap_copy, ap);
1900 n = vsnprintf (buf->base + buf->off, buf->size - buf->off,
1901 format, ap_copy);
1902 va_end (ap_copy);
1903 if (n < 0 || buf->off + n >= buf->size ||
1904 !memchr (buf->base + buf->off, '\0', buf->size - buf->off + 1))
1905 {
1906 size_t newlen = buf->size * 2;
1907 if (newlen < buf->size)
1908 xalloc_die ();
1909 buf->size = newlen;
1910 buf->base = xrealloc (buf->base, buf->size);
1911 }
1912 else
1913 {
1914 buf->off += n;
1915 break;
1916 }
1917 }
1918 return n;
1919 }
1920
1921 /* Make sure there are LEN free bytes at end of BUF. */
1922 void
text_buffer_alloc(struct text_buffer * buf,size_t len)1923 text_buffer_alloc (struct text_buffer *buf, size_t len)
1924 {
1925 if (buf->off + len > buf->size)
1926 {
1927 buf->size = buf->off + len;
1928 if (buf->size < MIN_TEXT_BUF_ALLOC)
1929 buf->size = MIN_TEXT_BUF_ALLOC;
1930 buf->base = xrealloc (buf->base, buf->size);
1931 }
1932 }
1933
1934 /* Return number of bytes that can be written to text buffer without
1935 reallocating the text buffer. */
1936 size_t
text_buffer_space_left(struct text_buffer * buf)1937 text_buffer_space_left (struct text_buffer *buf)
1938 {
1939 /* buf->size is the offset of the first byte after the allocated space.
1940 buf->off is the offset of the first byte to be written to. */
1941 return buf->size - buf->off;
1942 }
1943
1944 #if HAVE_ICONV
1945
1946 /* Run iconv using text buffer as output buffer. */
1947 size_t
text_buffer_iconv(struct text_buffer * buf,iconv_t iconv_state,ICONV_CONST char ** inbuf,size_t * inbytesleft)1948 text_buffer_iconv (struct text_buffer *buf, iconv_t iconv_state,
1949 ICONV_CONST char **inbuf, size_t *inbytesleft)
1950 {
1951 size_t out_bytes_left;
1952 char *outptr;
1953 size_t iconv_ret;
1954
1955 outptr = text_buffer_base (buf) + text_buffer_off (buf);
1956 out_bytes_left = text_buffer_space_left (buf);
1957 iconv_ret = iconv (iconv_state, inbuf, inbytesleft,
1958 &outptr, &out_bytes_left);
1959
1960 text_buffer_off (buf) = outptr - text_buffer_base (buf);
1961
1962 return iconv_ret;
1963 }
1964
1965 #endif /* HAVE_ICONV */
1966
1967 size_t
text_buffer_add_string(struct text_buffer * buf,const char * str,size_t len)1968 text_buffer_add_string (struct text_buffer *buf, const char *str, size_t len)
1969 {
1970 text_buffer_alloc (buf, len);
1971 memcpy (buf->base + buf->off, str, len);
1972 buf->off += len;
1973 return len;
1974 }
1975
1976 size_t
text_buffer_fill(struct text_buffer * buf,int c,size_t len)1977 text_buffer_fill (struct text_buffer *buf, int c, size_t len)
1978 {
1979 char *p;
1980 int i;
1981
1982 text_buffer_alloc (buf, len);
1983
1984 for (i = 0, p = buf->base + buf->off; i < len; i++)
1985 *p++ = c;
1986 buf->off += len;
1987
1988 return len;
1989 }
1990
1991 void
text_buffer_add_char(struct text_buffer * buf,int c)1992 text_buffer_add_char (struct text_buffer *buf, int c)
1993 {
1994 char ch = c;
1995 text_buffer_add_string (buf, &ch, 1);
1996 }
1997
1998 size_t
text_buffer_printf(struct text_buffer * buf,const char * format,...)1999 text_buffer_printf (struct text_buffer *buf, const char *format, ...)
2000 {
2001 va_list ap;
2002 size_t n;
2003
2004 va_start (ap, format);
2005 n = text_buffer_vprintf (buf, format, ap);
2006 va_end (ap);
2007 return n;
2008 }
2009
2010 #if defined(__MSDOS__) || defined(__MINGW32__)
2011 /* Cannot use FILENAME_CMP here, since that does not consider forward-
2012 and back-slash characters equal. */
2013 int
fncmp(const char * fn1,const char * fn2)2014 fncmp (const char *fn1, const char *fn2)
2015 {
2016 const char *s1 = fn1, *s2 = fn2;
2017
2018 while (tolower (*s1) == tolower (*s2)
2019 || (IS_SLASH (*s1) && IS_SLASH (*s2)))
2020 {
2021 if (*s1 == 0)
2022 return 0;
2023 s1++;
2024 s2++;
2025 }
2026
2027 return tolower (*s1) - tolower (*s2);
2028 }
2029 #endif
2030
2031 struct info_namelist_entry
2032 {
2033 struct info_namelist_entry *next;
2034 char name[1];
2035 };
2036
2037 int
info_namelist_add(struct info_namelist_entry ** ptop,const char * name)2038 info_namelist_add (struct info_namelist_entry **ptop, const char *name)
2039 {
2040 struct info_namelist_entry *p;
2041
2042 for (p = *ptop; p; p = p->next)
2043 if (fncmp (p->name, name) == 0)
2044 return 1;
2045
2046 p = xmalloc (sizeof (*p) + strlen (name));
2047 strcpy (p->name, name);
2048 p->next = *ptop;
2049 *ptop = p;
2050 return 0;
2051 }
2052
2053 void
info_namelist_free(struct info_namelist_entry * top)2054 info_namelist_free (struct info_namelist_entry *top)
2055 {
2056 while (top)
2057 {
2058 struct info_namelist_entry *next = top->next;
2059 free (top);
2060 top = next;
2061 }
2062 }
2063
2064