1 /* xgettext Tcl backend.
2    Copyright (C) 2002-2003, 2005-2006 Free Software Foundation, Inc.
3 
4    This file was written by Bruno Haible <haible@clisp.cons.org>, 2002.
5 
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 2, or (at your option)
9    any later version.
10 
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this program; if not, write to the Free Software Foundation,
18    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
19 
20 #ifdef HAVE_CONFIG_H
21 # include "config.h"
22 #endif
23 
24 #include <assert.h>
25 #include <errno.h>
26 #include <limits.h>
27 #include <stdbool.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 
32 #include "message.h"
33 #include "xgettext.h"
34 #include "x-tcl.h"
35 #include "error.h"
36 #include "xalloc.h"
37 #include "exit.h"
38 #include "hash.h"
39 #include "c-ctype.h"
40 #include "po-charset.h"
41 #include "ucs4-utf8.h"
42 #include "gettext.h"
43 
44 #define _(s) gettext(s)
45 
46 #define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
47 
48 
49 /* The Tcl syntax is defined in the Tcl.n manual page.
50    Summary of Tcl syntax:
51    Like sh syntax, except that `...` is replaced with [...]. In detail:
52    - In a preprocessing pass, backslash-newline-anywhitespace is replaced
53      with single space.
54    - Input is broken into words, which are then subject to command
55      substitution [...] , variable substitution $var, backslash substitution
56      \escape.
57    - Strings are enclosed in "..."; command substitution, variable
58      substitution and backslash substitutions are performed here as well.
59    - {...} is a string without substitutions.
60    - The list of resulting words is split into commands by semicolon and
61      newline.
62    - '#' at the beginning of a command introduces a comment until end of line.
63    The parser is implemented in tcl8.3.3/generic/tclParse.c.  */
64 
65 
66 /* ====================== Keyword set customization.  ====================== */
67 
68 /* If true extract all strings.  */
69 static bool extract_all = false;
70 
71 static hash_table keywords;
72 static bool default_keywords = true;
73 
74 
75 void
x_tcl_extract_all()76 x_tcl_extract_all ()
77 {
78   extract_all = true;
79 }
80 
81 
82 void
x_tcl_keyword(const char * name)83 x_tcl_keyword (const char *name)
84 {
85   if (name == NULL)
86     default_keywords = false;
87   else
88     {
89       const char *end;
90       struct callshape shape;
91 
92       if (keywords.table == NULL)
93 	hash_init (&keywords, 100);
94 
95       split_keywordspec (name, &end, &shape);
96 
97       /* The characters between name and end should form a valid Tcl
98 	 function name.  A leading "::" is redundant.  */
99       if (end - name >= 2 && name[0] == ':' && name[1] == ':')
100 	name += 2;
101 
102       insert_keyword_callshape (&keywords, name, end - name, &shape);
103     }
104 }
105 
106 /* Finish initializing the keywords hash table.
107    Called after argument processing, before each file is processed.  */
108 static void
init_keywords()109 init_keywords ()
110 {
111   if (default_keywords)
112     {
113       /* When adding new keywords here, also update the documentation in
114 	 xgettext.texi!  */
115       x_tcl_keyword ("::msgcat::mc");
116       default_keywords = false;
117     }
118 }
119 
120 void
init_flag_table_tcl()121 init_flag_table_tcl ()
122 {
123   xgettext_record_flag ("::msgcat::mc:1:pass-tcl-format");
124   xgettext_record_flag ("format:1:tcl-format");
125 }
126 
127 
128 /* ======================== Reading of characters.  ======================== */
129 
130 /* Real filename, used in error messages about the input file.  */
131 static const char *real_file_name;
132 
133 /* Logical filename and line number, used to label the extracted messages.  */
134 static char *logical_file_name;
135 static int line_number;
136 
137 /* The input file stream.  */
138 static FILE *fp;
139 
140 
141 /* Fetch the next character from the input file.  */
142 static int
do_getc()143 do_getc ()
144 {
145   int c = getc (fp);
146 
147   if (c == EOF)
148     {
149       if (ferror (fp))
150 	error (EXIT_FAILURE, errno, _("\
151 error while reading \"%s\""), real_file_name);
152     }
153   else if (c == '\n')
154    line_number++;
155 
156   return c;
157 }
158 
159 /* Put back the last fetched character, not EOF.  */
160 static void
do_ungetc(int c)161 do_ungetc (int c)
162 {
163   if (c == '\n')
164     line_number--;
165   ungetc (c, fp);
166 }
167 
168 
169 /* Combine backslash followed by newline and additional whitespace to
170    a single space.  */
171 
172 /* An int that becomes a space when casted to 'unsigned char'.  */
173 #define BS_NL (UCHAR_MAX + 1 + ' ')
174 
175 static int phase1_pushback[1];
176 static int phase1_pushback_length;
177 
178 static int
phase1_getc()179 phase1_getc ()
180 {
181   int c;
182 
183   if (phase1_pushback_length)
184     {
185       c = phase1_pushback[--phase1_pushback_length];
186       if (c == '\n' || c == BS_NL)
187 	++line_number;
188       return c;
189     }
190   c = do_getc ();
191   if (c != '\\')
192     return c;
193   c = do_getc ();
194   if (c != '\n')
195     {
196       if (c != EOF)
197 	do_ungetc (c);
198       return '\\';
199     }
200   for (;;)
201     {
202       c = do_getc ();
203       if (!(c == ' ' || c == '\t'))
204 	break;
205     }
206   if (c != EOF)
207     do_ungetc (c);
208   return BS_NL;
209 }
210 
211 /* Supports only one pushback character.  */
212 static void
phase1_ungetc(int c)213 phase1_ungetc (int c)
214 {
215   switch (c)
216     {
217     case EOF:
218       break;
219 
220     case '\n':
221     case BS_NL:
222       --line_number;
223       /* FALLTHROUGH */
224 
225     default:
226       if (phase1_pushback_length == SIZEOF (phase1_pushback))
227 	abort ();
228       phase1_pushback[phase1_pushback_length++] = c;
229       break;
230     }
231 }
232 
233 
234 /* Keep track of brace nesting depth.
235    When a word starts with an opening brace, a character group begins that
236    ends with the corresponding closing brace.  In theory these character
237    groups are string literals, but they are used by so many Tcl primitives
238    (proc, if, ...) as representing command lists, that we treat them as
239    command lists.  */
240 
241 /* An int that becomes a closing brace when casted to 'unsigned char'.  */
242 #define CL_BRACE (UCHAR_MAX + 1 + '}')
243 
244 static int phase2_pushback[2];
245 static int phase2_pushback_length;
246 
247 /* Brace nesting depth inside the current character group.  */
248 static int brace_depth;
249 
250 static int
phase2_push()251 phase2_push ()
252 {
253   int previous_depth = brace_depth;
254   brace_depth = 1;
255   return previous_depth;
256 }
257 
258 static void
phase2_pop(int previous_depth)259 phase2_pop (int previous_depth)
260 {
261   brace_depth = previous_depth;
262 }
263 
264 static int
phase2_getc()265 phase2_getc ()
266 {
267   int c;
268 
269   if (phase2_pushback_length)
270     {
271       c = phase2_pushback[--phase2_pushback_length];
272       if (c == '\n' || c == BS_NL)
273 	++line_number;
274       else if (c == '{')
275 	++brace_depth;
276       else if (c == '}')
277 	--brace_depth;
278       return c;
279     }
280   c = phase1_getc ();
281   if (c == '{')
282     ++brace_depth;
283   else if (c == '}')
284     {
285       if (--brace_depth == 0)
286 	c = CL_BRACE;
287     }
288   return c;
289 }
290 
291 /* Supports 2 characters of pushback.  */
292 static void
phase2_ungetc(int c)293 phase2_ungetc (int c)
294 {
295   if (c != EOF)
296     {
297       switch (c)
298 	{
299 	case '\n':
300 	case BS_NL:
301 	  --line_number;
302 	  break;
303 
304 	case '{':
305 	  --brace_depth;
306 	  break;
307 
308 	case '}':
309 	  ++brace_depth;
310 	  break;
311 	}
312       if (phase2_pushback_length == SIZEOF (phase2_pushback))
313 	abort ();
314       phase2_pushback[phase2_pushback_length++] = c;
315     }
316 }
317 
318 
319 /* ========================== Reading of tokens.  ========================== */
320 
321 
322 /* A token consists of a sequence of characters.  */
323 struct token
324 {
325   int allocated;		/* number of allocated 'token_char's */
326   int charcount;		/* number of used 'token_char's */
327   char *chars;			/* the token's constituents */
328 };
329 
330 /* Initialize a 'struct token'.  */
331 static inline void
init_token(struct token * tp)332 init_token (struct token *tp)
333 {
334   tp->allocated = 10;
335   tp->chars = (char *) xmalloc (tp->allocated * sizeof (char));
336   tp->charcount = 0;
337 }
338 
339 /* Free the memory pointed to by a 'struct token'.  */
340 static inline void
free_token(struct token * tp)341 free_token (struct token *tp)
342 {
343   free (tp->chars);
344 }
345 
346 /* Ensure there is enough room in the token for one more character.  */
347 static inline void
grow_token(struct token * tp)348 grow_token (struct token *tp)
349 {
350   if (tp->charcount == tp->allocated)
351     {
352       tp->allocated *= 2;
353       tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char));
354     }
355 }
356 
357 
358 /* ========================= Accumulating comments ========================= */
359 
360 
361 static char *buffer;
362 static size_t bufmax;
363 static size_t buflen;
364 
365 static inline void
comment_start()366 comment_start ()
367 {
368   buflen = 0;
369 }
370 
371 static inline void
comment_add(int c)372 comment_add (int c)
373 {
374   if (buflen >= bufmax)
375     {
376       bufmax = 2 * bufmax + 10;
377       buffer = xrealloc (buffer, bufmax);
378     }
379   buffer[buflen++] = c;
380 }
381 
382 static inline void
comment_line_end()383 comment_line_end ()
384 {
385   while (buflen >= 1
386 	 && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
387     --buflen;
388   if (buflen >= bufmax)
389     {
390       bufmax = 2 * bufmax + 10;
391       buffer = xrealloc (buffer, bufmax);
392     }
393   buffer[buflen] = '\0';
394   savable_comment_add (buffer);
395 }
396 
397 
398 /* These are for tracking whether comments count as immediately before
399    keyword.  */
400 static int last_comment_line;
401 static int last_non_comment_line;
402 
403 
404 /* ========================= Accumulating messages ========================= */
405 
406 
407 static message_list_ty *mlp;
408 
409 
410 /* ========================== Reading of commands ========================== */
411 
412 
413 /* We are only interested in constant strings (e.g. "msgcat::mc" or other
414    string literals).  Other words need not to be represented precisely.  */
415 enum word_type
416 {
417   t_string,	/* constant string */
418   t_other,	/* other string */
419   t_separator,	/* command separator: semicolon or newline */
420   t_bracket,	/* ']' pseudo word */
421   t_brace,	/* '}' pseudo word */
422   t_eof		/* EOF marker */
423 };
424 
425 struct word
426 {
427   enum word_type type;
428   struct token *token;		/* for t_string */
429   int line_number_at_start;	/* for t_string */
430 };
431 
432 /* Free the memory pointed to by a 'struct word'.  */
433 static inline void
free_word(struct word * wp)434 free_word (struct word *wp)
435 {
436   if (wp->type == t_string)
437     {
438       free_token (wp->token);
439       free (wp->token);
440     }
441 }
442 
443 /* Convert a t_string token to a char*.  */
444 static char *
string_of_word(const struct word * wp)445 string_of_word (const struct word *wp)
446 {
447   char *str;
448   int n;
449 
450   if (!(wp->type == t_string))
451     abort ();
452   n = wp->token->charcount;
453   str = (char *) xmalloc (n + 1);
454   memcpy (str, wp->token->chars, n);
455   str[n] = '\0';
456   return str;
457 }
458 
459 
460 /* Context lookup table.  */
461 static flag_context_list_table_ty *flag_context_list_table;
462 
463 
464 /* Read an escape sequence.  The value is an ISO-8859-1 character (in the
465    range 0x00..0xff) or a Unicode character (in the range 0x0000..0xffff).  */
466 static int
do_getc_escaped()467 do_getc_escaped ()
468 {
469   int c;
470 
471   c = phase1_getc ();
472   switch (c)
473     {
474     case EOF:
475       return '\\';
476     case 'a':
477       return '\a';
478     case 'b':
479       return '\b';
480     case 'f':
481       return '\f';
482     case 'n':
483       return '\n';
484     case 'r':
485       return '\r';
486     case 't':
487       return '\t';
488     case 'v':
489       return '\v';
490     case 'x':
491       {
492 	int n = 0;
493 	unsigned int i;
494 
495 	for (i = 0;; i++)
496 	  {
497 	    c = phase1_getc ();
498 	    if (c == EOF || !c_isxdigit ((unsigned char) c))
499 	      break;
500 
501 	    if (c >= '0' && c <= '9')
502 	      n = (n << 4) + (c - '0');
503 	    else if (c >= 'A' && c <= 'F')
504 	      n = (n << 4) + (c - 'A' + 10);
505 	    else if (c >= 'a' && c <= 'f')
506 	      n = (n << 4) + (c - 'a' + 10);
507 	  }
508 	phase1_ungetc (c);
509 	return (i > 0 ? (unsigned char) n : 'x');
510       }
511     case 'u':
512       {
513 	int n = 0;
514 	unsigned int i;
515 
516 	for (i = 0; i < 4; i++)
517 	  {
518 	    c = phase1_getc ();
519 	    if (c == EOF || !c_isxdigit ((unsigned char) c))
520 	      break;
521 
522 	    if (c >= '0' && c <= '9')
523 	      n = (n << 4) + (c - '0');
524 	    else if (c >= 'A' && c <= 'F')
525 	      n = (n << 4) + (c - 'A' + 10);
526 	    else if (c >= 'a' && c <= 'f')
527 	      n = (n << 4) + (c - 'a' + 10);
528 	  }
529 	phase1_ungetc (c);
530 	return (i > 0 ? n : 'u');
531       }
532     case '0': case '1': case '2': case '3': case '4':
533     case '5': case '6': case '7':
534       {
535 	int n = c - '0';
536 
537 	c = phase1_getc ();
538 	if (c != EOF)
539 	  {
540 	    if (c >= '0' && c <= '7')
541 	      {
542 		n = (n << 3) + (c - '0');
543 		c = phase1_getc ();
544 		if (c != EOF)
545 		  {
546 		    if (c >= '0' && c <= '7')
547 		      n = (n << 3) + (c - '0');
548 		    else
549 		      phase1_ungetc (c);
550 		  }
551 	      }
552 	    else
553 	      phase1_ungetc (c);
554 	  }
555 	return (unsigned char) n;
556       }
557     default:
558       /* Note: If c is non-ASCII, Tcl's behaviour is undefined here.  */
559       return (unsigned char) c;
560     }
561 }
562 
563 
564 enum terminator
565 {
566   te_space_separator,		/* looking for space semicolon newline */
567   te_space_separator_bracket,	/* looking for space semicolon newline ']' */
568   te_paren,			/* looking for ')' */
569   te_quote			/* looking for '"' */
570 };
571 
572 /* Forward declaration of local functions.  */
573 static enum word_type read_command_list (int looking_for,
574 					 flag_context_ty outer_context);
575 
576 /* Accumulate tokens into the given word.
577    'looking_for' denotes a parse terminator combination.
578    Return the first character past the token.  */
579 static int
accumulate_word(struct word * wp,enum terminator looking_for,flag_context_ty context)580 accumulate_word (struct word *wp, enum terminator looking_for,
581 		 flag_context_ty context)
582 {
583   int c;
584 
585   for (;;)
586     {
587       c = phase2_getc ();
588 
589       if (c == EOF || c == CL_BRACE)
590 	return c;
591       if ((looking_for == te_space_separator
592 	   || looking_for == te_space_separator_bracket)
593 	  && (c == ' ' || c == BS_NL
594 	      || c == '\t' || c == '\v' || c == '\f' || c == '\r'
595 	      || c == ';' || c == '\n'))
596 	return c;
597       if (looking_for == te_space_separator_bracket && c == ']')
598 	return c;
599       if (looking_for == te_paren && c == ')')
600 	return c;
601       if (looking_for == te_quote && c == '"')
602 	return c;
603 
604       if (c == '$')
605 	{
606 	  /* Distinguish $varname, ${varname} and lone $.  */
607 	  c = phase2_getc ();
608 	  if (c == '{')
609 	    {
610 	      /* ${varname} */
611 	      do
612 		c = phase2_getc ();
613 	      while (c != EOF && c != '}');
614 	      wp->type = t_other;
615 	    }
616 	  else
617 	    {
618 	      bool nonempty = false;
619 
620 	      for (; c != EOF && c != CL_BRACE; c = phase2_getc ())
621 		{
622 		  if (c_isalnum ((unsigned char) c) || (c == '_'))
623 		    {
624 		      nonempty = true;
625 		      continue;
626 		    }
627 		  if (c == ':')
628 		    {
629 		      c = phase2_getc ();
630 		      if (c == ':')
631 			{
632 			  do
633 			    c = phase2_getc ();
634 			  while (c == ':');
635 
636 			  phase2_ungetc (c);
637 			  nonempty = true;
638 			  continue;
639 			}
640 		      phase2_ungetc (c);
641 		      c = ':';
642 		    }
643 		  break;
644 		}
645 	      if (c == '(')
646 		{
647 		  /* $varname(index) */
648 		  struct word index_word;
649 
650 		  index_word.type = t_other;
651 		  c = accumulate_word (&index_word, te_paren, null_context);
652 		  if (c != EOF && c != ')')
653 		    phase2_ungetc (c);
654 		  wp->type = t_other;
655 		}
656 	      else
657 		{
658 		  phase2_ungetc (c);
659 		  if (nonempty)
660 		    {
661 		      /* $varname */
662 		      wp->type = t_other;
663 		    }
664 		  else
665 		    {
666 		      /* lone $ */
667 		      if (wp->type == t_string)
668 			{
669 			  grow_token (wp->token);
670 			  wp->token->chars[wp->token->charcount++] = '$';
671 			}
672 		    }
673 		}
674 	    }
675 	}
676       else if (c == '[')
677 	{
678 	  read_command_list (']', context);
679 	  wp->type = t_other;
680 	}
681       else if (c == '\\')
682 	{
683 	  unsigned int uc;
684 	  unsigned char utf8buf[6];
685 	  int count;
686 	  int i;
687 
688 	  uc = do_getc_escaped ();
689 	  assert (uc < 0x10000);
690 	  count = u8_uctomb (utf8buf, uc, 6);
691 	  assert (count > 0);
692 	  if (wp->type == t_string)
693 	    for (i = 0; i < count; i++)
694 	      {
695 		grow_token (wp->token);
696 		wp->token->chars[wp->token->charcount++] = utf8buf[i];
697 	      }
698 	}
699       else
700 	{
701 	  if (wp->type == t_string)
702 	    {
703 	      grow_token (wp->token);
704 	      wp->token->chars[wp->token->charcount++] = (unsigned char) c;
705 	    }
706 	}
707     }
708 }
709 
710 
711 /* Read the next word.
712    'looking_for' denotes a parse terminator, either ']' or '\0'.  */
713 static void
read_word(struct word * wp,int looking_for,flag_context_ty context)714 read_word (struct word *wp, int looking_for, flag_context_ty context)
715 {
716   int c;
717 
718   do
719     c = phase2_getc ();
720   while (c == ' ' || c == BS_NL
721 	 || c == '\t' || c == '\v' || c == '\f' || c == '\r');
722 
723   if (c == EOF)
724     {
725       wp->type = t_eof;
726       return;
727     }
728 
729   if (c == CL_BRACE)
730     {
731       wp->type = t_brace;
732       last_non_comment_line = line_number;
733       return;
734     }
735 
736   if (c == '\n')
737     {
738       /* Comments assumed to be grouped with a message must immediately
739 	 precede it, with no non-whitespace token on a line between both.  */
740       if (last_non_comment_line > last_comment_line)
741 	savable_comment_reset ();
742       wp->type = t_separator;
743       return;
744     }
745 
746   if (c == ';')
747     {
748       wp->type = t_separator;
749       last_non_comment_line = line_number;
750       return;
751     }
752 
753   if (looking_for == ']' && c == ']')
754     {
755       wp->type = t_bracket;
756       last_non_comment_line = line_number;
757       return;
758     }
759 
760   if (c == '{')
761     {
762       int previous_depth;
763       enum word_type terminator;
764 
765       /* Start a new nested character group, which lasts until the next
766 	 balanced '}' (ignoring \} things).  */
767       previous_depth = phase2_push () - 1;
768 
769       /* Interpret it as a command list.  */
770       terminator = read_command_list ('\0', null_context);
771 
772       if (terminator == t_brace)
773 	phase2_pop (previous_depth);
774 
775       wp->type = t_other;
776       last_non_comment_line = line_number;
777       return;
778     }
779 
780   wp->type = t_string;
781   wp->token = (struct token *) xmalloc (sizeof (struct token));
782   init_token (wp->token);
783   wp->line_number_at_start = line_number;
784 
785   if (c == '"')
786     {
787       c = accumulate_word (wp, te_quote, context);
788       if (c != EOF && c != '"')
789 	phase2_ungetc (c);
790     }
791   else
792     {
793       phase2_ungetc (c);
794       c = accumulate_word (wp,
795 			   looking_for == ']'
796 			   ? te_space_separator_bracket
797 			   : te_space_separator,
798 			   context);
799       if (c != EOF)
800 	phase2_ungetc (c);
801     }
802 
803   if (wp->type != t_string)
804     {
805       free_token (wp->token);
806       free (wp->token);
807     }
808   last_non_comment_line = line_number;
809 }
810 
811 
812 /* Read the next command.
813    'looking_for' denotes a parse terminator, either ']' or '\0'.
814    Returns the type of the word that terminated the command: t_separator or
815    t_bracket (only if looking_for is ']') or t_brace or t_eof.  */
816 static enum word_type
read_command(int looking_for,flag_context_ty outer_context)817 read_command (int looking_for, flag_context_ty outer_context)
818 {
819   int c;
820 
821   /* Skip whitespace and comments.  */
822   for (;;)
823     {
824       c = phase2_getc ();
825 
826       if (c == ' ' || c == BS_NL
827 	  || c == '\t' || c == '\v' || c == '\f' || c == '\r')
828 	continue;
829       if (c == '#')
830 	{
831 	  /* Skip a comment up to end of line.  */
832 	  last_comment_line = line_number;
833 	  comment_start ();
834 	  for (;;)
835 	    {
836 	      c = phase2_getc ();
837 	      if (c == EOF || c == CL_BRACE || c == '\n')
838 		break;
839 	      /* We skip all leading white space, but not EOLs.  */
840 	      if (!(buflen == 0 && (c == ' ' || c == '\t')))
841 		comment_add (c);
842 	    }
843 	  comment_line_end ();
844 	  continue;
845 	}
846       break;
847     }
848   phase2_ungetc (c);
849 
850   /* Read the words that make up the command.  */
851   {
852     int arg = 0;		/* Current argument number.  */
853     flag_context_list_iterator_ty context_iter;
854     const struct callshapes *shapes = NULL;
855     struct arglist_parser *argparser = NULL;
856 
857     for (;; arg++)
858       {
859 	struct word inner;
860 	flag_context_ty inner_context;
861 
862 	if (arg == 0)
863 	  inner_context = null_context;
864 	else
865 	  inner_context =
866 	    inherited_context (outer_context,
867 			       flag_context_list_iterator_advance (
868 				 &context_iter));
869 
870 	read_word (&inner, looking_for, inner_context);
871 
872 	/* Recognize end of command.  */
873 	if (inner.type == t_separator || inner.type == t_bracket
874 	    || inner.type == t_brace || inner.type == t_eof)
875 	  {
876 	    if (argparser != NULL)
877 	      arglist_parser_done (argparser, arg);
878 	    return inner.type;
879 	  }
880 
881 	if (extract_all)
882 	  {
883 	    if (inner.type == t_string)
884 	      {
885 		lex_pos_ty pos;
886 
887 		pos.file_name = logical_file_name;
888 		pos.line_number = inner.line_number_at_start;
889 		remember_a_message (mlp, NULL, string_of_word (&inner),
890 				    inner_context, &pos, savable_comment);
891 	      }
892 	  }
893 
894 	if (arg == 0)
895 	  {
896 	    /* This is the function position.  */
897 	    if (inner.type == t_string)
898 	      {
899 		char *function_name = string_of_word (&inner);
900 		char *stripped_name;
901 		void *keyword_value;
902 
903 		/* A leading "::" is redundant.  */
904 		stripped_name = function_name;
905 		if (function_name[0] == ':' && function_name[1] == ':')
906 		  stripped_name += 2;
907 
908 		if (hash_find_entry (&keywords,
909 				     stripped_name, strlen (stripped_name),
910 				     &keyword_value)
911 		    == 0)
912 		  shapes = (const struct callshapes *) keyword_value;
913 
914 		argparser = arglist_parser_alloc (mlp, shapes);
915 
916 		context_iter =
917 		  flag_context_list_iterator (
918 		    flag_context_list_table_lookup (
919 		      flag_context_list_table,
920 		      stripped_name, strlen (stripped_name)));
921 
922 		free (function_name);
923 	      }
924 	    else
925 	      context_iter = null_context_list_iterator;
926 	  }
927 	else
928 	  {
929 	    /* These are the argument positions.  */
930 	    if (argparser != NULL && inner.type == t_string)
931 	      arglist_parser_remember (argparser, arg,
932 				       string_of_word (&inner),
933 				       inner_context,
934 				       logical_file_name,
935 				       inner.line_number_at_start,
936 				       savable_comment);
937 	  }
938 
939 	free_word (&inner);
940       }
941   }
942 }
943 
944 
945 /* Read a list of commands.
946    'looking_for' denotes a parse terminator, either ']' or '\0'.
947    Returns the type of the word that terminated the command list:
948    t_bracket (only if looking_for is ']') or t_brace or t_eof.  */
949 static enum word_type
read_command_list(int looking_for,flag_context_ty outer_context)950 read_command_list (int looking_for, flag_context_ty outer_context)
951 {
952   for (;;)
953     {
954       enum word_type terminator;
955 
956       terminator = read_command (looking_for, outer_context);
957       if (terminator != t_separator)
958 	return terminator;
959     }
960 }
961 
962 
963 void
extract_tcl(FILE * f,const char * real_filename,const char * logical_filename,flag_context_list_table_ty * flag_table,msgdomain_list_ty * mdlp)964 extract_tcl (FILE *f,
965 	     const char *real_filename, const char *logical_filename,
966 	     flag_context_list_table_ty *flag_table,
967 	     msgdomain_list_ty *mdlp)
968 {
969   mlp = mdlp->item[0]->messages;
970 
971   /* We convert our strings to UTF-8 encoding.  */
972   xgettext_current_source_encoding = po_charset_utf8;
973 
974   fp = f;
975   real_file_name = real_filename;
976   logical_file_name = xstrdup (logical_filename);
977   line_number = 1;
978 
979   /* Initially, no brace is open.  */
980   brace_depth = 1000000;
981 
982   last_comment_line = -1;
983   last_non_comment_line = -1;
984 
985   flag_context_list_table = flag_table;
986 
987   init_keywords ();
988 
989   /* Eat tokens until eof is seen.  */
990   read_command_list ('\0', null_context);
991 
992   fp = NULL;
993   real_file_name = NULL;
994   logical_file_name = NULL;
995   line_number = 0;
996 }
997