1 /* xgettext JavaScript backend.
2 Copyright (C) 2002-2003, 2005-2009, 2013-2014, 2018-2020 Free Software Foundation, Inc.
3
4 This file was written by Andreas Stricker <andy@knitter.ch>, 2010
5 It's based on x-python from Bruno Haible.
6
7 This program is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <https://www.gnu.org/licenses/>. */
19
20 #ifdef HAVE_CONFIG_H
21 # include "config.h"
22 #endif
23
24 /* Specification. */
25 #include "x-javascript.h"
26
27 #include <assert.h>
28 #include <errno.h>
29 #include <stdbool.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33
34 #include "message.h"
35 #include "rc-str-list.h"
36 #include "xgettext.h"
37 #include "xg-pos.h"
38 #include "xg-encoding.h"
39 #include "xg-mixed-string.h"
40 #include "xg-arglist-context.h"
41 #include "xg-arglist-callshape.h"
42 #include "xg-arglist-parser.h"
43 #include "xg-message.h"
44 #include "error.h"
45 #include "error-progname.h"
46 #include "progname.h"
47 #include "xerror.h"
48 #include "xvasprintf.h"
49 #include "xalloc.h"
50 #include "c-strstr.h"
51 #include "c-ctype.h"
52 #include "po-charset.h"
53 #include "unistr.h"
54 #include "gettext.h"
55
56 #define _(s) gettext(s)
57
58 #define max(a,b) ((a) > (b) ? (a) : (b))
59
60 #define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
61
62 /* The JavaScript aka ECMA-Script syntax is defined in ECMA-262
63 specification:
64 <https://www.ecma-international.org/publications/standards/Ecma-262.htm>
65
66 Regarding the XML element support:
67 The earlier standard E4X
68 <https://en.wikipedia.org/wiki/ECMAScript_for_XML>
69 <https://web.archive.org/web/20131104082608/http://www.ecma-international.org/publications/standards/Ecma-357.htm>
70 is no longer widely supported.
71 Instead, nowadays, JSX is widely used.
72 <https://facebook.github.io/jsx/>
73 */
74
75 /* ====================== Keyword set customization. ====================== */
76
77 /* If true extract all strings. */
78 static bool extract_all = false;
79
80 static hash_table keywords;
81 static bool default_keywords = true;
82
83
84 void
x_javascript_extract_all()85 x_javascript_extract_all ()
86 {
87 extract_all = true;
88 }
89
90
91 void
x_javascript_keyword(const char * name)92 x_javascript_keyword (const char *name)
93 {
94 if (name == NULL)
95 default_keywords = false;
96 else
97 {
98 const char *end;
99 struct callshape shape;
100 const char *colon;
101
102 if (keywords.table == NULL)
103 hash_init (&keywords, 100);
104
105 split_keywordspec (name, &end, &shape);
106
107 /* The characters between name and end should form a valid C identifier.
108 A colon means an invalid parse in split_keywordspec(). */
109 colon = strchr (name, ':');
110 if (colon == NULL || colon >= end)
111 insert_keyword_callshape (&keywords, name, end - name, &shape);
112 }
113 }
114
115 /* Finish initializing the keywords hash table.
116 Called after argument processing, before each file is processed. */
117 static void
init_keywords()118 init_keywords ()
119 {
120 if (default_keywords)
121 {
122 /* When adding new keywords here, also update the documentation in
123 xgettext.texi! */
124 x_javascript_keyword ("gettext");
125 x_javascript_keyword ("dgettext:2");
126 x_javascript_keyword ("dcgettext:2");
127 x_javascript_keyword ("ngettext:1,2");
128 x_javascript_keyword ("dngettext:2,3");
129 x_javascript_keyword ("pgettext:1c,2");
130 x_javascript_keyword ("dpgettext:2c,3");
131 x_javascript_keyword ("_");
132 default_keywords = false;
133 }
134 }
135
136 void
init_flag_table_javascript()137 init_flag_table_javascript ()
138 {
139 xgettext_record_flag ("gettext:1:pass-javascript-format");
140 xgettext_record_flag ("dgettext:2:pass-javascript-format");
141 xgettext_record_flag ("dcgettext:2:pass-javascript-format");
142 xgettext_record_flag ("ngettext:1:pass-javascript-format");
143 xgettext_record_flag ("ngettext:2:pass-javascript-format");
144 xgettext_record_flag ("dngettext:2:pass-javascript-format");
145 xgettext_record_flag ("dngettext:3:pass-javascript-format");
146 xgettext_record_flag ("pgettext:2:pass-javascript-format");
147 xgettext_record_flag ("dpgettext:3:pass-javascript-format");
148 xgettext_record_flag ("_:1:pass-javascript-format");
149 }
150
151
152 /* ======================== Reading of characters. ======================== */
153
154 /* The input file stream. */
155 static FILE *fp;
156
157
158 /* 1. line_number handling. */
159
160 /* Maximum used, roughly a safer MB_LEN_MAX. */
161 #define MAX_PHASE1_PUSHBACK 16
162 static unsigned char phase1_pushback[MAX_PHASE1_PUSHBACK];
163 static int phase1_pushback_length;
164
165 /* Read the next single byte from the input file. */
166 static int
phase1_getc()167 phase1_getc ()
168 {
169 int c;
170
171 if (phase1_pushback_length)
172 c = phase1_pushback[--phase1_pushback_length];
173 else
174 {
175 c = getc (fp);
176
177 if (c == EOF)
178 {
179 if (ferror (fp))
180 error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
181 real_file_name);
182 return EOF;
183 }
184 }
185
186 if (c == '\n')
187 ++line_number;
188
189 return c;
190 }
191
192 /* Supports MAX_PHASE1_PUSHBACK characters of pushback. */
193 static void
phase1_ungetc(int c)194 phase1_ungetc (int c)
195 {
196 if (c != EOF)
197 {
198 if (c == '\n')
199 --line_number;
200
201 if (phase1_pushback_length == SIZEOF (phase1_pushback))
202 abort ();
203 phase1_pushback[phase1_pushback_length++] = c;
204 }
205 }
206
207
208 /* Phase 2: Conversion to Unicode.
209 For now, we expect JavaScript files to be encoded as UTF-8. */
210
211 /* End-of-file indicator for functions returning an UCS-4 character. */
212 #define UEOF -1
213
214 static lexical_context_ty lexical_context;
215
216 /* Maximum used, length of "<![CDATA[" tag minus one. */
217 static int phase2_pushback[8];
218 static int phase2_pushback_length;
219
220 /* Read the next Unicode UCS-4 character from the input file. */
221 static int
phase2_getc()222 phase2_getc ()
223 {
224 if (phase2_pushback_length)
225 return phase2_pushback[--phase2_pushback_length];
226
227 if (xgettext_current_source_encoding == po_charset_ascii)
228 {
229 int c = phase1_getc ();
230 if (c == EOF)
231 return UEOF;
232 if (!c_isascii (c))
233 {
234 multiline_error (xstrdup (""),
235 xasprintf ("%s\n%s\n",
236 non_ascii_error_message (lexical_context,
237 real_file_name,
238 line_number),
239 _("Please specify the source encoding through --from-code\n")));
240 exit (EXIT_FAILURE);
241 }
242 return c;
243 }
244 else if (xgettext_current_source_encoding != po_charset_utf8)
245 {
246 #if HAVE_ICONV
247 /* Use iconv on an increasing number of bytes. Read only as many bytes
248 through phase1_getc as needed. This is needed to give reasonable
249 interactive behaviour when fp is connected to an interactive tty. */
250 unsigned char buf[MAX_PHASE1_PUSHBACK];
251 size_t bufcount;
252 int c = phase1_getc ();
253 if (c == EOF)
254 return UEOF;
255 buf[0] = (unsigned char) c;
256 bufcount = 1;
257
258 for (;;)
259 {
260 unsigned char scratchbuf[6];
261 const char *inptr = (const char *) &buf[0];
262 size_t insize = bufcount;
263 char *outptr = (char *) &scratchbuf[0];
264 size_t outsize = sizeof (scratchbuf);
265
266 size_t res = iconv (xgettext_current_source_iconv,
267 (ICONV_CONST char **) &inptr, &insize,
268 &outptr, &outsize);
269 /* We expect that a character has been produced if and only if
270 some input bytes have been consumed. */
271 if ((insize < bufcount) != (outsize < sizeof (scratchbuf)))
272 abort ();
273 if (outsize == sizeof (scratchbuf))
274 {
275 /* No character has been produced. Must be an error. */
276 if (res != (size_t)(-1))
277 abort ();
278
279 if (errno == EILSEQ)
280 {
281 /* An invalid multibyte sequence was encountered. */
282 multiline_error (xstrdup (""),
283 xasprintf (_("\
284 %s:%d: Invalid multibyte sequence.\n\
285 Please specify the correct source encoding through --from-code\n"),
286 real_file_name, line_number));
287 exit (EXIT_FAILURE);
288 }
289 else if (errno == EINVAL)
290 {
291 /* An incomplete multibyte character. */
292 int c;
293
294 if (bufcount == MAX_PHASE1_PUSHBACK)
295 {
296 /* An overlong incomplete multibyte sequence was
297 encountered. */
298 multiline_error (xstrdup (""),
299 xasprintf (_("\
300 %s:%d: Long incomplete multibyte sequence.\n\
301 Please specify the correct source encoding through --from-code\n"),
302 real_file_name, line_number));
303 exit (EXIT_FAILURE);
304 }
305
306 /* Read one more byte and retry iconv. */
307 c = phase1_getc ();
308 if (c == EOF)
309 {
310 multiline_error (xstrdup (""),
311 xasprintf (_("\
312 %s:%d: Incomplete multibyte sequence at end of file.\n\
313 Please specify the correct source encoding through --from-code\n"),
314 real_file_name, line_number));
315 exit (EXIT_FAILURE);
316 }
317 if (c == '\n')
318 {
319 multiline_error (xstrdup (""),
320 xasprintf (_("\
321 %s:%d: Incomplete multibyte sequence at end of line.\n\
322 Please specify the correct source encoding through --from-code\n"),
323 real_file_name, line_number - 1));
324 exit (EXIT_FAILURE);
325 }
326 buf[bufcount++] = (unsigned char) c;
327 }
328 else
329 error (EXIT_FAILURE, errno, _("%s:%d: iconv failure"),
330 real_file_name, line_number);
331 }
332 else
333 {
334 size_t outbytes = sizeof (scratchbuf) - outsize;
335 size_t bytes = bufcount - insize;
336 ucs4_t uc;
337
338 /* We expect that one character has been produced. */
339 if (bytes == 0)
340 abort ();
341 if (outbytes == 0)
342 abort ();
343 /* Push back the unused bytes. */
344 while (insize > 0)
345 phase1_ungetc (buf[--insize]);
346 /* Convert the character from UTF-8 to UCS-4. */
347 if (u8_mbtoucr (&uc, scratchbuf, outbytes) < (int) outbytes)
348 {
349 /* scratchbuf contains an out-of-range Unicode character
350 (> 0x10ffff). */
351 multiline_error (xstrdup (""),
352 xasprintf (_("\
353 %s:%d: Invalid multibyte sequence.\n\
354 Please specify the source encoding through --from-code\n"),
355 real_file_name, line_number));
356 exit (EXIT_FAILURE);
357 }
358 return uc;
359 }
360 }
361 #else
362 /* If we don't have iconv(), the only supported values for
363 xgettext_global_source_encoding and thus also for
364 xgettext_current_source_encoding are ASCII and UTF-8. */
365 abort ();
366 #endif
367 }
368 else
369 {
370 /* Read an UTF-8 encoded character. */
371 unsigned char buf[6];
372 unsigned int count;
373 int c;
374 ucs4_t uc;
375
376 c = phase1_getc ();
377 if (c == EOF)
378 return UEOF;
379 buf[0] = c;
380 count = 1;
381
382 if (buf[0] >= 0xc0)
383 {
384 c = phase1_getc ();
385 if (c == EOF)
386 return UEOF;
387 buf[1] = c;
388 count = 2;
389 }
390
391 if (buf[0] >= 0xe0
392 && ((buf[1] ^ 0x80) < 0x40))
393 {
394 c = phase1_getc ();
395 if (c == EOF)
396 return UEOF;
397 buf[2] = c;
398 count = 3;
399 }
400
401 if (buf[0] >= 0xf0
402 && ((buf[1] ^ 0x80) < 0x40)
403 && ((buf[2] ^ 0x80) < 0x40))
404 {
405 c = phase1_getc ();
406 if (c == EOF)
407 return UEOF;
408 buf[3] = c;
409 count = 4;
410 }
411
412 if (buf[0] >= 0xf8
413 && ((buf[1] ^ 0x80) < 0x40)
414 && ((buf[2] ^ 0x80) < 0x40)
415 && ((buf[3] ^ 0x80) < 0x40))
416 {
417 c = phase1_getc ();
418 if (c == EOF)
419 return UEOF;
420 buf[4] = c;
421 count = 5;
422 }
423
424 if (buf[0] >= 0xfc
425 && ((buf[1] ^ 0x80) < 0x40)
426 && ((buf[2] ^ 0x80) < 0x40)
427 && ((buf[3] ^ 0x80) < 0x40)
428 && ((buf[4] ^ 0x80) < 0x40))
429 {
430 c = phase1_getc ();
431 if (c == EOF)
432 return UEOF;
433 buf[5] = c;
434 count = 6;
435 }
436
437 u8_mbtouc (&uc, buf, count);
438 return uc;
439 }
440 }
441
442 /* Supports max (9, UNINAME_MAX + 3) pushback characters. */
443 static void
phase2_ungetc(int c)444 phase2_ungetc (int c)
445 {
446 if (c != UEOF)
447 {
448 if (phase2_pushback_length == SIZEOF (phase2_pushback))
449 abort ();
450 phase2_pushback[phase2_pushback_length++] = c;
451 }
452 }
453
454
455 /* ========================= Accumulating strings. ======================== */
456
457 /* See xg-mixed-string.h for the API. */
458
459
460 /* ======================== Accumulating comments. ======================== */
461
462
463 /* Accumulating a single comment line. */
464
465 static struct mixed_string_buffer comment_buffer;
466
467 static inline void
comment_start()468 comment_start ()
469 {
470 mixed_string_buffer_init (&comment_buffer, lc_comment,
471 logical_file_name, line_number);
472 }
473
474 static inline bool
comment_at_start()475 comment_at_start ()
476 {
477 return mixed_string_buffer_is_empty (&comment_buffer);
478 }
479
480 static inline void
comment_add(int c)481 comment_add (int c)
482 {
483 mixed_string_buffer_append_unicode (&comment_buffer, c);
484 }
485
486 static inline const char *
comment_line_end(size_t chars_to_remove)487 comment_line_end (size_t chars_to_remove)
488 {
489 char *buffer =
490 mixed_string_contents_free1 (mixed_string_buffer_result (&comment_buffer));
491 size_t buflen = strlen (buffer) - chars_to_remove;
492
493 while (buflen >= 1
494 && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
495 --buflen;
496 buffer[buflen] = '\0';
497 savable_comment_add (buffer);
498 lexical_context = lc_outside;
499 return buffer;
500 }
501
502
503 /* These are for tracking whether comments count as immediately before
504 keyword. */
505 static int last_comment_line;
506 static int last_non_comment_line;
507
508
509 /* ======================== Recognizing comments. ======================== */
510
511
512 /* Canonicalized encoding name for the current input file. */
513 static const char *xgettext_current_file_source_encoding;
514
515 #if HAVE_ICONV
516 /* Converter from xgettext_current_file_source_encoding to UTF-8 (except from
517 ASCII or UTF-8, when this conversion is a no-op). */
518 static iconv_t xgettext_current_file_source_iconv;
519 #endif
520
521 /* Tracking whether the current line is a continuation line or contains a
522 non-blank character. */
523 static bool continuation_or_nonblank_line;
524
525
526 /* Phase 3: Outside strings, replace backslash-newline with nothing and a
527 comment with nothing. */
528
529 static int
phase3_getc()530 phase3_getc ()
531 {
532 int c;
533
534 for (;;)
535 {
536 c = phase2_getc ();
537 if (c == '\\')
538 {
539 c = phase2_getc ();
540 if (c != '\n')
541 {
542 phase2_ungetc (c);
543 /* This shouldn't happen usually, because "A backslash is
544 illegal elsewhere on a line outside a string literal." */
545 return '\\';
546 }
547 /* Eat backslash-newline. */
548 continuation_or_nonblank_line = true;
549 }
550 else if (c == '/')
551 {
552 c = phase2_getc ();
553 if (c == '/')
554 {
555 /* C++ style comment. */
556 last_comment_line = line_number;
557 comment_start ();
558 for (;;)
559 {
560 c = phase2_getc ();
561 if (c == UEOF || c == '\n')
562 {
563 comment_line_end (0);
564 break;
565 }
566 /* We skip all leading white space, but not EOLs. */
567 if (!(comment_at_start () && (c == ' ' || c == '\t')))
568 comment_add (c);
569 }
570 continuation_or_nonblank_line = false;
571 return c;
572 }
573 else if (c == '*')
574 {
575 /* C style comment. */
576 bool last_was_star = false;
577 last_comment_line = line_number;
578 comment_start ();
579 for (;;)
580 {
581 c = phase2_getc ();
582 if (c == UEOF)
583 break;
584 /* We skip all leading white space, but not EOLs. */
585 if (!(comment_at_start () && (c == ' ' || c == '\t')))
586 comment_add (c);
587 switch (c)
588 {
589 case '\n':
590 comment_line_end (1);
591 comment_start ();
592 last_was_star = false;
593 continue;
594
595 case '*':
596 last_was_star = true;
597 continue;
598 case '/':
599 if (last_was_star)
600 {
601 comment_line_end (2);
602 break;
603 }
604 /* FALLTHROUGH */
605
606 default:
607 last_was_star = false;
608 continue;
609 }
610 break;
611 }
612 continuation_or_nonblank_line = false;
613 }
614 else
615 {
616 phase2_ungetc (c);
617 return '/';
618 }
619 }
620 else
621 {
622 if (c == '\n')
623 continuation_or_nonblank_line = false;
624 else if (!(c == ' ' || c == '\t' || c == '\f'))
625 continuation_or_nonblank_line = true;
626 return c;
627 }
628 }
629 }
630
631 /* Supports only one pushback character. */
632 static void
phase3_ungetc(int c)633 phase3_ungetc (int c)
634 {
635 phase2_ungetc (c);
636 }
637
638
639 /* ========================= Accumulating strings. ======================== */
640
641 /* Return value of phase7_getuc when EOF is reached. */
642 #define P7_EOF (-1)
643 #define P7_STRING_END (-2)
644 #define P7_TEMPLATE_START_OF_EXPRESSION (-3) /* ${ */
645
646 /* Convert an UTF-16 or UTF-32 code point to a return value that can be
647 distinguished from a single-byte return value. */
648 #define UNICODE(code) (0x100 + (code))
649
650 /* Test a return value of phase7_getuc whether it designates an UTF-16 or
651 UTF-32 code point. */
652 #define IS_UNICODE(p7_result) ((p7_result) >= 0x100)
653
654 /* Extract the UTF-16 or UTF-32 code of a return value that satisfies
655 IS_UNICODE. */
656 #define UNICODE_VALUE(p7_result) ((p7_result) - 0x100)
657
658
659 /* ========================== Reading of tokens. ========================== */
660
661
662 enum token_type_ty
663 {
664 token_type_eof,
665 token_type_start,
666 token_type_lparen, /* ( */
667 token_type_rparen, /* ) */
668 token_type_lbrace, /* { */
669 token_type_rbrace, /* } */
670 token_type_comma, /* , */
671 token_type_dot, /* . */
672 token_type_lbracket, /* [ */
673 token_type_rbracket, /* ] */
674 token_type_plus, /* + */
675 token_type_regexp, /* /.../ */
676 token_type_operator, /* - * / % . < > = ~ ! | & ? : ^ */
677 token_type_equal, /* = */
678 token_type_string, /* "abc", 'abc' */
679 token_type_template, /* `abc` */
680 token_type_ltemplate, /* left part of template: `abc${ */
681 token_type_mtemplate, /* middle part of template: }abc${ */
682 token_type_rtemplate, /* right part of template: }abc` */
683 token_type_xml_tag, /* < or </ */
684 token_type_xml_element_start, /* last token of < ... > */
685 token_type_xml_element_end, /* last token of </ ... > */
686 token_type_xml_empty_element, /* last token of < ... /> */
687 token_type_keyword, /* return, else */
688 token_type_symbol, /* symbol, number */
689 token_type_other /* misc. operator */
690 };
691 typedef enum token_type_ty token_type_ty;
692
693 typedef struct token_ty token_ty;
694 struct token_ty
695 {
696 token_type_ty type;
697 char *string; /* for token_type_symbol, token_type_keyword */
698 mixed_string_ty *mixed_string; /* for token_type_string, token_type_template */
699 refcounted_string_list_ty *comment; /* for token_type_string, token_type_template */
700 int line_number;
701 };
702
703
704 /* Free the memory pointed to by a 'struct token_ty'. */
705 static inline void
free_token(token_ty * tp)706 free_token (token_ty *tp)
707 {
708 if (tp->type == token_type_symbol || tp->type == token_type_keyword)
709 free (tp->string);
710 if (tp->type == token_type_string || tp->type == token_type_template)
711 {
712 mixed_string_free (tp->mixed_string);
713 drop_reference (tp->comment);
714 }
715 }
716
717
718 /* JavaScript provides strings with either double or single quotes:
719 "abc" or 'abc' or `abc`
720 Both may contain special sequences after a backslash:
721 \', \", \\, \b, \f, \n, \r, \t, \v
722 Special characters can be entered using hexadecimal escape
723 sequences or deprecated octal escape sequences:
724 \xXX, \OOO
725 Any unicode point can be entered using Unicode escape sequences:
726 \uNNNN
727 If a sequence after a backslash is not a legitimate character
728 escape sequence, the character value is the sequence itself without
729 a backslash. For example, \xxx is treated as xxx. */
730
731 static int
phase7_getuc(int quote_char)732 phase7_getuc (int quote_char)
733 {
734 int c;
735
736 for (;;)
737 {
738 /* Use phase 2, because phase 3 elides comments. */
739 c = phase2_getc ();
740
741 if (c == UEOF)
742 return P7_EOF;
743
744 if (c == quote_char)
745 return P7_STRING_END;
746
747 if (c == '$' && quote_char == '`')
748 {
749 int c1 = phase2_getc ();
750
751 if (c1 == '{')
752 return P7_TEMPLATE_START_OF_EXPRESSION;
753 phase2_ungetc (c1);
754 }
755
756 if (c == '\n')
757 {
758 if (quote_char == '`')
759 return UNICODE ('\n');
760 else
761 {
762 phase2_ungetc (c);
763 error_with_progname = false;
764 error (0, 0, _("%s:%d: warning: unterminated string"),
765 logical_file_name, line_number);
766 error_with_progname = true;
767 return P7_STRING_END;
768 }
769 }
770
771 if (c == '\r' && quote_char == '`')
772 {
773 /* Line terminators inside template literals are normalized to \n,
774 says <http://exploringjs.com/es6/ch_template-literals.html>. */
775 int c1 = phase2_getc ();
776
777 if (c1 == '\n')
778 return UNICODE ('\n');
779 phase2_ungetc (c1);
780 }
781
782 if (c != '\\')
783 return UNICODE (c);
784
785 /* Dispatch according to the character following the backslash. */
786 c = phase2_getc ();
787 if (c == UEOF)
788 return P7_EOF;
789
790 switch (c)
791 {
792 case '\n':
793 continue;
794 case 'b':
795 return UNICODE ('\b');
796 case 'f':
797 return UNICODE ('\f');
798 case 'n':
799 return UNICODE ('\n');
800 case 'r':
801 return UNICODE ('\r');
802 case 't':
803 return UNICODE ('\t');
804 case 'v':
805 return UNICODE ('\v');
806 case '0': case '1': case '2': case '3': case '4':
807 case '5': case '6': case '7':
808 {
809 int n = c - '0';
810
811 c = phase2_getc ();
812 if (c != UEOF)
813 {
814 if (c >= '0' && c <= '7')
815 {
816 n = (n << 3) + (c - '0');
817 c = phase2_getc ();
818 if (c != UEOF)
819 {
820 if (c >= '0' && c <= '7')
821 n = (n << 3) + (c - '0');
822 else
823 phase2_ungetc (c);
824 }
825 }
826 else
827 phase2_ungetc (c);
828 }
829 return UNICODE (n);
830 }
831 case 'x':
832 {
833 int c1 = phase2_getc ();
834 int n1;
835
836 if (c1 >= '0' && c1 <= '9')
837 n1 = c1 - '0';
838 else if (c1 >= 'A' && c1 <= 'F')
839 n1 = c1 - 'A' + 10;
840 else if (c1 >= 'a' && c1 <= 'f')
841 n1 = c1 - 'a' + 10;
842 else
843 n1 = -1;
844
845 if (n1 >= 0)
846 {
847 int c2 = phase2_getc ();
848 int n2;
849
850 if (c2 >= '0' && c2 <= '9')
851 n2 = c2 - '0';
852 else if (c2 >= 'A' && c2 <= 'F')
853 n2 = c2 - 'A' + 10;
854 else if (c2 >= 'a' && c2 <= 'f')
855 n2 = c2 - 'a' + 10;
856 else
857 n2 = -1;
858
859 if (n2 >= 0)
860 {
861 int n = (n1 << 4) + n2;
862 return UNICODE (n);
863 }
864
865 phase2_ungetc (c2);
866 }
867 phase2_ungetc (c1);
868 return UNICODE (c);
869 }
870 case 'u':
871 {
872 unsigned char buf[4];
873 unsigned int n = 0;
874 int i;
875
876 for (i = 0; i < 4; i++)
877 {
878 int c1 = phase2_getc ();
879
880 if (c1 >= '0' && c1 <= '9')
881 n = (n << 4) + (c1 - '0');
882 else if (c1 >= 'A' && c1 <= 'F')
883 n = (n << 4) + (c1 - 'A' + 10);
884 else if (c1 >= 'a' && c1 <= 'f')
885 n = (n << 4) + (c1 - 'a' + 10);
886 else
887 {
888 phase2_ungetc (c1);
889 while (--i >= 0)
890 phase2_ungetc (buf[i]);
891 return UNICODE (c);
892 }
893
894 buf[i] = c1;
895 }
896 return UNICODE (n);
897 }
898 default:
899 return UNICODE (c);
900 }
901 }
902 }
903
904
905 /* Combine characters into tokens. Discard whitespace except newlines at
906 the end of logical lines. */
907
908 static token_ty phase5_pushback[2];
909 static int phase5_pushback_length;
910
911 static token_type_ty last_token_type;
912
913 /* Returns true if last_token_type indicates that we have just seen the
914 possibly last token of an expression. In this case, '<', '>', and '/'
915 need to be interpreted as operators, rather than as XML markup or start
916 of a regular expression. */
917 static bool
is_after_expression(void)918 is_after_expression (void)
919 {
920 switch (last_token_type)
921 {
922 case token_type_rparen:
923 case token_type_rbrace:
924 case token_type_rbracket:
925 case token_type_regexp:
926 case token_type_string:
927 case token_type_template:
928 case token_type_rtemplate:
929 case token_type_xml_element_end:
930 case token_type_xml_empty_element:
931 case token_type_symbol:
932 return true;
933
934 case token_type_eof:
935 case token_type_start:
936 case token_type_lparen:
937 case token_type_lbrace:
938 case token_type_comma:
939 case token_type_dot:
940 case token_type_lbracket:
941 case token_type_plus:
942 case token_type_operator:
943 case token_type_equal:
944 case token_type_ltemplate:
945 case token_type_mtemplate:
946 case token_type_xml_tag:
947 case token_type_xml_element_start:
948 case token_type_keyword:
949 case token_type_other:
950 return false;
951
952 default:
953 abort ();
954 }
955 }
956
957 static void
phase5_scan_regexp(void)958 phase5_scan_regexp (void)
959 {
960 int c;
961
962 /* Scan for end of RegExp literal ('/'). */
963 for (;;)
964 {
965 /* Must use phase2 as there can't be comments. */
966 c = phase2_getc ();
967 if (c == '/')
968 break;
969 if (c == '\\')
970 {
971 c = phase2_getc ();
972 if (c != UEOF)
973 continue;
974 }
975 if (c == UEOF)
976 {
977 error_with_progname = false;
978 error (0, 0,
979 _("%s:%d: warning: RegExp literal terminated too early"),
980 logical_file_name, line_number);
981 error_with_progname = true;
982 return;
983 }
984 }
985
986 /* Scan for modifier flags (ECMA-262 5th section 15.10.4.1). */
987 c = phase2_getc ();
988 if (!(c == 'g' || c == 'i' || c == 'm'))
989 phase2_ungetc (c);
990 }
991
992 /* Number of open template literals `...${ */
993 static int template_literal_depth;
994
995 /* Number of open '{' tokens, at each template literal level.
996 The "current" element is brace_depths[template_literal_depth]. */
997 static int *brace_depths;
998 /* Number of allocated elements in brace_depths. */
999 static size_t brace_depths_alloc;
1000
1001 /* Adds a new brace_depths level after template_literal_depth was
1002 incremented. */
1003 static void
new_brace_depth_level(void)1004 new_brace_depth_level (void)
1005 {
1006 if (template_literal_depth == brace_depths_alloc)
1007 {
1008 brace_depths_alloc = 2 * brace_depths_alloc + 1;
1009 /* Now template_literal_depth < brace_depths_alloc. */
1010 brace_depths =
1011 (int *) xrealloc (brace_depths, brace_depths_alloc * sizeof (int));
1012 }
1013 brace_depths[template_literal_depth] = 0;
1014 }
1015
1016 /* Number of open XML elements. */
1017 static int xml_element_depth;
1018 static bool inside_embedded_js_in_xml;
1019
1020 static bool
phase5_scan_xml_markup(token_ty * tp)1021 phase5_scan_xml_markup (token_ty *tp)
1022 {
1023 struct
1024 {
1025 const char *start;
1026 const char *end;
1027 } markers[] =
1028 {
1029 { "!--", "--" },
1030 { "![CDATA[", "]]" },
1031 { "?", "?" }
1032 };
1033 int i;
1034
1035 for (i = 0; i < SIZEOF (markers); i++)
1036 {
1037 const char *start = markers[i].start;
1038 const char *end = markers[i].end;
1039 int j;
1040
1041 /* Look for a start marker. */
1042 for (j = 0; start[j] != '\0'; j++)
1043 {
1044 int c;
1045
1046 assert (phase2_pushback_length + j < SIZEOF (phase2_pushback));
1047 c = phase2_getc ();
1048 if (c == UEOF)
1049 goto eof;
1050 if (c != start[j])
1051 {
1052 int k = j;
1053
1054 phase2_ungetc (c);
1055 k--;
1056
1057 for (; k >= 0; k--)
1058 phase2_ungetc (start[k]);
1059 break;
1060 }
1061 }
1062
1063 if (start[j] == '\0')
1064 /* Skip until the end marker. */
1065 for (;;)
1066 {
1067 int c;
1068
1069 for (j = 0; end[j] != '\0'; j++)
1070 {
1071 assert (phase2_pushback_length + 1 < SIZEOF (phase2_pushback));
1072 c = phase2_getc ();
1073 if (c == UEOF)
1074 goto eof;
1075 if (c != end[j])
1076 {
1077 /* Don't push the first character back so the next
1078 iteration start from the second character. */
1079 if (j > 0)
1080 {
1081 int k = j;
1082
1083 phase2_ungetc (c);
1084 k--;
1085
1086 for (; k > 0; k--)
1087 phase2_ungetc (end[k]);
1088 }
1089 break;
1090 }
1091 }
1092
1093 if (end[j] == '\0')
1094 {
1095 c = phase2_getc ();
1096 if (c == UEOF)
1097 goto eof;
1098 if (c != '>')
1099 {
1100 error_with_progname = false;
1101 error (0, 0,
1102 _("%s:%d: warning: %s is not allowed"),
1103 logical_file_name, line_number,
1104 end);
1105 error_with_progname = true;
1106 return false;
1107 }
1108 return true;
1109 }
1110 }
1111 }
1112 return false;
1113
1114 eof:
1115 error_with_progname = false;
1116 error (0, 0,
1117 _("%s:%d: warning: unterminated XML markup"),
1118 logical_file_name, line_number);
1119 error_with_progname = true;
1120 return false;
1121 }
1122
1123 static void
phase5_get(token_ty * tp)1124 phase5_get (token_ty *tp)
1125 {
1126 int c;
1127
1128 if (phase5_pushback_length)
1129 {
1130 *tp = phase5_pushback[--phase5_pushback_length];
1131 last_token_type = tp->type;
1132 return;
1133 }
1134
1135 for (;;)
1136 {
1137 tp->line_number = line_number;
1138 c = phase3_getc ();
1139
1140 switch (c)
1141 {
1142 case UEOF:
1143 tp->type = last_token_type = token_type_eof;
1144 return;
1145
1146 case '\n':
1147 if (last_non_comment_line > last_comment_line)
1148 savable_comment_reset ();
1149 /* FALLTHROUGH */
1150 case ' ':
1151 case '\t':
1152 case '\f':
1153 /* Ignore whitespace and comments. */
1154 continue;
1155 }
1156
1157 last_non_comment_line = tp->line_number;
1158
1159 switch (c)
1160 {
1161 case '.':
1162 {
1163 int c1 = phase3_getc ();
1164 phase3_ungetc (c1);
1165 if (!(c1 >= '0' && c1 <= '9'))
1166 {
1167
1168 tp->type = last_token_type = token_type_dot;
1169 return;
1170 }
1171 }
1172 /* FALLTHROUGH */
1173 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1174 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1175 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1176 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1177 case 'Y': case 'Z':
1178 case '_':
1179 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1180 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1181 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1182 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1183 case 'y': case 'z':
1184 case '0': case '1': case '2': case '3': case '4':
1185 case '5': case '6': case '7': case '8': case '9':
1186 /* Symbol, or part of a number. */
1187 {
1188 static char *buffer;
1189 static int bufmax;
1190 int bufpos;
1191
1192 bufpos = 0;
1193 for (;;)
1194 {
1195 if (bufpos >= bufmax)
1196 {
1197 bufmax = 2 * bufmax + 10;
1198 buffer = xrealloc (buffer, bufmax);
1199 }
1200 buffer[bufpos++] = c;
1201 c = phase3_getc ();
1202 switch (c)
1203 {
1204 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1205 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1206 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1207 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1208 case 'Y': case 'Z':
1209 case '_':
1210 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1211 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1212 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1213 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1214 case 'y': case 'z':
1215 case '0': case '1': case '2': case '3': case '4':
1216 case '5': case '6': case '7': case '8': case '9':
1217 continue;
1218 default:
1219 phase3_ungetc (c);
1220 break;
1221 }
1222 break;
1223 }
1224 if (bufpos >= bufmax)
1225 {
1226 bufmax = 2 * bufmax + 10;
1227 buffer = xrealloc (buffer, bufmax);
1228 }
1229 buffer[bufpos] = '\0';
1230 tp->string = xstrdup (buffer);
1231 if (strcmp (buffer, "return") == 0
1232 || strcmp (buffer, "else") == 0)
1233 tp->type = last_token_type = token_type_keyword;
1234 else
1235 tp->type = last_token_type = token_type_symbol;
1236 return;
1237 }
1238
1239 case '"': case '\'':
1240 /* Strings. */
1241 {
1242 int quote_char = c;
1243 lexical_context_ty saved_lexical_context = lexical_context;
1244 struct mixed_string_buffer msb;
1245
1246 lexical_context = lc_string;
1247 /* Start accumulating the string. */
1248 mixed_string_buffer_init (&msb, lexical_context,
1249 logical_file_name, line_number);
1250 for (;;)
1251 {
1252 int uc = phase7_getuc (quote_char);
1253
1254 /* Keep line_number in sync. */
1255 msb.line_number = line_number;
1256
1257 if (uc == P7_EOF || uc == P7_STRING_END)
1258 break;
1259
1260 if (IS_UNICODE (uc))
1261 {
1262 assert (UNICODE_VALUE (uc) >= 0
1263 && UNICODE_VALUE (uc) < 0x110000);
1264 mixed_string_buffer_append_unicode (&msb,
1265 UNICODE_VALUE (uc));
1266 }
1267 else
1268 mixed_string_buffer_append_char (&msb, uc);
1269 }
1270 tp->mixed_string = mixed_string_buffer_result (&msb);
1271 tp->comment = add_reference (savable_comment);
1272 lexical_context = saved_lexical_context;
1273 tp->type = last_token_type = token_type_string;
1274 return;
1275 }
1276
1277 case '`':
1278 /* Template literals. */
1279 {
1280 struct mixed_string_buffer msb;
1281
1282 lexical_context = lc_string;
1283 /* Start accumulating the string. */
1284 mixed_string_buffer_init (&msb, lexical_context,
1285 logical_file_name, line_number);
1286 for (;;)
1287 {
1288 int uc = phase7_getuc ('`');
1289
1290 /* Keep line_number in sync. */
1291 msb.line_number = line_number;
1292
1293 if (uc == P7_EOF || uc == P7_STRING_END)
1294 {
1295 tp->mixed_string = mixed_string_buffer_result (&msb);
1296 tp->comment = add_reference (savable_comment);
1297 tp->type = last_token_type = token_type_template;
1298 break;
1299 }
1300
1301 if (uc == P7_TEMPLATE_START_OF_EXPRESSION)
1302 {
1303 mixed_string_buffer_destroy (&msb);
1304 tp->type = last_token_type = token_type_ltemplate;
1305 template_literal_depth++;
1306 new_brace_depth_level ();
1307 break;
1308 }
1309
1310 if (IS_UNICODE (uc))
1311 {
1312 assert (UNICODE_VALUE (uc) >= 0
1313 && UNICODE_VALUE (uc) < 0x110000);
1314 mixed_string_buffer_append_unicode (&msb,
1315 UNICODE_VALUE (uc));
1316 }
1317 else
1318 mixed_string_buffer_append_char (&msb, uc);
1319 }
1320 lexical_context = lc_outside;
1321 return;
1322 }
1323
1324 case '+':
1325 tp->type = last_token_type = token_type_plus;
1326 return;
1327
1328 /* Identify operators. The multiple character ones are simply ignored
1329 * as they are recognized here and are otherwise not relevant. */
1330 case '-': case '*': /* '+' and '/' are not listed here! */
1331 case '%':
1332 case '~': case '!': case '|': case '&': case '^':
1333 case '?': case ':':
1334 tp->type = last_token_type = token_type_operator;
1335 return;
1336
1337 case '=':
1338 tp->type = last_token_type = token_type_equal;
1339 return;
1340
1341 case '<':
1342 {
1343 /* We assume:
1344 - XMLMarkup and XMLElement are not allowed after an expression,
1345 - embedded JavaScript expressions in XML do not recurse.
1346 */
1347 if (xml_element_depth > 0
1348 || (!inside_embedded_js_in_xml
1349 && ! is_after_expression ()))
1350 {
1351 /* Comments, PI, or CDATA. */
1352 if (phase5_scan_xml_markup (tp))
1353 /* BUG: *tp is not filled in here! */
1354 return;
1355 c = phase2_getc ();
1356
1357 if (c == '/')
1358 {
1359 /* Closing tag. */
1360 lexical_context = lc_xml_close_tag;
1361 }
1362 else
1363 {
1364 /* Opening element. */
1365 phase2_ungetc (c);
1366 lexical_context = lc_xml_open_tag;
1367 xml_element_depth++;
1368 }
1369 tp->type = last_token_type = token_type_xml_tag;
1370 }
1371 else
1372 tp->type = last_token_type = token_type_operator;
1373 }
1374 return;
1375
1376 case '>':
1377 if (xml_element_depth > 0 && !inside_embedded_js_in_xml)
1378 {
1379 switch (lexical_context)
1380 {
1381 case lc_xml_open_tag:
1382 lexical_context = lc_xml_content;
1383 tp->type = last_token_type = token_type_xml_element_start;
1384 return;
1385
1386 case lc_xml_close_tag:
1387 if (--xml_element_depth > 0)
1388 lexical_context = lc_xml_content;
1389 else
1390 lexical_context = lc_outside;
1391 tp->type = last_token_type = token_type_xml_element_end;
1392 return;
1393
1394 default:
1395 break;
1396 }
1397 }
1398 tp->type = last_token_type = token_type_operator;
1399 return;
1400
1401 case '/':
1402 if (xml_element_depth > 0 && !inside_embedded_js_in_xml)
1403 {
1404 /* If it appears in an opening tag of an XML element, it's
1405 part of '/>'. */
1406 if (lexical_context == lc_xml_open_tag)
1407 {
1408 c = phase2_getc ();
1409 if (c == '>')
1410 {
1411 if (--xml_element_depth > 0)
1412 lexical_context = lc_xml_content;
1413 else
1414 lexical_context = lc_outside;
1415 tp->type = last_token_type = token_type_xml_empty_element;
1416 return;
1417 }
1418 else
1419 phase2_ungetc (c);
1420 }
1421 }
1422
1423 /* Either a division operator or the start of a regular expression
1424 literal. If the '/' token is spotted after an expression, it's a
1425 division; otherwise it's a regular expression. */
1426 if (is_after_expression ())
1427 tp->type = last_token_type = token_type_operator;
1428 else
1429 {
1430 phase5_scan_regexp ();
1431 tp->type = last_token_type = token_type_regexp;
1432 }
1433 return;
1434
1435 case '{':
1436 if (xml_element_depth > 0 && !inside_embedded_js_in_xml)
1437 inside_embedded_js_in_xml = true;
1438 else
1439 brace_depths[template_literal_depth]++;
1440 tp->type = last_token_type = token_type_lbrace;
1441 return;
1442
1443 case '}':
1444 if (xml_element_depth > 0 && inside_embedded_js_in_xml)
1445 inside_embedded_js_in_xml = false;
1446 else if (brace_depths[template_literal_depth] > 0)
1447 brace_depths[template_literal_depth]--;
1448 else if (template_literal_depth > 0)
1449 {
1450 /* Middle or right part of template literal. */
1451 for (;;)
1452 {
1453 int uc = phase7_getuc ('`');
1454
1455 if (uc == P7_EOF || uc == P7_STRING_END)
1456 {
1457 tp->type = last_token_type = token_type_rtemplate;
1458 template_literal_depth--;
1459 break;
1460 }
1461
1462 if (uc == P7_TEMPLATE_START_OF_EXPRESSION)
1463 {
1464 tp->type = last_token_type = token_type_mtemplate;
1465 break;
1466 }
1467 }
1468 return;
1469 }
1470 tp->type = last_token_type = token_type_rbrace;
1471 return;
1472
1473 case '(':
1474 tp->type = last_token_type = token_type_lparen;
1475 return;
1476
1477 case ')':
1478 tp->type = last_token_type = token_type_rparen;
1479 return;
1480
1481 case ',':
1482 tp->type = last_token_type = token_type_comma;
1483 return;
1484
1485 case '[':
1486 tp->type = last_token_type = token_type_lbracket;
1487 return;
1488
1489 case ']':
1490 tp->type = last_token_type = token_type_rbracket;
1491 return;
1492
1493 default:
1494 /* We could carefully recognize each of the 2 and 3 character
1495 operators, but it is not necessary, as we only need to recognize
1496 gettext invocations. Don't bother. */
1497 tp->type = last_token_type = token_type_other;
1498 return;
1499 }
1500 }
1501 }
1502
1503 /* Supports only one pushback token. */
1504 static void
phase5_unget(token_ty * tp)1505 phase5_unget (token_ty *tp)
1506 {
1507 if (tp->type != token_type_eof)
1508 {
1509 if (phase5_pushback_length == SIZEOF (phase5_pushback))
1510 abort ();
1511 phase5_pushback[phase5_pushback_length++] = *tp;
1512 }
1513 }
1514
1515
1516 /* String concatenation with '+'.
1517 Handling of tagged template literals. */
1518
1519 static void
x_javascript_lex(token_ty * tp)1520 x_javascript_lex (token_ty *tp)
1521 {
1522 phase5_get (tp);
1523 if (tp->type == token_type_string || tp->type == token_type_template)
1524 {
1525 mixed_string_ty *sum = tp->mixed_string;
1526
1527 for (;;)
1528 {
1529 token_ty token2;
1530
1531 phase5_get (&token2);
1532 if (token2.type == token_type_plus)
1533 {
1534 token_ty token3;
1535
1536 phase5_get (&token3);
1537 if (token3.type == token_type_string
1538 || token3.type == token_type_template)
1539 {
1540 sum = mixed_string_concat_free1 (sum, token3.mixed_string);
1541
1542 free_token (&token3);
1543 free_token (&token2);
1544 continue;
1545 }
1546 phase5_unget (&token3);
1547 }
1548 phase5_unget (&token2);
1549 break;
1550 }
1551 tp->mixed_string = sum;
1552 }
1553 else if (tp->type == token_type_symbol)
1554 {
1555 token_ty token2;
1556
1557 phase5_get (&token2);
1558 if (token2.type == token_type_template)
1559 {
1560 /* The value of
1561 tag `abc`
1562 is the value of the function call
1563 tag (["abc"])
1564 We don't know anything about this value. Therefore, don't
1565 let the extractor see this template literal. */
1566 free_token (&token2);
1567 }
1568 else
1569 phase5_unget (&token2);
1570 }
1571 }
1572
1573
1574 /* ========================= Extracting strings. ========================== */
1575
1576
1577 /* Context lookup table. */
1578 static flag_context_list_table_ty *flag_context_list_table;
1579
1580
1581 /* The file is broken into tokens. Scan the token stream, looking for
1582 a keyword, followed by a left paren, followed by a string. When we
1583 see this sequence, we have something to remember. We assume we are
1584 looking at a valid JavaScript program, and leave the complaints about
1585 the grammar to the compiler.
1586
1587 Normal handling: Look for
1588 keyword ( ... msgid ... )
1589 Plural handling: Look for
1590 keyword ( ... msgid ... msgid_plural ... )
1591
1592 We use recursion because the arguments before msgid or between msgid
1593 and msgid_plural can contain subexpressions of the same form. */
1594
1595
1596 /* Extract messages until the next balanced closing parenthesis or bracket.
1597 Extracted messages are added to MLP.
1598 DELIM can be either token_type_rparen or token_type_rbracket, or
1599 token_type_eof to accept both.
1600 Return true upon eof, false upon closing parenthesis or bracket. */
1601 static bool
extract_balanced(message_list_ty * mlp,token_type_ty delim,flag_context_ty outer_context,flag_context_list_iterator_ty context_iter,struct arglist_parser * argparser)1602 extract_balanced (message_list_ty *mlp,
1603 token_type_ty delim,
1604 flag_context_ty outer_context,
1605 flag_context_list_iterator_ty context_iter,
1606 struct arglist_parser *argparser)
1607 {
1608 /* Current argument number. */
1609 int arg = 1;
1610 /* 0 when no keyword has been seen. 1 right after a keyword is seen. */
1611 int state;
1612 /* Parameters of the keyword just seen. Defined only in state 1. */
1613 const struct callshapes *next_shapes = NULL;
1614 /* Context iterator that will be used if the next token is a '('. */
1615 flag_context_list_iterator_ty next_context_iter =
1616 passthrough_context_list_iterator;
1617 /* Current context. */
1618 flag_context_ty inner_context =
1619 inherited_context (outer_context,
1620 flag_context_list_iterator_advance (&context_iter));
1621
1622 /* Start state is 0. */
1623 state = 0;
1624
1625 for (;;)
1626 {
1627 token_ty token;
1628
1629 x_javascript_lex (&token);
1630 switch (token.type)
1631 {
1632 case token_type_symbol:
1633 {
1634 void *keyword_value;
1635
1636 if (hash_find_entry (&keywords, token.string, strlen (token.string),
1637 &keyword_value)
1638 == 0)
1639 {
1640 next_shapes = (const struct callshapes *) keyword_value;
1641 state = 1;
1642 }
1643 else
1644 state = 0;
1645 }
1646 next_context_iter =
1647 flag_context_list_iterator (
1648 flag_context_list_table_lookup (
1649 flag_context_list_table,
1650 token.string, strlen (token.string)));
1651 free (token.string);
1652 continue;
1653
1654 case token_type_lparen:
1655 if (extract_balanced (mlp, token_type_rparen,
1656 inner_context, next_context_iter,
1657 arglist_parser_alloc (mlp,
1658 state ? next_shapes : NULL)))
1659 {
1660 arglist_parser_done (argparser, arg);
1661 return true;
1662 }
1663 next_context_iter = null_context_list_iterator;
1664 state = 0;
1665 continue;
1666
1667 case token_type_rparen:
1668 if (delim == token_type_rparen || delim == token_type_eof)
1669 {
1670 arglist_parser_done (argparser, arg);
1671 return false;
1672 }
1673 next_context_iter = null_context_list_iterator;
1674 state = 0;
1675 continue;
1676
1677 case token_type_comma:
1678 arg++;
1679 inner_context =
1680 inherited_context (outer_context,
1681 flag_context_list_iterator_advance (
1682 &context_iter));
1683 next_context_iter = passthrough_context_list_iterator;
1684 state = 0;
1685 continue;
1686
1687 case token_type_lbracket:
1688 if (extract_balanced (mlp, token_type_rbracket,
1689 null_context, null_context_list_iterator,
1690 arglist_parser_alloc (mlp, NULL)))
1691 {
1692 arglist_parser_done (argparser, arg);
1693 return true;
1694 }
1695 next_context_iter = null_context_list_iterator;
1696 state = 0;
1697 continue;
1698
1699 case token_type_rbracket:
1700 if (delim == token_type_rbracket || delim == token_type_eof)
1701 {
1702 arglist_parser_done (argparser, arg);
1703 return false;
1704 }
1705 next_context_iter = null_context_list_iterator;
1706 state = 0;
1707 continue;
1708
1709 case token_type_lbrace:
1710 if (extract_balanced (mlp, token_type_rbrace,
1711 null_context, null_context_list_iterator,
1712 arglist_parser_alloc (mlp, NULL)))
1713 {
1714 arglist_parser_done (argparser, arg);
1715 return true;
1716 }
1717 next_context_iter = null_context_list_iterator;
1718 state = 0;
1719 continue;
1720
1721 case token_type_rbrace:
1722 if (delim == token_type_rbrace || delim == token_type_eof)
1723 {
1724 arglist_parser_done (argparser, arg);
1725 return false;
1726 }
1727 next_context_iter = null_context_list_iterator;
1728 state = 0;
1729 continue;
1730
1731 case token_type_string:
1732 case token_type_template:
1733 {
1734 lex_pos_ty pos;
1735
1736 pos.file_name = logical_file_name;
1737 pos.line_number = token.line_number;
1738
1739 if (extract_all)
1740 {
1741 char *string = mixed_string_contents (token.mixed_string);
1742 mixed_string_free (token.mixed_string);
1743 remember_a_message (mlp, NULL, string, true, false,
1744 inner_context, &pos,
1745 NULL, token.comment, true);
1746 }
1747 else
1748 arglist_parser_remember (argparser, arg, token.mixed_string,
1749 inner_context,
1750 pos.file_name, pos.line_number,
1751 token.comment, true);
1752 }
1753 drop_reference (token.comment);
1754 next_context_iter = null_context_list_iterator;
1755 state = 0;
1756 continue;
1757
1758 case token_type_xml_element_start:
1759 if (extract_balanced (mlp, token_type_xml_element_end,
1760 null_context, null_context_list_iterator,
1761 arglist_parser_alloc (mlp, NULL)))
1762 {
1763 arglist_parser_done (argparser, arg);
1764 return true;
1765 }
1766 next_context_iter = null_context_list_iterator;
1767 state = 0;
1768 continue;
1769
1770 case token_type_xml_element_end:
1771 if (delim == token_type_xml_element_end || delim == token_type_eof)
1772 {
1773 arglist_parser_done (argparser, arg);
1774 return false;
1775 }
1776 next_context_iter = null_context_list_iterator;
1777 state = 0;
1778 continue;
1779
1780 case token_type_eof:
1781 arglist_parser_done (argparser, arg);
1782 return true;
1783
1784 case token_type_ltemplate:
1785 case token_type_mtemplate:
1786 case token_type_rtemplate:
1787 case token_type_keyword:
1788 case token_type_start:
1789 case token_type_dot:
1790 case token_type_plus:
1791 case token_type_regexp:
1792 case token_type_operator:
1793 case token_type_equal:
1794 case token_type_xml_tag:
1795 case token_type_xml_empty_element:
1796 case token_type_other:
1797 next_context_iter = null_context_list_iterator;
1798 state = 0;
1799 continue;
1800
1801 default:
1802 abort ();
1803 }
1804 }
1805 }
1806
1807
1808 void
extract_javascript(FILE * f,const char * real_filename,const char * logical_filename,flag_context_list_table_ty * flag_table,msgdomain_list_ty * mdlp)1809 extract_javascript (FILE *f,
1810 const char *real_filename, const char *logical_filename,
1811 flag_context_list_table_ty *flag_table,
1812 msgdomain_list_ty *mdlp)
1813 {
1814 message_list_ty *mlp = mdlp->item[0]->messages;
1815
1816 fp = f;
1817 real_file_name = real_filename;
1818 logical_file_name = xstrdup (logical_filename);
1819 line_number = 1;
1820
1821 phase1_pushback_length = 0;
1822
1823 lexical_context = lc_outside;
1824
1825 phase2_pushback_length = 0;
1826
1827 last_comment_line = -1;
1828 last_non_comment_line = -1;
1829
1830 xgettext_current_file_source_encoding =
1831 (xgettext_global_source_encoding != NULL ? xgettext_global_source_encoding :
1832 po_charset_ascii);
1833 #if HAVE_ICONV
1834 xgettext_current_file_source_iconv = xgettext_global_source_iconv;
1835 #endif
1836
1837 xgettext_current_source_encoding = xgettext_current_file_source_encoding;
1838 #if HAVE_ICONV
1839 xgettext_current_source_iconv = xgettext_current_file_source_iconv;
1840 #endif
1841
1842 continuation_or_nonblank_line = false;
1843
1844 phase5_pushback_length = 0;
1845 last_token_type = token_type_start;
1846
1847 template_literal_depth = 0;
1848 new_brace_depth_level ();
1849 xml_element_depth = 0;
1850 inside_embedded_js_in_xml = false;
1851
1852 flag_context_list_table = flag_table;
1853
1854 init_keywords ();
1855
1856 /* Eat tokens until eof is seen. When extract_balanced returns
1857 due to an unbalanced closing parenthesis, just restart it. */
1858 while (!extract_balanced (mlp, token_type_eof,
1859 null_context, null_context_list_iterator,
1860 arglist_parser_alloc (mlp, NULL)))
1861 ;
1862
1863 fp = NULL;
1864 real_file_name = NULL;
1865 logical_file_name = NULL;
1866 line_number = 0;
1867 }
1868