1 /* xgettext librep backend.
2 Copyright (C) 2001-2003, 2005-2009, 2018-2020 Free Software Foundation, Inc.
3
4 This file was written by Bruno Haible <haible@clisp.cons.org>, 2001.
5
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <https://www.gnu.org/licenses/>. */
18
19 #ifdef HAVE_CONFIG_H
20 # include "config.h"
21 #endif
22
23 /* Specification. */
24 #include "x-librep.h"
25
26 #include <errno.h>
27 #include <stdbool.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31
32 #include "c-ctype.h"
33 #include "message.h"
34 #include "xgettext.h"
35 #include "xg-pos.h"
36 #include "xg-mixed-string.h"
37 #include "xg-arglist-context.h"
38 #include "xg-arglist-callshape.h"
39 #include "xg-arglist-parser.h"
40 #include "xg-message.h"
41 #include "error.h"
42 #include "xalloc.h"
43 #include "mem-hash-map.h"
44 #include "gettext.h"
45
46 #define _(s) gettext(s)
47
48
49 /* Summary of librep syntax:
50 - ';' starts a comment until end of line.
51 - Block comments start with '#|' and end with '|#'.
52 - Numbers are constituted of an optional prefix (#b, #B for binary,
53 #o, #O for octal, #d, #D for decimal, #x, #X for hexadecimal,
54 #e, #E for exact, #i, #I for inexact), an optional sign (+ or -), and
55 the digits.
56 - Characters are written as '?' followed by the character, possibly
57 with an escape sequence, for examples '?a', '?\n', '?\177'.
58 - Strings are delimited by double quotes. Backslash introduces an escape
59 sequence. The following are understood: '\n', '\r', '\f', '\t', '\a',
60 '\\', '\^C', '\012' (octal), '\x12' (hexadecimal).
61 - Symbols: can contain meta-characters - whitespace or any from ()[]'";|\' -
62 if preceded by backslash or enclosed in |...|.
63 - Keywords: written as #:SYMBOL.
64 - () delimit lists.
65 - [] delimit vectors.
66 The reader is implemented in librep-0.14/src/lisp.c. */
67
68
69 /* ====================== Keyword set customization. ====================== */
70
71 /* If true extract all strings. */
72 static bool extract_all = false;
73
74 static hash_table keywords;
75 static bool default_keywords = true;
76
77
78 void
x_librep_extract_all()79 x_librep_extract_all ()
80 {
81 extract_all = true;
82 }
83
84
85 void
x_librep_keyword(const char * name)86 x_librep_keyword (const char *name)
87 {
88 if (name == NULL)
89 default_keywords = false;
90 else
91 {
92 const char *end;
93 struct callshape shape;
94 const char *colon;
95
96 if (keywords.table == NULL)
97 hash_init (&keywords, 100);
98
99 split_keywordspec (name, &end, &shape);
100
101 /* The characters between name and end should form a valid Lisp
102 symbol. */
103 colon = strchr (name, ':');
104 if (colon == NULL || colon >= end)
105 insert_keyword_callshape (&keywords, name, end - name, &shape);
106 }
107 }
108
109 /* Finish initializing the keywords hash table.
110 Called after argument processing, before each file is processed. */
111 static void
init_keywords()112 init_keywords ()
113 {
114 if (default_keywords)
115 {
116 /* When adding new keywords here, also update the documentation in
117 xgettext.texi! */
118 x_librep_keyword ("_");
119 default_keywords = false;
120 }
121 }
122
123 void
init_flag_table_librep()124 init_flag_table_librep ()
125 {
126 xgettext_record_flag ("_:1:pass-librep-format");
127 xgettext_record_flag ("format:2:librep-format");
128 }
129
130
131 /* ======================== Reading of characters. ======================== */
132
133 /* The input file stream. */
134 static FILE *fp;
135
136
137 /* Fetch the next character from the input file. */
138 static int
do_getc()139 do_getc ()
140 {
141 int c = getc (fp);
142
143 if (c == EOF)
144 {
145 if (ferror (fp))
146 error (EXIT_FAILURE, errno,
147 _("error while reading \"%s\""), real_file_name);
148 }
149 else if (c == '\n')
150 line_number++;
151
152 return c;
153 }
154
155 /* Put back the last fetched character, not EOF. */
156 static void
do_ungetc(int c)157 do_ungetc (int c)
158 {
159 if (c == '\n')
160 line_number--;
161 ungetc (c, fp);
162 }
163
164
165 /* ========================== Reading of tokens. ========================== */
166
167
168 /* A token consists of a sequence of characters. */
169 struct token
170 {
171 int allocated; /* number of allocated 'token_char's */
172 int charcount; /* number of used 'token_char's */
173 char *chars; /* the token's constituents */
174 };
175
176 /* Initialize a 'struct token'. */
177 static inline void
init_token(struct token * tp)178 init_token (struct token *tp)
179 {
180 tp->allocated = 10;
181 tp->chars = XNMALLOC (tp->allocated, char);
182 tp->charcount = 0;
183 }
184
185 /* Free the memory pointed to by a 'struct token'. */
186 static inline void
free_token(struct token * tp)187 free_token (struct token *tp)
188 {
189 free (tp->chars);
190 }
191
192 /* Ensure there is enough room in the token for one more character. */
193 static inline void
grow_token(struct token * tp)194 grow_token (struct token *tp)
195 {
196 if (tp->charcount == tp->allocated)
197 {
198 tp->allocated *= 2;
199 tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char));
200 }
201 }
202
203 /* Read the next token. If 'first' is given, it points to the first
204 character, which has already been read. Returns true for a symbol,
205 false for a number. */
206 static bool
read_token(struct token * tp,const int * first)207 read_token (struct token *tp, const int *first)
208 {
209 int c;
210 /* Variables for speculative number parsing: */
211 int radix = -1;
212 int nfirst = 0;
213 bool exact = true;
214 bool rational = false;
215 bool exponent = false;
216 bool had_sign = false;
217 bool expecting_prefix = false;
218
219 init_token (tp);
220
221 if (first)
222 c = *first;
223 else
224 c = do_getc ();
225
226 for (;; c = do_getc ())
227 {
228 switch (c)
229 {
230 case EOF:
231 goto done;
232
233 case ' ': case '\t': case '\n': case '\f': case '\r':
234 case '(': case ')': case '[': case ']':
235 case '\'': case '"': case ';': case ',': case '`':
236 goto done;
237
238 case '\\':
239 radix = 0;
240 c = do_getc ();
241 if (c == EOF)
242 /* Invalid, but be tolerant. */
243 break;
244 grow_token (tp);
245 tp->chars[tp->charcount++] = c;
246 break;
247
248 case '|':
249 radix = 0;
250 for (;;)
251 {
252 c = do_getc ();
253 if (c == EOF || c == '|')
254 break;
255 grow_token (tp);
256 tp->chars[tp->charcount++] = c;
257 }
258 break;
259
260 default:
261 if (radix != 0)
262 {
263 if (expecting_prefix)
264 {
265 switch (c)
266 {
267 case 'B': case 'b':
268 radix = 2;
269 break;
270 case 'O': case 'o':
271 radix = 8;
272 break;
273 case 'D': case 'd':
274 radix = 10;
275 break;
276 case 'X': case 'x':
277 radix = 16;
278 break;
279 case 'E': case 'e':
280 case 'I': case 'i':
281 break;
282 default:
283 radix = 0;
284 break;
285 }
286 expecting_prefix = false;
287 nfirst = tp->charcount + 1;
288 }
289 else if (tp->charcount == nfirst
290 && (c == '+' || c == '-' || c == '#'))
291 {
292 if (c == '#')
293 {
294 if (had_sign)
295 radix = 0;
296 else
297 expecting_prefix = true;
298 }
299 else
300 had_sign = true;
301 nfirst = tp->charcount + 1;
302 }
303 else
304 {
305 switch (radix)
306 {
307 case -1:
308 if (c == '.')
309 {
310 radix = 10;
311 exact = false;
312 }
313 else if (!(c >= '0' && c <= '9'))
314 radix = 0;
315 else if (c == '0')
316 radix = 1;
317 else
318 radix = 10;
319 break;
320
321 case 1:
322 switch (c)
323 {
324 case 'X': case 'x':
325 radix = 16;
326 nfirst = tp->charcount + 1;
327 break;
328 case '0': case '1': case '2': case '3': case '4':
329 case '5': case '6': case '7':
330 radix = 8;
331 nfirst = tp->charcount;
332 break;
333 case '.': case 'E': case 'e':
334 radix = 10;
335 exact = false;
336 break;
337 case '/':
338 radix = 10;
339 rational = true;
340 break;
341 default:
342 radix = 0;
343 break;
344 }
345 break;
346
347 default:
348 switch (c)
349 {
350 case '.':
351 if (exact && radix == 10 && !rational)
352 exact = false;
353 else
354 radix = 0;
355 break;
356 case '/':
357 if (exact && !rational)
358 rational = true;
359 else
360 radix = 0;
361 break;
362 case 'E': case 'e':
363 if (radix == 10)
364 {
365 if (!rational && !exponent)
366 {
367 exponent = true;
368 exact = false;
369 }
370 else
371 radix = 0;
372 break;
373 }
374 /*FALLTHROUGH*/
375 default:
376 if (exponent && (c == '+' || c == '-'))
377 break;
378 if ((radix <= 10
379 && !(c >= '0' && c <= '0' + radix - 1))
380 || (radix == 16 && !c_isxdigit (c)))
381 radix = 0;
382 break;
383 }
384 break;
385 }
386 }
387 }
388 else
389 {
390 if (c == '#')
391 goto done;
392 }
393 grow_token (tp);
394 tp->chars[tp->charcount++] = c;
395 }
396 }
397 done:
398 if (c != EOF)
399 do_ungetc (c);
400 if (radix > 0 && nfirst < tp->charcount)
401 return false; /* number */
402 else
403 return true; /* symbol */
404 }
405
406
407 /* ========================= Accumulating comments ========================= */
408
409
410 static char *buffer;
411 static size_t bufmax;
412 static size_t buflen;
413
414 static inline void
comment_start()415 comment_start ()
416 {
417 buflen = 0;
418 }
419
420 static inline void
comment_add(int c)421 comment_add (int c)
422 {
423 if (buflen >= bufmax)
424 {
425 bufmax = 2 * bufmax + 10;
426 buffer = xrealloc (buffer, bufmax);
427 }
428 buffer[buflen++] = c;
429 }
430
431 static inline void
comment_line_end(size_t chars_to_remove)432 comment_line_end (size_t chars_to_remove)
433 {
434 buflen -= chars_to_remove;
435 while (buflen >= 1
436 && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
437 --buflen;
438 if (chars_to_remove == 0 && buflen >= bufmax)
439 {
440 bufmax = 2 * bufmax + 10;
441 buffer = xrealloc (buffer, bufmax);
442 }
443 buffer[buflen] = '\0';
444 savable_comment_add (buffer);
445 }
446
447
448 /* These are for tracking whether comments count as immediately before
449 keyword. */
450 static int last_comment_line;
451 static int last_non_comment_line;
452
453
454 /* ========================= Accumulating messages ========================= */
455
456
457 static message_list_ty *mlp;
458
459
460 /* ============== Reading of objects. See CLHS 2 "Syntax". ============== */
461
462
463 /* We are only interested in symbols (e.g. GETTEXT or NGETTEXT) and strings.
464 Other objects need not to be represented precisely. */
465 enum object_type
466 {
467 t_symbol, /* symbol */
468 t_string, /* string */
469 t_other, /* other kind of real object */
470 t_dot, /* '.' pseudo object */
471 t_close, /* ')' or ']' pseudo object */
472 t_eof /* EOF marker */
473 };
474
475 struct object
476 {
477 enum object_type type;
478 struct token *token; /* for t_symbol and t_string */
479 int line_number_at_start; /* for t_string */
480 };
481
482 /* Free the memory pointed to by a 'struct object'. */
483 static inline void
free_object(struct object * op)484 free_object (struct object *op)
485 {
486 if (op->type == t_symbol || op->type == t_string)
487 {
488 free_token (op->token);
489 free (op->token);
490 }
491 }
492
493 /* Convert a t_symbol/t_string token to a char*. */
494 static char *
string_of_object(const struct object * op)495 string_of_object (const struct object *op)
496 {
497 char *str;
498 int n;
499
500 if (!(op->type == t_symbol || op->type == t_string))
501 abort ();
502 n = op->token->charcount;
503 str = XNMALLOC (n + 1, char);
504 memcpy (str, op->token->chars, n);
505 str[n] = '\0';
506 return str;
507 }
508
509 /* Context lookup table. */
510 static flag_context_list_table_ty *flag_context_list_table;
511
512 /* Returns the character represented by an escape sequence. */
513 static int
do_getc_escaped(int c)514 do_getc_escaped (int c)
515 {
516 switch (c)
517 {
518 case 'n':
519 return '\n';
520 case 'r':
521 return '\r';
522 case 'f':
523 return '\f';
524 case 't':
525 return '\t';
526 case 'v':
527 return '\v';
528 case 'a':
529 return '\a';
530 case '^':
531 c = do_getc ();
532 if (c == EOF)
533 return EOF;
534 return c & 0x1f;
535 case '0': case '1': case '2': case '3': case '4':
536 case '5': case '6': case '7':
537 {
538 int n = c - '0';
539
540 c = do_getc ();
541 if (c != EOF)
542 {
543 if (c >= '0' && c <= '7')
544 {
545 n = (n << 3) + (c - '0');
546 c = do_getc ();
547 if (c != EOF)
548 {
549 if (c >= '0' && c <= '7')
550 n = (n << 3) + (c - '0');
551 else
552 do_ungetc (c);
553 }
554 }
555 else
556 do_ungetc (c);
557 }
558 return (unsigned char) n;
559 }
560 case 'x':
561 {
562 int n = 0;
563
564 for (;;)
565 {
566 c = do_getc ();
567 if (c == EOF)
568 break;
569 else if (c >= '0' && c <= '9')
570 n = (n << 4) + (c - '0');
571 else if (c >= 'A' && c <= 'F')
572 n = (n << 4) + (c - 'A' + 10);
573 else if (c >= 'a' && c <= 'f')
574 n = (n << 4) + (c - 'a' + 10);
575 else
576 {
577 do_ungetc (c);
578 break;
579 }
580 }
581 return (unsigned char) n;
582 }
583 default:
584 return c;
585 }
586 }
587
588 /* Read the next object. */
589 static void
read_object(struct object * op,flag_context_ty outer_context)590 read_object (struct object *op, flag_context_ty outer_context)
591 {
592 for (;;)
593 {
594 int c;
595
596 c = do_getc ();
597
598 switch (c)
599 {
600 case EOF:
601 op->type = t_eof;
602 return;
603
604 case '\n':
605 /* Comments assumed to be grouped with a message must immediately
606 precede it, with no non-whitespace token on a line between
607 both. */
608 if (last_non_comment_line > last_comment_line)
609 savable_comment_reset ();
610 continue;
611
612 case ' ': case '\t': case '\f': case '\r':
613 continue;
614
615 case '(':
616 {
617 int arg = 0; /* Current argument number. */
618 flag_context_list_iterator_ty context_iter;
619 const struct callshapes *shapes = NULL;
620 struct arglist_parser *argparser = NULL;
621
622 for (;; arg++)
623 {
624 struct object inner;
625 flag_context_ty inner_context;
626
627 if (arg == 0)
628 inner_context = null_context;
629 else
630 inner_context =
631 inherited_context (outer_context,
632 flag_context_list_iterator_advance (
633 &context_iter));
634
635 read_object (&inner, inner_context);
636
637 /* Recognize end of list. */
638 if (inner.type == t_close)
639 {
640 op->type = t_other;
641 /* Don't bother converting "()" to "NIL". */
642 last_non_comment_line = line_number;
643 if (argparser != NULL)
644 arglist_parser_done (argparser, arg);
645 return;
646 }
647
648 /* Dots are not allowed in every position.
649 But be tolerant. */
650
651 /* EOF inside list is illegal. But be tolerant. */
652 if (inner.type == t_eof)
653 break;
654
655 if (arg == 0)
656 {
657 /* This is the function position. */
658 if (inner.type == t_symbol)
659 {
660 char *symbol_name = string_of_object (&inner);
661 void *keyword_value;
662
663 if (hash_find_entry (&keywords,
664 symbol_name, strlen (symbol_name),
665 &keyword_value)
666 == 0)
667 shapes = (const struct callshapes *) keyword_value;
668
669 argparser = arglist_parser_alloc (mlp, shapes);
670
671 context_iter =
672 flag_context_list_iterator (
673 flag_context_list_table_lookup (
674 flag_context_list_table,
675 symbol_name, strlen (symbol_name)));
676
677 free (symbol_name);
678 }
679 else
680 context_iter = null_context_list_iterator;
681 }
682 else
683 {
684 /* These are the argument positions. */
685 if (argparser != NULL && inner.type == t_string)
686 {
687 char *s = string_of_object (&inner);
688 mixed_string_ty *ms =
689 mixed_string_alloc_simple (s, lc_string,
690 logical_file_name,
691 inner.line_number_at_start);
692 free (s);
693 arglist_parser_remember (argparser, arg, ms,
694 inner_context,
695 logical_file_name,
696 inner.line_number_at_start,
697 savable_comment, false);
698 }
699 }
700
701 free_object (&inner);
702 }
703
704 if (argparser != NULL)
705 arglist_parser_done (argparser, arg);
706 }
707 op->type = t_other;
708 last_non_comment_line = line_number;
709 return;
710
711 case '[':
712 {
713 for (;;)
714 {
715 struct object inner;
716
717 read_object (&inner, null_context);
718
719 /* Recognize end of vector. */
720 if (inner.type == t_close)
721 {
722 op->type = t_other;
723 last_non_comment_line = line_number;
724 return;
725 }
726
727 /* Dots are not allowed. But be tolerant. */
728
729 /* EOF inside vector is illegal. But be tolerant. */
730 if (inner.type == t_eof)
731 break;
732
733 free_object (&inner);
734 }
735 }
736 op->type = t_other;
737 last_non_comment_line = line_number;
738 return;
739
740 case ')': case ']':
741 /* Tell the caller about the end of list or vector.
742 Unmatched closing parenthesis is illegal. But be tolerant. */
743 op->type = t_close;
744 last_non_comment_line = line_number;
745 return;
746
747 case ',':
748 {
749 int c = do_getc ();
750 /* The ,@ handling inside lists is wrong anyway, because
751 ,@form expands to an unknown number of elements. */
752 if (c != EOF && c != '@')
753 do_ungetc (c);
754 }
755 /*FALLTHROUGH*/
756 case '\'':
757 case '`':
758 {
759 struct object inner;
760
761 read_object (&inner, null_context);
762
763 /* Dots and EOF are not allowed here. But be tolerant. */
764
765 free_object (&inner);
766
767 op->type = t_other;
768 last_non_comment_line = line_number;
769 return;
770 }
771
772 case ';':
773 {
774 bool all_semicolons = true;
775
776 last_comment_line = line_number;
777 comment_start ();
778 for (;;)
779 {
780 int c = do_getc ();
781 if (c == EOF || c == '\n' || c == '\f' || c == '\r')
782 break;
783 if (c != ';')
784 all_semicolons = false;
785 if (!all_semicolons)
786 {
787 /* We skip all leading white space, but not EOLs. */
788 if (!(buflen == 0 && (c == ' ' || c == '\t')))
789 comment_add (c);
790 }
791 }
792 comment_line_end (0);
793 continue;
794 }
795
796 case '"':
797 {
798 op->token = XMALLOC (struct token);
799 init_token (op->token);
800 op->line_number_at_start = line_number;
801 for (;;)
802 {
803 int c = do_getc ();
804 if (c == EOF)
805 /* Invalid input. Be tolerant, no error message. */
806 break;
807 if (c == '"')
808 break;
809 if (c == '\\')
810 {
811 c = do_getc ();
812 if (c == EOF)
813 /* Invalid input. Be tolerant, no error message. */
814 break;
815 if (c == '\n')
816 /* Ignore escaped newline. */
817 ;
818 else
819 {
820 c = do_getc_escaped (c);
821 if (c == EOF)
822 /* Invalid input. Be tolerant, no error message. */
823 break;
824 grow_token (op->token);
825 op->token->chars[op->token->charcount++] = c;
826 }
827 }
828 else
829 {
830 grow_token (op->token);
831 op->token->chars[op->token->charcount++] = c;
832 }
833 }
834 op->type = t_string;
835
836 if (extract_all)
837 {
838 lex_pos_ty pos;
839
840 pos.file_name = logical_file_name;
841 pos.line_number = op->line_number_at_start;
842 remember_a_message (mlp, NULL, string_of_object (op), false,
843 false, null_context, &pos,
844 NULL, savable_comment, false);
845 }
846 last_non_comment_line = line_number;
847 return;
848 }
849
850 case '?':
851 c = do_getc ();
852 if (c == EOF)
853 /* Invalid input. Be tolerant, no error message. */
854 ;
855 else if (c == '\\')
856 {
857 c = do_getc ();
858 if (c == EOF)
859 /* Invalid input. Be tolerant, no error message. */
860 ;
861 else
862 {
863 c = do_getc_escaped (c);
864 if (c == EOF)
865 /* Invalid input. Be tolerant, no error message. */
866 ;
867 }
868 }
869 op->type = t_other;
870 last_non_comment_line = line_number;
871 return;
872
873 case '#':
874 /* Dispatch macro handling. */
875 c = do_getc ();
876 if (c == EOF)
877 /* Invalid input. Be tolerant, no error message. */
878 {
879 op->type = t_other;
880 return;
881 }
882
883 switch (c)
884 {
885 case '!':
886 if (ftell (fp) == 2)
887 /* Skip comment until !# */
888 {
889 c = do_getc ();
890 for (;;)
891 {
892 if (c == EOF)
893 break;
894 if (c == '!')
895 {
896 c = do_getc ();
897 if (c == EOF || c == '#')
898 break;
899 }
900 else
901 c = do_getc ();
902 }
903 if (c == EOF)
904 {
905 /* EOF not allowed here. But be tolerant. */
906 op->type = t_eof;
907 return;
908 }
909 continue;
910 }
911 /*FALLTHROUGH*/
912 case '\'':
913 case ':':
914 {
915 struct object inner;
916 read_object (&inner, null_context);
917 /* Dots and EOF are not allowed here.
918 But be tolerant. */
919 free_object (&inner);
920 op->type = t_other;
921 last_non_comment_line = line_number;
922 return;
923 }
924
925 case '[':
926 case '(':
927 {
928 struct object inner;
929 do_ungetc (c);
930 read_object (&inner, null_context);
931 /* Dots and EOF are not allowed here.
932 But be tolerant. */
933 free_object (&inner);
934 op->type = t_other;
935 last_non_comment_line = line_number;
936 return;
937 }
938
939 case '|':
940 {
941 int depth = 0;
942
943 comment_start ();
944 c = do_getc ();
945 for (;;)
946 {
947 if (c == EOF)
948 break;
949 if (c == '|')
950 {
951 c = do_getc ();
952 if (c == EOF)
953 break;
954 if (c == '#')
955 {
956 if (depth == 0)
957 {
958 comment_line_end (0);
959 break;
960 }
961 depth--;
962 comment_add ('|');
963 comment_add ('#');
964 c = do_getc ();
965 }
966 else
967 comment_add ('|');
968 }
969 else if (c == '#')
970 {
971 c = do_getc ();
972 if (c == EOF)
973 break;
974 comment_add ('#');
975 if (c == '|')
976 {
977 depth++;
978 comment_add ('|');
979 c = do_getc ();
980 }
981 }
982 else
983 {
984 /* We skip all leading white space. */
985 if (!(buflen == 0 && (c == ' ' || c == '\t')))
986 comment_add (c);
987 if (c == '\n')
988 {
989 comment_line_end (1);
990 comment_start ();
991 }
992 c = do_getc ();
993 }
994 }
995 if (c == EOF)
996 {
997 /* EOF not allowed here. But be tolerant. */
998 op->type = t_eof;
999 return;
1000 }
1001 last_comment_line = line_number;
1002 continue;
1003 }
1004
1005 case '\\':
1006 {
1007 struct token token;
1008 int first = '\\';
1009 read_token (&token, &first);
1010 free_token (&token);
1011 op->type = t_other;
1012 last_non_comment_line = line_number;
1013 return;
1014 }
1015
1016 case 'T': case 't':
1017 case 'F': case 'f':
1018 op->type = t_other;
1019 last_non_comment_line = line_number;
1020 return;
1021
1022 case 'B': case 'b':
1023 case 'O': case 'o':
1024 case 'D': case 'd':
1025 case 'X': case 'x':
1026 case 'E': case 'e':
1027 case 'I': case 'i':
1028 {
1029 struct token token;
1030 do_ungetc (c);
1031 c = '#';
1032 read_token (&token, &c);
1033 free_token (&token);
1034 op->type = t_other;
1035 last_non_comment_line = line_number;
1036 return;
1037 }
1038
1039 default:
1040 /* Invalid input. Be tolerant, no error message. */
1041 op->type = t_other;
1042 last_non_comment_line = line_number;
1043 return;
1044 }
1045
1046 /*NOTREACHED*/
1047 abort ();
1048
1049 default:
1050 /* Read a token. */
1051 {
1052 bool symbol;
1053
1054 op->token = XMALLOC (struct token);
1055 symbol = read_token (op->token, &c);
1056 if (op->token->charcount == 1 && op->token->chars[0] == '.')
1057 {
1058 free_token (op->token);
1059 free (op->token);
1060 op->type = t_dot;
1061 last_non_comment_line = line_number;
1062 return;
1063 }
1064 if (!symbol)
1065 {
1066 free_token (op->token);
1067 free (op->token);
1068 op->type = t_other;
1069 last_non_comment_line = line_number;
1070 return;
1071 }
1072 /* Distinguish between "foo" and "foo#bar". */
1073 c = do_getc ();
1074 if (c == '#')
1075 {
1076 struct token second_token;
1077
1078 free_token (op->token);
1079 free (op->token);
1080 read_token (&second_token, NULL);
1081 free_token (&second_token);
1082 op->type = t_other;
1083 last_non_comment_line = line_number;
1084 return;
1085 }
1086 else
1087 {
1088 if (c != EOF)
1089 do_ungetc (c);
1090 op->type = t_symbol;
1091 last_non_comment_line = line_number;
1092 return;
1093 }
1094 }
1095 }
1096 }
1097 }
1098
1099
1100 void
extract_librep(FILE * f,const char * real_filename,const char * logical_filename,flag_context_list_table_ty * flag_table,msgdomain_list_ty * mdlp)1101 extract_librep (FILE *f,
1102 const char *real_filename, const char *logical_filename,
1103 flag_context_list_table_ty *flag_table,
1104 msgdomain_list_ty *mdlp)
1105 {
1106 mlp = mdlp->item[0]->messages;
1107
1108 fp = f;
1109 real_file_name = real_filename;
1110 logical_file_name = xstrdup (logical_filename);
1111 line_number = 1;
1112
1113 last_comment_line = -1;
1114 last_non_comment_line = -1;
1115
1116 flag_context_list_table = flag_table;
1117
1118 init_keywords ();
1119
1120 /* Eat tokens until eof is seen. When read_object returns
1121 due to an unbalanced closing parenthesis, just restart it. */
1122 do
1123 {
1124 struct object toplevel_object;
1125
1126 read_object (&toplevel_object, null_context);
1127
1128 if (toplevel_object.type == t_eof)
1129 break;
1130
1131 free_object (&toplevel_object);
1132 }
1133 while (!feof (fp));
1134
1135 /* Close scanner. */
1136 fp = NULL;
1137 real_file_name = NULL;
1138 logical_file_name = NULL;
1139 line_number = 0;
1140 }
1141