1 /* xgettext librep backend.
2    Copyright (C) 2001-2003, 2005-2009, 2018-2020 Free Software Foundation, Inc.
3 
4    This file was written by Bruno Haible <haible@clisp.cons.org>, 2001.
5 
6    This program is free software: you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10 
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
18 
19 #ifdef HAVE_CONFIG_H
20 # include "config.h"
21 #endif
22 
23 /* Specification.  */
24 #include "x-librep.h"
25 
26 #include <errno.h>
27 #include <stdbool.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 
32 #include "c-ctype.h"
33 #include "message.h"
34 #include "xgettext.h"
35 #include "xg-pos.h"
36 #include "xg-mixed-string.h"
37 #include "xg-arglist-context.h"
38 #include "xg-arglist-callshape.h"
39 #include "xg-arglist-parser.h"
40 #include "xg-message.h"
41 #include "error.h"
42 #include "xalloc.h"
43 #include "mem-hash-map.h"
44 #include "gettext.h"
45 
46 #define _(s) gettext(s)
47 
48 
49 /* Summary of librep syntax:
50    - ';' starts a comment until end of line.
51    - Block comments start with '#|' and end with '|#'.
52    - Numbers are constituted of an optional prefix (#b, #B for binary,
53      #o, #O for octal, #d, #D for decimal, #x, #X for hexadecimal,
54      #e, #E for exact, #i, #I for inexact), an optional sign (+ or -), and
55      the digits.
56    - Characters are written as '?' followed by the character, possibly
57      with an escape sequence, for examples '?a', '?\n', '?\177'.
58    - Strings are delimited by double quotes. Backslash introduces an escape
59      sequence. The following are understood: '\n', '\r', '\f', '\t', '\a',
60      '\\', '\^C', '\012' (octal), '\x12' (hexadecimal).
61    - Symbols: can contain meta-characters - whitespace or any from ()[]'";|\' -
62      if preceded by backslash or enclosed in |...|.
63    - Keywords: written as #:SYMBOL.
64    - () delimit lists.
65    - [] delimit vectors.
66    The reader is implemented in librep-0.14/src/lisp.c.  */
67 
68 
69 /* ====================== Keyword set customization.  ====================== */
70 
71 /* If true extract all strings.  */
72 static bool extract_all = false;
73 
74 static hash_table keywords;
75 static bool default_keywords = true;
76 
77 
78 void
x_librep_extract_all()79 x_librep_extract_all ()
80 {
81   extract_all = true;
82 }
83 
84 
85 void
x_librep_keyword(const char * name)86 x_librep_keyword (const char *name)
87 {
88   if (name == NULL)
89     default_keywords = false;
90   else
91     {
92       const char *end;
93       struct callshape shape;
94       const char *colon;
95 
96       if (keywords.table == NULL)
97         hash_init (&keywords, 100);
98 
99       split_keywordspec (name, &end, &shape);
100 
101       /* The characters between name and end should form a valid Lisp
102          symbol.  */
103       colon = strchr (name, ':');
104       if (colon == NULL || colon >= end)
105         insert_keyword_callshape (&keywords, name, end - name, &shape);
106     }
107 }
108 
109 /* Finish initializing the keywords hash table.
110    Called after argument processing, before each file is processed.  */
111 static void
init_keywords()112 init_keywords ()
113 {
114   if (default_keywords)
115     {
116       /* When adding new keywords here, also update the documentation in
117          xgettext.texi!  */
118       x_librep_keyword ("_");
119       default_keywords = false;
120     }
121 }
122 
123 void
init_flag_table_librep()124 init_flag_table_librep ()
125 {
126   xgettext_record_flag ("_:1:pass-librep-format");
127   xgettext_record_flag ("format:2:librep-format");
128 }
129 
130 
131 /* ======================== Reading of characters.  ======================== */
132 
133 /* The input file stream.  */
134 static FILE *fp;
135 
136 
137 /* Fetch the next character from the input file.  */
138 static int
do_getc()139 do_getc ()
140 {
141   int c = getc (fp);
142 
143   if (c == EOF)
144     {
145       if (ferror (fp))
146         error (EXIT_FAILURE, errno,
147                _("error while reading \"%s\""), real_file_name);
148     }
149   else if (c == '\n')
150    line_number++;
151 
152   return c;
153 }
154 
155 /* Put back the last fetched character, not EOF.  */
156 static void
do_ungetc(int c)157 do_ungetc (int c)
158 {
159   if (c == '\n')
160     line_number--;
161   ungetc (c, fp);
162 }
163 
164 
165 /* ========================== Reading of tokens.  ========================== */
166 
167 
168 /* A token consists of a sequence of characters.  */
169 struct token
170 {
171   int allocated;                /* number of allocated 'token_char's */
172   int charcount;                /* number of used 'token_char's */
173   char *chars;                  /* the token's constituents */
174 };
175 
176 /* Initialize a 'struct token'.  */
177 static inline void
init_token(struct token * tp)178 init_token (struct token *tp)
179 {
180   tp->allocated = 10;
181   tp->chars = XNMALLOC (tp->allocated, char);
182   tp->charcount = 0;
183 }
184 
185 /* Free the memory pointed to by a 'struct token'.  */
186 static inline void
free_token(struct token * tp)187 free_token (struct token *tp)
188 {
189   free (tp->chars);
190 }
191 
192 /* Ensure there is enough room in the token for one more character.  */
193 static inline void
grow_token(struct token * tp)194 grow_token (struct token *tp)
195 {
196   if (tp->charcount == tp->allocated)
197     {
198       tp->allocated *= 2;
199       tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char));
200     }
201 }
202 
203 /* Read the next token.  If 'first' is given, it points to the first
204    character, which has already been read.  Returns true for a symbol,
205    false for a number.  */
206 static bool
read_token(struct token * tp,const int * first)207 read_token (struct token *tp, const int *first)
208 {
209   int c;
210   /* Variables for speculative number parsing:  */
211   int radix = -1;
212   int nfirst = 0;
213   bool exact = true;
214   bool rational = false;
215   bool exponent = false;
216   bool had_sign = false;
217   bool expecting_prefix = false;
218 
219   init_token (tp);
220 
221   if (first)
222     c = *first;
223   else
224     c = do_getc ();
225 
226   for (;; c = do_getc ())
227     {
228       switch (c)
229         {
230         case EOF:
231           goto done;
232 
233         case ' ': case '\t': case '\n': case '\f': case '\r':
234         case '(': case ')': case '[': case ']':
235         case '\'': case '"': case ';': case ',': case '`':
236           goto done;
237 
238         case '\\':
239           radix = 0;
240           c = do_getc ();
241           if (c == EOF)
242             /* Invalid, but be tolerant.  */
243             break;
244           grow_token (tp);
245           tp->chars[tp->charcount++] = c;
246           break;
247 
248         case '|':
249           radix = 0;
250           for (;;)
251             {
252               c = do_getc ();
253               if (c == EOF || c == '|')
254                 break;
255               grow_token (tp);
256               tp->chars[tp->charcount++] = c;
257             }
258           break;
259 
260         default:
261           if (radix != 0)
262             {
263               if (expecting_prefix)
264                 {
265                   switch (c)
266                     {
267                     case 'B': case 'b':
268                       radix = 2;
269                       break;
270                     case 'O': case 'o':
271                       radix = 8;
272                       break;
273                     case 'D': case 'd':
274                       radix = 10;
275                       break;
276                     case 'X': case 'x':
277                       radix = 16;
278                       break;
279                     case 'E': case 'e':
280                     case 'I': case 'i':
281                       break;
282                     default:
283                       radix = 0;
284                       break;
285                     }
286                   expecting_prefix = false;
287                   nfirst = tp->charcount + 1;
288                 }
289               else if (tp->charcount == nfirst
290                        && (c == '+' || c == '-' || c == '#'))
291                 {
292                   if (c == '#')
293                     {
294                       if (had_sign)
295                         radix = 0;
296                       else
297                         expecting_prefix = true;
298                     }
299                   else
300                     had_sign = true;
301                   nfirst = tp->charcount + 1;
302                 }
303               else
304                 {
305                   switch (radix)
306                     {
307                     case -1:
308                       if (c == '.')
309                         {
310                           radix = 10;
311                           exact = false;
312                         }
313                       else if (!(c >= '0' && c <= '9'))
314                         radix = 0;
315                       else if (c == '0')
316                         radix = 1;
317                       else
318                         radix = 10;
319                       break;
320 
321                     case 1:
322                       switch (c)
323                         {
324                         case 'X': case 'x':
325                           radix = 16;
326                           nfirst = tp->charcount + 1;
327                           break;
328                         case '0': case '1': case '2': case '3': case '4':
329                         case '5': case '6': case '7':
330                           radix = 8;
331                           nfirst = tp->charcount;
332                           break;
333                         case '.': case 'E': case 'e':
334                           radix = 10;
335                           exact = false;
336                           break;
337                         case '/':
338                           radix = 10;
339                           rational = true;
340                           break;
341                         default:
342                           radix = 0;
343                           break;
344                         }
345                       break;
346 
347                     default:
348                       switch (c)
349                         {
350                         case '.':
351                           if (exact && radix == 10 && !rational)
352                             exact = false;
353                           else
354                             radix = 0;
355                           break;
356                         case '/':
357                           if (exact && !rational)
358                             rational = true;
359                           else
360                             radix = 0;
361                           break;
362                         case 'E': case 'e':
363                           if (radix == 10)
364                             {
365                               if (!rational && !exponent)
366                                 {
367                                   exponent = true;
368                                   exact = false;
369                                 }
370                               else
371                                 radix = 0;
372                               break;
373                             }
374                           /*FALLTHROUGH*/
375                         default:
376                           if (exponent && (c == '+' || c == '-'))
377                             break;
378                           if ((radix <= 10
379                                && !(c >= '0' && c <= '0' + radix - 1))
380                               || (radix == 16 && !c_isxdigit (c)))
381                             radix = 0;
382                           break;
383                         }
384                       break;
385                     }
386                 }
387             }
388           else
389             {
390               if (c == '#')
391                 goto done;
392             }
393           grow_token (tp);
394           tp->chars[tp->charcount++] = c;
395         }
396     }
397  done:
398   if (c != EOF)
399     do_ungetc (c);
400   if (radix > 0 && nfirst < tp->charcount)
401     return false; /* number */
402   else
403     return true; /* symbol */
404 }
405 
406 
407 /* ========================= Accumulating comments ========================= */
408 
409 
410 static char *buffer;
411 static size_t bufmax;
412 static size_t buflen;
413 
414 static inline void
comment_start()415 comment_start ()
416 {
417   buflen = 0;
418 }
419 
420 static inline void
comment_add(int c)421 comment_add (int c)
422 {
423   if (buflen >= bufmax)
424     {
425       bufmax = 2 * bufmax + 10;
426       buffer = xrealloc (buffer, bufmax);
427     }
428   buffer[buflen++] = c;
429 }
430 
431 static inline void
comment_line_end(size_t chars_to_remove)432 comment_line_end (size_t chars_to_remove)
433 {
434   buflen -= chars_to_remove;
435   while (buflen >= 1
436          && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
437     --buflen;
438   if (chars_to_remove == 0 && buflen >= bufmax)
439     {
440       bufmax = 2 * bufmax + 10;
441       buffer = xrealloc (buffer, bufmax);
442     }
443   buffer[buflen] = '\0';
444   savable_comment_add (buffer);
445 }
446 
447 
448 /* These are for tracking whether comments count as immediately before
449    keyword.  */
450 static int last_comment_line;
451 static int last_non_comment_line;
452 
453 
454 /* ========================= Accumulating messages ========================= */
455 
456 
457 static message_list_ty *mlp;
458 
459 
460 /* ============== Reading of objects.  See CLHS 2 "Syntax".  ============== */
461 
462 
463 /* We are only interested in symbols (e.g. GETTEXT or NGETTEXT) and strings.
464    Other objects need not to be represented precisely.  */
465 enum object_type
466 {
467   t_symbol,     /* symbol */
468   t_string,     /* string */
469   t_other,      /* other kind of real object */
470   t_dot,        /* '.' pseudo object */
471   t_close,      /* ')' or ']' pseudo object */
472   t_eof         /* EOF marker */
473 };
474 
475 struct object
476 {
477   enum object_type type;
478   struct token *token;          /* for t_symbol and t_string */
479   int line_number_at_start;     /* for t_string */
480 };
481 
482 /* Free the memory pointed to by a 'struct object'.  */
483 static inline void
free_object(struct object * op)484 free_object (struct object *op)
485 {
486   if (op->type == t_symbol || op->type == t_string)
487     {
488       free_token (op->token);
489       free (op->token);
490     }
491 }
492 
493 /* Convert a t_symbol/t_string token to a char*.  */
494 static char *
string_of_object(const struct object * op)495 string_of_object (const struct object *op)
496 {
497   char *str;
498   int n;
499 
500   if (!(op->type == t_symbol || op->type == t_string))
501     abort ();
502   n = op->token->charcount;
503   str = XNMALLOC (n + 1, char);
504   memcpy (str, op->token->chars, n);
505   str[n] = '\0';
506   return str;
507 }
508 
509 /* Context lookup table.  */
510 static flag_context_list_table_ty *flag_context_list_table;
511 
512 /* Returns the character represented by an escape sequence.  */
513 static int
do_getc_escaped(int c)514 do_getc_escaped (int c)
515 {
516   switch (c)
517     {
518     case 'n':
519       return '\n';
520     case 'r':
521       return '\r';
522     case 'f':
523       return '\f';
524     case 't':
525       return '\t';
526     case 'v':
527       return '\v';
528     case 'a':
529       return '\a';
530     case '^':
531       c = do_getc ();
532       if (c == EOF)
533         return EOF;
534       return c & 0x1f;
535     case '0': case '1': case '2': case '3': case '4':
536     case '5': case '6': case '7':
537       {
538         int n = c - '0';
539 
540         c = do_getc ();
541         if (c != EOF)
542           {
543             if (c >= '0' && c <= '7')
544               {
545                 n = (n << 3) + (c - '0');
546                 c = do_getc ();
547                 if (c != EOF)
548                   {
549                     if (c >= '0' && c <= '7')
550                       n = (n << 3) + (c - '0');
551                     else
552                       do_ungetc (c);
553                   }
554               }
555             else
556               do_ungetc (c);
557           }
558         return (unsigned char) n;
559       }
560     case 'x':
561       {
562         int n = 0;
563 
564         for (;;)
565           {
566             c = do_getc ();
567             if (c == EOF)
568               break;
569             else if (c >= '0' && c <= '9')
570               n = (n << 4) + (c - '0');
571             else if (c >= 'A' && c <= 'F')
572               n = (n << 4) + (c - 'A' + 10);
573             else if (c >= 'a' && c <= 'f')
574               n = (n << 4) + (c - 'a' + 10);
575             else
576               {
577                 do_ungetc (c);
578                 break;
579               }
580           }
581         return (unsigned char) n;
582       }
583     default:
584       return c;
585     }
586 }
587 
588 /* Read the next object.  */
589 static void
read_object(struct object * op,flag_context_ty outer_context)590 read_object (struct object *op, flag_context_ty outer_context)
591 {
592   for (;;)
593     {
594       int c;
595 
596       c = do_getc ();
597 
598       switch (c)
599         {
600         case EOF:
601           op->type = t_eof;
602           return;
603 
604         case '\n':
605           /* Comments assumed to be grouped with a message must immediately
606              precede it, with no non-whitespace token on a line between
607              both.  */
608           if (last_non_comment_line > last_comment_line)
609             savable_comment_reset ();
610           continue;
611 
612         case ' ': case '\t': case '\f': case '\r':
613           continue;
614 
615         case '(':
616           {
617             int arg = 0;                /* Current argument number.  */
618             flag_context_list_iterator_ty context_iter;
619             const struct callshapes *shapes = NULL;
620             struct arglist_parser *argparser = NULL;
621 
622             for (;; arg++)
623               {
624                 struct object inner;
625                 flag_context_ty inner_context;
626 
627                 if (arg == 0)
628                   inner_context = null_context;
629                 else
630                   inner_context =
631                     inherited_context (outer_context,
632                                        flag_context_list_iterator_advance (
633                                          &context_iter));
634 
635                 read_object (&inner, inner_context);
636 
637                 /* Recognize end of list.  */
638                 if (inner.type == t_close)
639                   {
640                     op->type = t_other;
641                     /* Don't bother converting "()" to "NIL".  */
642                     last_non_comment_line = line_number;
643                     if (argparser != NULL)
644                       arglist_parser_done (argparser, arg);
645                     return;
646                   }
647 
648                 /* Dots are not allowed in every position.
649                    But be tolerant.  */
650 
651                 /* EOF inside list is illegal.  But be tolerant.  */
652                 if (inner.type == t_eof)
653                   break;
654 
655                 if (arg == 0)
656                   {
657                     /* This is the function position.  */
658                     if (inner.type == t_symbol)
659                       {
660                         char *symbol_name = string_of_object (&inner);
661                         void *keyword_value;
662 
663                         if (hash_find_entry (&keywords,
664                                              symbol_name, strlen (symbol_name),
665                                              &keyword_value)
666                             == 0)
667                           shapes = (const struct callshapes *) keyword_value;
668 
669                         argparser = arglist_parser_alloc (mlp, shapes);
670 
671                         context_iter =
672                           flag_context_list_iterator (
673                             flag_context_list_table_lookup (
674                               flag_context_list_table,
675                               symbol_name, strlen (symbol_name)));
676 
677                         free (symbol_name);
678                       }
679                     else
680                       context_iter = null_context_list_iterator;
681                   }
682                 else
683                   {
684                     /* These are the argument positions.  */
685                     if (argparser != NULL && inner.type == t_string)
686                       {
687                         char *s = string_of_object (&inner);
688                         mixed_string_ty *ms =
689                           mixed_string_alloc_simple (s, lc_string,
690                                                      logical_file_name,
691                                                      inner.line_number_at_start);
692                         free (s);
693                         arglist_parser_remember (argparser, arg, ms,
694                                                  inner_context,
695                                                  logical_file_name,
696                                                  inner.line_number_at_start,
697                                                  savable_comment, false);
698                       }
699                   }
700 
701                 free_object (&inner);
702               }
703 
704             if (argparser != NULL)
705               arglist_parser_done (argparser, arg);
706           }
707           op->type = t_other;
708           last_non_comment_line = line_number;
709           return;
710 
711         case '[':
712           {
713             for (;;)
714               {
715                 struct object inner;
716 
717                 read_object (&inner, null_context);
718 
719                 /* Recognize end of vector.  */
720                 if (inner.type == t_close)
721                   {
722                     op->type = t_other;
723                     last_non_comment_line = line_number;
724                     return;
725                   }
726 
727                 /* Dots are not allowed.  But be tolerant.  */
728 
729                 /* EOF inside vector is illegal.  But be tolerant.  */
730                 if (inner.type == t_eof)
731                   break;
732 
733                 free_object (&inner);
734               }
735           }
736           op->type = t_other;
737           last_non_comment_line = line_number;
738           return;
739 
740         case ')': case ']':
741           /* Tell the caller about the end of list or vector.
742              Unmatched closing parenthesis is illegal.  But be tolerant.  */
743           op->type = t_close;
744           last_non_comment_line = line_number;
745           return;
746 
747         case ',':
748           {
749             int c = do_getc ();
750             /* The ,@ handling inside lists is wrong anyway, because
751                ,@form expands to an unknown number of elements.  */
752             if (c != EOF && c != '@')
753               do_ungetc (c);
754           }
755           /*FALLTHROUGH*/
756         case '\'':
757         case '`':
758           {
759             struct object inner;
760 
761             read_object (&inner, null_context);
762 
763             /* Dots and EOF are not allowed here.  But be tolerant.  */
764 
765             free_object (&inner);
766 
767             op->type = t_other;
768             last_non_comment_line = line_number;
769             return;
770           }
771 
772         case ';':
773           {
774             bool all_semicolons = true;
775 
776             last_comment_line = line_number;
777             comment_start ();
778             for (;;)
779               {
780                 int c = do_getc ();
781                 if (c == EOF || c == '\n' || c == '\f' || c == '\r')
782                   break;
783                 if (c != ';')
784                   all_semicolons = false;
785                 if (!all_semicolons)
786                   {
787                     /* We skip all leading white space, but not EOLs.  */
788                     if (!(buflen == 0 && (c == ' ' || c == '\t')))
789                       comment_add (c);
790                   }
791               }
792             comment_line_end (0);
793             continue;
794           }
795 
796         case '"':
797           {
798             op->token = XMALLOC (struct token);
799             init_token (op->token);
800             op->line_number_at_start = line_number;
801             for (;;)
802               {
803                 int c = do_getc ();
804                 if (c == EOF)
805                   /* Invalid input.  Be tolerant, no error message.  */
806                   break;
807                 if (c == '"')
808                   break;
809                 if (c == '\\')
810                   {
811                     c = do_getc ();
812                     if (c == EOF)
813                       /* Invalid input.  Be tolerant, no error message.  */
814                       break;
815                     if (c == '\n')
816                       /* Ignore escaped newline.  */
817                       ;
818                     else
819                       {
820                         c = do_getc_escaped (c);
821                         if (c == EOF)
822                           /* Invalid input.  Be tolerant, no error message.  */
823                           break;
824                         grow_token (op->token);
825                         op->token->chars[op->token->charcount++] = c;
826                       }
827                   }
828                 else
829                   {
830                     grow_token (op->token);
831                     op->token->chars[op->token->charcount++] = c;
832                   }
833               }
834             op->type = t_string;
835 
836             if (extract_all)
837               {
838                 lex_pos_ty pos;
839 
840                 pos.file_name = logical_file_name;
841                 pos.line_number = op->line_number_at_start;
842                 remember_a_message (mlp, NULL, string_of_object (op), false,
843                                     false, null_context, &pos,
844                                     NULL, savable_comment, false);
845               }
846             last_non_comment_line = line_number;
847             return;
848           }
849 
850         case '?':
851           c = do_getc ();
852           if (c == EOF)
853             /* Invalid input.  Be tolerant, no error message.  */
854             ;
855           else if (c == '\\')
856             {
857               c = do_getc ();
858               if (c == EOF)
859                 /* Invalid input.  Be tolerant, no error message.  */
860                 ;
861               else
862                 {
863                   c = do_getc_escaped (c);
864                   if (c == EOF)
865                     /* Invalid input.  Be tolerant, no error message.  */
866                     ;
867                 }
868             }
869           op->type = t_other;
870           last_non_comment_line = line_number;
871           return;
872 
873         case '#':
874           /* Dispatch macro handling.  */
875           c = do_getc ();
876           if (c == EOF)
877             /* Invalid input.  Be tolerant, no error message.  */
878             {
879               op->type = t_other;
880               return;
881             }
882 
883           switch (c)
884             {
885             case '!':
886               if (ftell (fp) == 2)
887                 /* Skip comment until !# */
888                 {
889                   c = do_getc ();
890                   for (;;)
891                     {
892                       if (c == EOF)
893                         break;
894                       if (c == '!')
895                         {
896                           c = do_getc ();
897                           if (c == EOF || c == '#')
898                             break;
899                         }
900                       else
901                         c = do_getc ();
902                     }
903                   if (c == EOF)
904                     {
905                       /* EOF not allowed here.  But be tolerant.  */
906                       op->type = t_eof;
907                       return;
908                     }
909                   continue;
910                 }
911               /*FALLTHROUGH*/
912             case '\'':
913             case ':':
914               {
915                 struct object inner;
916                 read_object (&inner, null_context);
917                 /* Dots and EOF are not allowed here.
918                    But be tolerant.  */
919                 free_object (&inner);
920                 op->type = t_other;
921                 last_non_comment_line = line_number;
922                 return;
923               }
924 
925             case '[':
926             case '(':
927               {
928                 struct object inner;
929                 do_ungetc (c);
930                 read_object (&inner, null_context);
931                 /* Dots and EOF are not allowed here.
932                    But be tolerant.  */
933                 free_object (&inner);
934                 op->type = t_other;
935                 last_non_comment_line = line_number;
936                 return;
937               }
938 
939             case '|':
940               {
941                 int depth = 0;
942 
943                 comment_start ();
944                 c = do_getc ();
945                 for (;;)
946                   {
947                     if (c == EOF)
948                       break;
949                     if (c == '|')
950                       {
951                         c = do_getc ();
952                         if (c == EOF)
953                           break;
954                         if (c == '#')
955                           {
956                             if (depth == 0)
957                               {
958                                 comment_line_end (0);
959                                 break;
960                               }
961                             depth--;
962                             comment_add ('|');
963                             comment_add ('#');
964                             c = do_getc ();
965                           }
966                         else
967                           comment_add ('|');
968                       }
969                     else if (c == '#')
970                       {
971                         c = do_getc ();
972                         if (c == EOF)
973                           break;
974                         comment_add ('#');
975                         if (c == '|')
976                           {
977                             depth++;
978                             comment_add ('|');
979                             c = do_getc ();
980                           }
981                       }
982                     else
983                       {
984                         /* We skip all leading white space.  */
985                         if (!(buflen == 0 && (c == ' ' || c == '\t')))
986                           comment_add (c);
987                         if (c == '\n')
988                           {
989                             comment_line_end (1);
990                             comment_start ();
991                           }
992                         c = do_getc ();
993                       }
994                   }
995                 if (c == EOF)
996                   {
997                     /* EOF not allowed here.  But be tolerant.  */
998                     op->type = t_eof;
999                     return;
1000                   }
1001                 last_comment_line = line_number;
1002                 continue;
1003               }
1004 
1005             case '\\':
1006               {
1007                 struct token token;
1008                 int first = '\\';
1009                 read_token (&token, &first);
1010                 free_token (&token);
1011                 op->type = t_other;
1012                 last_non_comment_line = line_number;
1013                 return;
1014               }
1015 
1016             case 'T': case 't':
1017             case 'F': case 'f':
1018               op->type = t_other;
1019               last_non_comment_line = line_number;
1020               return;
1021 
1022             case 'B': case 'b':
1023             case 'O': case 'o':
1024             case 'D': case 'd':
1025             case 'X': case 'x':
1026             case 'E': case 'e':
1027             case 'I': case 'i':
1028               {
1029                 struct token token;
1030                 do_ungetc (c);
1031                 c = '#';
1032                 read_token (&token, &c);
1033                 free_token (&token);
1034                 op->type = t_other;
1035                 last_non_comment_line = line_number;
1036                 return;
1037               }
1038 
1039             default:
1040               /* Invalid input.  Be tolerant, no error message.  */
1041               op->type = t_other;
1042               last_non_comment_line = line_number;
1043               return;
1044             }
1045 
1046           /*NOTREACHED*/
1047           abort ();
1048 
1049         default:
1050           /* Read a token.  */
1051           {
1052             bool symbol;
1053 
1054             op->token = XMALLOC (struct token);
1055             symbol = read_token (op->token, &c);
1056             if (op->token->charcount == 1 && op->token->chars[0] == '.')
1057               {
1058                 free_token (op->token);
1059                 free (op->token);
1060                 op->type = t_dot;
1061                 last_non_comment_line = line_number;
1062                 return;
1063               }
1064             if (!symbol)
1065               {
1066                 free_token (op->token);
1067                 free (op->token);
1068                 op->type = t_other;
1069                 last_non_comment_line = line_number;
1070                 return;
1071               }
1072             /* Distinguish between "foo" and "foo#bar".  */
1073             c = do_getc ();
1074             if (c == '#')
1075               {
1076                 struct token second_token;
1077 
1078                 free_token (op->token);
1079                 free (op->token);
1080                 read_token (&second_token, NULL);
1081                 free_token (&second_token);
1082                 op->type = t_other;
1083                 last_non_comment_line = line_number;
1084                 return;
1085               }
1086             else
1087               {
1088                 if (c != EOF)
1089                   do_ungetc (c);
1090                 op->type = t_symbol;
1091                 last_non_comment_line = line_number;
1092                 return;
1093               }
1094           }
1095         }
1096     }
1097 }
1098 
1099 
1100 void
extract_librep(FILE * f,const char * real_filename,const char * logical_filename,flag_context_list_table_ty * flag_table,msgdomain_list_ty * mdlp)1101 extract_librep (FILE *f,
1102                 const char *real_filename, const char *logical_filename,
1103                 flag_context_list_table_ty *flag_table,
1104                 msgdomain_list_ty *mdlp)
1105 {
1106   mlp = mdlp->item[0]->messages;
1107 
1108   fp = f;
1109   real_file_name = real_filename;
1110   logical_file_name = xstrdup (logical_filename);
1111   line_number = 1;
1112 
1113   last_comment_line = -1;
1114   last_non_comment_line = -1;
1115 
1116   flag_context_list_table = flag_table;
1117 
1118   init_keywords ();
1119 
1120   /* Eat tokens until eof is seen.  When read_object returns
1121      due to an unbalanced closing parenthesis, just restart it.  */
1122   do
1123     {
1124       struct object toplevel_object;
1125 
1126       read_object (&toplevel_object, null_context);
1127 
1128       if (toplevel_object.type == t_eof)
1129         break;
1130 
1131       free_object (&toplevel_object);
1132     }
1133   while (!feof (fp));
1134 
1135   /* Close scanner.  */
1136   fp = NULL;
1137   real_file_name = NULL;
1138   logical_file_name = NULL;
1139   line_number = 0;
1140 }
1141