1 /* Reading PO files, abstract class.
2    Copyright (C) 1995-1996, 1998, 2000-2009, 2013, 2015 Free Software
3    Foundation, Inc.
4 
5    This file was written by Peter Miller <millerp@canb.auug.org.au>
6 
7    This program is free software: you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11 
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
19 
20 
21 #ifdef HAVE_CONFIG_H
22 # include "config.h"
23 #endif
24 
25 /* Specification.  */
26 #include "read-catalog-abstract.h"
27 
28 #include <limits.h>
29 #include <stdlib.h>
30 #include <string.h>
31 
32 #include "xalloc.h"
33 #include "xvasprintf.h"
34 #include "po-xerror.h"
35 #include "error.h"
36 #include "gettext.h"
37 
38 /* Local variables.  */
39 static abstract_catalog_reader_ty *callback_arg;
40 
41 
42 /* ========================================================================= */
43 /* Allocating and freeing instances of abstract_catalog_reader_ty.  */
44 
45 
46 abstract_catalog_reader_ty *
catalog_reader_alloc(abstract_catalog_reader_class_ty * method_table)47 catalog_reader_alloc (abstract_catalog_reader_class_ty *method_table)
48 {
49   abstract_catalog_reader_ty *pop;
50 
51   pop = (abstract_catalog_reader_ty *) xmalloc (method_table->size);
52   pop->methods = method_table;
53   if (method_table->constructor)
54     method_table->constructor (pop);
55   return pop;
56 }
57 
58 
59 void
catalog_reader_free(abstract_catalog_reader_ty * pop)60 catalog_reader_free (abstract_catalog_reader_ty *pop)
61 {
62   if (pop->methods->destructor)
63     pop->methods->destructor (pop);
64   free (pop);
65 }
66 
67 
68 /* ========================================================================= */
69 /* Inline functions to invoke the methods.  */
70 
71 
72 static inline void
call_parse_brief(abstract_catalog_reader_ty * pop)73 call_parse_brief (abstract_catalog_reader_ty *pop)
74 {
75   if (pop->methods->parse_brief)
76     pop->methods->parse_brief (pop);
77 }
78 
79 static inline void
call_parse_debrief(abstract_catalog_reader_ty * pop)80 call_parse_debrief (abstract_catalog_reader_ty *pop)
81 {
82   if (pop->methods->parse_debrief)
83     pop->methods->parse_debrief (pop);
84 }
85 
86 static inline void
call_directive_domain(abstract_catalog_reader_ty * pop,char * name)87 call_directive_domain (abstract_catalog_reader_ty *pop, char *name)
88 {
89   if (pop->methods->directive_domain)
90     pop->methods->directive_domain (pop, name);
91 }
92 
93 static inline void
call_directive_message(abstract_catalog_reader_ty * pop,char * msgctxt,char * msgid,lex_pos_ty * msgid_pos,char * msgid_plural,char * msgstr,size_t msgstr_len,lex_pos_ty * msgstr_pos,char * prev_msgctxt,char * prev_msgid,char * prev_msgid_plural,bool force_fuzzy,bool obsolete)94 call_directive_message (abstract_catalog_reader_ty *pop,
95                         char *msgctxt,
96                         char *msgid,
97                         lex_pos_ty *msgid_pos,
98                         char *msgid_plural,
99                         char *msgstr, size_t msgstr_len,
100                         lex_pos_ty *msgstr_pos,
101                         char *prev_msgctxt,
102                         char *prev_msgid,
103                         char *prev_msgid_plural,
104                         bool force_fuzzy, bool obsolete)
105 {
106   if (pop->methods->directive_message)
107     pop->methods->directive_message (pop, msgctxt,
108                                      msgid, msgid_pos, msgid_plural,
109                                      msgstr, msgstr_len, msgstr_pos,
110                                      prev_msgctxt,
111                                      prev_msgid,
112                                      prev_msgid_plural,
113                                      force_fuzzy, obsolete);
114 }
115 
116 static inline void
call_comment(abstract_catalog_reader_ty * pop,const char * s)117 call_comment (abstract_catalog_reader_ty *pop, const char *s)
118 {
119   if (pop->methods->comment != NULL)
120     pop->methods->comment (pop, s);
121 }
122 
123 static inline void
call_comment_dot(abstract_catalog_reader_ty * pop,const char * s)124 call_comment_dot (abstract_catalog_reader_ty *pop, const char *s)
125 {
126   if (pop->methods->comment_dot != NULL)
127     pop->methods->comment_dot (pop, s);
128 }
129 
130 static inline void
call_comment_filepos(abstract_catalog_reader_ty * pop,const char * name,size_t line)131 call_comment_filepos (abstract_catalog_reader_ty *pop, const char *name,
132                       size_t line)
133 {
134   if (pop->methods->comment_filepos)
135     pop->methods->comment_filepos (pop, name, line);
136 }
137 
138 static inline void
call_comment_special(abstract_catalog_reader_ty * pop,const char * s)139 call_comment_special (abstract_catalog_reader_ty *pop, const char *s)
140 {
141   if (pop->methods->comment_special != NULL)
142     pop->methods->comment_special (pop, s);
143 }
144 
145 
146 /* ========================================================================= */
147 /* Exported functions.  */
148 
149 
150 static inline void
parse_start(abstract_catalog_reader_ty * pop)151 parse_start (abstract_catalog_reader_ty *pop)
152 {
153   /* The parse will call the po_callback_... functions (see below)
154      when the various directive are recognised.  The callback_arg
155      variable is used to tell these functions which instance is to
156      have the relevant method invoked.  */
157   callback_arg = pop;
158 
159   call_parse_brief (pop);
160 }
161 
162 static inline void
parse_end(abstract_catalog_reader_ty * pop)163 parse_end (abstract_catalog_reader_ty *pop)
164 {
165   call_parse_debrief (pop);
166   callback_arg = NULL;
167 }
168 
169 
170 void
catalog_reader_parse(abstract_catalog_reader_ty * pop,FILE * fp,const char * real_filename,const char * logical_filename,catalog_input_format_ty input_syntax)171 catalog_reader_parse (abstract_catalog_reader_ty *pop, FILE *fp,
172                       const char *real_filename, const char *logical_filename,
173                       catalog_input_format_ty input_syntax)
174 {
175   error_message_count = 0;
176 
177   /* Parse the stream's content.  */
178   parse_start (pop);
179   input_syntax->parse (pop, fp, real_filename, logical_filename);
180   parse_end (pop);
181 
182   if (error_message_count > 0)
183     po_xerror (PO_SEVERITY_FATAL_ERROR, NULL,
184                /*real_filename*/ NULL, (size_t)(-1), (size_t)(-1), false,
185                xasprintf (ngettext ("found %d fatal error",
186                                     "found %d fatal errors",
187                                     error_message_count),
188                           error_message_count));
189 }
190 
191 
192 /* ========================================================================= */
193 /* Callbacks used by po-gram.y or po-lex.c, indirectly from
194    catalog_reader_parse.  */
195 
196 
197 /* This function is called by po_gram_lex() whenever a domain directive
198    has been seen.  */
199 void
po_callback_domain(char * name)200 po_callback_domain (char *name)
201 {
202   /* assert(callback_arg); */
203   call_directive_domain (callback_arg, name);
204 }
205 
206 
207 /* This function is called by po_gram_lex() whenever a message has been
208    seen.  */
209 void
po_callback_message(char * msgctxt,char * msgid,lex_pos_ty * msgid_pos,char * msgid_plural,char * msgstr,size_t msgstr_len,lex_pos_ty * msgstr_pos,char * prev_msgctxt,char * prev_msgid,char * prev_msgid_plural,bool force_fuzzy,bool obsolete)210 po_callback_message (char *msgctxt,
211                      char *msgid, lex_pos_ty *msgid_pos, char *msgid_plural,
212                      char *msgstr, size_t msgstr_len, lex_pos_ty *msgstr_pos,
213                      char *prev_msgctxt,
214                      char *prev_msgid,
215                      char *prev_msgid_plural,
216                      bool force_fuzzy, bool obsolete)
217 {
218   /* assert(callback_arg); */
219   call_directive_message (callback_arg, msgctxt,
220                           msgid, msgid_pos, msgid_plural,
221                           msgstr, msgstr_len, msgstr_pos,
222                           prev_msgctxt, prev_msgid, prev_msgid_plural,
223                           force_fuzzy, obsolete);
224 }
225 
226 
227 void
po_callback_comment(const char * s)228 po_callback_comment (const char *s)
229 {
230   /* assert(callback_arg); */
231   call_comment (callback_arg, s);
232 }
233 
234 
235 void
po_callback_comment_dot(const char * s)236 po_callback_comment_dot (const char *s)
237 {
238   /* assert(callback_arg); */
239   call_comment_dot (callback_arg, s);
240 }
241 
242 
243 /* This function is called by po_parse_comment_filepos(), once for each
244    filename.  */
245 void
po_callback_comment_filepos(const char * name,size_t line)246 po_callback_comment_filepos (const char *name, size_t line)
247 {
248   /* assert(callback_arg); */
249   call_comment_filepos (callback_arg, name, line);
250 }
251 
252 
253 void
po_callback_comment_special(const char * s)254 po_callback_comment_special (const char *s)
255 {
256   /* assert(callback_arg); */
257   call_comment_special (callback_arg, s);
258 }
259 
260 
261 /* Parse a special comment and put the result in *fuzzyp, formatp, *rangep,
262    *wrapp.  */
263 void
po_parse_comment_special(const char * s,bool * fuzzyp,enum is_format formatp[NFORMATS],struct argument_range * rangep,enum is_wrap * wrapp,enum is_syntax_check scp[NSYNTAXCHECKS])264 po_parse_comment_special (const char *s,
265                           bool *fuzzyp, enum is_format formatp[NFORMATS],
266                           struct argument_range *rangep, enum is_wrap *wrapp,
267                           enum is_syntax_check scp[NSYNTAXCHECKS])
268 {
269   size_t i;
270 
271   *fuzzyp = false;
272   for (i = 0; i < NFORMATS; i++)
273     formatp[i] = undecided;
274   rangep->min = -1;
275   rangep->max = -1;
276   *wrapp = undecided;
277   for (i = 0; i < NSYNTAXCHECKS; i++)
278     scp[i] = undecided;
279 
280   while (*s != '\0')
281     {
282       const char *t;
283 
284       /* Skip whitespace.  */
285       while (*s != '\0' && strchr ("\n \t\r\f\v,", *s) != NULL)
286         s++;
287 
288       /* Collect a token.  */
289       t = s;
290       while (*s != '\0' && strchr ("\n \t\r\f\v,", *s) == NULL)
291         s++;
292       if (s != t)
293         {
294           size_t len = s - t;
295 
296           /* Accept fuzzy flag.  */
297           if (len == 5 && memcmp (t, "fuzzy", 5) == 0)
298             {
299               *fuzzyp = true;
300               continue;
301             }
302 
303           /* Accept format description.  */
304           if (len >= 7 && memcmp (t + len - 7, "-format", 7) == 0)
305             {
306               const char *p;
307               size_t n;
308               enum is_format value;
309 
310               p = t;
311               n = len - 7;
312 
313               if (n >= 3 && memcmp (p, "no-", 3) == 0)
314                 {
315                   p += 3;
316                   n -= 3;
317                   value = no;
318                 }
319               else if (n >= 9 && memcmp (p, "possible-", 9) == 0)
320                 {
321                   p += 9;
322                   n -= 9;
323                   value = possible;
324                 }
325               else if (n >= 11 && memcmp (p, "impossible-", 11) == 0)
326                 {
327                   p += 11;
328                   n -= 11;
329                   value = impossible;
330                 }
331               else
332                 value = yes;
333 
334               for (i = 0; i < NFORMATS; i++)
335                 if (strlen (format_language[i]) == n
336                     && memcmp (format_language[i], p, n) == 0)
337                   {
338                     formatp[i] = value;
339                     break;
340                   }
341               if (i < NFORMATS)
342                 continue;
343             }
344 
345           /* Accept range description "range: <min>..<max>".  */
346           if (len == 6 && memcmp (t, "range:", 6) == 0)
347             {
348               /* Skip whitespace.  */
349               while (*s != '\0' && strchr ("\n \t\r\f\v,", *s) != NULL)
350                 s++;
351 
352               /* Collect a token.  */
353               t = s;
354               while (*s != '\0' && strchr ("\n \t\r\f\v,", *s) == NULL)
355                 s++;
356               /* Parse it.  */
357               if (*t >= '0' && *t <= '9')
358                 {
359                   unsigned int min = 0;
360 
361                   for (; *t >= '0' && *t <= '9'; t++)
362                     {
363                       if (min <= INT_MAX / 10)
364                         {
365                           min = 10 * min + (*t - '0');
366                           if (min > INT_MAX)
367                             min = INT_MAX;
368                         }
369                       else
370                         /* Avoid integer overflow.  */
371                         min = INT_MAX;
372                     }
373                   if (*t++ == '.')
374                     if (*t++ == '.')
375                       if (*t >= '0' && *t <= '9')
376                         {
377                           unsigned int max = 0;
378                           for (; *t >= '0' && *t <= '9'; t++)
379                             {
380                               if (max <= INT_MAX / 10)
381                                 {
382                                   max = 10 * max + (*t - '0');
383                                   if (max > INT_MAX)
384                                     max = INT_MAX;
385                                 }
386                               else
387                                 /* Avoid integer overflow.  */
388                                 max = INT_MAX;
389                             }
390                           if (min <= max)
391                             {
392                               rangep->min = min;
393                               rangep->max = max;
394                               continue;
395                             }
396                         }
397                 }
398             }
399 
400           /* Accept wrap description.  */
401           if (len == 4 && memcmp (t, "wrap", 4) == 0)
402             {
403               *wrapp = yes;
404               continue;
405             }
406           if (len == 7 && memcmp (t, "no-wrap", 7) == 0)
407             {
408               *wrapp = no;
409               continue;
410             }
411 
412           /* Accept syntax check description.  */
413           if (len >= 6 && memcmp (t + len - 6, "-check", 6) == 0)
414             {
415               const char *p;
416               size_t n;
417               enum is_syntax_check value;
418 
419               p = t;
420               n = len - 6;
421 
422               if (n >= 3 && memcmp (p, "no-", 3) == 0)
423                 {
424                   p += 3;
425                   n -= 3;
426                   value = no;
427                 }
428               else
429                 value = yes;
430 
431               for (i = 0; i < NSYNTAXCHECKS; i++)
432                 if (strlen (syntax_check_name[i]) == n
433                     && memcmp (syntax_check_name[i], p, n) == 0)
434                   {
435                     scp[i] = value;
436                     break;
437                   }
438               if (i < NSYNTAXCHECKS)
439                 continue;
440             }
441 
442           /* Unknown special comment marker.  It may have been generated
443              from a future xgettext version.  Ignore it.  */
444         }
445     }
446 }
447 
448 
449 /* Parse a GNU style file comment.
450    Syntax: an arbitrary number of
451              STRING COLON NUMBER
452            or
453              STRING
454    The latter style, without line number, occurs in PO files converted e.g.
455    from Pascal .rst files or from OpenOffice resource files.
456    Call po_callback_comment_filepos for each of them.  */
457 static void
po_parse_comment_filepos(const char * s)458 po_parse_comment_filepos (const char *s)
459 {
460   while (*s != '\0')
461     {
462       while (*s == ' ' || *s == '\t' || *s == '\n')
463         s++;
464       if (*s != '\0')
465         {
466           const char *string_start = s;
467 
468           do
469             s++;
470           while (!(*s == '\0' || *s == ' ' || *s == '\t' || *s == '\n'));
471 
472           /* See if there is a COLON and NUMBER after the STRING, separated
473              through optional spaces.  */
474           {
475             const char *p = s;
476 
477             while (*p == ' ' || *p == '\t' || *p == '\n')
478               p++;
479 
480             if (*p == ':')
481               {
482                 p++;
483 
484                 while (*p == ' ' || *p == '\t' || *p == '\n')
485                   p++;
486 
487                 if (*p >= '0' && *p <= '9')
488                   {
489                     /* Accumulate a number.  */
490                     size_t n = 0;
491 
492                     do
493                       {
494                         n = n * 10 + (*p - '0');
495                         p++;
496                       }
497                     while (*p >= '0' && *p <= '9');
498 
499                     if (*p == '\0' || *p == ' ' || *p == '\t' || *p == '\n')
500                       {
501                         /* Parsed a GNU style file comment with spaces.  */
502                         const char *string_end = s;
503                         size_t string_length = string_end - string_start;
504                         char *string = XNMALLOC (string_length + 1, char);
505 
506                         memcpy (string, string_start, string_length);
507                         string[string_length] = '\0';
508 
509                         po_callback_comment_filepos (string, n);
510 
511                         free (string);
512 
513                         s = p;
514                         continue;
515                       }
516                   }
517               }
518           }
519 
520           /* See if there is a COLON at the end of STRING and a NUMBER after
521              it, separated through optional spaces.  */
522           if (s[-1] == ':')
523             {
524               const char *p = s;
525 
526               while (*p == ' ' || *p == '\t' || *p == '\n')
527                 p++;
528 
529               if (*p >= '0' && *p <= '9')
530                 {
531                   /* Accumulate a number.  */
532                   size_t n = 0;
533 
534                   do
535                     {
536                       n = n * 10 + (*p - '0');
537                       p++;
538                     }
539                   while (*p >= '0' && *p <= '9');
540 
541                   if (*p == '\0' || *p == ' ' || *p == '\t' || *p == '\n')
542                     {
543                       /* Parsed a GNU style file comment with spaces.  */
544                       const char *string_end = s - 1;
545                       size_t string_length = string_end - string_start;
546                       char *string = XNMALLOC (string_length + 1, char);
547 
548                       memcpy (string, string_start, string_length);
549                       string[string_length] = '\0';
550 
551                       po_callback_comment_filepos (string, n);
552 
553                       free (string);
554 
555                       s = p;
556                       continue;
557                     }
558                 }
559             }
560 
561           /* See if there is a COLON and NUMBER at the end of the STRING,
562              without separating spaces.  */
563           {
564             const char *p = s;
565 
566             while (p > string_start)
567               {
568                 p--;
569                 if (!(*p >= '0' && *p <= '9'))
570                   {
571                     p++;
572                     break;
573                   }
574               }
575 
576             /* p now points to the beginning of the trailing digits segment
577                at the end of STRING.  */
578 
579             if (p < s
580                 && p > string_start + 1
581                 && p[-1] == ':')
582               {
583                 /* Parsed a GNU style file comment without spaces.  */
584                 const char *string_end = p - 1;
585 
586                 /* Accumulate a number.  */
587                 {
588                   size_t n = 0;
589 
590                   do
591                     {
592                       n = n * 10 + (*p - '0');
593                       p++;
594                     }
595                   while (p < s);
596 
597                   {
598                     size_t string_length = string_end - string_start;
599                     char *string = XNMALLOC (string_length + 1, char);
600 
601                     memcpy (string, string_start, string_length);
602                     string[string_length] = '\0';
603 
604                     po_callback_comment_filepos (string, n);
605 
606                     free (string);
607 
608                     continue;
609                   }
610                 }
611               }
612           }
613 
614           /* Parsed a file comment without line number.  */
615           {
616             const char *string_end = s;
617             size_t string_length = string_end - string_start;
618             char *string = XNMALLOC (string_length + 1, char);
619 
620             memcpy (string, string_start, string_length);
621             string[string_length] = '\0';
622 
623             po_callback_comment_filepos (string, (size_t)(-1));
624 
625             free (string);
626           }
627         }
628     }
629 }
630 
631 
632 /* Parse a SunOS or Solaris style file comment.
633    Syntax of SunOS style:
634      FILE_KEYWORD COLON STRING COMMA LINE_KEYWORD COLON NUMBER
635    Syntax of Solaris style:
636      FILE_KEYWORD COLON STRING COMMA LINE_KEYWORD NUMBER_KEYWORD COLON NUMBER
637    where
638      FILE_KEYWORD ::= "file" | "File"
639      COLON ::= ":"
640      COMMA ::= ","
641      LINE_KEYWORD ::= "line"
642      NUMBER_KEYWORD ::= "number"
643      NUMBER ::= [0-9]+
644    Return true if parsed, false if not a comment of this form. */
645 static bool
po_parse_comment_solaris_filepos(const char * s)646 po_parse_comment_solaris_filepos (const char *s)
647 {
648   if (s[0] == ' '
649       && (s[1] == 'F' || s[1] == 'f')
650       && s[2] == 'i' && s[3] == 'l' && s[4] == 'e'
651       && s[5] == ':')
652     {
653       const char *string_start;
654       const char *string_end;
655 
656       {
657         const char *p = s + 6;
658 
659         while (*p == ' ' || *p == '\t')
660           p++;
661         string_start = p;
662       }
663 
664       for (string_end = string_start; *string_end != '\0'; string_end++)
665         {
666           const char *p = string_end;
667 
668           while (*p == ' ' || *p == '\t')
669             p++;
670 
671           if (*p == ',')
672             {
673               p++;
674 
675               while (*p == ' ' || *p == '\t')
676                 p++;
677 
678               if (p[0] == 'l' && p[1] == 'i' && p[2] == 'n' && p[3] == 'e')
679                 {
680                   p += 4;
681 
682                   while (*p == ' ' || *p == '\t')
683                     p++;
684 
685                   if (p[0] == 'n' && p[1] == 'u' && p[2] == 'm'
686                       && p[3] == 'b' && p[4] == 'e' && p[5] == 'r')
687                     {
688                       p += 6;
689                       while (*p == ' ' || *p == '\t')
690                         p++;
691                     }
692 
693                   if (*p == ':')
694                     {
695                       p++;
696 
697                       if (*p >= '0' && *p <= '9')
698                         {
699                           /* Accumulate a number.  */
700                           size_t n = 0;
701 
702                           do
703                             {
704                               n = n * 10 + (*p - '0');
705                               p++;
706                             }
707                           while (*p >= '0' && *p <= '9');
708 
709                           while (*p == ' ' || *p == '\t' || *p == '\n')
710                             p++;
711 
712                           if (*p == '\0')
713                             {
714                               /* Parsed a Sun style file comment.  */
715                               size_t string_length = string_end - string_start;
716                               char *string =
717                                 XNMALLOC (string_length + 1, char);
718 
719                               memcpy (string, string_start, string_length);
720                               string[string_length] = '\0';
721 
722                               po_callback_comment_filepos (string, n);
723 
724                               free (string);
725                               return true;
726                             }
727                         }
728                     }
729                 }
730             }
731         }
732     }
733 
734   return false;
735 }
736 
737 
738 /* This function is called by po_gram_lex() whenever a comment is
739    seen.  It analyzes the comment to see what sort it is, and then
740    dispatches it to the appropriate method: call_comment, call_comment_dot,
741    call_comment_filepos (via po_parse_comment_filepos), or
742    call_comment_special.  */
743 void
po_callback_comment_dispatcher(const char * s)744 po_callback_comment_dispatcher (const char *s)
745 {
746   if (*s == '.')
747     {
748       s++;
749       /* There is usually a space before the comment.  People don't
750          consider it part of the comment, therefore remove it here.  */
751       if (*s == ' ')
752         s++;
753       po_callback_comment_dot (s);
754     }
755   else if (*s == ':')
756     {
757       /* Parse the file location string.  The appropriate callback will be
758          invoked.  */
759       po_parse_comment_filepos (s + 1);
760     }
761   else if (*s == ',' || *s == '!')
762     {
763       /* Get all entries in the special comment line.  */
764       po_callback_comment_special (s + 1);
765     }
766   else
767     {
768       /* It looks like a plain vanilla comment, but Solaris-style file
769          position lines do, too.  Try to parse the lot.  If the parse
770          succeeds, the appropriate callback will be invoked.  */
771       if (po_parse_comment_solaris_filepos (s))
772         /* Do nothing, it is a Sun-style file pos line.  */ ;
773       else
774         {
775           /* There is usually a space before the comment.  People don't
776              consider it part of the comment, therefore remove it here.  */
777           if (*s == ' ')
778             s++;
779           po_callback_comment (s);
780         }
781     }
782 }
783