1 /* Reading PO files, abstract class.
2 Copyright (C) 1995-1996, 1998, 2000-2009, 2013, 2015 Free Software
3 Foundation, Inc.
4
5 This file was written by Peter Miller <millerp@canb.auug.org.au>
6
7 This program is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <https://www.gnu.org/licenses/>. */
19
20
21 #ifdef HAVE_CONFIG_H
22 # include "config.h"
23 #endif
24
25 /* Specification. */
26 #include "read-catalog-abstract.h"
27
28 #include <limits.h>
29 #include <stdlib.h>
30 #include <string.h>
31
32 #include "xalloc.h"
33 #include "xvasprintf.h"
34 #include "po-xerror.h"
35 #include "error.h"
36 #include "gettext.h"
37
38 /* Local variables. */
39 static abstract_catalog_reader_ty *callback_arg;
40
41
42 /* ========================================================================= */
43 /* Allocating and freeing instances of abstract_catalog_reader_ty. */
44
45
46 abstract_catalog_reader_ty *
catalog_reader_alloc(abstract_catalog_reader_class_ty * method_table)47 catalog_reader_alloc (abstract_catalog_reader_class_ty *method_table)
48 {
49 abstract_catalog_reader_ty *pop;
50
51 pop = (abstract_catalog_reader_ty *) xmalloc (method_table->size);
52 pop->methods = method_table;
53 if (method_table->constructor)
54 method_table->constructor (pop);
55 return pop;
56 }
57
58
59 void
catalog_reader_free(abstract_catalog_reader_ty * pop)60 catalog_reader_free (abstract_catalog_reader_ty *pop)
61 {
62 if (pop->methods->destructor)
63 pop->methods->destructor (pop);
64 free (pop);
65 }
66
67
68 /* ========================================================================= */
69 /* Inline functions to invoke the methods. */
70
71
72 static inline void
call_parse_brief(abstract_catalog_reader_ty * pop)73 call_parse_brief (abstract_catalog_reader_ty *pop)
74 {
75 if (pop->methods->parse_brief)
76 pop->methods->parse_brief (pop);
77 }
78
79 static inline void
call_parse_debrief(abstract_catalog_reader_ty * pop)80 call_parse_debrief (abstract_catalog_reader_ty *pop)
81 {
82 if (pop->methods->parse_debrief)
83 pop->methods->parse_debrief (pop);
84 }
85
86 static inline void
call_directive_domain(abstract_catalog_reader_ty * pop,char * name)87 call_directive_domain (abstract_catalog_reader_ty *pop, char *name)
88 {
89 if (pop->methods->directive_domain)
90 pop->methods->directive_domain (pop, name);
91 }
92
93 static inline void
call_directive_message(abstract_catalog_reader_ty * pop,char * msgctxt,char * msgid,lex_pos_ty * msgid_pos,char * msgid_plural,char * msgstr,size_t msgstr_len,lex_pos_ty * msgstr_pos,char * prev_msgctxt,char * prev_msgid,char * prev_msgid_plural,bool force_fuzzy,bool obsolete)94 call_directive_message (abstract_catalog_reader_ty *pop,
95 char *msgctxt,
96 char *msgid,
97 lex_pos_ty *msgid_pos,
98 char *msgid_plural,
99 char *msgstr, size_t msgstr_len,
100 lex_pos_ty *msgstr_pos,
101 char *prev_msgctxt,
102 char *prev_msgid,
103 char *prev_msgid_plural,
104 bool force_fuzzy, bool obsolete)
105 {
106 if (pop->methods->directive_message)
107 pop->methods->directive_message (pop, msgctxt,
108 msgid, msgid_pos, msgid_plural,
109 msgstr, msgstr_len, msgstr_pos,
110 prev_msgctxt,
111 prev_msgid,
112 prev_msgid_plural,
113 force_fuzzy, obsolete);
114 }
115
116 static inline void
call_comment(abstract_catalog_reader_ty * pop,const char * s)117 call_comment (abstract_catalog_reader_ty *pop, const char *s)
118 {
119 if (pop->methods->comment != NULL)
120 pop->methods->comment (pop, s);
121 }
122
123 static inline void
call_comment_dot(abstract_catalog_reader_ty * pop,const char * s)124 call_comment_dot (abstract_catalog_reader_ty *pop, const char *s)
125 {
126 if (pop->methods->comment_dot != NULL)
127 pop->methods->comment_dot (pop, s);
128 }
129
130 static inline void
call_comment_filepos(abstract_catalog_reader_ty * pop,const char * name,size_t line)131 call_comment_filepos (abstract_catalog_reader_ty *pop, const char *name,
132 size_t line)
133 {
134 if (pop->methods->comment_filepos)
135 pop->methods->comment_filepos (pop, name, line);
136 }
137
138 static inline void
call_comment_special(abstract_catalog_reader_ty * pop,const char * s)139 call_comment_special (abstract_catalog_reader_ty *pop, const char *s)
140 {
141 if (pop->methods->comment_special != NULL)
142 pop->methods->comment_special (pop, s);
143 }
144
145
146 /* ========================================================================= */
147 /* Exported functions. */
148
149
150 static inline void
parse_start(abstract_catalog_reader_ty * pop)151 parse_start (abstract_catalog_reader_ty *pop)
152 {
153 /* The parse will call the po_callback_... functions (see below)
154 when the various directive are recognised. The callback_arg
155 variable is used to tell these functions which instance is to
156 have the relevant method invoked. */
157 callback_arg = pop;
158
159 call_parse_brief (pop);
160 }
161
162 static inline void
parse_end(abstract_catalog_reader_ty * pop)163 parse_end (abstract_catalog_reader_ty *pop)
164 {
165 call_parse_debrief (pop);
166 callback_arg = NULL;
167 }
168
169
170 void
catalog_reader_parse(abstract_catalog_reader_ty * pop,FILE * fp,const char * real_filename,const char * logical_filename,catalog_input_format_ty input_syntax)171 catalog_reader_parse (abstract_catalog_reader_ty *pop, FILE *fp,
172 const char *real_filename, const char *logical_filename,
173 catalog_input_format_ty input_syntax)
174 {
175 error_message_count = 0;
176
177 /* Parse the stream's content. */
178 parse_start (pop);
179 input_syntax->parse (pop, fp, real_filename, logical_filename);
180 parse_end (pop);
181
182 if (error_message_count > 0)
183 po_xerror (PO_SEVERITY_FATAL_ERROR, NULL,
184 /*real_filename*/ NULL, (size_t)(-1), (size_t)(-1), false,
185 xasprintf (ngettext ("found %d fatal error",
186 "found %d fatal errors",
187 error_message_count),
188 error_message_count));
189 }
190
191
192 /* ========================================================================= */
193 /* Callbacks used by po-gram.y or po-lex.c, indirectly from
194 catalog_reader_parse. */
195
196
197 /* This function is called by po_gram_lex() whenever a domain directive
198 has been seen. */
199 void
po_callback_domain(char * name)200 po_callback_domain (char *name)
201 {
202 /* assert(callback_arg); */
203 call_directive_domain (callback_arg, name);
204 }
205
206
207 /* This function is called by po_gram_lex() whenever a message has been
208 seen. */
209 void
po_callback_message(char * msgctxt,char * msgid,lex_pos_ty * msgid_pos,char * msgid_plural,char * msgstr,size_t msgstr_len,lex_pos_ty * msgstr_pos,char * prev_msgctxt,char * prev_msgid,char * prev_msgid_plural,bool force_fuzzy,bool obsolete)210 po_callback_message (char *msgctxt,
211 char *msgid, lex_pos_ty *msgid_pos, char *msgid_plural,
212 char *msgstr, size_t msgstr_len, lex_pos_ty *msgstr_pos,
213 char *prev_msgctxt,
214 char *prev_msgid,
215 char *prev_msgid_plural,
216 bool force_fuzzy, bool obsolete)
217 {
218 /* assert(callback_arg); */
219 call_directive_message (callback_arg, msgctxt,
220 msgid, msgid_pos, msgid_plural,
221 msgstr, msgstr_len, msgstr_pos,
222 prev_msgctxt, prev_msgid, prev_msgid_plural,
223 force_fuzzy, obsolete);
224 }
225
226
227 void
po_callback_comment(const char * s)228 po_callback_comment (const char *s)
229 {
230 /* assert(callback_arg); */
231 call_comment (callback_arg, s);
232 }
233
234
235 void
po_callback_comment_dot(const char * s)236 po_callback_comment_dot (const char *s)
237 {
238 /* assert(callback_arg); */
239 call_comment_dot (callback_arg, s);
240 }
241
242
243 /* This function is called by po_parse_comment_filepos(), once for each
244 filename. */
245 void
po_callback_comment_filepos(const char * name,size_t line)246 po_callback_comment_filepos (const char *name, size_t line)
247 {
248 /* assert(callback_arg); */
249 call_comment_filepos (callback_arg, name, line);
250 }
251
252
253 void
po_callback_comment_special(const char * s)254 po_callback_comment_special (const char *s)
255 {
256 /* assert(callback_arg); */
257 call_comment_special (callback_arg, s);
258 }
259
260
261 /* Parse a special comment and put the result in *fuzzyp, formatp, *rangep,
262 *wrapp. */
263 void
po_parse_comment_special(const char * s,bool * fuzzyp,enum is_format formatp[NFORMATS],struct argument_range * rangep,enum is_wrap * wrapp,enum is_syntax_check scp[NSYNTAXCHECKS])264 po_parse_comment_special (const char *s,
265 bool *fuzzyp, enum is_format formatp[NFORMATS],
266 struct argument_range *rangep, enum is_wrap *wrapp,
267 enum is_syntax_check scp[NSYNTAXCHECKS])
268 {
269 size_t i;
270
271 *fuzzyp = false;
272 for (i = 0; i < NFORMATS; i++)
273 formatp[i] = undecided;
274 rangep->min = -1;
275 rangep->max = -1;
276 *wrapp = undecided;
277 for (i = 0; i < NSYNTAXCHECKS; i++)
278 scp[i] = undecided;
279
280 while (*s != '\0')
281 {
282 const char *t;
283
284 /* Skip whitespace. */
285 while (*s != '\0' && strchr ("\n \t\r\f\v,", *s) != NULL)
286 s++;
287
288 /* Collect a token. */
289 t = s;
290 while (*s != '\0' && strchr ("\n \t\r\f\v,", *s) == NULL)
291 s++;
292 if (s != t)
293 {
294 size_t len = s - t;
295
296 /* Accept fuzzy flag. */
297 if (len == 5 && memcmp (t, "fuzzy", 5) == 0)
298 {
299 *fuzzyp = true;
300 continue;
301 }
302
303 /* Accept format description. */
304 if (len >= 7 && memcmp (t + len - 7, "-format", 7) == 0)
305 {
306 const char *p;
307 size_t n;
308 enum is_format value;
309
310 p = t;
311 n = len - 7;
312
313 if (n >= 3 && memcmp (p, "no-", 3) == 0)
314 {
315 p += 3;
316 n -= 3;
317 value = no;
318 }
319 else if (n >= 9 && memcmp (p, "possible-", 9) == 0)
320 {
321 p += 9;
322 n -= 9;
323 value = possible;
324 }
325 else if (n >= 11 && memcmp (p, "impossible-", 11) == 0)
326 {
327 p += 11;
328 n -= 11;
329 value = impossible;
330 }
331 else
332 value = yes;
333
334 for (i = 0; i < NFORMATS; i++)
335 if (strlen (format_language[i]) == n
336 && memcmp (format_language[i], p, n) == 0)
337 {
338 formatp[i] = value;
339 break;
340 }
341 if (i < NFORMATS)
342 continue;
343 }
344
345 /* Accept range description "range: <min>..<max>". */
346 if (len == 6 && memcmp (t, "range:", 6) == 0)
347 {
348 /* Skip whitespace. */
349 while (*s != '\0' && strchr ("\n \t\r\f\v,", *s) != NULL)
350 s++;
351
352 /* Collect a token. */
353 t = s;
354 while (*s != '\0' && strchr ("\n \t\r\f\v,", *s) == NULL)
355 s++;
356 /* Parse it. */
357 if (*t >= '0' && *t <= '9')
358 {
359 unsigned int min = 0;
360
361 for (; *t >= '0' && *t <= '9'; t++)
362 {
363 if (min <= INT_MAX / 10)
364 {
365 min = 10 * min + (*t - '0');
366 if (min > INT_MAX)
367 min = INT_MAX;
368 }
369 else
370 /* Avoid integer overflow. */
371 min = INT_MAX;
372 }
373 if (*t++ == '.')
374 if (*t++ == '.')
375 if (*t >= '0' && *t <= '9')
376 {
377 unsigned int max = 0;
378 for (; *t >= '0' && *t <= '9'; t++)
379 {
380 if (max <= INT_MAX / 10)
381 {
382 max = 10 * max + (*t - '0');
383 if (max > INT_MAX)
384 max = INT_MAX;
385 }
386 else
387 /* Avoid integer overflow. */
388 max = INT_MAX;
389 }
390 if (min <= max)
391 {
392 rangep->min = min;
393 rangep->max = max;
394 continue;
395 }
396 }
397 }
398 }
399
400 /* Accept wrap description. */
401 if (len == 4 && memcmp (t, "wrap", 4) == 0)
402 {
403 *wrapp = yes;
404 continue;
405 }
406 if (len == 7 && memcmp (t, "no-wrap", 7) == 0)
407 {
408 *wrapp = no;
409 continue;
410 }
411
412 /* Accept syntax check description. */
413 if (len >= 6 && memcmp (t + len - 6, "-check", 6) == 0)
414 {
415 const char *p;
416 size_t n;
417 enum is_syntax_check value;
418
419 p = t;
420 n = len - 6;
421
422 if (n >= 3 && memcmp (p, "no-", 3) == 0)
423 {
424 p += 3;
425 n -= 3;
426 value = no;
427 }
428 else
429 value = yes;
430
431 for (i = 0; i < NSYNTAXCHECKS; i++)
432 if (strlen (syntax_check_name[i]) == n
433 && memcmp (syntax_check_name[i], p, n) == 0)
434 {
435 scp[i] = value;
436 break;
437 }
438 if (i < NSYNTAXCHECKS)
439 continue;
440 }
441
442 /* Unknown special comment marker. It may have been generated
443 from a future xgettext version. Ignore it. */
444 }
445 }
446 }
447
448
449 /* Parse a GNU style file comment.
450 Syntax: an arbitrary number of
451 STRING COLON NUMBER
452 or
453 STRING
454 The latter style, without line number, occurs in PO files converted e.g.
455 from Pascal .rst files or from OpenOffice resource files.
456 Call po_callback_comment_filepos for each of them. */
457 static void
po_parse_comment_filepos(const char * s)458 po_parse_comment_filepos (const char *s)
459 {
460 while (*s != '\0')
461 {
462 while (*s == ' ' || *s == '\t' || *s == '\n')
463 s++;
464 if (*s != '\0')
465 {
466 const char *string_start = s;
467
468 do
469 s++;
470 while (!(*s == '\0' || *s == ' ' || *s == '\t' || *s == '\n'));
471
472 /* See if there is a COLON and NUMBER after the STRING, separated
473 through optional spaces. */
474 {
475 const char *p = s;
476
477 while (*p == ' ' || *p == '\t' || *p == '\n')
478 p++;
479
480 if (*p == ':')
481 {
482 p++;
483
484 while (*p == ' ' || *p == '\t' || *p == '\n')
485 p++;
486
487 if (*p >= '0' && *p <= '9')
488 {
489 /* Accumulate a number. */
490 size_t n = 0;
491
492 do
493 {
494 n = n * 10 + (*p - '0');
495 p++;
496 }
497 while (*p >= '0' && *p <= '9');
498
499 if (*p == '\0' || *p == ' ' || *p == '\t' || *p == '\n')
500 {
501 /* Parsed a GNU style file comment with spaces. */
502 const char *string_end = s;
503 size_t string_length = string_end - string_start;
504 char *string = XNMALLOC (string_length + 1, char);
505
506 memcpy (string, string_start, string_length);
507 string[string_length] = '\0';
508
509 po_callback_comment_filepos (string, n);
510
511 free (string);
512
513 s = p;
514 continue;
515 }
516 }
517 }
518 }
519
520 /* See if there is a COLON at the end of STRING and a NUMBER after
521 it, separated through optional spaces. */
522 if (s[-1] == ':')
523 {
524 const char *p = s;
525
526 while (*p == ' ' || *p == '\t' || *p == '\n')
527 p++;
528
529 if (*p >= '0' && *p <= '9')
530 {
531 /* Accumulate a number. */
532 size_t n = 0;
533
534 do
535 {
536 n = n * 10 + (*p - '0');
537 p++;
538 }
539 while (*p >= '0' && *p <= '9');
540
541 if (*p == '\0' || *p == ' ' || *p == '\t' || *p == '\n')
542 {
543 /* Parsed a GNU style file comment with spaces. */
544 const char *string_end = s - 1;
545 size_t string_length = string_end - string_start;
546 char *string = XNMALLOC (string_length + 1, char);
547
548 memcpy (string, string_start, string_length);
549 string[string_length] = '\0';
550
551 po_callback_comment_filepos (string, n);
552
553 free (string);
554
555 s = p;
556 continue;
557 }
558 }
559 }
560
561 /* See if there is a COLON and NUMBER at the end of the STRING,
562 without separating spaces. */
563 {
564 const char *p = s;
565
566 while (p > string_start)
567 {
568 p--;
569 if (!(*p >= '0' && *p <= '9'))
570 {
571 p++;
572 break;
573 }
574 }
575
576 /* p now points to the beginning of the trailing digits segment
577 at the end of STRING. */
578
579 if (p < s
580 && p > string_start + 1
581 && p[-1] == ':')
582 {
583 /* Parsed a GNU style file comment without spaces. */
584 const char *string_end = p - 1;
585
586 /* Accumulate a number. */
587 {
588 size_t n = 0;
589
590 do
591 {
592 n = n * 10 + (*p - '0');
593 p++;
594 }
595 while (p < s);
596
597 {
598 size_t string_length = string_end - string_start;
599 char *string = XNMALLOC (string_length + 1, char);
600
601 memcpy (string, string_start, string_length);
602 string[string_length] = '\0';
603
604 po_callback_comment_filepos (string, n);
605
606 free (string);
607
608 continue;
609 }
610 }
611 }
612 }
613
614 /* Parsed a file comment without line number. */
615 {
616 const char *string_end = s;
617 size_t string_length = string_end - string_start;
618 char *string = XNMALLOC (string_length + 1, char);
619
620 memcpy (string, string_start, string_length);
621 string[string_length] = '\0';
622
623 po_callback_comment_filepos (string, (size_t)(-1));
624
625 free (string);
626 }
627 }
628 }
629 }
630
631
632 /* Parse a SunOS or Solaris style file comment.
633 Syntax of SunOS style:
634 FILE_KEYWORD COLON STRING COMMA LINE_KEYWORD COLON NUMBER
635 Syntax of Solaris style:
636 FILE_KEYWORD COLON STRING COMMA LINE_KEYWORD NUMBER_KEYWORD COLON NUMBER
637 where
638 FILE_KEYWORD ::= "file" | "File"
639 COLON ::= ":"
640 COMMA ::= ","
641 LINE_KEYWORD ::= "line"
642 NUMBER_KEYWORD ::= "number"
643 NUMBER ::= [0-9]+
644 Return true if parsed, false if not a comment of this form. */
645 static bool
po_parse_comment_solaris_filepos(const char * s)646 po_parse_comment_solaris_filepos (const char *s)
647 {
648 if (s[0] == ' '
649 && (s[1] == 'F' || s[1] == 'f')
650 && s[2] == 'i' && s[3] == 'l' && s[4] == 'e'
651 && s[5] == ':')
652 {
653 const char *string_start;
654 const char *string_end;
655
656 {
657 const char *p = s + 6;
658
659 while (*p == ' ' || *p == '\t')
660 p++;
661 string_start = p;
662 }
663
664 for (string_end = string_start; *string_end != '\0'; string_end++)
665 {
666 const char *p = string_end;
667
668 while (*p == ' ' || *p == '\t')
669 p++;
670
671 if (*p == ',')
672 {
673 p++;
674
675 while (*p == ' ' || *p == '\t')
676 p++;
677
678 if (p[0] == 'l' && p[1] == 'i' && p[2] == 'n' && p[3] == 'e')
679 {
680 p += 4;
681
682 while (*p == ' ' || *p == '\t')
683 p++;
684
685 if (p[0] == 'n' && p[1] == 'u' && p[2] == 'm'
686 && p[3] == 'b' && p[4] == 'e' && p[5] == 'r')
687 {
688 p += 6;
689 while (*p == ' ' || *p == '\t')
690 p++;
691 }
692
693 if (*p == ':')
694 {
695 p++;
696
697 if (*p >= '0' && *p <= '9')
698 {
699 /* Accumulate a number. */
700 size_t n = 0;
701
702 do
703 {
704 n = n * 10 + (*p - '0');
705 p++;
706 }
707 while (*p >= '0' && *p <= '9');
708
709 while (*p == ' ' || *p == '\t' || *p == '\n')
710 p++;
711
712 if (*p == '\0')
713 {
714 /* Parsed a Sun style file comment. */
715 size_t string_length = string_end - string_start;
716 char *string =
717 XNMALLOC (string_length + 1, char);
718
719 memcpy (string, string_start, string_length);
720 string[string_length] = '\0';
721
722 po_callback_comment_filepos (string, n);
723
724 free (string);
725 return true;
726 }
727 }
728 }
729 }
730 }
731 }
732 }
733
734 return false;
735 }
736
737
738 /* This function is called by po_gram_lex() whenever a comment is
739 seen. It analyzes the comment to see what sort it is, and then
740 dispatches it to the appropriate method: call_comment, call_comment_dot,
741 call_comment_filepos (via po_parse_comment_filepos), or
742 call_comment_special. */
743 void
po_callback_comment_dispatcher(const char * s)744 po_callback_comment_dispatcher (const char *s)
745 {
746 if (*s == '.')
747 {
748 s++;
749 /* There is usually a space before the comment. People don't
750 consider it part of the comment, therefore remove it here. */
751 if (*s == ' ')
752 s++;
753 po_callback_comment_dot (s);
754 }
755 else if (*s == ':')
756 {
757 /* Parse the file location string. The appropriate callback will be
758 invoked. */
759 po_parse_comment_filepos (s + 1);
760 }
761 else if (*s == ',' || *s == '!')
762 {
763 /* Get all entries in the special comment line. */
764 po_callback_comment_special (s + 1);
765 }
766 else
767 {
768 /* It looks like a plain vanilla comment, but Solaris-style file
769 position lines do, too. Try to parse the lot. If the parse
770 succeeds, the appropriate callback will be invoked. */
771 if (po_parse_comment_solaris_filepos (s))
772 /* Do nothing, it is a Sun-style file pos line. */ ;
773 else
774 {
775 /* There is usually a space before the comment. People don't
776 consider it part of the comment, therefore remove it here. */
777 if (*s == ' ')
778 s++;
779 po_callback_comment (s);
780 }
781 }
782 }
783