1 /* GNU gettext - internationalization aids
2 Copyright (C) 1995-1998, 2000-2010, 2012, 2016, 2018-2020 Free Software
3 Foundation, Inc.
4 This file was written by Peter Miller <millerp@canb.auug.org.au>
5
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <https://www.gnu.org/licenses/>. */
18
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
22
23 #include <getopt.h>
24 #include <limits.h>
25 #include <stdbool.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <locale.h>
29
30 #include "noreturn.h"
31 #include "closeout.h"
32 #include "dir-list.h"
33 #include "error.h"
34 #include "error-progname.h"
35 #include "progname.h"
36 #include "relocatable.h"
37 #include "basename-lgpl.h"
38 #include "message.h"
39 #include "read-catalog.h"
40 #include "read-po.h"
41 #include "read-properties.h"
42 #include "read-stringtable.h"
43 #include "xmalloca.h"
44 #include "po-charset.h"
45 #include "msgl-iconv.h"
46 #include "msgl-fsearch.h"
47 #include "c-strstr.h"
48 #include "c-strcase.h"
49 #include "propername.h"
50 #include "gettext.h"
51
52 #define _(str) gettext (str)
53
54
55 /* Apply the .pot file to each of the domains in the PO file. */
56 static bool multi_domain_mode = false;
57
58 /* Determines whether to use fuzzy matching. */
59 static bool use_fuzzy_matching = true;
60
61 /* Whether to consider fuzzy messages as translations. */
62 static bool include_fuzzies = false;
63
64 /* Whether to consider untranslated messages as translations. */
65 static bool include_untranslated = false;
66
67 /* Long options. */
68 static const struct option long_options[] =
69 {
70 { "directory", required_argument, NULL, 'D' },
71 { "help", no_argument, NULL, 'h' },
72 { "multi-domain", no_argument, NULL, 'm' },
73 { "no-fuzzy-matching", no_argument, NULL, 'N' },
74 { "properties-input", no_argument, NULL, 'P' },
75 { "stringtable-input", no_argument, NULL, CHAR_MAX + 1 },
76 { "use-fuzzy", no_argument, NULL, CHAR_MAX + 2 },
77 { "use-untranslated", no_argument, NULL, CHAR_MAX + 3 },
78 { "version", no_argument, NULL, 'V' },
79 { NULL, 0, NULL, 0 }
80 };
81
82
83 /* Forward declaration of local functions. */
84 _GL_NORETURN_FUNC static void usage (int status);
85 static void compare (const char *fn1, const char *fn2,
86 catalog_input_format_ty input_syntax);
87
88
89 int
main(int argc,char * argv[])90 main (int argc, char *argv[])
91 {
92 int optchar;
93 bool do_help;
94 bool do_version;
95 catalog_input_format_ty input_syntax = &input_format_po;
96
97 /* Set program name for messages. */
98 set_program_name (argv[0]);
99 error_print_progname = maybe_print_progname;
100 gram_max_allowed_errors = UINT_MAX;
101
102 /* Set locale via LC_ALL. */
103 setlocale (LC_ALL, "");
104
105 /* Set the text message domain. */
106 bindtextdomain (PACKAGE, relocate (LOCALEDIR));
107 bindtextdomain ("bison-runtime", relocate (BISON_LOCALEDIR));
108 textdomain (PACKAGE);
109
110 /* Ensure that write errors on stdout are detected. */
111 atexit (close_stdout);
112
113 do_help = false;
114 do_version = false;
115 while ((optchar = getopt_long (argc, argv, "D:hmNPV", long_options, NULL))
116 != EOF)
117 switch (optchar)
118 {
119 case '\0': /* long option */
120 break;
121
122 case 'D':
123 dir_list_append (optarg);
124 break;
125
126 case 'h':
127 do_help = true;
128 break;
129
130 case 'm':
131 multi_domain_mode = true;
132 break;
133
134 case 'N':
135 use_fuzzy_matching = false;
136 break;
137
138 case 'P':
139 input_syntax = &input_format_properties;
140 break;
141
142 case 'V':
143 do_version = true;
144 break;
145
146 case CHAR_MAX + 1: /* --stringtable-input */
147 input_syntax = &input_format_stringtable;
148 break;
149
150 case CHAR_MAX + 2: /* --use-fuzzy */
151 include_fuzzies = true;
152 break;
153
154 case CHAR_MAX + 3: /* --use-untranslated */
155 include_untranslated = true;
156 break;
157
158 default:
159 usage (EXIT_FAILURE);
160 break;
161 }
162
163 /* Version information is requested. */
164 if (do_version)
165 {
166 printf ("%s (GNU %s) %s\n", last_component (program_name),
167 PACKAGE, VERSION);
168 /* xgettext: no-wrap */
169 printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
170 License GPLv3+: GNU GPL version 3 or later <%s>\n\
171 This is free software: you are free to change and redistribute it.\n\
172 There is NO WARRANTY, to the extent permitted by law.\n\
173 "),
174 "1995-2020", "https://gnu.org/licenses/gpl.html");
175 printf (_("Written by %s.\n"), proper_name ("Peter Miller"));
176 exit (EXIT_SUCCESS);
177 }
178
179 /* Help is requested. */
180 if (do_help)
181 usage (EXIT_SUCCESS);
182
183 /* Test whether we have an .po file name as argument. */
184 if (optind >= argc)
185 {
186 error (EXIT_SUCCESS, 0, _("no input files given"));
187 usage (EXIT_FAILURE);
188 }
189 if (optind + 2 != argc)
190 {
191 error (EXIT_SUCCESS, 0, _("exactly 2 input files required"));
192 usage (EXIT_FAILURE);
193 }
194
195 /* compare the two files */
196 compare (argv[optind], argv[optind + 1], input_syntax);
197 exit (EXIT_SUCCESS);
198 }
199
200
201 /* Display usage information and exit. */
202 static void
usage(int status)203 usage (int status)
204 {
205 if (status != EXIT_SUCCESS)
206 fprintf (stderr, _("Try '%s --help' for more information.\n"),
207 program_name);
208 else
209 {
210 printf (_("\
211 Usage: %s [OPTION] def.po ref.pot\n\
212 "), program_name);
213 printf ("\n");
214 /* xgettext: no-wrap */
215 printf (_("\
216 Compare two Uniforum style .po files to check that both contain the same\n\
217 set of msgid strings. The def.po file is an existing PO file with the\n\
218 translations. The ref.pot file is the last created PO file, or a PO Template\n\
219 file (generally created by xgettext). This is useful for checking that\n\
220 you have translated each and every message in your program. Where an exact\n\
221 match cannot be found, fuzzy matching is used to produce better diagnostics.\n\
222 "));
223 printf ("\n");
224 printf (_("\
225 Mandatory arguments to long options are mandatory for short options too.\n"));
226 printf ("\n");
227 printf (_("\
228 Input file location:\n"));
229 printf (_("\
230 def.po translations\n"));
231 printf (_("\
232 ref.pot references to the sources\n"));
233 printf (_("\
234 -D, --directory=DIRECTORY add DIRECTORY to list for input files search\n"));
235 printf ("\n");
236 printf (_("\
237 Operation modifiers:\n"));
238 printf (_("\
239 -m, --multi-domain apply ref.pot to each of the domains in def.po\n"));
240 printf (_("\
241 -N, --no-fuzzy-matching do not use fuzzy matching\n"));
242 printf (_("\
243 --use-fuzzy consider fuzzy entries\n"));
244 printf (_("\
245 --use-untranslated consider untranslated entries\n"));
246 printf ("\n");
247 printf (_("\
248 Input file syntax:\n"));
249 printf (_("\
250 -P, --properties-input input files are in Java .properties syntax\n"));
251 printf (_("\
252 --stringtable-input input files are in NeXTstep/GNUstep .strings\n\
253 syntax\n"));
254 printf ("\n");
255 printf (_("\
256 Informative output:\n"));
257 printf (_("\
258 -h, --help display this help and exit\n"));
259 printf (_("\
260 -V, --version output version information and exit\n"));
261 printf ("\n");
262 /* TRANSLATORS: The first placeholder is the web address of the Savannah
263 project of this package. The second placeholder is the bug-reporting
264 email address for this package. Please add _another line_ saying
265 "Report translation bugs to <...>\n" with the address for translation
266 bugs (typically your translation team's web or email address). */
267 printf(_("\
268 Report bugs in the bug tracker at <%s>\n\
269 or by email to <%s>.\n"),
270 "https://savannah.gnu.org/projects/gettext",
271 "bug-gettext@gnu.org");
272 }
273
274 exit (status);
275 }
276
277
278 /* Return true if a message should be kept. */
279 static bool
is_message_selected(const message_ty * mp)280 is_message_selected (const message_ty *mp)
281 {
282 /* Always keep the header entry. */
283 if (is_header (mp))
284 return true;
285
286 return !mp->obsolete;
287 }
288
289
290 /* Remove obsolete messages from a message list. Return the modified list. */
291 static msgdomain_list_ty *
remove_obsoletes(msgdomain_list_ty * mdlp)292 remove_obsoletes (msgdomain_list_ty *mdlp)
293 {
294 size_t k;
295
296 for (k = 0; k < mdlp->nitems; k++)
297 message_list_remove_if_not (mdlp->item[k]->messages, is_message_selected);
298
299 return mdlp;
300 }
301
302
303 static void
match_domain(const char * fn1,const char * fn2,message_list_ty * defmlp,message_fuzzy_index_ty ** defmlp_findex,const char * def_canon_charset,message_list_ty * refmlp,int * nerrors)304 match_domain (const char *fn1, const char *fn2,
305 message_list_ty *defmlp, message_fuzzy_index_ty **defmlp_findex,
306 const char *def_canon_charset,
307 message_list_ty *refmlp,
308 int *nerrors)
309 {
310 size_t j;
311
312 for (j = 0; j < refmlp->nitems; j++)
313 {
314 message_ty *refmsg;
315 message_ty *defmsg;
316
317 refmsg = refmlp->item[j];
318
319 /* See if it is in the other file. */
320 defmsg = message_list_search (defmlp, refmsg->msgctxt, refmsg->msgid);
321 if (defmsg)
322 {
323 if (!include_untranslated && defmsg->msgstr[0] == '\0')
324 {
325 (*nerrors)++;
326 po_gram_error_at_line (&defmsg->pos,
327 _("this message is untranslated"));
328 }
329 else if (!include_fuzzies && defmsg->is_fuzzy && !is_header (defmsg))
330 {
331 (*nerrors)++;
332 po_gram_error_at_line (&defmsg->pos,
333 _("this message needs to be reviewed by the translator"));
334 }
335 else
336 defmsg->used = 1;
337 }
338 else
339 {
340 /* If the message was not defined at all, try to find a very
341 similar message, it could be a typo, or the suggestion may
342 help. */
343 (*nerrors)++;
344 if (use_fuzzy_matching)
345 {
346 if (false)
347 {
348 /* Old, slow code. */
349 defmsg =
350 message_list_search_fuzzy (defmlp,
351 refmsg->msgctxt, refmsg->msgid);
352 }
353 else
354 {
355 /* Speedup through early abort in fstrcmp(), combined with
356 pre-sorting of the messages through a hashed index. */
357 /* Create the fuzzy index lazily. */
358 if (*defmlp_findex == NULL)
359 *defmlp_findex =
360 message_fuzzy_index_alloc (defmlp, def_canon_charset);
361 defmsg =
362 message_fuzzy_index_search (*defmlp_findex,
363 refmsg->msgctxt, refmsg->msgid,
364 FUZZY_THRESHOLD, false);
365 }
366 }
367 else
368 defmsg = NULL;
369 if (defmsg)
370 {
371 po_gram_error_at_line (&refmsg->pos,
372 _("this message is used but not defined..."));
373 error_message_count--;
374 po_gram_error_at_line (&defmsg->pos,
375 _("...but this definition is similar"));
376 defmsg->used = 1;
377 }
378 else
379 po_gram_error_at_line (&refmsg->pos,
380 _("this message is used but not defined in %s"),
381 fn1);
382 }
383 }
384 }
385
386
387 static void
compare(const char * fn1,const char * fn2,catalog_input_format_ty input_syntax)388 compare (const char *fn1, const char *fn2, catalog_input_format_ty input_syntax)
389 {
390 msgdomain_list_ty *def;
391 msgdomain_list_ty *ref;
392 int nerrors;
393 size_t j, k;
394 const char *def_canon_charset;
395 message_list_ty *empty_list;
396
397 /* This is the master file, created by a human. */
398 def = remove_obsoletes (read_catalog_file (fn1, input_syntax));
399
400 /* This is the generated file, created by groping the sources with
401 the xgettext program. */
402 ref = remove_obsoletes (read_catalog_file (fn2, input_syntax));
403
404 /* The references file can be either in ASCII or in UTF-8. If it is
405 in UTF-8, we have to convert the definitions to UTF-8 as well. */
406 {
407 bool was_utf8 = false;
408 for (k = 0; k < ref->nitems; k++)
409 {
410 message_list_ty *mlp = ref->item[k]->messages;
411
412 for (j = 0; j < mlp->nitems; j++)
413 if (is_header (mlp->item[j]) /* && !mlp->item[j]->obsolete */)
414 {
415 const char *header = mlp->item[j]->msgstr;
416
417 if (header != NULL)
418 {
419 const char *charsetstr = c_strstr (header, "charset=");
420
421 if (charsetstr != NULL)
422 {
423 size_t len;
424
425 charsetstr += strlen ("charset=");
426 len = strcspn (charsetstr, " \t\n");
427 if (len == strlen ("UTF-8")
428 && c_strncasecmp (charsetstr, "UTF-8", len) == 0)
429 was_utf8 = true;
430 }
431 }
432 }
433 }
434 if (was_utf8)
435 def = iconv_msgdomain_list (def, "UTF-8", true, fn1);
436 }
437
438 /* Determine canonicalized encoding name of the definitions now, after
439 conversion. Only used for fuzzy matching. */
440 if (use_fuzzy_matching)
441 {
442 def_canon_charset = def->encoding;
443 if (def_canon_charset == NULL)
444 {
445 char *charset = NULL;
446
447 /* Get the encoding of the definitions file. */
448 for (k = 0; k < def->nitems; k++)
449 {
450 message_list_ty *mlp = def->item[k]->messages;
451
452 for (j = 0; j < mlp->nitems; j++)
453 if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
454 {
455 const char *header = mlp->item[j]->msgstr;
456
457 if (header != NULL)
458 {
459 const char *charsetstr = c_strstr (header, "charset=");
460
461 if (charsetstr != NULL)
462 {
463 size_t len;
464
465 charsetstr += strlen ("charset=");
466 len = strcspn (charsetstr, " \t\n");
467 charset = (char *) xmalloca (len + 1);
468 memcpy (charset, charsetstr, len);
469 charset[len] = '\0';
470 break;
471 }
472 }
473 }
474 if (charset != NULL)
475 break;
476 }
477 if (charset != NULL)
478 def_canon_charset = po_charset_canonicalize (charset);
479 if (def_canon_charset == NULL)
480 /* Unspecified encoding. Assume unibyte encoding. */
481 def_canon_charset = po_charset_ascii;
482 }
483 }
484 else
485 def_canon_charset = NULL;
486
487 empty_list = message_list_alloc (false);
488
489 /* Every entry in the xgettext generated file must be matched by a
490 (single) entry in the human created file. */
491 nerrors = 0;
492 if (!multi_domain_mode)
493 for (k = 0; k < ref->nitems; k++)
494 {
495 const char *domain = ref->item[k]->domain;
496 message_list_ty *refmlp = ref->item[k]->messages;
497 message_list_ty *defmlp;
498 message_fuzzy_index_ty *defmlp_findex;
499
500 defmlp = msgdomain_list_sublist (def, domain, false);
501 if (defmlp == NULL)
502 defmlp = empty_list;
503
504 defmlp_findex = NULL;
505
506 match_domain (fn1, fn2, defmlp, &defmlp_findex, def_canon_charset,
507 refmlp, &nerrors);
508
509 if (defmlp_findex != NULL)
510 message_fuzzy_index_free (defmlp_findex);
511 }
512 else
513 {
514 /* Apply the references messages in the default domain to each of
515 the definition domains. */
516 message_list_ty *refmlp = ref->item[0]->messages;
517
518 for (k = 0; k < def->nitems; k++)
519 {
520 message_list_ty *defmlp = def->item[k]->messages;
521
522 /* Ignore the default message domain if it has no messages. */
523 if (k > 0 || defmlp->nitems > 0)
524 {
525 message_fuzzy_index_ty *defmlp_findex = NULL;
526
527 match_domain (fn1, fn2, defmlp, &defmlp_findex, def_canon_charset,
528 refmlp, &nerrors);
529
530 if (defmlp_findex != NULL)
531 message_fuzzy_index_free (defmlp_findex);
532 }
533 }
534 }
535
536 /* Look for messages in the definition file, which are not present
537 in the reference file, indicating messages which defined but not
538 used in the program. */
539 for (k = 0; k < def->nitems; ++k)
540 {
541 message_list_ty *defmlp = def->item[k]->messages;
542
543 for (j = 0; j < defmlp->nitems; j++)
544 {
545 message_ty *defmsg = defmlp->item[j];
546
547 if (!defmsg->used)
548 po_gram_error_at_line (&defmsg->pos,
549 _("warning: this message is not used"));
550 }
551 }
552
553 /* Exit with status 1 on any error. */
554 if (nerrors > 0)
555 error (EXIT_FAILURE, 0,
556 ngettext ("found %d fatal error", "found %d fatal errors", nerrors),
557 nerrors);
558 }
559