1 /* GNU gettext - internationalization aids
2    Copyright (C) 1995-1998, 2000-2010, 2012, 2016, 2018-2020 Free Software
3    Foundation, Inc.
4    This file was written by Peter Miller <millerp@canb.auug.org.au>
5 
6    This program is free software: you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10 
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
18 
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
22 
23 #include <getopt.h>
24 #include <limits.h>
25 #include <stdbool.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <locale.h>
29 
30 #include "noreturn.h"
31 #include "closeout.h"
32 #include "dir-list.h"
33 #include "error.h"
34 #include "error-progname.h"
35 #include "progname.h"
36 #include "relocatable.h"
37 #include "basename-lgpl.h"
38 #include "message.h"
39 #include "read-catalog.h"
40 #include "read-po.h"
41 #include "read-properties.h"
42 #include "read-stringtable.h"
43 #include "xmalloca.h"
44 #include "po-charset.h"
45 #include "msgl-iconv.h"
46 #include "msgl-fsearch.h"
47 #include "c-strstr.h"
48 #include "c-strcase.h"
49 #include "propername.h"
50 #include "gettext.h"
51 
52 #define _(str) gettext (str)
53 
54 
55 /* Apply the .pot file to each of the domains in the PO file.  */
56 static bool multi_domain_mode = false;
57 
58 /* Determines whether to use fuzzy matching.  */
59 static bool use_fuzzy_matching = true;
60 
61 /* Whether to consider fuzzy messages as translations.  */
62 static bool include_fuzzies = false;
63 
64 /* Whether to consider untranslated messages as translations.  */
65 static bool include_untranslated = false;
66 
67 /* Long options.  */
68 static const struct option long_options[] =
69 {
70   { "directory", required_argument, NULL, 'D' },
71   { "help", no_argument, NULL, 'h' },
72   { "multi-domain", no_argument, NULL, 'm' },
73   { "no-fuzzy-matching", no_argument, NULL, 'N' },
74   { "properties-input", no_argument, NULL, 'P' },
75   { "stringtable-input", no_argument, NULL, CHAR_MAX + 1 },
76   { "use-fuzzy", no_argument, NULL, CHAR_MAX + 2 },
77   { "use-untranslated", no_argument, NULL, CHAR_MAX + 3 },
78   { "version", no_argument, NULL, 'V' },
79   { NULL, 0, NULL, 0 }
80 };
81 
82 
83 /* Forward declaration of local functions.  */
84 _GL_NORETURN_FUNC static void usage (int status);
85 static void compare (const char *fn1, const char *fn2,
86                      catalog_input_format_ty input_syntax);
87 
88 
89 int
main(int argc,char * argv[])90 main (int argc, char *argv[])
91 {
92   int optchar;
93   bool do_help;
94   bool do_version;
95   catalog_input_format_ty input_syntax = &input_format_po;
96 
97   /* Set program name for messages.  */
98   set_program_name (argv[0]);
99   error_print_progname = maybe_print_progname;
100   gram_max_allowed_errors = UINT_MAX;
101 
102   /* Set locale via LC_ALL.  */
103   setlocale (LC_ALL, "");
104 
105   /* Set the text message domain.  */
106   bindtextdomain (PACKAGE, relocate (LOCALEDIR));
107   bindtextdomain ("bison-runtime", relocate (BISON_LOCALEDIR));
108   textdomain (PACKAGE);
109 
110   /* Ensure that write errors on stdout are detected.  */
111   atexit (close_stdout);
112 
113   do_help = false;
114   do_version = false;
115   while ((optchar = getopt_long (argc, argv, "D:hmNPV", long_options, NULL))
116          != EOF)
117     switch (optchar)
118       {
119       case '\0':                /* long option */
120         break;
121 
122       case 'D':
123         dir_list_append (optarg);
124         break;
125 
126       case 'h':
127         do_help = true;
128         break;
129 
130       case 'm':
131         multi_domain_mode = true;
132         break;
133 
134       case 'N':
135         use_fuzzy_matching = false;
136         break;
137 
138       case 'P':
139         input_syntax = &input_format_properties;
140         break;
141 
142       case 'V':
143         do_version = true;
144         break;
145 
146       case CHAR_MAX + 1:        /* --stringtable-input */
147         input_syntax = &input_format_stringtable;
148         break;
149 
150       case CHAR_MAX + 2:        /* --use-fuzzy */
151         include_fuzzies = true;
152         break;
153 
154       case CHAR_MAX + 3:        /* --use-untranslated */
155         include_untranslated = true;
156         break;
157 
158       default:
159         usage (EXIT_FAILURE);
160         break;
161       }
162 
163   /* Version information is requested.  */
164   if (do_version)
165     {
166       printf ("%s (GNU %s) %s\n", last_component (program_name),
167               PACKAGE, VERSION);
168       /* xgettext: no-wrap */
169       printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
170 License GPLv3+: GNU GPL version 3 or later <%s>\n\
171 This is free software: you are free to change and redistribute it.\n\
172 There is NO WARRANTY, to the extent permitted by law.\n\
173 "),
174               "1995-2020", "https://gnu.org/licenses/gpl.html");
175       printf (_("Written by %s.\n"), proper_name ("Peter Miller"));
176       exit (EXIT_SUCCESS);
177     }
178 
179   /* Help is requested.  */
180   if (do_help)
181     usage (EXIT_SUCCESS);
182 
183   /* Test whether we have an .po file name as argument.  */
184   if (optind >= argc)
185     {
186       error (EXIT_SUCCESS, 0, _("no input files given"));
187       usage (EXIT_FAILURE);
188     }
189   if (optind + 2 != argc)
190     {
191       error (EXIT_SUCCESS, 0, _("exactly 2 input files required"));
192       usage (EXIT_FAILURE);
193     }
194 
195   /* compare the two files */
196   compare (argv[optind], argv[optind + 1], input_syntax);
197   exit (EXIT_SUCCESS);
198 }
199 
200 
201 /* Display usage information and exit.  */
202 static void
usage(int status)203 usage (int status)
204 {
205   if (status != EXIT_SUCCESS)
206     fprintf (stderr, _("Try '%s --help' for more information.\n"),
207              program_name);
208   else
209     {
210       printf (_("\
211 Usage: %s [OPTION] def.po ref.pot\n\
212 "), program_name);
213       printf ("\n");
214       /* xgettext: no-wrap */
215       printf (_("\
216 Compare two Uniforum style .po files to check that both contain the same\n\
217 set of msgid strings.  The def.po file is an existing PO file with the\n\
218 translations.  The ref.pot file is the last created PO file, or a PO Template\n\
219 file (generally created by xgettext).  This is useful for checking that\n\
220 you have translated each and every message in your program.  Where an exact\n\
221 match cannot be found, fuzzy matching is used to produce better diagnostics.\n\
222 "));
223       printf ("\n");
224       printf (_("\
225 Mandatory arguments to long options are mandatory for short options too.\n"));
226       printf ("\n");
227       printf (_("\
228 Input file location:\n"));
229       printf (_("\
230   def.po                      translations\n"));
231       printf (_("\
232   ref.pot                     references to the sources\n"));
233       printf (_("\
234   -D, --directory=DIRECTORY   add DIRECTORY to list for input files search\n"));
235       printf ("\n");
236       printf (_("\
237 Operation modifiers:\n"));
238       printf (_("\
239   -m, --multi-domain          apply ref.pot to each of the domains in def.po\n"));
240       printf (_("\
241   -N, --no-fuzzy-matching     do not use fuzzy matching\n"));
242       printf (_("\
243       --use-fuzzy             consider fuzzy entries\n"));
244       printf (_("\
245       --use-untranslated      consider untranslated entries\n"));
246       printf ("\n");
247       printf (_("\
248 Input file syntax:\n"));
249       printf (_("\
250   -P, --properties-input      input files are in Java .properties syntax\n"));
251       printf (_("\
252       --stringtable-input     input files are in NeXTstep/GNUstep .strings\n\
253                               syntax\n"));
254       printf ("\n");
255       printf (_("\
256 Informative output:\n"));
257       printf (_("\
258   -h, --help                  display this help and exit\n"));
259       printf (_("\
260   -V, --version               output version information and exit\n"));
261       printf ("\n");
262       /* TRANSLATORS: The first placeholder is the web address of the Savannah
263          project of this package.  The second placeholder is the bug-reporting
264          email address for this package.  Please add _another line_ saying
265          "Report translation bugs to <...>\n" with the address for translation
266          bugs (typically your translation team's web or email address).  */
267       printf(_("\
268 Report bugs in the bug tracker at <%s>\n\
269 or by email to <%s>.\n"),
270              "https://savannah.gnu.org/projects/gettext",
271              "bug-gettext@gnu.org");
272     }
273 
274   exit (status);
275 }
276 
277 
278 /* Return true if a message should be kept.  */
279 static bool
is_message_selected(const message_ty * mp)280 is_message_selected (const message_ty *mp)
281 {
282   /* Always keep the header entry.  */
283   if (is_header (mp))
284     return true;
285 
286   return !mp->obsolete;
287 }
288 
289 
290 /* Remove obsolete messages from a message list.  Return the modified list.  */
291 static msgdomain_list_ty *
remove_obsoletes(msgdomain_list_ty * mdlp)292 remove_obsoletes (msgdomain_list_ty *mdlp)
293 {
294   size_t k;
295 
296   for (k = 0; k < mdlp->nitems; k++)
297     message_list_remove_if_not (mdlp->item[k]->messages, is_message_selected);
298 
299   return mdlp;
300 }
301 
302 
303 static void
match_domain(const char * fn1,const char * fn2,message_list_ty * defmlp,message_fuzzy_index_ty ** defmlp_findex,const char * def_canon_charset,message_list_ty * refmlp,int * nerrors)304 match_domain (const char *fn1, const char *fn2,
305               message_list_ty *defmlp, message_fuzzy_index_ty **defmlp_findex,
306               const char *def_canon_charset,
307               message_list_ty *refmlp,
308               int *nerrors)
309 {
310   size_t j;
311 
312   for (j = 0; j < refmlp->nitems; j++)
313     {
314       message_ty *refmsg;
315       message_ty *defmsg;
316 
317       refmsg = refmlp->item[j];
318 
319       /* See if it is in the other file.  */
320       defmsg = message_list_search (defmlp, refmsg->msgctxt, refmsg->msgid);
321       if (defmsg)
322         {
323           if (!include_untranslated && defmsg->msgstr[0] == '\0')
324             {
325               (*nerrors)++;
326               po_gram_error_at_line (&defmsg->pos,
327                                      _("this message is untranslated"));
328             }
329           else if (!include_fuzzies && defmsg->is_fuzzy && !is_header (defmsg))
330             {
331               (*nerrors)++;
332               po_gram_error_at_line (&defmsg->pos,
333                                      _("this message needs to be reviewed by the translator"));
334             }
335           else
336             defmsg->used = 1;
337         }
338       else
339         {
340           /* If the message was not defined at all, try to find a very
341              similar message, it could be a typo, or the suggestion may
342              help.  */
343           (*nerrors)++;
344           if (use_fuzzy_matching)
345             {
346               if (false)
347                 {
348                   /* Old, slow code.  */
349                   defmsg =
350                     message_list_search_fuzzy (defmlp,
351                                                refmsg->msgctxt, refmsg->msgid);
352                 }
353               else
354                 {
355                   /* Speedup through early abort in fstrcmp(), combined with
356                      pre-sorting of the messages through a hashed index.  */
357                   /* Create the fuzzy index lazily.  */
358                   if (*defmlp_findex == NULL)
359                     *defmlp_findex =
360                       message_fuzzy_index_alloc (defmlp, def_canon_charset);
361                   defmsg =
362                     message_fuzzy_index_search (*defmlp_findex,
363                                                 refmsg->msgctxt, refmsg->msgid,
364                                                 FUZZY_THRESHOLD, false);
365                 }
366             }
367           else
368             defmsg = NULL;
369           if (defmsg)
370             {
371               po_gram_error_at_line (&refmsg->pos,
372                                      _("this message is used but not defined..."));
373               error_message_count--;
374               po_gram_error_at_line (&defmsg->pos,
375                                      _("...but this definition is similar"));
376               defmsg->used = 1;
377             }
378           else
379             po_gram_error_at_line (&refmsg->pos,
380                                    _("this message is used but not defined in %s"),
381                                    fn1);
382         }
383     }
384 }
385 
386 
387 static void
compare(const char * fn1,const char * fn2,catalog_input_format_ty input_syntax)388 compare (const char *fn1, const char *fn2, catalog_input_format_ty input_syntax)
389 {
390   msgdomain_list_ty *def;
391   msgdomain_list_ty *ref;
392   int nerrors;
393   size_t j, k;
394   const char *def_canon_charset;
395   message_list_ty *empty_list;
396 
397   /* This is the master file, created by a human.  */
398   def = remove_obsoletes (read_catalog_file (fn1, input_syntax));
399 
400   /* This is the generated file, created by groping the sources with
401      the xgettext program.  */
402   ref = remove_obsoletes (read_catalog_file (fn2, input_syntax));
403 
404   /* The references file can be either in ASCII or in UTF-8.  If it is
405      in UTF-8, we have to convert the definitions to UTF-8 as well.  */
406   {
407     bool was_utf8 = false;
408     for (k = 0; k < ref->nitems; k++)
409       {
410         message_list_ty *mlp = ref->item[k]->messages;
411 
412         for (j = 0; j < mlp->nitems; j++)
413           if (is_header (mlp->item[j]) /* && !mlp->item[j]->obsolete */)
414             {
415               const char *header = mlp->item[j]->msgstr;
416 
417               if (header != NULL)
418                 {
419                   const char *charsetstr = c_strstr (header, "charset=");
420 
421                   if (charsetstr != NULL)
422                     {
423                       size_t len;
424 
425                       charsetstr += strlen ("charset=");
426                       len = strcspn (charsetstr, " \t\n");
427                       if (len == strlen ("UTF-8")
428                           && c_strncasecmp (charsetstr, "UTF-8", len) == 0)
429                         was_utf8 = true;
430                     }
431                 }
432             }
433         }
434     if (was_utf8)
435       def = iconv_msgdomain_list (def, "UTF-8", true, fn1);
436   }
437 
438   /* Determine canonicalized encoding name of the definitions now, after
439      conversion.  Only used for fuzzy matching.  */
440   if (use_fuzzy_matching)
441     {
442       def_canon_charset = def->encoding;
443       if (def_canon_charset == NULL)
444         {
445           char *charset = NULL;
446 
447           /* Get the encoding of the definitions file.  */
448           for (k = 0; k < def->nitems; k++)
449             {
450               message_list_ty *mlp = def->item[k]->messages;
451 
452               for (j = 0; j < mlp->nitems; j++)
453                 if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
454                   {
455                     const char *header = mlp->item[j]->msgstr;
456 
457                     if (header != NULL)
458                       {
459                         const char *charsetstr = c_strstr (header, "charset=");
460 
461                         if (charsetstr != NULL)
462                           {
463                             size_t len;
464 
465                             charsetstr += strlen ("charset=");
466                             len = strcspn (charsetstr, " \t\n");
467                             charset = (char *) xmalloca (len + 1);
468                             memcpy (charset, charsetstr, len);
469                             charset[len] = '\0';
470                             break;
471                           }
472                       }
473                   }
474               if (charset != NULL)
475                 break;
476             }
477           if (charset != NULL)
478             def_canon_charset = po_charset_canonicalize (charset);
479           if (def_canon_charset == NULL)
480             /* Unspecified encoding.  Assume unibyte encoding.  */
481             def_canon_charset = po_charset_ascii;
482         }
483     }
484   else
485     def_canon_charset = NULL;
486 
487   empty_list = message_list_alloc (false);
488 
489   /* Every entry in the xgettext generated file must be matched by a
490      (single) entry in the human created file.  */
491   nerrors = 0;
492   if (!multi_domain_mode)
493     for (k = 0; k < ref->nitems; k++)
494       {
495         const char *domain = ref->item[k]->domain;
496         message_list_ty *refmlp = ref->item[k]->messages;
497         message_list_ty *defmlp;
498         message_fuzzy_index_ty *defmlp_findex;
499 
500         defmlp = msgdomain_list_sublist (def, domain, false);
501         if (defmlp == NULL)
502           defmlp = empty_list;
503 
504         defmlp_findex = NULL;
505 
506         match_domain (fn1, fn2, defmlp, &defmlp_findex, def_canon_charset,
507                       refmlp, &nerrors);
508 
509         if (defmlp_findex != NULL)
510           message_fuzzy_index_free (defmlp_findex);
511       }
512   else
513     {
514       /* Apply the references messages in the default domain to each of
515          the definition domains.  */
516       message_list_ty *refmlp = ref->item[0]->messages;
517 
518       for (k = 0; k < def->nitems; k++)
519         {
520           message_list_ty *defmlp = def->item[k]->messages;
521 
522           /* Ignore the default message domain if it has no messages.  */
523           if (k > 0 || defmlp->nitems > 0)
524             {
525               message_fuzzy_index_ty *defmlp_findex = NULL;
526 
527               match_domain (fn1, fn2, defmlp, &defmlp_findex, def_canon_charset,
528                             refmlp, &nerrors);
529 
530               if (defmlp_findex != NULL)
531                 message_fuzzy_index_free (defmlp_findex);
532             }
533         }
534     }
535 
536   /* Look for messages in the definition file, which are not present
537      in the reference file, indicating messages which defined but not
538      used in the program.  */
539   for (k = 0; k < def->nitems; ++k)
540     {
541       message_list_ty *defmlp = def->item[k]->messages;
542 
543       for (j = 0; j < defmlp->nitems; j++)
544         {
545           message_ty *defmsg = defmlp->item[j];
546 
547           if (!defmsg->used)
548             po_gram_error_at_line (&defmsg->pos,
549                                    _("warning: this message is not used"));
550         }
551     }
552 
553   /* Exit with status 1 on any error.  */
554   if (nerrors > 0)
555     error (EXIT_FAILURE, 0,
556            ngettext ("found %d fatal error", "found %d fatal errors", nerrors),
557            nerrors);
558 }
559