1 /* Manipulates attributes of messages in translation catalogs.
2    Copyright (C) 2001-2007, 2009-2010, 2012-2014, 2016, 2018-2020 Free Software
3    Foundation, Inc.
4    Written by Bruno Haible <haible@clisp.cons.org>, 2001.
5 
6    This program is free software: you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10 
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
18 
19 
20 #ifdef HAVE_CONFIG_H
21 # include "config.h"
22 #endif
23 
24 #include <getopt.h>
25 #include <limits.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <locale.h>
29 
30 #include <textstyle.h>
31 
32 #include "noreturn.h"
33 #include "closeout.h"
34 #include "dir-list.h"
35 #include "error.h"
36 #include "error-progname.h"
37 #include "progname.h"
38 #include "relocatable.h"
39 #include "basename-lgpl.h"
40 #include "message.h"
41 #include "read-catalog.h"
42 #include "read-po.h"
43 #include "read-properties.h"
44 #include "read-stringtable.h"
45 #include "write-catalog.h"
46 #include "write-po.h"
47 #include "write-properties.h"
48 #include "write-stringtable.h"
49 #include "propername.h"
50 #include "xalloc.h"
51 #include "gettext.h"
52 
53 #define _(str) gettext (str)
54 
55 
56 /* Force output of PO file even if empty.  */
57 static int force_po;
58 
59 /* Bit mask of subsets to remove.  */
60 enum
61 {
62   REMOVE_UNTRANSLATED   = 1 << 0,
63   REMOVE_TRANSLATED     = 1 << 1,
64   REMOVE_FUZZY          = 1 << 2,
65   REMOVE_NONFUZZY       = 1 << 3,
66   REMOVE_OBSOLETE       = 1 << 4,
67   REMOVE_NONOBSOLETE    = 1 << 5
68 };
69 static int to_remove;
70 
71 /* Bit mask of actions to perform on all messages.  */
72 enum
73 {
74   SET_FUZZY             = 1 << 0,
75   RESET_FUZZY           = 1 << 1,
76   SET_OBSOLETE          = 1 << 2,
77   RESET_OBSOLETE        = 1 << 3,
78   REMOVE_PREV           = 1 << 4,
79   ADD_PREV              = 1 << 5,
80   REMOVE_TRANSLATION    = 1 << 6
81 };
82 static int to_change;
83 
84 /* Long options.  */
85 static const struct option long_options[] =
86 {
87   { "add-location", optional_argument, NULL, 'n' },
88   { "clear-fuzzy", no_argument, NULL, CHAR_MAX + 8 },
89   { "clear-obsolete", no_argument, NULL, CHAR_MAX + 10 },
90   { "clear-previous", no_argument, NULL, CHAR_MAX + 18 },
91   { "empty", no_argument, NULL, CHAR_MAX + 23 },
92   { "color", optional_argument, NULL, CHAR_MAX + 19 },
93   { "directory", required_argument, NULL, 'D' },
94   { "escape", no_argument, NULL, 'E' },
95   { "force-po", no_argument, &force_po, 1 },
96   { "fuzzy", no_argument, NULL, CHAR_MAX + 11 },
97   { "help", no_argument, NULL, 'h' },
98   { "ignore-file", required_argument, NULL, CHAR_MAX + 15 },
99   { "indent", no_argument, NULL, 'i' },
100   { "no-escape", no_argument, NULL, 'e' },
101   { "no-fuzzy", no_argument, NULL, CHAR_MAX + 3 },
102   { "no-location", no_argument, NULL, CHAR_MAX + 22 },
103   { "no-obsolete", no_argument, NULL, CHAR_MAX + 5 },
104   { "no-wrap", no_argument, NULL, CHAR_MAX + 13 },
105   { "obsolete", no_argument, NULL, CHAR_MAX + 12 },
106   { "only-file", required_argument, NULL, CHAR_MAX + 14 },
107   { "only-fuzzy", no_argument, NULL, CHAR_MAX + 4 },
108   { "only-obsolete", no_argument, NULL, CHAR_MAX + 6 },
109   { "output-file", required_argument, NULL, 'o' },
110   { "previous", no_argument, NULL, CHAR_MAX + 21 },
111   { "properties-input", no_argument, NULL, 'P' },
112   { "properties-output", no_argument, NULL, 'p' },
113   { "set-fuzzy", no_argument, NULL, CHAR_MAX + 7 },
114   { "set-obsolete", no_argument, NULL, CHAR_MAX + 9 },
115   { "sort-by-file", no_argument, NULL, 'F' },
116   { "sort-output", no_argument, NULL, 's' },
117   { "stringtable-input", no_argument, NULL, CHAR_MAX + 16 },
118   { "stringtable-output", no_argument, NULL, CHAR_MAX + 17 },
119   { "strict", no_argument, NULL, 'S' },
120   { "style", required_argument, NULL, CHAR_MAX + 20 },
121   { "translated", no_argument, NULL, CHAR_MAX + 1 },
122   { "untranslated", no_argument, NULL, CHAR_MAX + 2 },
123   { "version", no_argument, NULL, 'V' },
124   { "width", required_argument, NULL, 'w' },
125   { NULL, 0, NULL, 0 }
126 };
127 
128 
129 /* Forward declaration of local functions.  */
130 _GL_NORETURN_FUNC static void usage (int status);
131 static msgdomain_list_ty *process_msgdomain_list (msgdomain_list_ty *mdlp,
132                                                   msgdomain_list_ty *only_mdlp,
133                                                 msgdomain_list_ty *ignore_mdlp);
134 
135 
136 int
main(int argc,char ** argv)137 main (int argc, char **argv)
138 {
139   int optchar;
140   bool do_help;
141   bool do_version;
142   char *output_file;
143   const char *input_file;
144   const char *only_file;
145   const char *ignore_file;
146   msgdomain_list_ty *only_mdlp;
147   msgdomain_list_ty *ignore_mdlp;
148   msgdomain_list_ty *result;
149   catalog_input_format_ty input_syntax = &input_format_po;
150   catalog_output_format_ty output_syntax = &output_format_po;
151   bool sort_by_msgid = false;
152   bool sort_by_filepos = false;
153 
154   /* Set program name for messages.  */
155   set_program_name (argv[0]);
156   error_print_progname = maybe_print_progname;
157 
158   /* Set locale via LC_ALL.  */
159   setlocale (LC_ALL, "");
160 
161   /* Set the text message domain.  */
162   bindtextdomain (PACKAGE, relocate (LOCALEDIR));
163   bindtextdomain ("bison-runtime", relocate (BISON_LOCALEDIR));
164   textdomain (PACKAGE);
165 
166   /* Ensure that write errors on stdout are detected.  */
167   atexit (close_stdout);
168 
169   /* Set default values for variables.  */
170   do_help = false;
171   do_version = false;
172   output_file = NULL;
173   input_file = NULL;
174   only_file = NULL;
175   ignore_file = NULL;
176 
177   while ((optchar = getopt_long (argc, argv, "D:eEFhino:pPsVw:", long_options,
178                                  NULL)) != EOF)
179     switch (optchar)
180       {
181       case '\0':                /* Long option.  */
182         break;
183 
184       case 'D':
185         dir_list_append (optarg);
186         break;
187 
188       case 'e':
189         message_print_style_escape (false);
190         break;
191 
192       case 'E':
193         message_print_style_escape (true);
194         break;
195 
196       case 'F':
197         sort_by_filepos = true;
198         break;
199 
200       case 'h':
201         do_help = true;
202         break;
203 
204       case 'i':
205         message_print_style_indent ();
206         break;
207 
208       case 'n':
209         if (handle_filepos_comment_option (optarg))
210           usage (EXIT_FAILURE);
211         break;
212 
213       case 'o':
214         output_file = optarg;
215         break;
216 
217       case 'p':
218         output_syntax = &output_format_properties;
219         break;
220 
221       case 'P':
222         input_syntax = &input_format_properties;
223         break;
224 
225       case 's':
226         sort_by_msgid = true;
227         break;
228 
229       case 'S':
230         message_print_style_uniforum ();
231         break;
232 
233       case 'V':
234         do_version = true;
235         break;
236 
237       case 'w':
238         {
239           int value;
240           char *endp;
241           value = strtol (optarg, &endp, 10);
242           if (endp != optarg)
243             message_page_width_set (value);
244         }
245         break;
246 
247       case CHAR_MAX + 1: /* --translated */
248         to_remove |= REMOVE_UNTRANSLATED;
249         break;
250 
251       case CHAR_MAX + 2: /* --untranslated */
252         to_remove |= REMOVE_TRANSLATED;
253         break;
254 
255       case CHAR_MAX + 3: /* --no-fuzzy */
256         to_remove |= REMOVE_FUZZY;
257         break;
258 
259       case CHAR_MAX + 4: /* --only-fuzzy */
260         to_remove |= REMOVE_NONFUZZY;
261         break;
262 
263       case CHAR_MAX + 5: /* --no-obsolete */
264         to_remove |= REMOVE_OBSOLETE;
265         break;
266 
267       case CHAR_MAX + 6: /* --only-obsolete */
268         to_remove |= REMOVE_NONOBSOLETE;
269         break;
270 
271       case CHAR_MAX + 7: /* --set-fuzzy */
272         to_change |= SET_FUZZY;
273         break;
274 
275       case CHAR_MAX + 8: /* --clear-fuzzy */
276         to_change |= RESET_FUZZY;
277         break;
278 
279       case CHAR_MAX + 9: /* --set-obsolete */
280         to_change |= SET_OBSOLETE;
281         break;
282 
283       case CHAR_MAX + 10: /* --clear-obsolete */
284         to_change |= RESET_OBSOLETE;
285         break;
286 
287       case CHAR_MAX + 11: /* --fuzzy */
288         to_remove |= REMOVE_NONFUZZY;
289         to_change |= RESET_FUZZY;
290         break;
291 
292       case CHAR_MAX + 12: /* --obsolete */
293         to_remove |= REMOVE_NONOBSOLETE;
294         to_change |= RESET_OBSOLETE;
295         break;
296 
297       case CHAR_MAX + 13: /* --no-wrap */
298         message_page_width_ignore ();
299         break;
300 
301       case CHAR_MAX + 14: /* --only-file */
302         only_file = optarg;
303         break;
304 
305       case CHAR_MAX + 15: /* --ignore-file */
306         ignore_file = optarg;
307         break;
308 
309       case CHAR_MAX + 16: /* --stringtable-input */
310         input_syntax = &input_format_stringtable;
311         break;
312 
313       case CHAR_MAX + 17: /* --stringtable-output */
314         output_syntax = &output_format_stringtable;
315         break;
316 
317       case CHAR_MAX + 18: /* --clear-previous */
318         to_change |= REMOVE_PREV;
319         break;
320 
321       case CHAR_MAX + 19: /* --color */
322         if (handle_color_option (optarg) || color_test_mode)
323           usage (EXIT_FAILURE);
324         break;
325 
326       case CHAR_MAX + 20: /* --style */
327         handle_style_option (optarg);
328         break;
329 
330       case CHAR_MAX + 21: /* --previous */
331         to_change |= ADD_PREV;
332         break;
333 
334       case CHAR_MAX + 22: /* --no-location */
335         message_print_style_filepos (filepos_comment_none);
336         break;
337 
338       case CHAR_MAX + 23: /* --empty */
339         to_change |= REMOVE_TRANSLATION;
340         break;
341 
342       default:
343         usage (EXIT_FAILURE);
344         /* NOTREACHED */
345       }
346 
347   /* Version information requested.  */
348   if (do_version)
349     {
350       printf ("%s (GNU %s) %s\n", last_component (program_name),
351               PACKAGE, VERSION);
352       /* xgettext: no-wrap */
353       printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
354 License GPLv3+: GNU GPL version 3 or later <%s>\n\
355 This is free software: you are free to change and redistribute it.\n\
356 There is NO WARRANTY, to the extent permitted by law.\n\
357 "),
358               "2001-2020", "https://gnu.org/licenses/gpl.html");
359       printf (_("Written by %s.\n"), proper_name ("Bruno Haible"));
360       exit (EXIT_SUCCESS);
361     }
362 
363   /* Help is requested.  */
364   if (do_help)
365     usage (EXIT_SUCCESS);
366 
367   /* Test whether we have an .po file name as argument.  */
368   if (optind == argc)
369     input_file = "-";
370   else if (optind + 1 == argc)
371     input_file = argv[optind];
372   else
373     {
374       error (EXIT_SUCCESS, 0, _("at most one input file allowed"));
375       usage (EXIT_FAILURE);
376     }
377 
378   /* Verify selected options.  */
379   if (sort_by_msgid && sort_by_filepos)
380     error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
381            "--sort-output", "--sort-by-file");
382 
383   /* Read input file.  */
384   result = read_catalog_file (input_file, input_syntax);
385 
386   /* Read optional files that limit the extent of the attribute changes.  */
387   only_mdlp = (only_file != NULL
388                ? read_catalog_file (only_file, input_syntax)
389                : NULL);
390   ignore_mdlp = (ignore_file != NULL
391                  ? read_catalog_file (ignore_file, input_syntax)
392                  : NULL);
393 
394   /* Filter the messages and manipulate the attributes.  */
395   result = process_msgdomain_list (result, only_mdlp, ignore_mdlp);
396 
397   /* Sorting the list of messages.  */
398   if (sort_by_filepos)
399     msgdomain_list_sort_by_filepos (result);
400   else if (sort_by_msgid)
401     msgdomain_list_sort_by_msgid (result);
402 
403   /* Write the PO file.  */
404   msgdomain_list_print (result, output_file, output_syntax, force_po, false);
405 
406   exit (EXIT_SUCCESS);
407 }
408 
409 
410 /* Display usage information and exit.  */
411 static void
usage(int status)412 usage (int status)
413 {
414   if (status != EXIT_SUCCESS)
415     fprintf (stderr, _("Try '%s --help' for more information.\n"),
416              program_name);
417   else
418     {
419       printf (_("\
420 Usage: %s [OPTION] [INPUTFILE]\n\
421 "), program_name);
422       printf ("\n");
423       /* xgettext: no-wrap */
424       printf (_("\
425 Filters the messages of a translation catalog according to their attributes,\n\
426 and manipulates the attributes.\n"));
427       printf ("\n");
428       printf (_("\
429 Mandatory arguments to long options are mandatory for short options too.\n"));
430       printf ("\n");
431       printf (_("\
432 Input file location:\n"));
433       printf (_("\
434   INPUTFILE                   input PO file\n"));
435       printf (_("\
436   -D, --directory=DIRECTORY   add DIRECTORY to list for input files search\n"));
437       printf (_("\
438 If no input file is given or if it is -, standard input is read.\n"));
439       printf ("\n");
440       printf (_("\
441 Output file location:\n"));
442       printf (_("\
443   -o, --output-file=FILE      write output to specified file\n"));
444       printf (_("\
445 The results are written to standard output if no output file is specified\n\
446 or if it is -.\n"));
447       printf ("\n");
448       printf (_("\
449 Message selection:\n"));
450       printf (_("\
451       --translated            keep translated, remove untranslated messages\n"));
452       printf (_("\
453       --untranslated          keep untranslated, remove translated messages\n"));
454       printf (_("\
455       --no-fuzzy              remove 'fuzzy' marked messages\n"));
456       printf (_("\
457       --only-fuzzy            keep 'fuzzy' marked messages\n"));
458       printf (_("\
459       --no-obsolete           remove obsolete #~ messages\n"));
460       printf (_("\
461       --only-obsolete         keep obsolete #~ messages\n"));
462       printf ("\n");
463       printf (_("\
464 Attribute manipulation:\n"));
465       printf (_("\
466       --set-fuzzy             set all messages 'fuzzy'\n"));
467       printf (_("\
468       --clear-fuzzy           set all messages non-'fuzzy'\n"));
469       printf (_("\
470       --set-obsolete          set all messages obsolete\n"));
471       printf (_("\
472       --clear-obsolete        set all messages non-obsolete\n"));
473       printf (_("\
474       --previous              when setting 'fuzzy', keep previous msgids\n\
475                               of translated messages.\n"));
476       printf (_("\
477       --clear-previous        remove the \"previous msgid\" from all messages\n"));
478       printf (_("\
479       --empty                 when removing 'fuzzy', also set msgstr empty\n"));
480       printf (_("\
481       --only-file=FILE.po     manipulate only entries listed in FILE.po\n"));
482       printf (_("\
483       --ignore-file=FILE.po   manipulate only entries not listed in FILE.po\n"));
484       printf (_("\
485       --fuzzy                 synonym for --only-fuzzy --clear-fuzzy\n"));
486       printf (_("\
487       --obsolete              synonym for --only-obsolete --clear-obsolete\n"));
488       printf ("\n");
489       printf (_("\
490 Input file syntax:\n"));
491       printf (_("\
492   -P, --properties-input      input file is in Java .properties syntax\n"));
493       printf (_("\
494       --stringtable-input     input file is in NeXTstep/GNUstep .strings syntax\n"));
495       printf ("\n");
496       printf (_("\
497 Output details:\n"));
498       printf (_("\
499       --color                 use colors and other text attributes always\n\
500       --color=WHEN            use colors and other text attributes if WHEN.\n\
501                               WHEN may be 'always', 'never', 'auto', or 'html'.\n"));
502       printf (_("\
503       --style=STYLEFILE       specify CSS style rule file for --color\n"));
504       printf (_("\
505   -e, --no-escape             do not use C escapes in output (default)\n"));
506       printf (_("\
507   -E, --escape                use C escapes in output, no extended chars\n"));
508       printf (_("\
509       --force-po              write PO file even if empty\n"));
510       printf (_("\
511   -i, --indent                write the .po file using indented style\n"));
512       printf (_("\
513       --no-location           do not write '#: filename:line' lines\n"));
514       printf (_("\
515   -n, --add-location          generate '#: filename:line' lines (default)\n"));
516       printf (_("\
517       --strict                write out strict Uniforum conforming .po file\n"));
518       printf (_("\
519   -p, --properties-output     write out a Java .properties file\n"));
520       printf (_("\
521       --stringtable-output    write out a NeXTstep/GNUstep .strings file\n"));
522       printf (_("\
523   -w, --width=NUMBER          set output page width\n"));
524       printf (_("\
525       --no-wrap               do not break long message lines, longer than\n\
526                               the output page width, into several lines\n"));
527       printf (_("\
528   -s, --sort-output           generate sorted output\n"));
529       printf (_("\
530   -F, --sort-by-file          sort output by file location\n"));
531       printf ("\n");
532       printf (_("\
533 Informative output:\n"));
534       printf (_("\
535   -h, --help                  display this help and exit\n"));
536       printf (_("\
537   -V, --version               output version information and exit\n"));
538       printf ("\n");
539       /* TRANSLATORS: The first placeholder is the web address of the Savannah
540          project of this package.  The second placeholder is the bug-reporting
541          email address for this package.  Please add _another line_ saying
542          "Report translation bugs to <...>\n" with the address for translation
543          bugs (typically your translation team's web or email address).  */
544       printf(_("\
545 Report bugs in the bug tracker at <%s>\n\
546 or by email to <%s>.\n"),
547              "https://savannah.gnu.org/projects/gettext",
548              "bug-gettext@gnu.org");
549     }
550 
551   exit (status);
552 }
553 
554 
555 /* Return true if a message should be kept.  */
556 static bool
is_message_selected(const message_ty * mp)557 is_message_selected (const message_ty *mp)
558 {
559   /* Always keep the header entry.  */
560   if (is_header (mp))
561     return true;
562 
563   if ((to_remove & (REMOVE_UNTRANSLATED | REMOVE_TRANSLATED))
564       && (mp->msgstr[0] == '\0'
565           ? to_remove & REMOVE_UNTRANSLATED
566           : to_remove & REMOVE_TRANSLATED))
567     return false;
568 
569   if ((to_remove & (REMOVE_FUZZY | REMOVE_NONFUZZY))
570       && (mp->is_fuzzy
571           ? to_remove & REMOVE_FUZZY
572           : to_remove & REMOVE_NONFUZZY))
573     return false;
574 
575   if ((to_remove & (REMOVE_OBSOLETE | REMOVE_NONOBSOLETE))
576       && (mp->obsolete
577           ? to_remove & REMOVE_OBSOLETE
578           : to_remove & REMOVE_NONOBSOLETE))
579     return false;
580 
581   return true;
582 }
583 
584 
585 static void
process_message_list(message_list_ty * mlp,message_list_ty * only_mlp,message_list_ty * ignore_mlp)586 process_message_list (message_list_ty *mlp,
587                       message_list_ty *only_mlp, message_list_ty *ignore_mlp)
588 {
589   /* Keep only the selected messages.  */
590   message_list_remove_if_not (mlp, is_message_selected);
591 
592   /* Change the attributes.  */
593   if (to_change)
594     {
595       size_t j;
596 
597       for (j = 0; j < mlp->nitems; j++)
598         {
599           message_ty *mp = mlp->item[j];
600 
601           /* Attribute changes only affect messages listed in --only-file
602              and not listed in --ignore-file.  */
603           if ((only_mlp
604                ? message_list_search (only_mlp, mp->msgctxt, mp->msgid) != NULL
605                : true)
606               && (ignore_mlp
607                   ? message_list_search (ignore_mlp, mp->msgctxt, mp->msgid) == NULL
608                   : true))
609             {
610               if (to_change & SET_FUZZY)
611                 {
612                   if ((to_change & ADD_PREV) && !is_header (mp)
613                       && !mp->is_fuzzy && mp->msgstr[0] != '\0')
614                     {
615                       mp->prev_msgctxt =
616                         (mp->msgctxt != NULL ? xstrdup (mp->msgctxt) : NULL);
617                       mp->prev_msgid =
618                         (mp->msgid != NULL ? xstrdup (mp->msgid) : NULL);
619                       mp->prev_msgid_plural =
620                         (mp->msgid_plural != NULL
621                          ? xstrdup (mp->msgid_plural)
622                          : NULL);
623                     }
624                   mp->is_fuzzy = true;
625                 }
626 
627               if (to_change & RESET_FUZZY)
628                 {
629                   if ((to_change & REMOVE_TRANSLATION)
630                       && mp->is_fuzzy && !mp->obsolete)
631                     {
632                       unsigned long int nplurals = 0;
633                       char *msgstr;
634                       size_t pos;
635 
636                       for (pos = 0; pos < mp->msgstr_len; ++pos)
637                         if (!mp->msgstr[pos])
638                           ++nplurals;
639                       free ((char *) mp->msgstr);
640                       msgstr = XNMALLOC (nplurals, char);
641                       memset (msgstr, '\0', nplurals);
642                       mp->msgstr = msgstr;
643                       mp->msgstr_len = nplurals;
644                     }
645                   mp->is_fuzzy = false;
646                 }
647               /* Always keep the header entry non-obsolete.  */
648               if ((to_change & SET_OBSOLETE) && !is_header (mp))
649                 mp->obsolete = true;
650               if (to_change & RESET_OBSOLETE)
651                 mp->obsolete = false;
652               if (to_change & REMOVE_PREV)
653                 {
654                   mp->prev_msgctxt = NULL;
655                   mp->prev_msgid = NULL;
656                   mp->prev_msgid_plural = NULL;
657                 }
658             }
659         }
660     }
661 }
662 
663 
664 static msgdomain_list_ty *
process_msgdomain_list(msgdomain_list_ty * mdlp,msgdomain_list_ty * only_mdlp,msgdomain_list_ty * ignore_mdlp)665 process_msgdomain_list (msgdomain_list_ty *mdlp,
666                         msgdomain_list_ty *only_mdlp,
667                         msgdomain_list_ty *ignore_mdlp)
668 {
669   size_t k;
670 
671   for (k = 0; k < mdlp->nitems; k++)
672     process_message_list (mdlp->item[k]->messages,
673                           only_mdlp
674                           ? msgdomain_list_sublist (only_mdlp,
675                                                     mdlp->item[k]->domain,
676                                                     true)
677                           : NULL,
678                           ignore_mdlp
679                           ? msgdomain_list_sublist (ignore_mdlp,
680                                                     mdlp->item[k]->domain,
681                                                     false)
682                           : NULL);
683 
684   return mdlp;
685 }
686