1 /* Unicode CLDR plural rule parser and converter
2    Copyright (C) 2015, 2018-2020 Free Software Foundation, Inc.
3 
4    This file was written by Daiki Ueno <ueno@gnu.org>, 2015.
5 
6    This program is free software: you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10 
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
18 
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
22 
23 #include "basename-lgpl.h"
24 #include "cldr-plural-exp.h"
25 #include "closeout.h"
26 #include "c-ctype.h"
27 #include <errno.h>
28 #include <error.h>
29 #include <getopt.h>
30 #include "gettext.h"
31 #include <libxml/tree.h>
32 #include <libxml/parser.h>
33 #include <locale.h>
34 #include "progname.h"
35 #include "propername.h"
36 #include "relocatable.h"
37 #include <stdlib.h>
38 #include <string.h>
39 #include "xalloc.h"
40 
41 #define _(s) gettext(s)
42 
43 
44 static char *
extract_rules(FILE * fp,const char * real_filename,const char * logical_filename,const char * locale)45 extract_rules (FILE *fp,
46                const char *real_filename, const char *logical_filename,
47                const char *locale)
48 {
49   xmlDocPtr doc;
50   xmlNodePtr node, n;
51   size_t locale_length;
52   char *buffer = NULL, *p;
53   size_t bufmax = 0;
54   size_t buflen = 0;
55 
56   doc = xmlReadFd (fileno (fp), logical_filename, NULL,
57                    XML_PARSE_NONET
58                    | XML_PARSE_NOWARNING
59                    | XML_PARSE_NOBLANKS);
60   if (doc == NULL)
61     error (EXIT_FAILURE, 0, _("Could not parse file %s as XML"), logical_filename);
62 
63   node = xmlDocGetRootElement (doc);
64   if (!node || !xmlStrEqual (node->name, BAD_CAST "supplementalData"))
65     {
66       error_at_line (0, 0,
67                      logical_filename,
68                      xmlGetLineNo (node),
69                      _("The root element must be <%s>"),
70                      "supplementalData");
71       goto out;
72     }
73 
74   for (n = node->children; n; n = n->next)
75     {
76       if (n->type == XML_ELEMENT_NODE
77           && xmlStrEqual (n->name, BAD_CAST "plurals"))
78         break;
79     }
80   if (!n)
81     {
82       error (0, 0, _("The element <%s> does not contain a <%s> element"),
83              "supplementalData", "plurals");
84       goto out;
85     }
86 
87   locale_length = strlen (locale);
88   for (n = n->children; n; n = n->next)
89     {
90       xmlChar *locales;
91       xmlChar *cp;
92       xmlNodePtr n2;
93       bool found = false;
94 
95       if (n->type != XML_ELEMENT_NODE
96           || !xmlStrEqual (n->name, BAD_CAST "pluralRules"))
97         continue;
98 
99       if (!xmlHasProp (n, BAD_CAST "locales"))
100         {
101           error_at_line (0, 0,
102                          logical_filename,
103                          xmlGetLineNo (n),
104                          _("The element <%s> does not have attribute <%s>"),
105                          "pluralRules", "locales");
106           continue;
107         }
108 
109       cp = locales = xmlGetProp (n, BAD_CAST "locales");
110       while (*cp != '\0')
111         {
112           while (c_isspace (*cp))
113             cp++;
114           if (xmlStrncmp (cp, BAD_CAST locale, locale_length) == 0
115               && (*(cp + locale_length) == '\0'
116                   || c_isspace (*(cp + locale_length))))
117             {
118               found = true;
119               break;
120             }
121           while (*cp && !c_isspace (*cp))
122             cp++;
123         }
124       xmlFree (locales);
125 
126       if (!found)
127         continue;
128 
129       for (n2 = n->children; n2; n2 = n2->next)
130         {
131           xmlChar *count;
132           xmlChar *content;
133           size_t length;
134 
135           if (n2->type != XML_ELEMENT_NODE
136               || !xmlStrEqual (n2->name, BAD_CAST "pluralRule"))
137             continue;
138 
139           if (!xmlHasProp (n2, BAD_CAST "count"))
140             {
141               error_at_line (0, 0,
142                              logical_filename,
143                              xmlGetLineNo (n2),
144                              _("The element <%s> does not have attribute <%s>"),
145                              "pluralRule", "count");
146               break;
147             }
148 
149           count = xmlGetProp (n2, BAD_CAST "count");
150           content = xmlNodeGetContent (n2);
151           length = xmlStrlen (count) + strlen (": ")
152             + xmlStrlen (content) + strlen ("; ");
153 
154           if (buflen + length + 1 > bufmax)
155             {
156               bufmax *= 2;
157               if (bufmax < buflen + length + 1)
158                 bufmax = buflen + length + 1;
159               buffer = (char *) xrealloc (buffer, bufmax);
160             }
161 
162           sprintf (buffer + buflen, "%s: %s; ", count, content);
163           xmlFree (count);
164           xmlFree (content);
165 
166           buflen += length;
167         }
168     }
169 
170   if (buffer)
171     {
172       /* Scrub the last semicolon, if any.  */
173       p = strrchr (buffer, ';');
174       if (p)
175         *p = '\0';
176     }
177 
178  out:
179   xmlFreeDoc (doc);
180   return buffer;
181 }
182 
183 /* Display usage information and exit.  */
184 static void
usage(int status)185 usage (int status)
186 {
187   if (status != EXIT_SUCCESS)
188     fprintf (stderr, _("Try '%s --help' for more information.\n"),
189              program_name);
190   else
191     {
192       printf (_("\
193 Usage: %s [OPTION...] [LOCALE RULES]...\n\
194 "), program_name);
195       printf ("\n");
196       /* xgettext: no-wrap */
197       printf (_("\
198 Extract or convert Unicode CLDR plural rules.\n\
199 \n\
200 If both LOCALE and RULES are specified, it reads CLDR plural rules for\n\
201 LOCALE from RULES and print them in a form suitable for gettext use.\n\
202 If no argument is given, it reads CLDR plural rules from the standard input.\n\
203 "));
204       printf ("\n");
205       /* xgettext: no-wrap */
206       printf (_("\
207 Mandatory arguments to long options are mandatory for short options too.\n\
208 Similarly for optional arguments.\n\
209 "));
210       printf ("\n");
211       printf (_("\
212   -c, --cldr                  print plural rules in the CLDR format\n"));
213       printf (_("\
214   -h, --help                  display this help and exit\n"));
215       printf (_("\
216   -V, --version               output version information and exit\n"));
217       printf ("\n");
218       /* TRANSLATORS: The first placeholder is the web address of the Savannah
219          project of this package.  The second placeholder is the bug-reporting
220          email address for this package.  Please add _another line_ saying
221          "Report translation bugs to <...>\n" with the address for translation
222          bugs (typically your translation team's web or email address).  */
223       printf(_("\
224 Report bugs in the bug tracker at <%s>\n\
225 or by email to <%s>.\n"),
226              "https://savannah.gnu.org/projects/gettext",
227              "bug-gettext@gnu.org");
228     }
229   exit (status);
230 }
231 
232 /* Long options.  */
233 static const struct option long_options[] =
234 {
235   { "cldr", no_argument, NULL, 'c' },
236   { "help", no_argument, NULL, 'h' },
237   { "version", no_argument, NULL, 'V' },
238   { NULL, 0, NULL, 0 }
239 };
240 
241 int
main(int argc,char ** argv)242 main (int argc, char **argv)
243 {
244   bool opt_cldr_format = false;
245   bool do_help = false;
246   bool do_version = false;
247   int optchar;
248 
249   /* Set program name for messages.  */
250   set_program_name (argv[0]);
251 
252   /* Set locale via LC_ALL.  */
253   setlocale (LC_ALL, "");
254 
255   /* Set the text message domain.  */
256   bindtextdomain (PACKAGE, relocate (LOCALEDIR));
257   bindtextdomain ("bison-runtime", relocate (BISON_LOCALEDIR));
258   textdomain (PACKAGE);
259 
260   /* Ensure that write errors on stdout are detected.  */
261   atexit (close_stdout);
262 
263   while ((optchar = getopt_long (argc, argv, "chV", long_options, NULL)) != EOF)
264     switch (optchar)
265       {
266       case '\0':                /* Long option.  */
267         break;
268 
269       case 'c':
270         opt_cldr_format = true;
271         break;
272 
273       case 'h':
274         do_help = true;
275         break;
276 
277       case 'V':
278         do_version = true;
279         break;
280 
281       default:
282         usage (EXIT_FAILURE);
283         /* NOTREACHED */
284       }
285 
286   /* Version information requested.  */
287   if (do_version)
288     {
289       printf ("%s (GNU %s) %s\n", last_component (program_name),
290               PACKAGE, VERSION);
291       /* xgettext: no-wrap */
292       printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
293 License GPLv3+: GNU GPL version 3 or later <%s>\n\
294 This is free software: you are free to change and redistribute it.\n\
295 There is NO WARRANTY, to the extent permitted by law.\n\
296 "),
297               "2015-2020", "https://gnu.org/licenses/gpl.html");
298       printf (_("Written by %s.\n"), proper_name ("Daiki Ueno"));
299       exit (EXIT_SUCCESS);
300     }
301 
302   /* Help is requested.  */
303   if (do_help)
304     usage (EXIT_SUCCESS);
305 
306   if (argc == optind + 2)
307     {
308       /* Two arguments: Read CLDR rules from a file.  */
309       const char *locale = argv[optind];
310       const char *logical_filename = argv[optind + 1];
311       char *extracted_rules;
312       FILE *fp;
313 
314       LIBXML_TEST_VERSION
315 
316       fp = fopen (logical_filename, "r");
317       if (fp == NULL)
318         error (1, 0, _("%s cannot be read"), logical_filename);
319 
320       extracted_rules = extract_rules (fp, logical_filename, logical_filename,
321                                        locale);
322       fclose (fp);
323       if (extracted_rules == NULL)
324         error (1, 0, _("cannot extract rules for %s"), locale);
325 
326       if (opt_cldr_format)
327         printf ("%s\n", extracted_rules);
328       else
329         {
330           struct cldr_plural_rule_list_ty *result;
331 
332           result = cldr_plural_parse (extracted_rules);
333           if (result == NULL)
334             error (1, 0, _("cannot parse CLDR rule"));
335 
336           cldr_plural_rule_list_print (result, stdout);
337           cldr_plural_rule_list_free (result);
338         }
339       free (extracted_rules);
340     }
341   else if (argc == optind)
342     {
343       /* No argument: Read CLDR rules from standard input.  */
344       char *line = NULL;
345       size_t line_size = 0;
346       for (;;)
347         {
348           int line_len;
349           struct cldr_plural_rule_list_ty *result;
350 
351           line_len = getline (&line, &line_size, stdin);
352           if (line_len < 0)
353             break;
354           if (line_len > 0 && line[line_len - 1] == '\n')
355             line[--line_len] = '\0';
356 
357           result = cldr_plural_parse (line);
358           if (result)
359             {
360               cldr_plural_rule_list_print (result, stdout);
361               cldr_plural_rule_list_free (result);
362             }
363         }
364 
365       free (line);
366     }
367   else
368     {
369       error (1, 0, _("extra operand %s"), argv[optind]);
370     }
371 
372   return 0;
373 }
374