1 /* KUIT (KDE User Interface Text) format strings.
2    Copyright (C) 2015, 2018-2019 Free Software Foundation, Inc.
3    Written by Daiki Ueno <ueno@gnu.org>, 2015.
4 
5    This program is free software: you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 3 of the License, or
8    (at your option) any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
17 
18 #ifdef HAVE_CONFIG_H
19 # include <config.h>
20 #endif
21 
22 #include <assert.h>
23 #include <stdbool.h>
24 #include <stdlib.h>
25 
26 #include "format.h"
27 #include "unistr.h"
28 #include "xalloc.h"
29 #include "xvasprintf.h"
30 #include "gettext.h"
31 
32 #if IN_LIBGETTEXTPO
33 /* Use included markup parser to avoid extra dependency from
34    libgettextpo to libxml2.  */
35 # ifndef FORMAT_KDE_KUIT_FALLBACK_MARKUP
36 #  define FORMAT_KDE_KUIT_USE_FALLBACK_MARKUP 1
37 # endif
38 #else
39 #  define FORMAT_KDE_KUIT_USE_LIBXML2 1
40 #endif
41 
42 #if FORMAT_KDE_KUIT_USE_LIBXML2
43 # include <libxml/parser.h>
44 #elif FORMAT_KDE_KUIT_USE_FALLBACK_MARKUP
45 # include "markup.h"
46 #endif
47 
48 
49 #define _(str) gettext (str)
50 
51 #define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
52 
53 
54 /* KUIT (KDE User Interface Text) is an XML-like markup which augments
55    translatable strings with semantic information:
56    https://api.kde.org/frameworks/ki18n/html/prg_guide.html#kuit_markup
57    KUIT can be seen as a fragment of a well-formed XML document,
58    except that it allows '&' as a Qt accelerator marker and '%' as a
59    format directive.  */
60 
61 struct spec
62 {
63   /* A format string descriptor returned from formatstring_kde.parse.  */
64   void *base;
65 };
66 
67 #define XML_NS "https://www.gnu.org/s/gettext/kde"
68 
69 struct char_range
70 {
71   ucs4_t start;
72   ucs4_t end;
73 };
74 
75 /* Character ranges for NameStartChar defined in:
76    https://www.w3.org/TR/REC-xml/#NT-NameStartChar  */
77 static const struct char_range name_chars1[] =
78   {
79     { ':', ':' },
80     { 'A', 'Z' },
81     { '_', '_' },
82     { 'a', 'z' },
83     { 0xC0, 0xD6 },
84     { 0xD8, 0xF6 },
85     { 0xF8, 0x2FF },
86     { 0x370, 0x37D },
87     { 0x37F, 0x1FFF },
88     { 0x200C, 0x200D },
89     { 0x2070, 0x218F },
90     { 0x2C00, 0x2FEF },
91     { 0x3001, 0xD7FF },
92     { 0xF900, 0xFDCF },
93     { 0xFDF0, 0xFFFD },
94     { 0x10000, 0xEFFFF }
95   };
96 
97 /* Character ranges for NameChar, excluding NameStartChar:
98    https://www.w3.org/TR/REC-xml/#NT-NameChar  */
99 static const struct char_range name_chars2[] =
100   {
101     { '-', '-' },
102     { '.', '.' },
103     { '0', '9' },
104     { 0xB7, 0xB7 },
105     { 0x0300, 0x036F },
106     { 0x203F, 0x2040 }
107   };
108 
109 /* Return true if INPUT is an XML reference.  */
110 static bool
is_reference(const char * input)111 is_reference (const char *input)
112 {
113   const char *str = input;
114   const char *str_limit = str + strlen (input);
115   ucs4_t uc;
116   int i;
117 
118   str += u8_mbtouc (&uc, (const unsigned char *) str, str_limit - str);
119   assert (uc == '&');
120 
121   str += u8_mbtouc (&uc, (const unsigned char *) str, str_limit - str);
122 
123   /* CharRef */
124   if (uc == '#')
125     {
126       str += u8_mbtouc (&uc, (const unsigned char *) str, str_limit - str);
127       if (uc == 'x')
128         {
129           while (str < str_limit)
130             {
131               str += u8_mbtouc (&uc, (const unsigned char *) str,
132                                 str_limit - str);
133               if (!(('0' <= uc && uc <= '9')
134                     || ('A' <= uc && uc <= 'F')
135                     || ('a' <= uc && uc <= 'f')))
136                 break;
137             }
138           return uc == ';';
139         }
140       else if ('0' <= uc && uc <= '9')
141         {
142           while (str < str_limit)
143             {
144               str += u8_mbtouc (&uc, (const unsigned char *) str,
145                                 str_limit - str);
146               if (!('0' <= uc && uc <= '9'))
147                 break;
148             }
149           return uc == ';';
150         }
151     }
152   else
153     {
154       /* EntityRef */
155       for (i = 0; i < SIZEOF (name_chars1); i++)
156         if (name_chars1[i].start <= uc && uc <= name_chars1[i].end)
157           break;
158 
159       if (i == SIZEOF (name_chars1))
160         return false;
161 
162       while (str < str_limit)
163         {
164           str += u8_mbtouc (&uc, (const unsigned char *) str, str_limit - str);
165           for (i = 0; i < SIZEOF (name_chars1); i++)
166             if (name_chars1[i].start <= uc && uc <= name_chars1[i].end)
167               break;
168           if (i == SIZEOF (name_chars1))
169             {
170               for (i = 0; i < SIZEOF (name_chars2); i++)
171                 if (name_chars2[i].start <= uc && uc <= name_chars2[i].end)
172                   break;
173               if (i == SIZEOF (name_chars2))
174                 return false;
175             }
176         }
177       return uc == ';';
178     }
179 
180   return false;
181 }
182 
183 
184 static void *
format_parse(const char * format,bool translated,char * fdi,char ** invalid_reason)185 format_parse (const char *format, bool translated, char *fdi,
186               char **invalid_reason)
187 {
188   struct spec spec;
189   struct spec *result;
190   const char *str;
191   const char *str_limit;
192   size_t amp_count;
193   char *buffer, *bp;
194 
195   spec.base = NULL;
196 
197   /* Preprocess the input, putting the content in a <gt:kuit> element.  */
198   str = format;
199   str_limit = str + strlen (format);
200 
201   for (amp_count = 0; str < str_limit; amp_count++)
202     {
203       const char *amp = strchrnul (str, '&');
204       if (*amp != '&')
205         break;
206       str = amp + 1;
207     }
208 
209   buffer = xmalloc (amp_count * 4
210                     + strlen (format)
211                     + strlen ("<gt:kuit xmlns:gt=\"" XML_NS "\"></gt:kuit>")
212                     + 1);
213   *buffer = '\0';
214 
215   bp = buffer;
216   bp = stpcpy (bp, "<gt:kuit xmlns:gt=\"" XML_NS "\">");
217   str = format;
218   while (str < str_limit)
219     {
220       const char *amp = strchrnul (str, '&');
221 
222       bp = stpncpy (bp, str, amp - str);
223       if (*amp != '&')
224         break;
225 
226       bp = stpcpy (bp, is_reference (amp) ? "&" : "&amp;");
227       str = amp + 1;
228     }
229   stpcpy (bp, "</gt:kuit>");
230 
231 #if FORMAT_KDE_KUIT_USE_LIBXML2
232     {
233       xmlDocPtr doc;
234 
235       doc = xmlReadMemory (buffer, strlen (buffer), "", NULL,
236                            XML_PARSE_NONET
237                            | XML_PARSE_NOWARNING
238                            | XML_PARSE_NOERROR
239                            | XML_PARSE_NOBLANKS);
240       if (doc == NULL)
241         {
242           xmlError *err = xmlGetLastError ();
243           *invalid_reason =
244             xasprintf (_("error while parsing: %s"),
245                        err->message);
246           free (buffer);
247           xmlFreeDoc (doc);
248           return NULL;
249         }
250 
251       free (buffer);
252       xmlFreeDoc (doc);
253     }
254 #elif FORMAT_KDE_KUIT_USE_FALLBACK_MARKUP
255     {
256       markup_parser_ty parser;
257       markup_parse_context_ty *context;
258 
259       memset (&parser, 0, sizeof (markup_parser_ty));
260       context = markup_parse_context_new (&parser, 0, NULL);
261       if (!markup_parse_context_parse (context, buffer, strlen (buffer)))
262         {
263           *invalid_reason =
264             xasprintf (_("error while parsing: %s"),
265                        markup_parse_context_get_error (context));
266           free (buffer);
267           markup_parse_context_free (context);
268           return NULL;
269         }
270 
271       if (!markup_parse_context_end_parse (context))
272         {
273           *invalid_reason =
274             xasprintf (_("error while parsing: %s"),
275                        markup_parse_context_get_error (context));
276           free (buffer);
277           markup_parse_context_free (context);
278           return NULL;
279         }
280 
281       free (buffer);
282       markup_parse_context_free (context);
283     }
284 #else
285     /* No support for XML.  */
286     free (buffer);
287 #endif
288 
289   spec.base = formatstring_kde.parse (format, translated, fdi, invalid_reason);
290   if (spec.base == NULL)
291     return NULL;
292 
293   result = XMALLOC (struct spec);
294   *result = spec;
295   return result;
296 }
297 
298 static void
format_free(void * descr)299 format_free (void *descr)
300 {
301   struct spec *spec = descr;
302   formatstring_kde.free (spec->base);
303   free (spec);
304 }
305 
306 static int
format_get_number_of_directives(void * descr)307 format_get_number_of_directives (void *descr)
308 {
309   struct spec *spec = descr;
310   return formatstring_kde.get_number_of_directives (spec->base);
311 }
312 
313 static bool
format_check(void * msgid_descr,void * msgstr_descr,bool equality,formatstring_error_logger_t error_logger,const char * pretty_msgid,const char * pretty_msgstr)314 format_check (void *msgid_descr, void *msgstr_descr, bool equality,
315               formatstring_error_logger_t error_logger,
316               const char *pretty_msgid, const char *pretty_msgstr)
317 {
318   struct spec *msgid_spec = msgid_descr;
319   struct spec *msgstr_spec = msgstr_descr;
320 
321   return formatstring_kde.check (msgid_spec->base, msgstr_spec->base, equality,
322                                  error_logger,
323                                  pretty_msgid, pretty_msgstr);
324 }
325 
326 struct formatstring_parser formatstring_kde_kuit =
327 {
328   format_parse,
329   format_free,
330   format_get_number_of_directives,
331   NULL,
332   format_check
333 };
334 
335 
336 #ifdef TEST
337 
338 /* Test program: Print the argument list specification returned by
339    format_parse for strings read from standard input.  */
340 
341 #include <stdio.h>
342 
343 static void
format_print(void * descr)344 format_print (void *descr)
345 {
346   struct spec *spec = (struct spec *) descr;
347   unsigned int last;
348   unsigned int i;
349 
350   if (spec == NULL)
351     {
352       printf ("INVALID");
353       return;
354     }
355 
356   printf ("(");
357   last = 1;
358   for (i = 0; i < spec->numbered_arg_count; i++)
359     {
360       unsigned int number = spec->numbered[i].number;
361 
362       if (i > 0)
363         printf (" ");
364       if (number < last)
365         abort ();
366       for (; last < number; last++)
367         printf ("_ ");
368       last = number + 1;
369     }
370   printf (")");
371 }
372 
373 int
main()374 main ()
375 {
376   for (;;)
377     {
378       char *line = NULL;
379       size_t line_size = 0;
380       int line_len;
381       char *invalid_reason;
382       void *descr;
383 
384       line_len = getline (&line, &line_size, stdin);
385       if (line_len < 0)
386         break;
387       if (line_len > 0 && line[line_len - 1] == '\n')
388         line[--line_len] = '\0';
389 
390       invalid_reason = NULL;
391       descr = format_parse (line, false, NULL, &invalid_reason);
392 
393       format_print (descr);
394       printf ("\n");
395       if (descr == NULL)
396         printf ("%s\n", invalid_reason);
397 
398       free (invalid_reason);
399       free (line);
400     }
401 
402   return 0;
403 }
404 
405 /*
406  * For Emacs M-x compile
407  * Local Variables:
408  * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../../gettext-runtime/intl -DHAVE_CONFIG_H -DTEST format-kde-kuit.c ../gnulib-lib/libgettextlib.la"
409  * End:
410  */
411 
412 #endif /* TEST */
413