1 /* Internationalization Tag Set (ITS) handling
2    Copyright (C) 2015, 2018-2020 Free Software Foundation, Inc.
3 
4    This file was written by Daiki Ueno <ueno@gnu.org>, 2015.
5 
6    This program is free software: you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10 
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
18 
19 #ifdef HAVE_CONFIG_H
20 #include <config.h>
21 #endif
22 
23 /* Specification.  */
24 #include "its.h"
25 
26 #include <assert.h>
27 #include <errno.h>
28 #include "error.h"
29 #include "gettext.h"
30 #include "mem-hash-map.h"
31 #include <stdint.h>
32 #include <libxml/tree.h>
33 #include <libxml/parser.h>
34 #include <libxml/xmlwriter.h>
35 #include <libxml/xpath.h>
36 #include <libxml/xpathInternals.h>
37 #include <stdlib.h>
38 #include "trim.h"
39 #include "xalloc.h"
40 #include "xvasprintf.h"
41 
42 #define _(str) gettext (str)
43 
44 /* The Internationalization Tag Set (ITS) 2.0 standard is available at:
45    https://www.w3.org/TR/its20/
46 
47    This implementation supports only a few data categories, useful for
48    gettext-based projects.  Other data categories can be added by
49    extending the its_rule_class_ty class and registering it in
50    init_classes().
51 
52    The message extraction is performed in three steps.  In the first
53    step, its_rule_list_apply() assigns values to nodes in an XML
54    document.  In the second step, its_rule_list_extract_nodes() marks
55    translatable nodes.  In the final step,
56    its_rule_list_extract_text() extracts text contents from the marked
57    nodes.
58 
59    The values assigned to a node are represented as an array of
60    key-value pairs, where both keys and values are string.  The array
61    is stored in node->_private.  To retrieve the values for a node,
62    use its_rule_list_eval().  */
63 
64 #define ITS_NS "http://www.w3.org/2005/11/its"
65 #define XML_NS "http://www.w3.org/XML/1998/namespace"
66 #define GT_NS "https://www.gnu.org/s/gettext/ns/its/extensions/1.0"
67 
68 struct its_value_ty
69 {
70   char *name;
71   char *value;
72 };
73 
74 struct its_value_list_ty
75 {
76   struct its_value_ty *items;
77   size_t nitems;
78   size_t nitems_max;
79 };
80 
81 static void
its_value_list_append(struct its_value_list_ty * values,const char * name,const char * value)82 its_value_list_append (struct its_value_list_ty *values,
83                        const char *name,
84                        const char *value)
85 {
86   struct its_value_ty _value;
87 
88   _value.name = xstrdup (name);
89   _value.value = xstrdup (value);
90 
91   if (values->nitems == values->nitems_max)
92     {
93       values->nitems_max = 2 * values->nitems_max + 1;
94       values->items =
95         xrealloc (values->items,
96                   sizeof (struct its_value_ty) * values->nitems_max);
97     }
98   memcpy (&values->items[values->nitems++], &_value,
99           sizeof (struct its_value_ty));
100 }
101 
102 static const char *
its_value_list_get_value(struct its_value_list_ty * values,const char * name)103 its_value_list_get_value (struct its_value_list_ty *values,
104                           const char *name)
105 {
106   size_t i;
107 
108   for (i = 0; i < values->nitems; i++)
109     {
110       struct its_value_ty *value = &values->items[i];
111       if (strcmp (value->name, name) == 0)
112         return value->value;
113     }
114   return NULL;
115 }
116 
117 static void
its_value_list_set_value(struct its_value_list_ty * values,const char * name,const char * value)118 its_value_list_set_value (struct its_value_list_ty *values,
119                           const char *name,
120                           const char *value)
121 {
122   size_t i;
123 
124   for (i = 0; i < values->nitems; i++)
125     {
126       struct its_value_ty *_value = &values->items[i];
127       if (strcmp (_value->name, name) == 0)
128         {
129           free (_value->value);
130           _value->value = xstrdup (value);
131           break;
132         }
133     }
134 
135   if (i == values->nitems)
136     its_value_list_append (values, name, value);
137 }
138 
139 static void
its_value_list_merge(struct its_value_list_ty * values,struct its_value_list_ty * other)140 its_value_list_merge (struct its_value_list_ty *values,
141                       struct its_value_list_ty *other)
142 {
143   size_t i;
144 
145   for (i = 0; i < other->nitems; i++)
146     {
147       struct its_value_ty *other_value = &other->items[i];
148       size_t j;
149 
150       for (j = 0; j < values->nitems; j++)
151         {
152           struct its_value_ty *value = &values->items[j];
153 
154           if (strcmp (value->name, other_value->name) == 0
155               && strcmp (value->value, other_value->value) != 0)
156             {
157               free (value->value);
158               value->value = xstrdup (other_value->value);
159               break;
160             }
161         }
162 
163       if (j == values->nitems)
164         its_value_list_append (values, other_value->name, other_value->value);
165     }
166 }
167 
168 static void
its_value_list_destroy(struct its_value_list_ty * values)169 its_value_list_destroy (struct its_value_list_ty *values)
170 {
171   size_t i;
172 
173   for (i = 0; i < values->nitems; i++)
174     {
175       free (values->items[i].name);
176       free (values->items[i].value);
177     }
178   free (values->items);
179 }
180 
181 struct its_pool_ty
182 {
183   struct its_value_list_ty *items;
184   size_t nitems;
185   size_t nitems_max;
186 };
187 
188 static struct its_value_list_ty *
its_pool_alloc_value_list(struct its_pool_ty * pool)189 its_pool_alloc_value_list (struct its_pool_ty *pool)
190 {
191   struct its_value_list_ty *values;
192 
193   if (pool->nitems == pool->nitems_max)
194     {
195       pool->nitems_max = 2 * pool->nitems_max + 1;
196       pool->items =
197         xrealloc (pool->items,
198                   sizeof (struct its_value_list_ty) * pool->nitems_max);
199     }
200 
201   values = &pool->items[pool->nitems++];
202   memset (values, 0, sizeof (struct its_value_list_ty));
203   return values;
204 }
205 
206 static const char *
its_pool_get_value_for_node(struct its_pool_ty * pool,xmlNode * node,const char * name)207 its_pool_get_value_for_node (struct its_pool_ty *pool, xmlNode *node,
208                               const char *name)
209 {
210   intptr_t index = (intptr_t) node->_private;
211   if (index > 0)
212     {
213       struct its_value_list_ty *values;
214 
215       assert (index <= pool->nitems);
216       values = &pool->items[index - 1];
217 
218       return its_value_list_get_value (values, name);
219     }
220   return NULL;
221 }
222 
223 static void
its_pool_destroy(struct its_pool_ty * pool)224 its_pool_destroy (struct its_pool_ty *pool)
225 {
226   size_t i;
227 
228   for (i = 0; i < pool->nitems; i++)
229     its_value_list_destroy (&pool->items[i]);
230   free (pool->items);
231 }
232 
233 struct its_rule_list_ty
234 {
235   struct its_rule_ty **items;
236   size_t nitems;
237   size_t nitems_max;
238 
239   struct its_pool_ty pool;
240 };
241 
242 struct its_node_list_ty
243 {
244   xmlNode **items;
245   size_t nitems;
246   size_t nitems_max;
247 };
248 
249 static void
its_node_list_append(struct its_node_list_ty * nodes,xmlNode * node)250 its_node_list_append (struct its_node_list_ty *nodes,
251                       xmlNode *node)
252 {
253   if (nodes->nitems == nodes->nitems_max)
254     {
255       nodes->nitems_max = 2 * nodes->nitems_max + 1;
256       nodes->items =
257         xrealloc (nodes->items, sizeof (xmlNode *) * nodes->nitems_max);
258     }
259   nodes->items[nodes->nitems++] = node;
260 }
261 
262 /* Base class representing an ITS rule in global definition.  */
263 struct its_rule_class_ty
264 {
265   /* How many bytes to malloc for an instance of this class.  */
266   size_t size;
267 
268   /* What to do immediately after the instance is malloc()ed.  */
269   void (*constructor) (struct its_rule_ty *pop, xmlNode *node);
270 
271   /* What to do immediately before the instance is free()ed.  */
272   void (*destructor) (struct its_rule_ty *pop);
273 
274   /* How to apply the rule to all elements in DOC.  */
275   void (* apply) (struct its_rule_ty *pop, struct its_pool_ty *pool,
276                   xmlDoc *doc);
277 
278   /* How to evaluate the value of NODE according to the rule.  */
279   struct its_value_list_ty *(* eval) (struct its_rule_ty *pop,
280                                       struct its_pool_ty *pool, xmlNode *node);
281 };
282 
283 #define ITS_RULE_TY                             \
284   struct its_rule_class_ty *methods;            \
285   char *selector;                               \
286   struct its_value_list_ty values;              \
287   xmlNs **namespaces;
288 
289 struct its_rule_ty
290 {
291   ITS_RULE_TY
292 };
293 
294 static hash_table classes;
295 
296 static void
its_rule_destructor(struct its_rule_ty * pop)297 its_rule_destructor (struct its_rule_ty *pop)
298 {
299   free (pop->selector);
300   its_value_list_destroy (&pop->values);
301   if (pop->namespaces)
302     {
303       size_t i;
304       for (i = 0; pop->namespaces[i] != NULL; i++)
305         xmlFreeNs (pop->namespaces[i]);
306       free (pop->namespaces);
307     }
308 }
309 
310 static void
its_rule_apply(struct its_rule_ty * rule,struct its_pool_ty * pool,xmlDoc * doc)311 its_rule_apply (struct its_rule_ty *rule, struct its_pool_ty *pool, xmlDoc *doc)
312 {
313   xmlXPathContext *context;
314   xmlXPathObject *object;
315   size_t i;
316 
317   if (!rule->selector)
318     {
319       error (0, 0, _("selector is not specified"));
320       return;
321     }
322 
323   context = xmlXPathNewContext (doc);
324   if (!context)
325     {
326       error (0, 0, _("cannot create XPath context"));
327       return;
328     }
329 
330   if (rule->namespaces)
331     {
332       size_t i;
333       for (i = 0; rule->namespaces[i] != NULL; i++)
334         {
335           xmlNs *ns = rule->namespaces[i];
336           xmlXPathRegisterNs (context, ns->prefix, ns->href);
337         }
338     }
339 
340   object = xmlXPathEval (BAD_CAST rule->selector, context);
341   if (!object)
342     {
343       xmlXPathFreeContext (context);
344       error (0, 0, _("cannot evaluate XPath expression: %s"), rule->selector);
345       return;
346     }
347 
348   if (object->nodesetval)
349     {
350       xmlNodeSet *nodes = object->nodesetval;
351       for (i = 0; i < nodes->nodeNr; i++)
352         {
353           xmlNode *node = nodes->nodeTab[i];
354           struct its_value_list_ty *values;
355 
356           /* We can't store VALUES in NODE, since the address can
357              change when realloc()ed.  */
358           intptr_t index = (intptr_t) node->_private;
359 
360           assert (index <= pool->nitems);
361           if (index > 0)
362             values = &pool->items[index - 1];
363           else
364             {
365               values = its_pool_alloc_value_list (pool);
366               node->_private = (void *) pool->nitems;
367             }
368 
369           its_value_list_merge (values, &rule->values);
370         }
371     }
372 
373   xmlXPathFreeObject (object);
374   xmlXPathFreeContext (context);
375 }
376 
377 static char *
_its_get_attribute(xmlNode * node,const char * attr,const char * namespace)378 _its_get_attribute (xmlNode *node, const char *attr, const char *namespace)
379 {
380   xmlChar *value;
381   char *result;
382 
383   value = xmlGetNsProp (node, BAD_CAST attr, BAD_CAST namespace);
384 
385   result = xstrdup ((const char *) value);
386   xmlFree (value);
387 
388   return result;
389 }
390 
391 static char *
normalize_whitespace(const char * text,enum its_whitespace_type_ty whitespace)392 normalize_whitespace (const char *text, enum its_whitespace_type_ty whitespace)
393 {
394   switch (whitespace)
395     {
396     case ITS_WHITESPACE_PRESERVE:
397       return xstrdup (text);
398 
399     case ITS_WHITESPACE_TRIM:
400       return trim (text);
401 
402     case ITS_WHITESPACE_NORMALIZE_PARAGRAPH:
403       /* Normalize whitespaces within the text, keeping paragraph
404          boundaries.  */
405       {
406         char *result = xstrdup (text);
407         /* Go through the string, shrinking it, reading from *p++
408            and writing to *out++.  (result <= out <= p.)  */
409         const char *start_of_paragraph;
410         char *out;
411 
412         out = result;
413         for (start_of_paragraph = result; *start_of_paragraph != '\0';)
414           {
415             const char *end_of_paragraph;
416             const char *next_paragraph;
417 
418             /* Find the next paragraph boundary.  */
419             {
420               const char *p;
421 
422               for (p = start_of_paragraph;;)
423                 {
424                   const char *nl = strchrnul (p, '\n');
425                   if (*nl == '\0')
426                     {
427                       end_of_paragraph = nl;
428                       next_paragraph = end_of_paragraph;
429                       break;
430                     }
431                   p = nl + 1;
432                   {
433                     const char *past_whitespace = p + strspn (p, " \t\n");
434                     if (memchr (p, '\n', past_whitespace - p) != NULL)
435                       {
436                         end_of_paragraph = nl;
437                         next_paragraph = past_whitespace;
438                         break;
439                       }
440                     p = past_whitespace;
441                   }
442                 }
443             }
444 
445             /* Normalize whitespaces in the paragraph.  */
446             {
447               const char *p;
448 
449               /* Remove whitespace at the beginning of the paragraph.  */
450               for (p = start_of_paragraph; p < end_of_paragraph; p++)
451                 if (!(*p == ' ' || *p == '\t' || *p == '\n'))
452                   break;
453 
454               for (; p < end_of_paragraph;)
455                 {
456                   if (*p == ' ' || *p == '\t' || *p == '\n')
457                     {
458                       /* Normalize whitespace inside the paragraph, and
459                          remove whitespace at the end of the paragraph.  */
460                       do
461                         p++;
462                       while (p < end_of_paragraph
463                              && (*p == ' ' || *p == '\t' || *p == '\n'));
464                       if (p < end_of_paragraph)
465                         *out++ = ' ';
466                     }
467                   else
468                     *out++ = *p++;
469                 }
470             }
471 
472             if (*next_paragraph != '\0')
473               {
474                 memcpy (out, "\n\n", 2);
475                 out += 2;
476               }
477             start_of_paragraph = next_paragraph;
478           }
479         *out = '\0';
480         return result;
481       }
482     default:
483       /* Normalize whitespaces within the text, but do not eliminate whitespace
484          at the beginning nor the end of the text.  */
485       {
486         char *result = xstrdup (text);
487         char *out;
488         const char *p;
489 
490         out = result;
491         for (p = result; *p != '\0';)
492           {
493             if (*p == ' ' || *p == '\t' || *p == '\n')
494               {
495                 do
496                   p++;
497                 while (*p == ' ' || *p == '\t' || *p == '\n');
498                 *out++ = ' ';
499               }
500             else
501               *out++ = *p++;
502           }
503         *out = '\0';
504         return result;
505       }
506     }
507 }
508 
509 static char *
_its_encode_special_chars(const char * content,bool is_attribute)510 _its_encode_special_chars (const char *content, bool is_attribute)
511 {
512   const char *str;
513   size_t amount = 0;
514   char *result, *p;
515 
516   for (str = content; *str != '\0'; str++)
517     {
518       switch (*str)
519         {
520         case '&':
521           amount += sizeof ("&amp;");
522           break;
523         case '<':
524           amount += sizeof ("&lt;");
525           break;
526         case '>':
527           amount += sizeof ("&gt;");
528           break;
529         case '"':
530           if (is_attribute)
531             amount += sizeof ("&quot;");
532           else
533             amount += 1;
534           break;
535         default:
536           amount += 1;
537           break;
538         }
539     }
540 
541   result = XNMALLOC (amount + 1, char);
542   *result = '\0';
543   p = result;
544   for (str = content; *str != '\0'; str++)
545     {
546       switch (*str)
547         {
548         case '&':
549           p = stpcpy (p, "&amp;");
550           break;
551         case '<':
552           p = stpcpy (p, "&lt;");
553           break;
554         case '>':
555           p = stpcpy (p, "&gt;");
556           break;
557         case '"':
558           if (is_attribute)
559             p = stpcpy (p, "&quot;");
560           else
561             *p++ = '"';
562           break;
563         default:
564           *p++ = *str;
565           break;
566         }
567     }
568   *p = '\0';
569   return result;
570 }
571 
572 static char *
_its_collect_text_content(xmlNode * node,enum its_whitespace_type_ty whitespace,bool no_escape)573 _its_collect_text_content (xmlNode *node,
574                            enum its_whitespace_type_ty whitespace,
575                            bool no_escape)
576 {
577   char *buffer = NULL;
578   size_t bufmax = 0;
579   size_t bufpos = 0;
580   xmlNode *n;
581 
582   for (n = node->children; n; n = n->next)
583     {
584       char *content = NULL;
585 
586       switch (n->type)
587         {
588         case XML_TEXT_NODE:
589         case XML_CDATA_SECTION_NODE:
590           {
591             xmlChar *xcontent = xmlNodeGetContent (n);
592             char *econtent;
593             const char *ccontent;
594 
595             /* We can't expect xmlTextWriterWriteString() encode
596                special characters as we write text outside of the
597                element.  */
598             if (no_escape)
599               econtent = xstrdup ((const char *) xcontent);
600             else
601               econtent =
602                 _its_encode_special_chars ((const char *) xcontent,
603                                            node->type == XML_ATTRIBUTE_NODE);
604             xmlFree (xcontent);
605 
606             /* Skip whitespaces at the beginning of the text, if this
607                is the first node.  */
608             ccontent = econtent;
609             if (whitespace == ITS_WHITESPACE_NORMALIZE && !n->prev)
610               ccontent = ccontent + strspn (ccontent, " \t\n");
611             content =
612               normalize_whitespace (ccontent, whitespace);
613             free (econtent);
614 
615             /* Skip whitespaces at the end of the text, if this
616                is the last node.  */
617             if (whitespace == ITS_WHITESPACE_NORMALIZE && !n->next)
618               {
619                 char *p = content + strlen (content);
620                 for (; p > content; p--)
621                   {
622                     int c = *(p - 1);
623                     if (!(c == ' ' || c == '\t' || c == '\n'))
624                       {
625                         *p = '\0';
626                         break;
627                       }
628                   }
629               }
630           }
631           break;
632 
633         case XML_ELEMENT_NODE:
634           {
635             xmlOutputBuffer *buffer = xmlAllocOutputBuffer (NULL);
636             xmlTextWriter *writer = xmlNewTextWriter (buffer);
637             char *p = _its_collect_text_content (n, whitespace,
638                                                  no_escape);
639             const char *ccontent;
640 
641             xmlTextWriterStartElement (writer, BAD_CAST n->name);
642             if (n->properties)
643               {
644                 xmlAttr *attr = n->properties;
645                 for (; attr; attr = attr->next)
646                   {
647                     xmlChar *prop = xmlGetProp (n, attr->name);
648                     xmlTextWriterWriteAttribute (writer,
649                                                  attr->name,
650                                                  prop);
651                     xmlFree (prop);
652                   }
653               }
654             if (*p != '\0')
655               xmlTextWriterWriteRaw (writer, BAD_CAST p);
656             xmlTextWriterEndElement (writer);
657             ccontent = (const char *) xmlOutputBufferGetContent (buffer);
658             content = normalize_whitespace (ccontent, whitespace);
659             xmlFreeTextWriter (writer);
660             free (p);
661           }
662           break;
663 
664         case XML_ENTITY_REF_NODE:
665           content = xasprintf ("&%s;", (const char *) n->name);
666           break;
667 
668         default:
669           break;
670         }
671 
672       if (content != NULL)
673         {
674           size_t length = strlen (content);
675 
676           if (bufpos + length + 1 >= bufmax)
677             {
678               bufmax = 2 * bufmax + length + 1;
679               buffer = xrealloc (buffer, bufmax);
680             }
681           strcpy (&buffer[bufpos], content);
682           bufpos += length;
683         }
684       free (content);
685     }
686 
687   if (buffer == NULL)
688     buffer = xstrdup ("");
689   return buffer;
690 }
691 
692 static void
_its_error_missing_attribute(xmlNode * node,const char * attribute)693 _its_error_missing_attribute (xmlNode *node, const char *attribute)
694 {
695   error (0, 0, _("\"%s\" node does not contain \"%s\""),
696          node->name, attribute);
697 }
698 
699 /* Implementation of Translate data category.  */
700 static void
its_translate_rule_constructor(struct its_rule_ty * pop,xmlNode * node)701 its_translate_rule_constructor (struct its_rule_ty *pop, xmlNode *node)
702 {
703   char *prop;
704 
705   if (!xmlHasProp (node, BAD_CAST "selector"))
706     {
707       _its_error_missing_attribute (node, "selector");
708       return;
709     }
710 
711   if (!xmlHasProp (node, BAD_CAST "translate"))
712     {
713       _its_error_missing_attribute (node, "translate");
714       return;
715     }
716 
717   prop = _its_get_attribute (node, "selector", NULL);
718   if (prop)
719     pop->selector = prop;
720 
721   prop = _its_get_attribute (node, "translate", NULL);
722   its_value_list_append (&pop->values, "translate", prop);
723   free (prop);
724 }
725 
726 struct its_value_list_ty *
its_translate_rule_eval(struct its_rule_ty * pop,struct its_pool_ty * pool,xmlNode * node)727 its_translate_rule_eval (struct its_rule_ty *pop, struct its_pool_ty *pool,
728                          xmlNode *node)
729 {
730   struct its_value_list_ty *result;
731 
732   result = XCALLOC (1, struct its_value_list_ty);
733 
734   switch (node->type)
735     {
736     case XML_ATTRIBUTE_NODE:
737       /* Attribute nodes don't inherit from the parent elements.  */
738       {
739         const char *value =
740           its_pool_get_value_for_node (pool, node, "translate");
741         if (value != NULL)
742           {
743             its_value_list_set_value (result, "translate", value);
744             return result;
745           }
746 
747         /* The default value is translate="no".  */
748         its_value_list_append (result, "translate", "no");
749       }
750       break;
751 
752     case XML_ELEMENT_NODE:
753       /* Inherit from the parent elements.  */
754       {
755         const char *value;
756 
757         /* A local attribute overrides the global rule.  */
758         if (xmlHasNsProp (node, BAD_CAST "translate", BAD_CAST ITS_NS))
759           {
760             char *prop;
761 
762             prop = _its_get_attribute (node, "translate", ITS_NS);
763             its_value_list_append (result, "translate", prop);
764             free (prop);
765             return result;
766           }
767 
768         /* Check value for the current node.  */
769         value = its_pool_get_value_for_node (pool, node, "translate");
770         if (value != NULL)
771           {
772             its_value_list_set_value (result, "translate", value);
773             return result;
774           }
775 
776         /* Recursively check value for the parent node.  */
777         if (node->parent == NULL
778             || node->parent->type != XML_ELEMENT_NODE)
779           /* The default value is translate="yes".  */
780           its_value_list_append (result, "translate", "yes");
781         else
782           {
783             struct its_value_list_ty *values;
784 
785             values = its_translate_rule_eval (pop, pool, node->parent);
786             its_value_list_merge (result, values);
787             its_value_list_destroy (values);
788             free (values);
789           }
790       }
791       break;
792 
793     default:
794       break;
795     }
796 
797   return result;
798 }
799 
800 static struct its_rule_class_ty its_translate_rule_class =
801   {
802     sizeof (struct its_rule_ty),
803     its_translate_rule_constructor,
804     its_rule_destructor,
805     its_rule_apply,
806     its_translate_rule_eval,
807   };
808 
809 /* Implementation of Localization Note data category.  */
810 static void
its_localization_note_rule_constructor(struct its_rule_ty * pop,xmlNode * node)811 its_localization_note_rule_constructor (struct its_rule_ty *pop, xmlNode *node)
812 {
813   char *prop;
814   xmlNode *n;
815 
816   if (!xmlHasProp (node, BAD_CAST "selector"))
817     {
818       _its_error_missing_attribute (node, "selector");
819       return;
820     }
821 
822   if (!xmlHasProp (node, BAD_CAST "locNoteType"))
823     {
824       _its_error_missing_attribute (node, "locNoteType");
825       return;
826     }
827 
828   prop = _its_get_attribute (node, "selector", NULL);
829   if (prop)
830     pop->selector = prop;
831 
832   for (n = node->children; n; n = n->next)
833     {
834       if (n->type == XML_ELEMENT_NODE
835           && xmlStrEqual (n->name, BAD_CAST "locNote")
836           && xmlStrEqual (n->ns->href, BAD_CAST ITS_NS))
837         break;
838     }
839 
840   prop = _its_get_attribute (node, "locNoteType", NULL);
841   if (prop)
842     its_value_list_append (&pop->values, "locNoteType", prop);
843   free (prop);
844 
845   if (n)
846     {
847       /* FIXME: Respect space attribute.  */
848       char *content = _its_collect_text_content (n, ITS_WHITESPACE_NORMALIZE,
849                                                  false);
850       its_value_list_append (&pop->values, "locNote", content);
851       free (content);
852     }
853   else if (xmlHasProp (node, BAD_CAST "locNotePointer"))
854     {
855       prop = _its_get_attribute (node, "locNotePointer", NULL);
856       its_value_list_append (&pop->values, "locNotePointer", prop);
857       free (prop);
858     }
859   /* FIXME: locNoteRef and locNoteRefPointer */
860 }
861 
862 struct its_value_list_ty *
its_localization_note_rule_eval(struct its_rule_ty * pop,struct its_pool_ty * pool,xmlNode * node)863 its_localization_note_rule_eval (struct its_rule_ty *pop,
864                                  struct its_pool_ty *pool,
865                                  xmlNode *node)
866 {
867   struct its_value_list_ty *result;
868 
869   result = XCALLOC (1, struct its_value_list_ty);
870 
871   switch (node->type)
872     {
873     case XML_ATTRIBUTE_NODE:
874       /* Attribute nodes don't inherit from the parent elements.  */
875       {
876         const char *value;
877 
878         value = its_pool_get_value_for_node (pool, node, "locNoteType");
879         if (value != NULL)
880           its_value_list_set_value (result, "locNoteType", value);
881 
882         value = its_pool_get_value_for_node (pool, node, "locNote");
883         if (value != NULL)
884           {
885             its_value_list_set_value (result, "locNote", value);
886             return result;
887           }
888 
889         value = its_pool_get_value_for_node (pool, node, "locNotePointer");
890         if (value != NULL)
891           {
892             its_value_list_set_value (result, "locNotePointer", value);
893             return result;
894           }
895       }
896       break;
897 
898     case XML_ELEMENT_NODE:
899       /* Inherit from the parent elements.  */
900       {
901         const char *value;
902 
903         /* Local attributes overrides the global rule.  */
904         if (xmlHasNsProp (node, BAD_CAST "locNote", BAD_CAST ITS_NS)
905             || xmlHasNsProp (node, BAD_CAST "locNoteRef", BAD_CAST ITS_NS)
906             || xmlHasNsProp (node, BAD_CAST "locNoteType", BAD_CAST ITS_NS))
907           {
908             char *prop;
909 
910             if (xmlHasNsProp (node, BAD_CAST "locNote", BAD_CAST ITS_NS))
911               {
912                 prop = _its_get_attribute (node, "locNote", ITS_NS);
913                 its_value_list_append (result, "locNote", prop);
914                 free (prop);
915               }
916 
917             /* FIXME: locNoteRef */
918 
919             if (xmlHasNsProp (node, BAD_CAST "locNoteType", BAD_CAST ITS_NS))
920               {
921                 prop = _its_get_attribute (node, "locNoteType", ITS_NS);
922                 its_value_list_append (result, "locNoteType", prop);
923                 free (prop);
924               }
925 
926             return result;
927           }
928 
929         /* Check value for the current node.  */
930         value = its_pool_get_value_for_node (pool, node, "locNoteType");
931         if (value != NULL)
932           its_value_list_set_value (result, "locNoteType", value);
933 
934         value = its_pool_get_value_for_node (pool, node, "locNote");
935         if (value != NULL)
936           {
937             its_value_list_set_value (result, "locNote", value);
938             return result;
939           }
940 
941         value = its_pool_get_value_for_node (pool, node, "locNotePointer");
942         if (value != NULL)
943           {
944             its_value_list_set_value (result, "locNotePointer", value);
945             return result;
946           }
947 
948         /* Recursively check value for the parent node.  */
949         if (node->parent == NULL
950             || node->parent->type != XML_ELEMENT_NODE)
951           return result;
952         else
953           {
954             struct its_value_list_ty *values;
955 
956             values = its_localization_note_rule_eval (pop, pool, node->parent);
957             its_value_list_merge (result, values);
958             its_value_list_destroy (values);
959             free (values);
960           }
961       }
962       break;
963 
964     default:
965       break;
966     }
967 
968   /* The default value is None.  */
969   return result;
970 }
971 
972 static struct its_rule_class_ty its_localization_note_rule_class =
973   {
974     sizeof (struct its_rule_ty),
975     its_localization_note_rule_constructor,
976     its_rule_destructor,
977     its_rule_apply,
978     its_localization_note_rule_eval,
979   };
980 
981 /* Implementation of Element Within Text data category.  */
982 static void
its_element_within_text_rule_constructor(struct its_rule_ty * pop,xmlNode * node)983 its_element_within_text_rule_constructor (struct its_rule_ty *pop,
984                                           xmlNode *node)
985 {
986   char *prop;
987 
988   if (!xmlHasProp (node, BAD_CAST "selector"))
989     {
990       _its_error_missing_attribute (node, "selector");
991       return;
992     }
993 
994   if (!xmlHasProp (node, BAD_CAST "withinText"))
995     {
996       _its_error_missing_attribute (node, "withinText");
997       return;
998     }
999 
1000   prop = _its_get_attribute (node, "selector", NULL);
1001   if (prop)
1002     pop->selector = prop;
1003 
1004   prop = _its_get_attribute (node, "withinText", NULL);
1005   its_value_list_append (&pop->values, "withinText", prop);
1006   free (prop);
1007 }
1008 
1009 struct its_value_list_ty *
its_element_within_text_rule_eval(struct its_rule_ty * pop,struct its_pool_ty * pool,xmlNode * node)1010 its_element_within_text_rule_eval (struct its_rule_ty *pop,
1011                                    struct its_pool_ty *pool,
1012                                    xmlNode *node)
1013 {
1014   struct its_value_list_ty *result;
1015   const char *value;
1016 
1017   result = XCALLOC (1, struct its_value_list_ty);
1018 
1019   if (node->type != XML_ELEMENT_NODE)
1020     return result;
1021 
1022   /* A local attribute overrides the global rule.  */
1023   if (xmlHasNsProp (node, BAD_CAST "withinText", BAD_CAST ITS_NS))
1024     {
1025       char *prop;
1026 
1027       prop = _its_get_attribute (node, "withinText", ITS_NS);
1028       its_value_list_append (result, "withinText", prop);
1029       free (prop);
1030       return result;
1031     }
1032 
1033   /* Doesn't inherit from the parent elements, and the default value
1034      is None.  */
1035   value = its_pool_get_value_for_node (pool, node, "withinText");
1036   if (value != NULL)
1037     its_value_list_set_value (result, "withinText", value);
1038 
1039   return result;
1040 }
1041 
1042 static struct its_rule_class_ty its_element_within_text_rule_class =
1043   {
1044     sizeof (struct its_rule_ty),
1045     its_element_within_text_rule_constructor,
1046     its_rule_destructor,
1047     its_rule_apply,
1048     its_element_within_text_rule_eval,
1049   };
1050 
1051 /* Implementation of Preserve Space data category.  */
1052 static void
its_preserve_space_rule_constructor(struct its_rule_ty * pop,xmlNode * node)1053 its_preserve_space_rule_constructor (struct its_rule_ty *pop,
1054                                      xmlNode *node)
1055 {
1056   char *prop;
1057 
1058   if (!xmlHasProp (node, BAD_CAST "selector"))
1059     {
1060       _its_error_missing_attribute (node, "selector");
1061       return;
1062     }
1063 
1064   if (!xmlHasProp (node, BAD_CAST "space"))
1065     {
1066       _its_error_missing_attribute (node, "space");
1067       return;
1068     }
1069 
1070   prop = _its_get_attribute (node, "selector", NULL);
1071   if (prop)
1072     pop->selector = prop;
1073 
1074   prop = _its_get_attribute (node, "space", NULL);
1075   if (prop
1076       && !(strcmp (prop, "preserve") ==0
1077            || strcmp (prop, "default") == 0
1078            /* gettext extension: remove leading/trailing whitespaces only.  */
1079            || (node->ns && xmlStrEqual (node->ns->href, BAD_CAST GT_NS)
1080                && strcmp (prop, "trim") == 0)
1081            /* gettext extension: same as default except keeping
1082               paragraph boundaries.  */
1083            || (node->ns && xmlStrEqual (node->ns->href, BAD_CAST GT_NS)
1084                && strcmp (prop, "paragraph") == 0)))
1085     {
1086       error (0, 0, _("invalid attribute value \"%s\" for \"%s\""),
1087              prop, "space");
1088       free (prop);
1089       return;
1090     }
1091 
1092   its_value_list_append (&pop->values, "space", prop);
1093   free (prop);
1094 }
1095 
1096 struct its_value_list_ty *
its_preserve_space_rule_eval(struct its_rule_ty * pop,struct its_pool_ty * pool,xmlNode * node)1097 its_preserve_space_rule_eval (struct its_rule_ty *pop,
1098                               struct its_pool_ty *pool,
1099                               xmlNode *node)
1100 {
1101   struct its_value_list_ty *result;
1102   struct its_value_list_ty *values;
1103   const char *value;
1104 
1105   result = XCALLOC (1, struct its_value_list_ty);
1106 
1107   if (node->type != XML_ELEMENT_NODE)
1108     return result;
1109 
1110   /* A local attribute overrides the global rule.  */
1111   if (xmlHasNsProp (node, BAD_CAST "space", BAD_CAST XML_NS))
1112     {
1113       char *prop;
1114 
1115       prop = _its_get_attribute (node, "space", XML_NS);
1116       its_value_list_append (result, "space", prop);
1117       free (prop);
1118       return result;
1119     }
1120 
1121   /* Check value for the current node.  */
1122   value = its_pool_get_value_for_node (pool, node, "space");
1123   if (value != NULL)
1124     {
1125       its_value_list_set_value (result, "space", value);
1126       return result;
1127     }
1128 
1129   if (node->parent == NULL
1130       || node->parent->type != XML_ELEMENT_NODE)
1131     {
1132       /* The default value is space="default".  */
1133       its_value_list_append (result, "space", "default");
1134       return result;
1135     }
1136 
1137   /* Recursively check value for the parent node.  */
1138   values = its_preserve_space_rule_eval (pop, pool, node->parent);
1139   its_value_list_merge (result, values);
1140   its_value_list_destroy (values);
1141   free (values);
1142 
1143   return result;
1144 }
1145 
1146 static struct its_rule_class_ty its_preserve_space_rule_class =
1147   {
1148     sizeof (struct its_rule_ty),
1149     its_preserve_space_rule_constructor,
1150     its_rule_destructor,
1151     its_rule_apply,
1152     its_preserve_space_rule_eval,
1153   };
1154 
1155 /* Implementation of Context data category.  */
1156 static void
its_extension_context_rule_constructor(struct its_rule_ty * pop,xmlNode * node)1157 its_extension_context_rule_constructor (struct its_rule_ty *pop, xmlNode *node)
1158 {
1159   char *prop;
1160 
1161   if (!xmlHasProp (node, BAD_CAST "selector"))
1162     {
1163       _its_error_missing_attribute (node, "selector");
1164       return;
1165     }
1166 
1167   if (!xmlHasProp (node, BAD_CAST "contextPointer"))
1168     {
1169       _its_error_missing_attribute (node, "contextPointer");
1170       return;
1171     }
1172 
1173   prop = _its_get_attribute (node, "selector", NULL);
1174   if (prop)
1175     pop->selector = prop;
1176 
1177   prop = _its_get_attribute (node, "contextPointer", NULL);
1178   its_value_list_append (&pop->values, "contextPointer", prop);
1179   free (prop);
1180 
1181   if (xmlHasProp (node, BAD_CAST "textPointer"))
1182     {
1183       prop = _its_get_attribute (node, "textPointer", NULL);
1184       its_value_list_append (&pop->values, "textPointer", prop);
1185       free (prop);
1186     }
1187 }
1188 
1189 struct its_value_list_ty *
its_extension_context_rule_eval(struct its_rule_ty * pop,struct its_pool_ty * pool,xmlNode * node)1190 its_extension_context_rule_eval (struct its_rule_ty *pop,
1191                                  struct its_pool_ty *pool,
1192                                  xmlNode *node)
1193 {
1194   struct its_value_list_ty *result;
1195   const char *value;
1196 
1197   result = XCALLOC (1, struct its_value_list_ty);
1198 
1199   /* Doesn't inherit from the parent elements, and the default value
1200      is None.  */
1201   value = its_pool_get_value_for_node (pool, node, "contextPointer");
1202   if (value != NULL)
1203     its_value_list_set_value (result, "contextPointer", value);
1204 
1205   value = its_pool_get_value_for_node (pool, node, "textPointer");
1206   if (value != NULL)
1207     its_value_list_set_value (result, "textPointer", value);
1208 
1209   return result;
1210 }
1211 
1212 static struct its_rule_class_ty its_extension_context_rule_class =
1213   {
1214     sizeof (struct its_rule_ty),
1215     its_extension_context_rule_constructor,
1216     its_rule_destructor,
1217     its_rule_apply,
1218     its_extension_context_rule_eval,
1219   };
1220 
1221 /* Implementation of Escape Special Characters data category.  */
1222 static void
its_extension_escape_rule_constructor(struct its_rule_ty * pop,xmlNode * node)1223 its_extension_escape_rule_constructor (struct its_rule_ty *pop, xmlNode *node)
1224 {
1225   char *prop;
1226 
1227   if (!xmlHasProp (node, BAD_CAST "selector"))
1228     {
1229       _its_error_missing_attribute (node, "selector");
1230       return;
1231     }
1232 
1233   if (!xmlHasProp (node, BAD_CAST "escape"))
1234     {
1235       _its_error_missing_attribute (node, "escape");
1236       return;
1237     }
1238 
1239   prop = _its_get_attribute (node, "selector", NULL);
1240   if (prop)
1241     pop->selector = prop;
1242 
1243   prop = _its_get_attribute (node, "escape", NULL);
1244   its_value_list_append (&pop->values, "escape", prop);
1245   free (prop);
1246 }
1247 
1248 struct its_value_list_ty *
its_extension_escape_rule_eval(struct its_rule_ty * pop,struct its_pool_ty * pool,xmlNode * node)1249 its_extension_escape_rule_eval (struct its_rule_ty *pop,
1250                                 struct its_pool_ty *pool,
1251                                 xmlNode *node)
1252 {
1253   struct its_value_list_ty *result;
1254 
1255   result = XCALLOC (1, struct its_value_list_ty);
1256 
1257   switch (node->type)
1258     {
1259     case XML_ATTRIBUTE_NODE:
1260       /* Attribute nodes don't inherit from the parent elements.  */
1261       {
1262         const char *value =
1263           its_pool_get_value_for_node (pool, node, "escape");
1264         if (value != NULL)
1265           {
1266             its_value_list_set_value (result, "escape", value);
1267             return result;
1268           }
1269       }
1270       break;
1271 
1272     case XML_ELEMENT_NODE:
1273       /* Inherit from the parent elements.  */
1274       {
1275         const char *value;
1276 
1277         /* Check value for the current node.  */
1278         value = its_pool_get_value_for_node (pool, node, "escape");
1279         if (value != NULL)
1280           {
1281             its_value_list_set_value (result, "escape", value);
1282             return result;
1283           }
1284 
1285         /* Recursively check value for the parent node.  */
1286         if (node->parent != NULL
1287             && node->parent->type == XML_ELEMENT_NODE)
1288           {
1289             struct its_value_list_ty *values;
1290 
1291             values = its_extension_escape_rule_eval (pop, pool, node->parent);
1292             its_value_list_merge (result, values);
1293             its_value_list_destroy (values);
1294             free (values);
1295           }
1296       }
1297       break;
1298 
1299     default:
1300       break;
1301     }
1302 
1303   return result;
1304 }
1305 
1306 static struct its_rule_class_ty its_extension_escape_rule_class =
1307   {
1308     sizeof (struct its_rule_ty),
1309     its_extension_escape_rule_constructor,
1310     its_rule_destructor,
1311     its_rule_apply,
1312     its_extension_escape_rule_eval,
1313   };
1314 
1315 static struct its_rule_ty *
its_rule_alloc(struct its_rule_class_ty * method_table,xmlNode * node)1316 its_rule_alloc (struct its_rule_class_ty *method_table, xmlNode *node)
1317 {
1318   struct its_rule_ty *pop;
1319 
1320   pop = (struct its_rule_ty *) xcalloc (1, method_table->size);
1321   pop->methods = method_table;
1322   if (method_table->constructor)
1323     method_table->constructor (pop, node);
1324   return pop;
1325 }
1326 
1327 static struct its_rule_ty *
its_rule_parse(xmlDoc * doc,xmlNode * node)1328 its_rule_parse (xmlDoc *doc, xmlNode *node)
1329 {
1330   const char *name = (const char *) node->name;
1331   void *value;
1332 
1333   if (hash_find_entry (&classes, name, strlen (name), &value) == 0)
1334     {
1335       struct its_rule_ty *result;
1336       xmlNs **namespaces;
1337 
1338       result = its_rule_alloc ((struct its_rule_class_ty *) value, node);
1339       namespaces = xmlGetNsList (doc, node);
1340       if (namespaces)
1341         {
1342           size_t i;
1343           for (i = 0; namespaces[i] != NULL; i++)
1344             ;
1345           result->namespaces = XCALLOC (i + 1, xmlNs *);
1346           for (i = 0; namespaces[i] != NULL; i++)
1347             result->namespaces[i] = xmlCopyNamespace (namespaces[i]);
1348         }
1349       xmlFree (namespaces);
1350       return result;
1351     }
1352 
1353   return NULL;
1354 }
1355 
1356 static void
its_rule_destroy(struct its_rule_ty * pop)1357 its_rule_destroy (struct its_rule_ty *pop)
1358 {
1359   if (pop->methods->destructor)
1360     pop->methods->destructor (pop);
1361 }
1362 
1363 static void
init_classes(void)1364 init_classes (void)
1365 {
1366 #define ADD_RULE_CLASS(n, c) \
1367   hash_insert_entry (&classes, n, strlen (n), &c);
1368 
1369   ADD_RULE_CLASS ("translateRule", its_translate_rule_class);
1370   ADD_RULE_CLASS ("locNoteRule", its_localization_note_rule_class);
1371   ADD_RULE_CLASS ("withinTextRule", its_element_within_text_rule_class);
1372   ADD_RULE_CLASS ("preserveSpaceRule", its_preserve_space_rule_class);
1373   ADD_RULE_CLASS ("contextRule", its_extension_context_rule_class);
1374   ADD_RULE_CLASS ("escapeRule", its_extension_escape_rule_class);
1375 
1376 #undef ADD_RULE_CLASS
1377 }
1378 
1379 struct its_rule_list_ty *
its_rule_list_alloc(void)1380 its_rule_list_alloc (void)
1381 {
1382   struct its_rule_list_ty *result;
1383 
1384   if (classes.table == NULL)
1385     {
1386       hash_init (&classes, 10);
1387       init_classes ();
1388     }
1389 
1390   result = XCALLOC (1, struct its_rule_list_ty);
1391   return result;
1392 }
1393 
1394 void
its_rule_list_free(struct its_rule_list_ty * rules)1395 its_rule_list_free (struct its_rule_list_ty *rules)
1396 {
1397   size_t i;
1398 
1399   for (i = 0; i < rules->nitems; i++)
1400     {
1401       its_rule_destroy (rules->items[i]);
1402       free (rules->items[i]);
1403     }
1404   free (rules->items);
1405   its_pool_destroy (&rules->pool);
1406 }
1407 
1408 static bool
its_rule_list_add_from_doc(struct its_rule_list_ty * rules,xmlDoc * doc)1409 its_rule_list_add_from_doc (struct its_rule_list_ty *rules,
1410                             xmlDoc *doc)
1411 {
1412   xmlNode *root, *node;
1413 
1414   root = xmlDocGetRootElement (doc);
1415   if (!(xmlStrEqual (root->name, BAD_CAST "rules")
1416         && xmlStrEqual (root->ns->href, BAD_CAST ITS_NS)))
1417     {
1418       error (0, 0, _("the root element is not \"rules\""
1419                      " under namespace %s"),
1420              ITS_NS);
1421       xmlFreeDoc (doc);
1422       return false;
1423     }
1424 
1425   for (node = root->children; node; node = node->next)
1426     {
1427       struct its_rule_ty *rule;
1428 
1429       rule = its_rule_parse (doc, node);
1430       if (rule != NULL)
1431         {
1432           if (rules->nitems == rules->nitems_max)
1433             {
1434               rules->nitems_max = 2 * rules->nitems_max + 1;
1435               rules->items =
1436                 xrealloc (rules->items,
1437                           sizeof (struct its_rule_ty *) * rules->nitems_max);
1438             }
1439           rules->items[rules->nitems++] = rule;
1440         }
1441     }
1442 
1443   return true;
1444 }
1445 
1446 bool
its_rule_list_add_from_file(struct its_rule_list_ty * rules,const char * filename)1447 its_rule_list_add_from_file (struct its_rule_list_ty *rules,
1448                              const char *filename)
1449 {
1450   xmlDoc *doc;
1451   bool result;
1452 
1453   doc = xmlReadFile (filename, "utf-8",
1454                      XML_PARSE_NONET
1455                      | XML_PARSE_NOWARNING
1456                      | XML_PARSE_NOBLANKS
1457                      | XML_PARSE_NOERROR);
1458   if (doc == NULL)
1459     {
1460       xmlError *err = xmlGetLastError ();
1461       error (0, 0, _("cannot read %s: %s"), filename, err->message);
1462       return false;
1463     }
1464 
1465   result = its_rule_list_add_from_doc (rules, doc);
1466   xmlFreeDoc (doc);
1467   return result;
1468 }
1469 
1470 bool
its_rule_list_add_from_string(struct its_rule_list_ty * rules,const char * rule)1471 its_rule_list_add_from_string (struct its_rule_list_ty *rules,
1472                                const char *rule)
1473 {
1474   xmlDoc *doc;
1475   bool result;
1476 
1477   doc = xmlReadMemory (rule, strlen (rule),
1478                        "(internal)",
1479                        NULL,
1480                        XML_PARSE_NONET
1481                        | XML_PARSE_NOWARNING
1482                        | XML_PARSE_NOBLANKS
1483                        | XML_PARSE_NOERROR);
1484   if (doc == NULL)
1485     {
1486       xmlError *err = xmlGetLastError ();
1487       error (0, 0, _("cannot read %s: %s"), "(internal)", err->message);
1488       return false;
1489     }
1490 
1491   result = its_rule_list_add_from_doc (rules, doc);
1492   xmlFreeDoc (doc);
1493   return result;
1494 }
1495 
1496 static void
its_rule_list_apply(struct its_rule_list_ty * rules,xmlDoc * doc)1497 its_rule_list_apply (struct its_rule_list_ty *rules, xmlDoc *doc)
1498 {
1499   size_t i;
1500 
1501   for (i = 0; i < rules->nitems; i++)
1502     {
1503       struct its_rule_ty *rule = rules->items[i];
1504       rule->methods->apply (rule, &rules->pool, doc);
1505     }
1506 }
1507 
1508 static struct its_value_list_ty *
its_rule_list_eval(its_rule_list_ty * rules,xmlNode * node)1509 its_rule_list_eval (its_rule_list_ty *rules, xmlNode *node)
1510 {
1511   struct its_value_list_ty *result;
1512   size_t i;
1513 
1514   result = XCALLOC (1, struct its_value_list_ty);
1515   for (i = 0; i < rules->nitems; i++)
1516     {
1517       struct its_rule_ty *rule = rules->items[i];
1518       struct its_value_list_ty *values;
1519 
1520       values = rule->methods->eval (rule, &rules->pool, node);
1521       its_value_list_merge (result, values);
1522       its_value_list_destroy (values);
1523       free (values);
1524     }
1525 
1526   return result;
1527 }
1528 
1529 static bool
its_rule_list_is_translatable(its_rule_list_ty * rules,xmlNode * node,int depth)1530 its_rule_list_is_translatable (its_rule_list_ty *rules,
1531                                xmlNode *node,
1532                                int depth)
1533 {
1534   struct its_value_list_ty *values;
1535   const char *value;
1536   xmlNode *n;
1537 
1538   if (node->type != XML_ELEMENT_NODE
1539       && node->type != XML_ATTRIBUTE_NODE)
1540     return false;
1541 
1542   values = its_rule_list_eval (rules, node);
1543 
1544   /* Check if NODE has translate="yes".  */
1545   value = its_value_list_get_value (values, "translate");
1546   if (!(value && strcmp (value, "yes") == 0))
1547     {
1548       its_value_list_destroy (values);
1549       free (values);
1550       return false;
1551     }
1552 
1553   /* Check if NODE has withinText="yes", if NODE is not top-level.  */
1554   if (depth > 0)
1555     {
1556       value = its_value_list_get_value (values, "withinText");
1557       if (!(value && strcmp (value, "yes") == 0))
1558         {
1559           its_value_list_destroy (values);
1560           free (values);
1561           return false;
1562         }
1563     }
1564 
1565   its_value_list_destroy (values);
1566   free (values);
1567 
1568   for (n = node->children; n; n = n->next)
1569     {
1570       switch (n->type)
1571         {
1572         case XML_ELEMENT_NODE:
1573           if (!its_rule_list_is_translatable (rules, n, depth + 1))
1574             return false;
1575           break;
1576 
1577         case XML_TEXT_NODE:
1578         case XML_CDATA_SECTION_NODE:
1579         case XML_ENTITY_REF_NODE:
1580         case XML_COMMENT_NODE:
1581           break;
1582 
1583         default:
1584           return false;
1585         }
1586     }
1587 
1588   return true;
1589 }
1590 
1591 static void
its_rule_list_extract_nodes(its_rule_list_ty * rules,struct its_node_list_ty * nodes,xmlNode * node)1592 its_rule_list_extract_nodes (its_rule_list_ty *rules,
1593                              struct its_node_list_ty *nodes,
1594                              xmlNode *node)
1595 {
1596   if (node->type == XML_ELEMENT_NODE)
1597     {
1598       xmlNode *n;
1599 
1600       if (node->properties)
1601         {
1602           xmlAttr *attr = node->properties;
1603           for (; attr; attr = attr->next)
1604             {
1605               xmlNode *n = (xmlNode *) attr;
1606               if (its_rule_list_is_translatable (rules, n, 0))
1607                 its_node_list_append (nodes, n);
1608             }
1609         }
1610 
1611       if (its_rule_list_is_translatable (rules, node, 0))
1612         its_node_list_append (nodes, node);
1613       else
1614         {
1615           for (n = node->children; n; n = n->next)
1616             its_rule_list_extract_nodes (rules, nodes, n);
1617         }
1618     }
1619 }
1620 
1621 static char *
_its_get_content(struct its_rule_list_ty * rules,xmlNode * node,const char * pointer,enum its_whitespace_type_ty whitespace,bool no_escape)1622 _its_get_content (struct its_rule_list_ty *rules, xmlNode *node,
1623                   const char *pointer,
1624                   enum its_whitespace_type_ty whitespace,
1625                   bool no_escape)
1626 {
1627   xmlXPathContext *context;
1628   xmlXPathObject *object;
1629   size_t i;
1630   char *result = NULL;
1631 
1632   context = xmlXPathNewContext (node->doc);
1633   if (!context)
1634     {
1635       error (0, 0, _("cannot create XPath context"));
1636       return NULL;
1637     }
1638 
1639   for (i = 0; i < rules->nitems; i++)
1640     {
1641       struct its_rule_ty *rule = rules->items[i];
1642       if (rule->namespaces)
1643         {
1644           size_t i;
1645           for (i = 0; rule->namespaces[i] != NULL; i++)
1646             {
1647               xmlNs *ns = rule->namespaces[i];
1648               xmlXPathRegisterNs (context, ns->prefix, ns->href);
1649             }
1650         }
1651     }
1652 
1653   xmlXPathSetContextNode (node, context);
1654   object = xmlXPathEvalExpression (BAD_CAST pointer, context);
1655   if (!object)
1656     {
1657       xmlXPathFreeContext (context);
1658       error (0, 0, _("cannot evaluate XPath location path: %s"),
1659              pointer);
1660       return NULL;
1661     }
1662 
1663   switch (object->type)
1664     {
1665     case XPATH_NODESET:
1666       {
1667         xmlNodeSet *nodes = object->nodesetval;
1668         string_list_ty sl;
1669         size_t i;
1670 
1671         string_list_init (&sl);
1672         for (i = 0; i < nodes->nodeNr; i++)
1673           {
1674             char *content = _its_collect_text_content (nodes->nodeTab[i],
1675                                                        whitespace,
1676                                                        no_escape);
1677             string_list_append (&sl, content);
1678             free (content);
1679           }
1680         result = string_list_concat (&sl);
1681         string_list_destroy (&sl);
1682       }
1683       break;
1684 
1685     case XPATH_STRING:
1686       result = xstrdup ((const char *) object->stringval);
1687       break;
1688 
1689     default:
1690       break;
1691     }
1692 
1693   xmlXPathFreeObject (object);
1694   xmlXPathFreeContext (context);
1695 
1696   return result;
1697 }
1698 
1699 static void
_its_comment_append(string_list_ty * comments,const char * data)1700 _its_comment_append (string_list_ty *comments, const char *data)
1701 {
1702   /* Split multiline comment into lines, and remove leading and trailing
1703      whitespace.  */
1704   char *copy = xstrdup (data);
1705   char *p;
1706   char *q;
1707 
1708   for (p = copy; (q = strchr (p, '\n')) != NULL; p = q + 1)
1709     {
1710       while (p[0] == ' ' || p[0] == '\t')
1711         p++;
1712       while (q > p && (q[-1] == ' ' || q[-1] == '\t'))
1713         q--;
1714       *q = '\0';
1715       string_list_append (comments, p);
1716     }
1717   q = p + strlen (p);
1718   while (p[0] == ' ' || p[0] == '\t')
1719     p++;
1720   while (q > p && (q[-1] == ' ' || q[-1] == '\t'))
1721     q--;
1722   *q = '\0';
1723   string_list_append (comments, p);
1724   free (copy);
1725 }
1726 
1727 static void
its_rule_list_extract_text(its_rule_list_ty * rules,xmlNode * node,const char * logical_filename,flag_context_list_table_ty * flag_table,message_list_ty * mlp,its_extract_callback_ty callback)1728 its_rule_list_extract_text (its_rule_list_ty *rules,
1729                             xmlNode *node,
1730                             const char *logical_filename,
1731                             flag_context_list_table_ty *flag_table,
1732                             message_list_ty *mlp,
1733                             its_extract_callback_ty callback)
1734 {
1735   if (node->type == XML_ELEMENT_NODE
1736       || node->type == XML_ATTRIBUTE_NODE)
1737     {
1738       struct its_value_list_ty *values;
1739       const char *value;
1740       char *msgid = NULL, *msgctxt = NULL, *comment = NULL;
1741       enum its_whitespace_type_ty whitespace;
1742       bool no_escape;
1743 
1744       values = its_rule_list_eval (rules, node);
1745 
1746       value = its_value_list_get_value (values, "locNote");
1747       if (value)
1748         comment = xstrdup (value);
1749       else
1750         {
1751           value = its_value_list_get_value (values, "escape");
1752           no_escape = value != NULL && strcmp (value, "no") == 0;
1753 
1754           value = its_value_list_get_value (values, "locNotePointer");
1755           if (value)
1756             comment = _its_get_content (rules, node, value, ITS_WHITESPACE_TRIM,
1757                                         no_escape);
1758         }
1759 
1760       if (comment != NULL && *comment != '\0')
1761         {
1762           string_list_ty comments;
1763           char *tmp;
1764 
1765           string_list_init (&comments);
1766           _its_comment_append (&comments, comment);
1767           tmp = string_list_join (&comments, "\n", '\0', false);
1768           free (comment);
1769           comment = tmp;
1770         }
1771       else
1772         /* Extract comments preceding the node.  */
1773         {
1774           xmlNode *sibling;
1775           string_list_ty comments;
1776 
1777           string_list_init (&comments);
1778           for (sibling = node->prev; sibling; sibling = sibling->prev)
1779             if (sibling->type != XML_COMMENT_NODE || sibling->prev == NULL)
1780               break;
1781           if (sibling)
1782             {
1783               if (sibling->type != XML_COMMENT_NODE)
1784                 sibling = sibling->next;
1785               for (; sibling && sibling->type == XML_COMMENT_NODE;
1786                    sibling = sibling->next)
1787                 {
1788                   xmlChar *content = xmlNodeGetContent (sibling);
1789                   _its_comment_append (&comments, (const char *) content);
1790                   xmlFree (content);
1791                 }
1792               free (comment);
1793               comment = string_list_join (&comments, "\n", '\0', false);
1794               string_list_destroy (&comments);
1795             }
1796         }
1797 
1798       value = its_value_list_get_value (values, "space");
1799       if (value && strcmp (value, "preserve") == 0)
1800         whitespace = ITS_WHITESPACE_PRESERVE;
1801       else if (value && strcmp (value, "trim") == 0)
1802         whitespace = ITS_WHITESPACE_TRIM;
1803       else if (value && strcmp (value, "paragraph") == 0)
1804         whitespace = ITS_WHITESPACE_NORMALIZE_PARAGRAPH;
1805       else
1806         whitespace = ITS_WHITESPACE_NORMALIZE;
1807 
1808       value = its_value_list_get_value (values, "escape");
1809       no_escape = value != NULL && strcmp (value, "no") == 0;
1810 
1811       value = its_value_list_get_value (values, "contextPointer");
1812       if (value)
1813         msgctxt = _its_get_content (rules, node, value, ITS_WHITESPACE_PRESERVE,
1814                                     no_escape);
1815 
1816       value = its_value_list_get_value (values, "textPointer");
1817       if (value)
1818         msgid = _its_get_content (rules, node, value, ITS_WHITESPACE_PRESERVE,
1819                                   no_escape);
1820       its_value_list_destroy (values);
1821       free (values);
1822 
1823       if (msgid == NULL)
1824         msgid = _its_collect_text_content (node, whitespace, no_escape);
1825       if (*msgid != '\0')
1826         {
1827           lex_pos_ty pos;
1828           char *marker;
1829 
1830           pos.file_name = xstrdup (logical_filename);
1831           pos.line_number = xmlGetLineNo (node);
1832 
1833           if (node->type == XML_ELEMENT_NODE)
1834             {
1835               assert (node->parent);
1836               marker = xasprintf ("%s/%s", node->parent->name, node->name);
1837             }
1838           else
1839             {
1840               assert (node->parent && node->parent->parent);
1841               marker = xasprintf ("%s/%s@%s",
1842                                   node->parent->parent->name,
1843                                   node->parent->name,
1844                                   node->name);
1845             }
1846 
1847           if (msgctxt != NULL && *msgctxt == '\0')
1848             {
1849               free (msgctxt);
1850               msgctxt = NULL;
1851             }
1852 
1853           callback (mlp, msgctxt, msgid, &pos, comment, marker, whitespace);
1854           free (marker);
1855         }
1856       free (msgctxt);
1857       free (msgid);
1858       free (comment);
1859     }
1860 }
1861 
1862 void
its_rule_list_extract(its_rule_list_ty * rules,FILE * fp,const char * real_filename,const char * logical_filename,flag_context_list_table_ty * flag_table,msgdomain_list_ty * mdlp,its_extract_callback_ty callback)1863 its_rule_list_extract (its_rule_list_ty *rules,
1864                        FILE *fp, const char *real_filename,
1865                        const char *logical_filename,
1866                        flag_context_list_table_ty *flag_table,
1867                        msgdomain_list_ty *mdlp,
1868                        its_extract_callback_ty callback)
1869 {
1870   xmlDoc *doc;
1871   struct its_node_list_ty nodes;
1872   size_t i;
1873 
1874   doc = xmlReadFd (fileno (fp), logical_filename, NULL,
1875                    XML_PARSE_NONET
1876                    | XML_PARSE_NOWARNING
1877                    | XML_PARSE_NOBLANKS
1878                    | XML_PARSE_NOERROR);
1879   if (doc == NULL)
1880     {
1881       xmlError *err = xmlGetLastError ();
1882       error (0, 0, _("cannot read %s: %s"), logical_filename, err->message);
1883       return;
1884     }
1885 
1886   its_rule_list_apply (rules, doc);
1887 
1888   memset (&nodes, 0, sizeof (struct its_node_list_ty));
1889   its_rule_list_extract_nodes (rules,
1890                                &nodes,
1891                                xmlDocGetRootElement (doc));
1892 
1893   for (i = 0; i < nodes.nitems; i++)
1894     its_rule_list_extract_text (rules, nodes.items[i],
1895                                 logical_filename,
1896                                 flag_table,
1897                                 mdlp->item[0]->messages,
1898                                 callback);
1899 
1900   free (nodes.items);
1901   xmlFreeDoc (doc);
1902 }
1903 
1904 struct its_merge_context_ty
1905 {
1906   its_rule_list_ty *rules;
1907   xmlDoc *doc;
1908   struct its_node_list_ty nodes;
1909 };
1910 
1911 static void
its_merge_context_merge_node(struct its_merge_context_ty * context,xmlNode * node,const char * language,message_list_ty * mlp)1912 its_merge_context_merge_node (struct its_merge_context_ty *context,
1913                               xmlNode *node,
1914                               const char *language,
1915                               message_list_ty *mlp)
1916 {
1917   if (node->type == XML_ELEMENT_NODE)
1918     {
1919       struct its_value_list_ty *values;
1920       const char *value;
1921       char *msgid = NULL, *msgctxt = NULL;
1922       enum its_whitespace_type_ty whitespace;
1923       bool no_escape;
1924 
1925       values = its_rule_list_eval (context->rules, node);
1926 
1927       value = its_value_list_get_value (values, "space");
1928       if (value && strcmp (value, "preserve") == 0)
1929         whitespace = ITS_WHITESPACE_PRESERVE;
1930       else if (value && strcmp (value, "trim") == 0)
1931         whitespace = ITS_WHITESPACE_TRIM;
1932       else if (value && strcmp (value, "paragraph") == 0)
1933         whitespace = ITS_WHITESPACE_NORMALIZE_PARAGRAPH;
1934       else
1935         whitespace = ITS_WHITESPACE_NORMALIZE;
1936 
1937       value = its_value_list_get_value (values, "escape");
1938       no_escape = value != NULL && strcmp (value, "no") == 0;
1939 
1940       value = its_value_list_get_value (values, "contextPointer");
1941       if (value)
1942         msgctxt = _its_get_content (context->rules, node, value,
1943                                     ITS_WHITESPACE_PRESERVE, no_escape);
1944 
1945       value = its_value_list_get_value (values, "textPointer");
1946       if (value)
1947         msgid = _its_get_content (context->rules, node, value,
1948                                   ITS_WHITESPACE_PRESERVE, no_escape);
1949       its_value_list_destroy (values);
1950       free (values);
1951 
1952       if (msgid == NULL)
1953         msgid = _its_collect_text_content (node, whitespace, no_escape);
1954       if (*msgid != '\0')
1955         {
1956           message_ty *mp;
1957 
1958           mp = message_list_search (mlp, msgctxt, msgid);
1959           if (mp && *mp->msgstr != '\0')
1960             {
1961               xmlNode *translated;
1962 
1963               translated = xmlNewNode (node->ns, node->name);
1964               xmlSetProp (translated, BAD_CAST "xml:lang", BAD_CAST language);
1965 
1966               xmlNodeAddContent (translated, BAD_CAST mp->msgstr);
1967               xmlAddNextSibling (node, translated);
1968             }
1969         }
1970       free (msgctxt);
1971       free (msgid);
1972     }
1973 }
1974 
1975 void
its_merge_context_merge(its_merge_context_ty * context,const char * language,message_list_ty * mlp)1976 its_merge_context_merge (its_merge_context_ty *context,
1977                          const char *language,
1978                          message_list_ty *mlp)
1979 {
1980   size_t i;
1981 
1982   for (i = 0; i < context->nodes.nitems; i++)
1983     its_merge_context_merge_node (context, context->nodes.items[i],
1984                                   language,
1985                                   mlp);
1986 }
1987 
1988 struct its_merge_context_ty *
its_merge_context_alloc(its_rule_list_ty * rules,const char * filename)1989 its_merge_context_alloc (its_rule_list_ty *rules,
1990                          const char *filename)
1991 {
1992   xmlDoc *doc;
1993   struct its_merge_context_ty *result;
1994 
1995   doc = xmlReadFile (filename, NULL,
1996                      XML_PARSE_NONET
1997                      | XML_PARSE_NOWARNING
1998                      | XML_PARSE_NOBLANKS
1999                      | XML_PARSE_NOERROR);
2000   if (doc == NULL)
2001     {
2002       xmlError *err = xmlGetLastError ();
2003       error (0, 0, _("cannot read %s: %s"), filename, err->message);
2004       return NULL;
2005     }
2006 
2007   its_rule_list_apply (rules, doc);
2008 
2009   result = XMALLOC (struct its_merge_context_ty);
2010   result->rules = rules;
2011   result->doc = doc;
2012 
2013   /* Collect translatable nodes.  */
2014   memset (&result->nodes, 0, sizeof (struct its_node_list_ty));
2015   its_rule_list_extract_nodes (result->rules,
2016                                &result->nodes,
2017                                xmlDocGetRootElement (result->doc));
2018 
2019   return result;
2020 }
2021 
2022 void
its_merge_context_write(struct its_merge_context_ty * context,FILE * fp)2023 its_merge_context_write (struct its_merge_context_ty *context,
2024                          FILE *fp)
2025 {
2026   xmlDocFormatDump (fp, context->doc, 1);
2027 }
2028 
2029 void
its_merge_context_free(struct its_merge_context_ty * context)2030 its_merge_context_free (struct its_merge_context_ty *context)
2031 {
2032   xmlFreeDoc (context->doc);
2033   free (context->nodes.items);
2034   free (context);
2035 }
2036