1 /*
2  * Copyright (C) 2008 Emweb bv, Herent, Belgium.
3  *
4  * See the LICENSE file for terms of use.
5  */
6 #ifndef WT_CNOR
7 #include <fstream>
8 #include <cstring>
9 
10 #include "Wt/WLocale.h"
11 #include "Wt/WLogger.h"
12 #include "Wt/WMessageResources.h"
13 #include "Wt/WStringStream.h"
14 
15 #include "DomElement.h"
16 #include "WebUtils.h"
17 
18 #include "3rdparty/rapidxml/rapidxml.hpp"
19 #include "3rdparty/rapidxml/rapidxml_print.hpp"
20 
21 using namespace Wt;
22 using namespace Wt::rapidxml;
23 
24 #ifndef WT_NO_SPIRIT
25 
26 #include <boost/version.hpp>
27 
28 #if BOOST_VERSION < 103600
29 #include <boost/spirit.hpp>
30 #include <boost/spirit/phoenix/binders.hpp>
31 #else
32 #include <boost/spirit/include/classic_core.hpp>
33 #include <boost/spirit/include/classic_attribute.hpp>
34 #include <boost/spirit/include/phoenix1_binders.hpp>
35 #endif
36 
37 namespace {
38 
39 #if BOOST_VERSION < 103600
40   using namespace boost::spirit;
41 #else
42   using namespace boost::spirit::classic;
43 #endif
44   using namespace boost;
45 
46 struct CExpressionParser : grammar<CExpressionParser>
47 {
48   struct ParseState
49   {
50     bool condition_;
51   };
52 
CExpressionParserCExpressionParser53   CExpressionParser(::int64_t n, int &result, ParseState &state) :
54     n_(n),
55     result_(result),
56     state_(state)
57   {}
58 
59   struct value_closure : closure<value_closure, ::int64_t, ::int64_t>
60   {
61     member1 value;
62     member2 condition;
63   };
64 
65   template <typename ScannerT>
66   struct definition
67   {
definitionCExpressionParser::definition68     definition(CExpressionParser const& self)
69     {
70       using namespace boost::spirit;
71       using namespace phoenix;
72 
73       group
74         = '('
75           >> expression[group.value = arg1]
76           >> ')'
77         ;
78 
79        // A statement can end at the end of the line, or with a semicolon.
80       statement
81         =   ( expression[bind(&CExpressionParser::set_result)(self, arg1)]
82           )
83         >> (end_p | ';')
84         ;
85 
86       literal
87         = uint_p[literal.value = arg1]
88         ;
89 
90       factor
91         = literal[factor.value = arg1]
92         | group[factor.value = arg1]
93         | ch_p('n')[factor.value = bind(&CExpressionParser::get_n)(self)]
94         ;
95 
96       term
97         = factor[term.value = arg1]
98           >> *( ('*' >> factor[term.value *= arg1])
99               | ('/' >> factor[term.value /= arg1])
100 	      | ('%' >> factor[term.value %= arg1])
101             )
102         ;
103 
104       additive_expression
105         = term[additive_expression.value = arg1]
106           >> *( ('+' >> term[additive_expression.value += arg1])
107               | ('-' >> term[additive_expression.value -= arg1])
108             )
109         ;
110 
111       expression
112 	= or_expression[expression.value = arg1]
113 	               [expression.condition = arg1]
114 	>> !( '?'
115 	      >> expression[bind(&CExpressionParser::set_cond)
116 			    (self, expression.condition)]
117 	                   [bind(&CExpressionParser::ternary_op)
118 			    (self, expression.value, arg1)]
119 	      >> ':'
120 	      >> expression[bind(&CExpressionParser::set_not_cond)
121 			    (self, expression.condition)]
122 	                   [bind(&CExpressionParser::ternary_op)
123 			    (self, expression.value, arg1)]
124 			    )
125 	;
126 
127       or_expression
128         = and_expression[or_expression.value = arg1]
129 	  >> *( "||" >> and_expression[bind(&CExpressionParser::or_op)
130 				       (self, or_expression.value, arg1)] )
131         ;
132 
133       and_expression
134         = eq_expression[and_expression.value = arg1]
135           >> *( "&&" >> eq_expression[bind(&CExpressionParser::and_op)
136 				       (self, and_expression.value, arg1)] )
137         ;
138 
139       eq_expression
140         = relational_expression[eq_expression.value = arg1]
141           >> *( ("==" >> relational_expression[bind(&CExpressionParser::eq_op)
142 					       (self,
143 						eq_expression.value,
144 						arg1)])
145 	      | ("!=" >> relational_expression[bind(&CExpressionParser::neq_op)
146 					       (self,
147 						eq_expression.value,
148 						arg1)])
149             )
150         ;
151 
152       relational_expression
153         = additive_expression[relational_expression.value = arg1]
154           >> *( (">" >> additive_expression[bind(&CExpressionParser::gt_op)
155 					    (self,
156 					     relational_expression.value,
157 					     arg1)])
158 	      | (">=" >> additive_expression[bind(&CExpressionParser::gte_op)
159 					     (self,
160 					      relational_expression.value,
161 					      arg1)])
162 	      | ("<" >> additive_expression[bind(&CExpressionParser::lt_op)
163 					    (self,
164 					     relational_expression.value,
165 					     arg1)])
166 	      | ("<=" >> additive_expression[bind(&CExpressionParser::lte_op)
167 					     (self,
168 					      relational_expression.value,
169 					      arg1)])
170             )
171         ;
172     }
173 
174     rule<ScannerT> const&
startCExpressionParser::definition175     start() const { return statement; }
176 
177     rule<ScannerT> statement;
178     rule<ScannerT, value_closure::context_t> expression, factor,
179       group, literal, term, additive_expression, or_expression, and_expression,
180       eq_expression, relational_expression;
181   };
182 
183 private:
get_nCExpressionParser184   ::int64_t get_n() const { return n_; }
185 
eq_opCExpressionParser186   void eq_op(::int64_t &x, ::int64_t y) const { x = x == y; }
neq_opCExpressionParser187   void neq_op(::int64_t &x, ::int64_t y) const { x = x != y; }
188 
lt_opCExpressionParser189   void lt_op(::int64_t &x, ::int64_t y) const { x = x < y;}
gt_opCExpressionParser190   void gt_op(::int64_t &x, ::int64_t y) const { x = x > y;}
lte_opCExpressionParser191   void lte_op(::int64_t &x, ::int64_t y) const { x = x <= y;}
gte_opCExpressionParser192   void gte_op(::int64_t &x, ::int64_t y) const { x = x >= y;}
193 
ternary_opCExpressionParser194   void ternary_op(::int64_t &result, ::int64_t y) const
195   {
196     if (state_.condition_)
197       result = y;
198   }
199 
set_condCExpressionParser200   void set_cond(::int64_t condition) const
201   {
202     state_.condition_ = condition;
203   }
204 
set_not_condCExpressionParser205   void set_not_cond(::int64_t condition) const
206   {
207     state_.condition_ = !condition;
208   }
209 
or_opCExpressionParser210   void or_op(::int64_t &x, ::int64_t y) const { x = x || y; }
and_opCExpressionParser211   void and_op(::int64_t &x, ::int64_t y) const { x = x && y; }
212 
set_resultCExpressionParser213   void set_result(int result) const { result_ = result; }
214 private :
215   ::int64_t n_;
216   int &result_;
217   ParseState &state_;
218 
219 public :
resultCExpressionParser220   int result() { return result_; }
221 };
222 
223 } // anonymous namespace
224 
225 #endif //WT_NO_SPIRIT
226 
227 namespace {
fixSelfClosingTags(xml_node<> * x_node)228   void fixSelfClosingTags(xml_node<> *x_node)
229   {
230     for (xml_node<> *x_child = x_node->first_node(); x_child;
231 	 x_child = x_child->next_sibling())
232       fixSelfClosingTags(x_child);
233 
234     if (!x_node->first_node()
235 	&& x_node->value_size() == 0
236 	&& !Wt::DomElement::isSelfClosingTag
237 	(std::string(x_node->name(), x_node->name_size()))) {
238       // We need to add an emtpy data node since <div /> is illegal HTML
239       // (but valid XML / XHTML)
240       xml_node<> *empty	= x_node->document()->allocate_node(node_data);
241       x_node->append_node(empty);
242     }
243   }
244 
copy_chars(const char * begin,const char * end,char * out)245   char *copy_chars(const char *begin, const char *end, char *out)
246   {
247     while (begin != end)
248       *out++ = *begin++;
249     return out;
250   }
251 
readElementContent(xml_node<> * x_parent,std::unique_ptr<char[]> & buf)252   std::string readElementContent(xml_node<> *x_parent,
253 				 std::unique_ptr<char[]>& buf)
254   {
255     char *ptr = buf.get();
256 
257     if (x_parent->type() == node_cdata) {
258       return std::string(x_parent->value(), x_parent->value_size());
259     } else {
260       for (xml_node<> *x_child = x_parent->first_node();
261 	   x_child; x_child = x_child->next_sibling()) {
262 	if (x_child->type() == node_cdata) {
263 	  ptr = copy_chars(x_child->value(),
264 			   x_child->value() + x_child->value_size(), ptr);
265 	} else {
266 	  fixSelfClosingTags(x_child);
267 	  ptr = print(ptr, *x_child, print_no_indenting);
268 	}
269       }
270 
271       return std::string(buf.get(), ptr - buf.get());
272     }
273   }
274 
attributeValueToInt(xml_attribute<> * x_attribute)275   int attributeValueToInt(xml_attribute<> *x_attribute)
276   {
277     return Utils::stoi(std::string(x_attribute->value(),
278 				 x_attribute->value_size()));
279   }
280 }
281 
282 namespace Wt {
283 
284 LOGGER("WMessageResources");
285 
WMessageResources(const std::string & path,bool loadInMemory)286 WMessageResources::WMessageResources(const std::string& path,
287 				     bool loadInMemory)
288   : loadInMemory_(loadInMemory),
289     path_(path),
290     builtin_(nullptr)
291 { }
292 
WMessageResources(const char * builtin)293 WMessageResources::WMessageResources(const char *builtin)
294   : loadInMemory_(true),
295     builtin_(builtin)
296 {
297   std::istringstream s(builtin,  std::ios::in | std::ios::binary);
298   readResourceStream(s, resources_[""], "<internal resource bundle>");
299 }
300 
keys(const WLocale & locale)301 std::set<std::string> WMessageResources::keys(const WLocale& locale) const
302 {
303   load(locale);
304 
305   std::set<std::string> keys;
306 
307   for (auto& r : resources_) {
308     if (r.first == locale.name()) {
309       for (auto& k : r.second.map_)
310 	keys.insert(k.first);
311       break;
312     }
313   }
314 
315   return keys;
316 }
317 
load(const WLocale & locale)318 void WMessageResources::load(const WLocale& locale) const
319 {
320   if (!path_.empty()) {
321     Resource& target = resources_[locale.name()];
322     std::string l = locale.name();
323 
324     target.map_.clear();
325 
326     for (;;) {
327       if (readResourceFile(l, target))
328 	break;
329 
330       /* try a lesser specified variant */
331       std::string::size_type i = l.rfind('-');
332       if (i != std::string::npos)
333 	l.erase(i);
334       else {
335 	if (locale.name().empty())
336 	  LOG_ERROR("Could not load resource bundle: " << path_ << ".xml");
337 	break;
338       }
339     }
340   }
341 }
342 
hibernate()343 void WMessageResources::hibernate()
344 {
345   if (!loadInMemory_) {
346     resources_.clear();
347   }
348 }
349 
resolveKey(const WLocale & locale,const std::string & key)350 LocalizedString WMessageResources::resolveKey(const WLocale& locale, const std::string& key)
351   const
352 {
353   LocalizedString result = resolve(locale.name(), key);
354   if (result)
355     return result;
356 
357   return resolve(std::string(), key);
358 }
359 
resolve(const std::string & locale,const std::string & key)360 LocalizedString WMessageResources::resolve(const std::string& locale, const std::string& key)
361   const
362 {
363   if (resources_.find(locale) == resources_.end())
364     load(locale);
365 
366   const Resource& res = resources_[locale];
367 
368   KeyValuesMap::const_iterator j = res.map_.find(key);
369   if (j != res.map_.end()) {
370     if (j->second.size() > 1 )
371       return LocalizedString{};
372     return LocalizedString{j->second[0], TextFormat::XHTML};
373   }
374 
375   return LocalizedString{};
376 }
377 
findCase(const std::vector<std::string> & cases,std::string pluralExpression,::uint64_t amount)378 std::string WMessageResources::findCase(const std::vector<std::string> &cases,
379 					std::string pluralExpression,
380 					::uint64_t amount)
381   const
382 {
383 #ifdef WT_NO_SPIRIT
384   throw WException("WString::trn() requires the spirit library.");
385 #else
386   int c = evalPluralCase(pluralExpression, amount);
387 
388   if (c > (int)cases.size() - 1 || c < 0) {
389     WStringStream error;
390     error << "Expression '" << pluralExpression << "' evaluates to '"
391 	  << c << "' for n=" << std::to_string(amount);
392 
393     if (c < 0)
394       error << " and values smaller than 0 are not allowed.";
395     else
396       error << " which is greater than the list of cases (size="
397 	    << (int)cases.size() << ").";
398 
399     throw WException(error.c_str());
400   }
401 
402   return cases[c];
403 #endif // WT_NO_SPIRIT
404 }
405 
resolvePluralKey(const WLocale & locale,const std::string & key,::uint64_t amount)406 LocalizedString WMessageResources::resolvePluralKey(const WLocale& locale,
407 					 const std::string& key,
408 					 ::uint64_t amount) const
409 {
410   LocalizedString result = resolvePlural(locale.name(), key, amount);
411   if (result)
412     return result;
413 
414   return resolvePlural(std::string(), key, amount);
415 }
416 
resolvePlural(const std::string & locale,const std::string & key,::uint64_t amount)417 LocalizedString WMessageResources::resolvePlural(const std::string& locale,
418 				      const std::string& key,
419 				      ::uint64_t amount) const
420 {
421   if (resources_.find(locale) == resources_.end())
422     load(locale);
423 
424   Resource& res = resources_[locale];
425 
426   KeyValuesMap::const_iterator j = res.map_.find(key);
427   if (j != res.map_.end()) {
428     if (j->second.size() != res.pluralCount_ )
429       return LocalizedString{};
430     std::string result = findCase(j->second, res.pluralExpression_, amount);
431     return LocalizedString{result, TextFormat::XHTML};
432   } else
433     return LocalizedString{};
434 }
435 
readResourceFile(const std::string & locale,Resource & resource)436 bool WMessageResources::readResourceFile(const std::string& locale,
437 				         Resource& resource) const
438 {
439   if (!path_.empty()) {
440     std::string fileName
441       = path_ + (locale.length() > 0 ? "_" : "") + locale + ".xml";
442 
443     std::ifstream s(fileName.c_str(), std::ios::binary);
444     return readResourceStream(s, resource, fileName);
445   } else {
446     return false;
447   }
448 }
449 
readResourceStream(std::istream & s,Resource & resource,const std::string & fileName)450 bool WMessageResources::readResourceStream(std::istream &s,
451 					   Resource& resource,
452                                            const std::string &fileName) const
453 {
454   if (!s)
455     return false;
456 
457   s.seekg(0, std::ios::end);
458   int length = s.tellg();
459   s.seekg(0, std::ios::beg);
460 
461   enum { UTF8, UTF16LE, UTF16BE } encoding = UTF8;
462 
463   // See if we have UTF16 BOM
464   if (length >= 2) {
465     unsigned char m1, m2;
466     m1 = s.get();
467     m2 = s.get();
468 
469     if (m1 == 0xFE && m2 == 0xFF)
470       encoding = UTF16BE;
471     else if (m1 == 0xFF && m2 == 0xFE)
472       encoding = UTF16LE;
473     else {
474       s.seekg(0, std::ios::beg);
475     }
476   }
477 
478   std::unique_ptr<char[]> text
479     (new char[encoding == UTF8 ? length + 1 : (length-2)*2 + 1]);
480 
481   if (encoding != UTF8) {
482     // Transcode from UTF16 stream to CharEncoding::UTF8 text
483     const int BUFSIZE = 2048;
484     unsigned char buf[BUFSIZE];
485 
486     unsigned long firstWord = 0;
487     char *out = text.get();
488 
489     for (;;) {
490       s.read((char *)buf, BUFSIZE);
491       int read = s.gcount();
492 
493       for (int i = 0; i < read; i += 2) {
494 	unsigned long ch;
495 
496 	// read next 2-byte char
497 	if (encoding == UTF16LE) {
498 	  ch = buf[i+1];
499 	  ch = (ch << 8) | buf[i];
500 	} else {
501 	  ch = buf[i];
502 	  ch = (ch << 8) | buf[i+1];
503 	}
504 
505 	if (firstWord) {
506 	  // second word of multi-word
507 	  if (ch < 0xDC00 || ch > 0xDFFF) {
508 	    read = 0;
509 	    break;
510 	  }
511 
512 	  unsigned long cp = 0x10000 + (((firstWord & 0x3FF) << 10)
513 					| (ch & 0x3FF));
514 
515 	  Wt::rapidxml::xml_document<>::insert_coded_character<0>(out, cp);
516 
517 	  firstWord = 0;
518 	} else if (ch >= 0xD800 && ch <= 0xDBFF) {
519 	  // first word of multi-word
520 	  firstWord = ch;
521 	} else {
522 	  // single-word
523 	  Wt::rapidxml::xml_document<>::insert_coded_character<0>(out, ch);
524 
525 	  firstWord = 0;
526 	}
527       }
528 
529       if (read != BUFSIZE)
530 	break;
531     }
532 
533     length = out - text.get();
534   } else {
535     s.read(text.get(), length);
536   }
537 
538   text[length] = 0;
539 
540   try {
541     xml_document<> doc;
542     doc.parse<parse_no_string_terminators
543       | parse_comment_nodes
544       | parse_xhtml_entity_translation
545       | parse_validate_closing_tags>(text.get());
546 
547     xml_node<> *x_root = doc.first_node("messages");
548     if (!x_root)
549       throw parse_error("Expected <messages> root element", text.get());
550 
551     xml_attribute<> *x_nplurals = x_root->first_attribute("nplurals");
552     xml_attribute<> *x_plural = x_root->first_attribute("plural");
553     if (x_nplurals && !x_plural)
554       throw parse_error("Expected 'plural' attribute in <messages>",
555 			x_root->value());
556     if (x_plural && !x_nplurals)
557       throw parse_error("Expected 'nplurals' attribute in <messages>",
558 			x_root->value());
559     if (x_nplurals && x_plural) {
560       resource.pluralCount_ = attributeValueToInt(x_nplurals);
561       resource.pluralExpression_
562 	= std::string(x_plural->value(), x_plural->value_size());
563     } else {
564       resource.pluralCount_ = 0;
565     }
566 
567     // factor 2 in case we expanded <span/> to <span></span>
568     std::unique_ptr<char[]> buf(new char[length * 2]);
569 
570     for (xml_node<> *x_message = x_root->first_node("message");
571 	 x_message; x_message = x_message->next_sibling("message")) {
572       xml_attribute<> *x_id = x_message->first_attribute("id");
573       if (!x_id)
574 	throw parse_error("Missing message id", x_message->value());
575 
576       std::string id(x_id->value(), x_id->value_size());
577 
578       xml_node<> *x_plural = x_message->first_node("plural");
579       if (x_plural) {
580 	if (resource.pluralCount_ == 0)
581 	  throw parse_error("Expected 'nplurals' attribute in <message>",
582 			    x_plural->value());
583 
584 	resource.map_[id] = std::vector<std::string>();
585 	resource.map_[id].reserve(resource.pluralCount_);
586 
587 	std::vector<bool> visited;
588 	visited.reserve(resource.pluralCount_);
589 
590 	for (unsigned i = 0; i < resource.pluralCount_; i++) {
591 	  resource.map_[id].push_back(std::string());
592 	  visited.push_back(false);
593 	}
594 
595 	for (; x_plural; x_plural = x_plural->next_sibling("plural")) {
596 	  xml_attribute<> *x_case = x_plural->first_attribute("case");
597 	  int c = attributeValueToInt(x_case);
598 	  if (c >= (int)resource.pluralCount_)
599 	    throw parse_error("The attribute 'case' used in <plural> is greater"
600 			      " than the nplurals <messages> attribute.",
601 			      x_plural->value());
602 	  visited[c] = true;
603 	  resource.map_[id][c] = readElementContent(x_plural, buf);
604 	}
605 
606 	for (unsigned i = 0; i < resource.pluralCount_; i++)
607 	  if (!visited[i])
608 	    throw parse_error("Missing plural case in <message>",
609 			      x_message->value());
610       } else {
611 	resource.map_[id] = std::vector<std::string>();
612 	resource.map_[id].reserve(1);
613 	resource.map_[id].push_back(readElementContent(x_message, buf));
614       }
615     }
616   } catch (parse_error& e) {
617     LOG_ERROR("Error reading " << fileName
618 	      << ": at character " << (int)(e.where<char>() - text.get())
619 	      << ": " << e.what());
620   }
621 
622   return true;
623 }
624 
evalPluralCase(const std::string & expression,::uint64_t n)625 int WMessageResources::evalPluralCase(const std::string &expression,
626 				      ::uint64_t n)
627 {
628   int result = 0;
629 
630 #ifndef WT_NO_SPIRIT
631   CExpressionParser::ParseState state;
632   CExpressionParser p(n, result, state);
633   std::string tmp = expression;
634   parse(tmp.begin(), tmp.end(), p, space_p);
635 #endif // WT_NO_SPIRIT
636 
637   return result;
638 }
639 
640 }
641 #endif // WT_CNOR
642 
643