1 /*
2 * Copyright (C) 2008 Emweb bv, Herent, Belgium.
3 *
4 * See the LICENSE file for terms of use.
5 */
6 #ifndef WT_CNOR
7 #include <fstream>
8 #include <cstring>
9
10 #include "Wt/WLocale.h"
11 #include "Wt/WLogger.h"
12 #include "Wt/WMessageResources.h"
13 #include "Wt/WStringStream.h"
14
15 #include "DomElement.h"
16 #include "WebUtils.h"
17
18 #include "3rdparty/rapidxml/rapidxml.hpp"
19 #include "3rdparty/rapidxml/rapidxml_print.hpp"
20
21 using namespace Wt;
22 using namespace Wt::rapidxml;
23
24 #ifndef WT_NO_SPIRIT
25
26 #include <boost/version.hpp>
27
28 #if BOOST_VERSION < 103600
29 #include <boost/spirit.hpp>
30 #include <boost/spirit/phoenix/binders.hpp>
31 #else
32 #include <boost/spirit/include/classic_core.hpp>
33 #include <boost/spirit/include/classic_attribute.hpp>
34 #include <boost/spirit/include/phoenix1_binders.hpp>
35 #endif
36
37 namespace {
38
39 #if BOOST_VERSION < 103600
40 using namespace boost::spirit;
41 #else
42 using namespace boost::spirit::classic;
43 #endif
44 using namespace boost;
45
46 struct CExpressionParser : grammar<CExpressionParser>
47 {
48 struct ParseState
49 {
50 bool condition_;
51 };
52
CExpressionParserCExpressionParser53 CExpressionParser(::int64_t n, int &result, ParseState &state) :
54 n_(n),
55 result_(result),
56 state_(state)
57 {}
58
59 struct value_closure : closure<value_closure, ::int64_t, ::int64_t>
60 {
61 member1 value;
62 member2 condition;
63 };
64
65 template <typename ScannerT>
66 struct definition
67 {
definitionCExpressionParser::definition68 definition(CExpressionParser const& self)
69 {
70 using namespace boost::spirit;
71 using namespace phoenix;
72
73 group
74 = '('
75 >> expression[group.value = arg1]
76 >> ')'
77 ;
78
79 // A statement can end at the end of the line, or with a semicolon.
80 statement
81 = ( expression[bind(&CExpressionParser::set_result)(self, arg1)]
82 )
83 >> (end_p | ';')
84 ;
85
86 literal
87 = uint_p[literal.value = arg1]
88 ;
89
90 factor
91 = literal[factor.value = arg1]
92 | group[factor.value = arg1]
93 | ch_p('n')[factor.value = bind(&CExpressionParser::get_n)(self)]
94 ;
95
96 term
97 = factor[term.value = arg1]
98 >> *( ('*' >> factor[term.value *= arg1])
99 | ('/' >> factor[term.value /= arg1])
100 | ('%' >> factor[term.value %= arg1])
101 )
102 ;
103
104 additive_expression
105 = term[additive_expression.value = arg1]
106 >> *( ('+' >> term[additive_expression.value += arg1])
107 | ('-' >> term[additive_expression.value -= arg1])
108 )
109 ;
110
111 expression
112 = or_expression[expression.value = arg1]
113 [expression.condition = arg1]
114 >> !( '?'
115 >> expression[bind(&CExpressionParser::set_cond)
116 (self, expression.condition)]
117 [bind(&CExpressionParser::ternary_op)
118 (self, expression.value, arg1)]
119 >> ':'
120 >> expression[bind(&CExpressionParser::set_not_cond)
121 (self, expression.condition)]
122 [bind(&CExpressionParser::ternary_op)
123 (self, expression.value, arg1)]
124 )
125 ;
126
127 or_expression
128 = and_expression[or_expression.value = arg1]
129 >> *( "||" >> and_expression[bind(&CExpressionParser::or_op)
130 (self, or_expression.value, arg1)] )
131 ;
132
133 and_expression
134 = eq_expression[and_expression.value = arg1]
135 >> *( "&&" >> eq_expression[bind(&CExpressionParser::and_op)
136 (self, and_expression.value, arg1)] )
137 ;
138
139 eq_expression
140 = relational_expression[eq_expression.value = arg1]
141 >> *( ("==" >> relational_expression[bind(&CExpressionParser::eq_op)
142 (self,
143 eq_expression.value,
144 arg1)])
145 | ("!=" >> relational_expression[bind(&CExpressionParser::neq_op)
146 (self,
147 eq_expression.value,
148 arg1)])
149 )
150 ;
151
152 relational_expression
153 = additive_expression[relational_expression.value = arg1]
154 >> *( (">" >> additive_expression[bind(&CExpressionParser::gt_op)
155 (self,
156 relational_expression.value,
157 arg1)])
158 | (">=" >> additive_expression[bind(&CExpressionParser::gte_op)
159 (self,
160 relational_expression.value,
161 arg1)])
162 | ("<" >> additive_expression[bind(&CExpressionParser::lt_op)
163 (self,
164 relational_expression.value,
165 arg1)])
166 | ("<=" >> additive_expression[bind(&CExpressionParser::lte_op)
167 (self,
168 relational_expression.value,
169 arg1)])
170 )
171 ;
172 }
173
174 rule<ScannerT> const&
startCExpressionParser::definition175 start() const { return statement; }
176
177 rule<ScannerT> statement;
178 rule<ScannerT, value_closure::context_t> expression, factor,
179 group, literal, term, additive_expression, or_expression, and_expression,
180 eq_expression, relational_expression;
181 };
182
183 private:
get_nCExpressionParser184 ::int64_t get_n() const { return n_; }
185
eq_opCExpressionParser186 void eq_op(::int64_t &x, ::int64_t y) const { x = x == y; }
neq_opCExpressionParser187 void neq_op(::int64_t &x, ::int64_t y) const { x = x != y; }
188
lt_opCExpressionParser189 void lt_op(::int64_t &x, ::int64_t y) const { x = x < y;}
gt_opCExpressionParser190 void gt_op(::int64_t &x, ::int64_t y) const { x = x > y;}
lte_opCExpressionParser191 void lte_op(::int64_t &x, ::int64_t y) const { x = x <= y;}
gte_opCExpressionParser192 void gte_op(::int64_t &x, ::int64_t y) const { x = x >= y;}
193
ternary_opCExpressionParser194 void ternary_op(::int64_t &result, ::int64_t y) const
195 {
196 if (state_.condition_)
197 result = y;
198 }
199
set_condCExpressionParser200 void set_cond(::int64_t condition) const
201 {
202 state_.condition_ = condition;
203 }
204
set_not_condCExpressionParser205 void set_not_cond(::int64_t condition) const
206 {
207 state_.condition_ = !condition;
208 }
209
or_opCExpressionParser210 void or_op(::int64_t &x, ::int64_t y) const { x = x || y; }
and_opCExpressionParser211 void and_op(::int64_t &x, ::int64_t y) const { x = x && y; }
212
set_resultCExpressionParser213 void set_result(int result) const { result_ = result; }
214 private :
215 ::int64_t n_;
216 int &result_;
217 ParseState &state_;
218
219 public :
resultCExpressionParser220 int result() { return result_; }
221 };
222
223 } // anonymous namespace
224
225 #endif //WT_NO_SPIRIT
226
227 namespace {
fixSelfClosingTags(xml_node<> * x_node)228 void fixSelfClosingTags(xml_node<> *x_node)
229 {
230 for (xml_node<> *x_child = x_node->first_node(); x_child;
231 x_child = x_child->next_sibling())
232 fixSelfClosingTags(x_child);
233
234 if (!x_node->first_node()
235 && x_node->value_size() == 0
236 && !Wt::DomElement::isSelfClosingTag
237 (std::string(x_node->name(), x_node->name_size()))) {
238 // We need to add an emtpy data node since <div /> is illegal HTML
239 // (but valid XML / XHTML)
240 xml_node<> *empty = x_node->document()->allocate_node(node_data);
241 x_node->append_node(empty);
242 }
243 }
244
copy_chars(const char * begin,const char * end,char * out)245 char *copy_chars(const char *begin, const char *end, char *out)
246 {
247 while (begin != end)
248 *out++ = *begin++;
249 return out;
250 }
251
readElementContent(xml_node<> * x_parent,std::unique_ptr<char[]> & buf)252 std::string readElementContent(xml_node<> *x_parent,
253 std::unique_ptr<char[]>& buf)
254 {
255 char *ptr = buf.get();
256
257 if (x_parent->type() == node_cdata) {
258 return std::string(x_parent->value(), x_parent->value_size());
259 } else {
260 for (xml_node<> *x_child = x_parent->first_node();
261 x_child; x_child = x_child->next_sibling()) {
262 if (x_child->type() == node_cdata) {
263 ptr = copy_chars(x_child->value(),
264 x_child->value() + x_child->value_size(), ptr);
265 } else {
266 fixSelfClosingTags(x_child);
267 ptr = print(ptr, *x_child, print_no_indenting);
268 }
269 }
270
271 return std::string(buf.get(), ptr - buf.get());
272 }
273 }
274
attributeValueToInt(xml_attribute<> * x_attribute)275 int attributeValueToInt(xml_attribute<> *x_attribute)
276 {
277 return Utils::stoi(std::string(x_attribute->value(),
278 x_attribute->value_size()));
279 }
280 }
281
282 namespace Wt {
283
284 LOGGER("WMessageResources");
285
WMessageResources(const std::string & path,bool loadInMemory)286 WMessageResources::WMessageResources(const std::string& path,
287 bool loadInMemory)
288 : loadInMemory_(loadInMemory),
289 path_(path),
290 builtin_(nullptr)
291 { }
292
WMessageResources(const char * builtin)293 WMessageResources::WMessageResources(const char *builtin)
294 : loadInMemory_(true),
295 builtin_(builtin)
296 {
297 std::istringstream s(builtin, std::ios::in | std::ios::binary);
298 readResourceStream(s, resources_[""], "<internal resource bundle>");
299 }
300
keys(const WLocale & locale)301 std::set<std::string> WMessageResources::keys(const WLocale& locale) const
302 {
303 load(locale);
304
305 std::set<std::string> keys;
306
307 for (auto& r : resources_) {
308 if (r.first == locale.name()) {
309 for (auto& k : r.second.map_)
310 keys.insert(k.first);
311 break;
312 }
313 }
314
315 return keys;
316 }
317
load(const WLocale & locale)318 void WMessageResources::load(const WLocale& locale) const
319 {
320 if (!path_.empty()) {
321 Resource& target = resources_[locale.name()];
322 std::string l = locale.name();
323
324 target.map_.clear();
325
326 for (;;) {
327 if (readResourceFile(l, target))
328 break;
329
330 /* try a lesser specified variant */
331 std::string::size_type i = l.rfind('-');
332 if (i != std::string::npos)
333 l.erase(i);
334 else {
335 if (locale.name().empty())
336 LOG_ERROR("Could not load resource bundle: " << path_ << ".xml");
337 break;
338 }
339 }
340 }
341 }
342
hibernate()343 void WMessageResources::hibernate()
344 {
345 if (!loadInMemory_) {
346 resources_.clear();
347 }
348 }
349
resolveKey(const WLocale & locale,const std::string & key)350 LocalizedString WMessageResources::resolveKey(const WLocale& locale, const std::string& key)
351 const
352 {
353 LocalizedString result = resolve(locale.name(), key);
354 if (result)
355 return result;
356
357 return resolve(std::string(), key);
358 }
359
resolve(const std::string & locale,const std::string & key)360 LocalizedString WMessageResources::resolve(const std::string& locale, const std::string& key)
361 const
362 {
363 if (resources_.find(locale) == resources_.end())
364 load(locale);
365
366 const Resource& res = resources_[locale];
367
368 KeyValuesMap::const_iterator j = res.map_.find(key);
369 if (j != res.map_.end()) {
370 if (j->second.size() > 1 )
371 return LocalizedString{};
372 return LocalizedString{j->second[0], TextFormat::XHTML};
373 }
374
375 return LocalizedString{};
376 }
377
findCase(const std::vector<std::string> & cases,std::string pluralExpression,::uint64_t amount)378 std::string WMessageResources::findCase(const std::vector<std::string> &cases,
379 std::string pluralExpression,
380 ::uint64_t amount)
381 const
382 {
383 #ifdef WT_NO_SPIRIT
384 throw WException("WString::trn() requires the spirit library.");
385 #else
386 int c = evalPluralCase(pluralExpression, amount);
387
388 if (c > (int)cases.size() - 1 || c < 0) {
389 WStringStream error;
390 error << "Expression '" << pluralExpression << "' evaluates to '"
391 << c << "' for n=" << std::to_string(amount);
392
393 if (c < 0)
394 error << " and values smaller than 0 are not allowed.";
395 else
396 error << " which is greater than the list of cases (size="
397 << (int)cases.size() << ").";
398
399 throw WException(error.c_str());
400 }
401
402 return cases[c];
403 #endif // WT_NO_SPIRIT
404 }
405
resolvePluralKey(const WLocale & locale,const std::string & key,::uint64_t amount)406 LocalizedString WMessageResources::resolvePluralKey(const WLocale& locale,
407 const std::string& key,
408 ::uint64_t amount) const
409 {
410 LocalizedString result = resolvePlural(locale.name(), key, amount);
411 if (result)
412 return result;
413
414 return resolvePlural(std::string(), key, amount);
415 }
416
resolvePlural(const std::string & locale,const std::string & key,::uint64_t amount)417 LocalizedString WMessageResources::resolvePlural(const std::string& locale,
418 const std::string& key,
419 ::uint64_t amount) const
420 {
421 if (resources_.find(locale) == resources_.end())
422 load(locale);
423
424 Resource& res = resources_[locale];
425
426 KeyValuesMap::const_iterator j = res.map_.find(key);
427 if (j != res.map_.end()) {
428 if (j->second.size() != res.pluralCount_ )
429 return LocalizedString{};
430 std::string result = findCase(j->second, res.pluralExpression_, amount);
431 return LocalizedString{result, TextFormat::XHTML};
432 } else
433 return LocalizedString{};
434 }
435
readResourceFile(const std::string & locale,Resource & resource)436 bool WMessageResources::readResourceFile(const std::string& locale,
437 Resource& resource) const
438 {
439 if (!path_.empty()) {
440 std::string fileName
441 = path_ + (locale.length() > 0 ? "_" : "") + locale + ".xml";
442
443 std::ifstream s(fileName.c_str(), std::ios::binary);
444 return readResourceStream(s, resource, fileName);
445 } else {
446 return false;
447 }
448 }
449
readResourceStream(std::istream & s,Resource & resource,const std::string & fileName)450 bool WMessageResources::readResourceStream(std::istream &s,
451 Resource& resource,
452 const std::string &fileName) const
453 {
454 if (!s)
455 return false;
456
457 s.seekg(0, std::ios::end);
458 int length = s.tellg();
459 s.seekg(0, std::ios::beg);
460
461 enum { UTF8, UTF16LE, UTF16BE } encoding = UTF8;
462
463 // See if we have UTF16 BOM
464 if (length >= 2) {
465 unsigned char m1, m2;
466 m1 = s.get();
467 m2 = s.get();
468
469 if (m1 == 0xFE && m2 == 0xFF)
470 encoding = UTF16BE;
471 else if (m1 == 0xFF && m2 == 0xFE)
472 encoding = UTF16LE;
473 else {
474 s.seekg(0, std::ios::beg);
475 }
476 }
477
478 std::unique_ptr<char[]> text
479 (new char[encoding == UTF8 ? length + 1 : (length-2)*2 + 1]);
480
481 if (encoding != UTF8) {
482 // Transcode from UTF16 stream to CharEncoding::UTF8 text
483 const int BUFSIZE = 2048;
484 unsigned char buf[BUFSIZE];
485
486 unsigned long firstWord = 0;
487 char *out = text.get();
488
489 for (;;) {
490 s.read((char *)buf, BUFSIZE);
491 int read = s.gcount();
492
493 for (int i = 0; i < read; i += 2) {
494 unsigned long ch;
495
496 // read next 2-byte char
497 if (encoding == UTF16LE) {
498 ch = buf[i+1];
499 ch = (ch << 8) | buf[i];
500 } else {
501 ch = buf[i];
502 ch = (ch << 8) | buf[i+1];
503 }
504
505 if (firstWord) {
506 // second word of multi-word
507 if (ch < 0xDC00 || ch > 0xDFFF) {
508 read = 0;
509 break;
510 }
511
512 unsigned long cp = 0x10000 + (((firstWord & 0x3FF) << 10)
513 | (ch & 0x3FF));
514
515 Wt::rapidxml::xml_document<>::insert_coded_character<0>(out, cp);
516
517 firstWord = 0;
518 } else if (ch >= 0xD800 && ch <= 0xDBFF) {
519 // first word of multi-word
520 firstWord = ch;
521 } else {
522 // single-word
523 Wt::rapidxml::xml_document<>::insert_coded_character<0>(out, ch);
524
525 firstWord = 0;
526 }
527 }
528
529 if (read != BUFSIZE)
530 break;
531 }
532
533 length = out - text.get();
534 } else {
535 s.read(text.get(), length);
536 }
537
538 text[length] = 0;
539
540 try {
541 xml_document<> doc;
542 doc.parse<parse_no_string_terminators
543 | parse_comment_nodes
544 | parse_xhtml_entity_translation
545 | parse_validate_closing_tags>(text.get());
546
547 xml_node<> *x_root = doc.first_node("messages");
548 if (!x_root)
549 throw parse_error("Expected <messages> root element", text.get());
550
551 xml_attribute<> *x_nplurals = x_root->first_attribute("nplurals");
552 xml_attribute<> *x_plural = x_root->first_attribute("plural");
553 if (x_nplurals && !x_plural)
554 throw parse_error("Expected 'plural' attribute in <messages>",
555 x_root->value());
556 if (x_plural && !x_nplurals)
557 throw parse_error("Expected 'nplurals' attribute in <messages>",
558 x_root->value());
559 if (x_nplurals && x_plural) {
560 resource.pluralCount_ = attributeValueToInt(x_nplurals);
561 resource.pluralExpression_
562 = std::string(x_plural->value(), x_plural->value_size());
563 } else {
564 resource.pluralCount_ = 0;
565 }
566
567 // factor 2 in case we expanded <span/> to <span></span>
568 std::unique_ptr<char[]> buf(new char[length * 2]);
569
570 for (xml_node<> *x_message = x_root->first_node("message");
571 x_message; x_message = x_message->next_sibling("message")) {
572 xml_attribute<> *x_id = x_message->first_attribute("id");
573 if (!x_id)
574 throw parse_error("Missing message id", x_message->value());
575
576 std::string id(x_id->value(), x_id->value_size());
577
578 xml_node<> *x_plural = x_message->first_node("plural");
579 if (x_plural) {
580 if (resource.pluralCount_ == 0)
581 throw parse_error("Expected 'nplurals' attribute in <message>",
582 x_plural->value());
583
584 resource.map_[id] = std::vector<std::string>();
585 resource.map_[id].reserve(resource.pluralCount_);
586
587 std::vector<bool> visited;
588 visited.reserve(resource.pluralCount_);
589
590 for (unsigned i = 0; i < resource.pluralCount_; i++) {
591 resource.map_[id].push_back(std::string());
592 visited.push_back(false);
593 }
594
595 for (; x_plural; x_plural = x_plural->next_sibling("plural")) {
596 xml_attribute<> *x_case = x_plural->first_attribute("case");
597 int c = attributeValueToInt(x_case);
598 if (c >= (int)resource.pluralCount_)
599 throw parse_error("The attribute 'case' used in <plural> is greater"
600 " than the nplurals <messages> attribute.",
601 x_plural->value());
602 visited[c] = true;
603 resource.map_[id][c] = readElementContent(x_plural, buf);
604 }
605
606 for (unsigned i = 0; i < resource.pluralCount_; i++)
607 if (!visited[i])
608 throw parse_error("Missing plural case in <message>",
609 x_message->value());
610 } else {
611 resource.map_[id] = std::vector<std::string>();
612 resource.map_[id].reserve(1);
613 resource.map_[id].push_back(readElementContent(x_message, buf));
614 }
615 }
616 } catch (parse_error& e) {
617 LOG_ERROR("Error reading " << fileName
618 << ": at character " << (int)(e.where<char>() - text.get())
619 << ": " << e.what());
620 }
621
622 return true;
623 }
624
evalPluralCase(const std::string & expression,::uint64_t n)625 int WMessageResources::evalPluralCase(const std::string &expression,
626 ::uint64_t n)
627 {
628 int result = 0;
629
630 #ifndef WT_NO_SPIRIT
631 CExpressionParser::ParseState state;
632 CExpressionParser p(n, result, state);
633 std::string tmp = expression;
634 parse(tmp.begin(), tmp.end(), p, space_p);
635 #endif // WT_NO_SPIRIT
636
637 return result;
638 }
639
640 }
641 #endif // WT_CNOR
642
643