1 /*
2  * This file is part of StarDict.
3  *
4  * StarDict is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * StarDict is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with StarDict.  If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #ifdef HAVE_CONFIG_H
19 #  include "config.h"
20 #endif
21 
22 #include <libxml/xmlreader.h>
23 #include <libxml/xinclude.h>
24 #include <cstring>
25 #include <iostream>
26 #include <vector>
27 #include <algorithm>
28 #include "lib_stardict_text2bin.h"
29 #include "ifo_file.h"
30 #include "libcommon.h"
31 #include "lib_chars.h"
32 #include "lib_common_dict.h"
33 #include "lib_textual_dict_parser.h"
34 #include "lib_dict_verify.h"
35 
36 #define parser_err \
37 	"Parser error."
38 #define duplicate_elem_err \
39 	"%u: duplicate element %s."
40 #define attribute_value_not_specified_err \
41 	"%u: attribute '%s' is not specified."
42 #define attribute_value_unknown_err \
43 	"%u: unknown value of the attribute '%s': %s."
44 #define element_blank_value \
45 	"%u: element '%s' has blank value."
46 #define attribute_content_type_absent_err \
47 	"%u: Content type attribute is not specified neither for the definition element, " \
48 	"nor for any element above."
49 #define attribute_value_empty_err \
50 	"%u: value of the attribute '%s' is an empty string."
51 #define save_into_temp_file_err \
52 	"Unable to save into a temporary file."
53 #define duplicate_key_err \
54 	"%u: duplicate key: %s."
55 #define duplicate_synonym_err \
56 		"%u: duplicate synonym: %s."
57 #define incorrect_dir_sep_err \
58 		"%u: Incorrect directory separator, use '/' char."
59 #define resource_list_empty_err \
60 		"%u: empty resource list."
61 #define article_key_list_empty_err \
62 		"%u: article: empty key list."
63 #define article_definition_list_empty_err \
64 		"%u: article: empty definition list."
65 #define missing_info_section_err \
66 		"missing info section."
67 #define article_list_empty \
68 	"article list empty."
69 #define unknown_content_type_str_err \
70 		"%u: unknown content type '%s'. It must be one ASCII char."
71 #define unknown_content_type_chr_err \
72 	"%u: unknown content type '%c'."
73 #define content_type_r_in_definition_err \
74 		"%u: use 'definition-r' element for content type 'r'."
75 #define element_invalid_utf8_value_err \
76 		"%u: element %s. Invalid utf-8 text value: %s."
77 #define article_key_forbidden_chars_err \
78 	"%u: key contains forbidden chars: %s."
79 #define article_synonym_forbidden_chars_err \
80 	"%u: synonym contains forbidden chars: %s."
81 #define normalization_failed_err \
82 		"%u: utf8 normalization failed. String: %s."
83 #define article_key_long_err \
84 		"%u: Key is too long: %s. Key length = %u, maximum length = %d."
85 #define article_synonym_long_err \
86 		"%u: Synonym is too long: %s. Synonym length = %u, maximum length = %d."
87 #define element_invalid_text_err \
88 		"%u: element '%s'. Invalid text: ''\n%s\n'''\n" \
89 		invalid_chars_in_textual_data_msg
90 
91 #define missing_req_info_item_msg \
92 	"missing required info item %s."
93 #define parse_xml_done_msg \
94 	"xml parsing done."
95 #define allow_unknown_content_type_msg \
96 	"OK, allow unknown content type."
97 #define xinclude_process_msg \
98 		"%u: processing xinclude: %s"
99 
100 namespace xml {
101 	/* ResourceWrapper needs an address of a function with external linkage.
102 	 * xmlFree is a global variable - a pointer to a function.
103 	 * Provide a wrapper function. */
stardict_xmlFree(void * p)104 	void stardict_xmlFree(void* p)
105 	{
106 		xmlFree(p);
107 	}
108 	typedef ResourceWrapper<xmlChar, void*, void, stardict_xmlFree> CharStr;
109 }
110 
note_type_str(xmlReaderTypes type)111 static const char* note_type_str(xmlReaderTypes type)
112 {
113 	return note_type_str(type);
114 }
115 
note_type_str(int type)116 static const char* note_type_str(int type)
117 {
118 	const char* types[] = {
119 		"NONE",
120 		"ELEMENT",
121 		"ATTRIBUTE",
122 		"TEXT",
123 		"CDATA",
124 		"ENTITY_REFERENCE",
125 		"ENTITY",
126 		"PROCESSING_INSTRUCTION",
127 		"COMMENT",
128 		"DOCUMENT",
129 		"DOCUMENT_TYPE",
130 		"DOCUMENT_FRAGMENT",
131 		"NOTATION",
132 		"WHITESPACE",
133 		"SIGNIFICANT_WHITESPACE",
134 		"END_ELEMENT",
135 		"END_ENTITY",
136 		"XML_DECLARATION"
137 	};
138 	if(type < 0 || type > XML_READER_TYPE_XML_DECLARATION)
139 		return "ERROR";
140 	return types[type];
141 }
142 
143 #if 0
144 static void processNode(xmlTextReaderPtr reader) {
145     xmlChar *name, *value;
146 
147     name = xmlTextReaderName(reader);
148     if (name == NULL)
149         name = xmlStrdup(BAD_CAST "--");
150     value = xmlTextReaderValue(reader);
151     xmlNodePtr node = xmlTextReaderCurrentNode(reader);
152 
153     printf("%hu: %d %s %s %d %d %d",
154     		node ? node->line : 0,
155             xmlTextReaderDepth(reader),
156             note_type_str(xmlTextReaderNodeType(reader)),
157             name,
158             xmlTextReaderIsEmptyElement(reader),
159             xmlTextReaderHasAttributes(reader),
160             xmlTextReaderAttributeCount(reader)
161             );
162     xmlFree(name);
163     if (value == NULL)
164         printf("\n");
165     else {
166         printf(" '%s'\n", value);
167         xmlFree(value);
168     }
169 }
170 #endif
171 
172 /* Implementation details
173  *
174  * Most of the elements have a special method that reads that particular element.
175  * For example, we have read_article, read_contents, etc.
176  * The method is invoked after the start tag of the element in read.
177  * For example, after reading article start tag we invoke read_article method.
178  * The method in question must read its element completely and return logical result:
179  * EXIT_FAILURE or EXIT_SUCCESS.
180  * A method may correct small errors, that does not effect the rest of the document.
181  * If a method returns EXIT_SUCCESS, the following elements can be read without problems.
182  * If a method returns EXIT_FAILURE, we should terminate the parsing process immediately.
183  * */
184 class textual_dict_parser_t
185 {
186 public:
187 	textual_dict_parser_t(void);
188 	int parse(const std::string& xmlfilename, common_dict_t* norm_dict);
189 	void set_custom_include(bool b);
190 	bool get_custom_include(void) const;
191 private:
192 	enum ReadResult { rrEOF = -1, rrError = -2, rrEndElement = -3};
193 	typedef unsigned int line_number_t;
194 	int prepare_parser(void);
195 	void close_parser(void);
196 	void remove_reader(void);
197 	int read_all(void);
198 	int read_stardict(void);
199 	int read_info(void);
200 	int read_info_items(void);
201 	int read_info_item(const char* elem, bool (DictInfo::* is_item)(void) const,
202 		void (DictInfo:: *set_item)(const std::string& ));
203 	int read_contents(void);
204 	int read_article(void);
205 	int read_contents_r(article_def_t& article_def);
206 	TLoadResult read_att_type(char& type);
207 	int check_blank_info_items(void);
208 	int check_new_line_info_items(void);
209 	int check_mandatory_info_items(void);
210 	int check_article_key(const std::string& value, line_number_t article_item_line_number);
211 	int check_article_synonym(const std::string& value, line_number_t article_item_line_number);
212 	int read_xml_element(const char** exp_elems, const char* open_elem);
213 	int read_xml_end_element(const char* open_elem);
214 	int read_xml_text_item(const char* elem, std::string& value);
215 	int read_xml_text(const char* open_elem);
216 	int read_xml_attribute(const char* att_name, std::string& value);
217 	void check_blank_info_item(const char* elem, bool (DictInfo::* is_item)(void) const,
218 		const std::string& (DictInfo:: *get_item)(void) const, void (DictInfo:: *unset_item)(void));
219 	int check_new_line_info_item(const char* elem, bool (DictInfo::* is_item)(void) const,
220 			const std::string& (DictInfo:: *get_item)(void) const);
221 	char get_effective_content_type(void) const;
222 	void unexpected_node_type_element(const char** exp_elems);
223 	void unexpected_node_type_text(void);
224 	void unexpected_node_type_end_element(const char* open_elem);
225 	void unexpected_element(const char** exp_elems);
226 	void unexpected_eof(const char** exp_elems);
227 	void unexpected_eof(void);
228 	void unexpected_end_element(const char** exp_elems, const char* open_elem);
229 	void unmatched_end_element(const char* open_elem, const char* end_elem);
230 	void unexpected_empty_element(void);
231 	void unexpected_non_empty_element(void);
232 	line_number_t get_line_number(void);
233 	int next_node(void);
234 	std::string xmlfilename;
235 	xmlTextReaderPtr xml_reader;
236 	xmlXIncludeCtxtPtr xincctxt;
237 	static const int default_reader_options =
238 		XML_PARSE_NOENT | XML_PARSE_NOBLANKS | XML_PARSE_NOCDATA | XML_PARSE_XINCLUDE;
239 	int reader_options;
240 	struct elem_data_t
241 	{
elem_data_ttextual_dict_parser_t::elem_data_t242 		elem_data_t(const char* name, char content_type = 0)
243 		:
244 			name(name),
245 			content_type(content_type)
246 		{
247 
248 		}
249 		const char* name;
250 		char content_type;
251 	private:
252 		elem_data_t(void);
253 	};
254 	/* stack of elements
255 	 * Each time new element start tag is read, name of the read element is added to the stack.
256 	 * When an end element is read, the top element of the stack is removed.
257 	 * Some elements (article, contents, definition) may have a content type attribute.
258 	 * The value of this attribute is one char that is assigned to content_type field.
259 	 * For other elements and when the attribute in question is not specified, content_type field
260 	 * must be = 0. */
261 	std::vector<elem_data_t> elem_stack;
262 	common_dict_t* norm_dict;
263 };
264 
textual_dict_parser_t(void)265 textual_dict_parser_t::textual_dict_parser_t(void)
266 :
267 	xml_reader(NULL),
268 	xincctxt(NULL),
269 	reader_options(default_reader_options),
270 	norm_dict(NULL)
271 {
272 
273 }
274 
275 /* Return value:
276  * EXIT_FAILURE or EXIT_SUCCESS */
parse(const std::string & xmlfilename,common_dict_t * norm_dict)277 int textual_dict_parser_t::parse(const std::string& xmlfilename,
278 		common_dict_t* norm_dict)
279 {
280 	this->xmlfilename = xmlfilename;
281 	this->norm_dict = norm_dict;
282 
283 	auto_executor_t<textual_dict_parser_t> auto_exec(*this, &textual_dict_parser_t::close_parser);
284 	if(prepare_parser())
285 		return EXIT_FAILURE;
286 	g_message("processing %s...", xmlfilename.c_str());
287 	if(read_all())
288 		return EXIT_FAILURE;
289 #if 0
290 	int ret = next_node();
291 	while (ret == 1) {
292 		processNode(xml_reader);
293 		ret = next_node();
294 	}
295 	if (ret != 0) {
296 		g_critical("%s : failed to parse.", xmlfilename.c_str());
297 		return EXIT_FAILURE;
298 	}
299 #endif
300 	g_message(parse_xml_done_msg);
301 	return EXIT_SUCCESS;
302 }
303 
set_custom_include(bool b)304 void textual_dict_parser_t::set_custom_include(bool b)
305 {
306 	if(b)
307 		reader_options &= ~XML_PARSE_XINCLUDE;
308 	else
309 		reader_options |= XML_PARSE_XINCLUDE;
310 }
311 
get_custom_include(void) const312 bool textual_dict_parser_t::get_custom_include(void) const
313 {
314 	return (reader_options & XML_PARSE_XINCLUDE) == 0;
315 }
316 
317 /* Return value:
318  * EXIT_FAILURE or EXIT_SUCCESS */
prepare_parser(void)319 int textual_dict_parser_t::prepare_parser(void)
320 {
321 	remove_reader();
322 	xml_reader = xmlReaderForFile(xmlfilename.c_str(), NULL,
323 		reader_options);
324 	if (!xml_reader) {
325 		g_critical(open_read_file_err, xmlfilename.c_str(), "");
326 		return EXIT_FAILURE;
327 	}
328 	if(!norm_dict) {
329 		g_critical("norm_dict == NULL.");
330 		return EXIT_FAILURE;
331 	}
332 	if(norm_dict->reset())
333 		return EXIT_FAILURE;
334 	return EXIT_SUCCESS;
335 }
336 
close_parser(void)337 void textual_dict_parser_t::close_parser(void)
338 {
339 	remove_reader();
340 	norm_dict = NULL;
341 	xmlfilename.clear();
342 }
343 
remove_reader(void)344 void textual_dict_parser_t::remove_reader(void)
345 {
346 	if(xml_reader)
347 		xmlFreeTextReader(xml_reader);
348 	xml_reader = NULL;
349 	if(xincctxt)
350 		xmlXIncludeFreeContext(xincctxt);
351 	xincctxt = NULL;
352 }
353 
354 /* Return value:
355  * EXIT_FAILURE or EXIT_SUCCESS */
read_all(void)356 int textual_dict_parser_t::read_all(void)
357 {
358 	const char* const open_elem = "";
359 	static const char* exp_elems[] = {
360 		"stardict",
361 		NULL
362 	};
363 	int ret = read_xml_element(exp_elems, open_elem);
364 	if(ret == rrEOF) {
365 		unexpected_eof(exp_elems);
366 		return EXIT_FAILURE;
367 	}
368 	if(ret == rrError)
369 		return EXIT_FAILURE;
370 	if(ret == rrEndElement) {
371 		unexpected_end_element(exp_elems, open_elem);
372 		return EXIT_FAILURE;
373 	}
374 	if(xmlTextReaderIsEmptyElement(xml_reader)) {
375 		unexpected_empty_element();
376 		return EXIT_FAILURE;
377 	}
378 	g_assert(ret == 0);
379 	elem_stack.push_back(elem_data_t(exp_elems[0]));
380 	if(read_stardict())
381 		return EXIT_FAILURE;
382 	return EXIT_SUCCESS;
383 }
384 
385 /* read root stardict element
386  * Return value:
387  * EXIT_FAILURE or EXIT_SUCCESS */
read_stardict(void)388 int textual_dict_parser_t::read_stardict(void)
389 {
390 	static const char* exp_elems[] = {
391 		"info",
392 		"contents",
393 		"article",
394 		NULL
395 	};
396 	bool have_info = false;
397 	while(true) {
398 		int ret = read_xml_element(exp_elems, elem_stack.back().name);
399 		if(ret == rrEOF) {
400 			unexpected_eof(exp_elems);
401 			return EXIT_FAILURE;
402 		}
403 		if(ret == rrError)
404 			return EXIT_FAILURE;
405 		if(ret == rrEndElement) {
406 			elem_stack.pop_back();
407 			break;
408 		}
409 		if(xmlTextReaderIsEmptyElement(xml_reader)) {
410 			unexpected_empty_element();
411 			return EXIT_FAILURE;
412 		}
413 		elem_stack.push_back(elem_data_t(exp_elems[ret]));
414 		switch(ret) {
415 		case 0:
416 			if(have_info) {
417 				g_critical(duplicate_elem_err, get_line_number(), exp_elems[ret]);
418 				return EXIT_FAILURE;
419 			}
420 			if(read_info())
421 				return EXIT_FAILURE;
422 			have_info = true;
423 			break;
424 		case 1:
425 			if(read_contents())
426 				return EXIT_FAILURE;
427 			break;
428 		case 2:
429 			if(read_article())
430 				return EXIT_FAILURE;
431 			break;
432 		default:
433 			g_assert_not_reached();
434 		}
435 	}
436 	if(!have_info) {
437 		g_critical(missing_info_section_err);
438 		return EXIT_FAILURE;
439 	}
440 	if(norm_dict->articles.empty()) {
441 		g_critical(article_list_empty);
442 		return EXIT_FAILURE;
443 	}
444 	g_message("total articles: %lu.", static_cast<unsigned long>(norm_dict->articles.size()));
445 	return EXIT_SUCCESS;
446 }
447 
448 /* Return value:
449  * EXIT_FAILURE or EXIT_SUCCESS */
read_info(void)450 int textual_dict_parser_t::read_info(void)
451 {
452 	norm_dict->dict_info.clear();
453 	if(read_info_items())
454 		return EXIT_FAILURE;
455 	if(check_blank_info_items())
456 		return EXIT_FAILURE;
457 	if(check_new_line_info_items())
458 		return EXIT_FAILURE;
459 	if(check_mandatory_info_items())
460 		return EXIT_FAILURE;
461 	return EXIT_SUCCESS;
462 }
463 
464 /* Return value:
465  * EXIT_FAILURE or EXIT_SUCCESS */
read_info_items(void)466 int textual_dict_parser_t::read_info_items(void)
467 {
468 	static const char* exp_elems[] = {
469 		"version",
470 		"bookname",
471 		"author",
472 		"email",
473 		"website",
474 		"description",
475 		"date",
476 		"dicttype",
477 		NULL
478 	};
479 	while(true) {
480 		int ret = read_xml_element(exp_elems, elem_stack.back().name);
481 		if(ret == rrEOF) {
482 			unexpected_eof(exp_elems);
483 			return EXIT_FAILURE;
484 		}
485 		if(ret == rrError)
486 			return EXIT_FAILURE;
487 		if(ret == rrEndElement) {
488 			elem_stack.pop_back();
489 			break;
490 		}
491 		if(xmlTextReaderIsEmptyElement(xml_reader)) {
492 			unexpected_empty_element();
493 			return EXIT_FAILURE;
494 		}
495 		// do not change elem_stack for leaf elements
496 		switch(ret) {
497 		case 0:
498 			if(read_info_item("version", &DictInfo::is_version, &DictInfo::set_version))
499 				return EXIT_FAILURE;
500 			break;
501 		case 1:
502 			if(read_info_item("bookname", &DictInfo::is_bookname, &DictInfo::set_bookname))
503 				return EXIT_FAILURE;
504 			break;
505 		case 2:
506 			if(read_info_item("author", &DictInfo::is_author, &DictInfo::set_author))
507 				return EXIT_FAILURE;
508 			break;
509 		case 3:
510 			if(read_info_item("email", &DictInfo::is_email, &DictInfo::set_email))
511 				return EXIT_FAILURE;
512 			break;
513 		case 4:
514 			if(read_info_item("website", &DictInfo::is_website, &DictInfo::set_website))
515 				return EXIT_FAILURE;
516 			break;
517 		case 5:
518 			if(read_info_item("description", &DictInfo::is_description, &DictInfo::set_description))
519 				return EXIT_FAILURE;
520 			break;
521 		case 6:
522 			if(read_info_item("date", &DictInfo::is_date, &DictInfo::set_date))
523 				return EXIT_FAILURE;
524 			break;
525 		case 7:
526 			if(read_info_item("dicttype", &DictInfo::is_dicttype, &DictInfo::set_dicttype))
527 				return EXIT_FAILURE;
528 			break;
529 		default:
530 			g_assert_not_reached();
531 		}
532 	}
533 	return EXIT_SUCCESS;
534 }
535 
536 /* Return value:
537  * EXIT_FAILURE or EXIT_SUCCESS */
read_info_item(const char * elem,bool (DictInfo::* is_item)(void)const,void (DictInfo::* set_item)(const std::string &))538 int textual_dict_parser_t::read_info_item(const char* elem, bool (DictInfo::* is_item)(void) const,
539 	void (DictInfo:: *set_item)(const std::string& ))
540 {
541 	bool ignore = false;
542 	if((norm_dict->dict_info.*is_item)()) {
543 		g_warning(duplicate_elem_err, get_line_number(), elem);
544 		g_message(fixed_ignore_msg);
545 		ignore = true;
546 	}
547 	std::string value;
548 	if(read_xml_text_item(elem, value))
549 		return EXIT_FAILURE;
550 	if(value.empty()) {
551 		g_warning(element_blank_value, get_line_number(), elem);
552 		g_message(fixed_ignore_msg);
553 		return EXIT_SUCCESS;
554 	}
555 	if(!ignore) {
556 		(norm_dict->dict_info.*set_item)(value.c_str());
557 	}
558 	return EXIT_SUCCESS;
559 }
560 
561 /* Return value:
562  * EXIT_FAILURE or EXIT_SUCCESS */
read_contents(void)563 int textual_dict_parser_t::read_contents(void)
564 {
565 	{
566 		char content_type;
567 		switch(read_att_type(content_type))
568 		{
569 		case lrOK:
570 			elem_stack.back().content_type = content_type;
571 			break;
572 		case lrError:
573 			return EXIT_FAILURE;
574 		case lrNotFound:
575 			break;
576 		}
577 	}
578 	static const char* exp_elems[] = {
579 		"contents",
580 		"article",
581 		NULL
582 	};
583 	while(true) {
584 		int ret = read_xml_element(exp_elems, elem_stack.back().name);
585 		if(ret == rrEOF) {
586 			unexpected_eof(exp_elems);
587 			return EXIT_FAILURE;
588 		}
589 		if(ret == rrError)
590 			return EXIT_FAILURE;
591 		if(ret == rrEndElement) {
592 			elem_stack.pop_back();
593 			break;
594 		}
595 		if(xmlTextReaderIsEmptyElement(xml_reader)) {
596 			unexpected_empty_element();
597 			return EXIT_FAILURE;
598 		}
599 		elem_stack.push_back(elem_data_t(exp_elems[ret]));
600 		switch(ret) {
601 		case 0:
602 			if(read_contents())
603 				return EXIT_FAILURE;
604 			break;
605 		case 1:
606 			if(read_article())
607 				return EXIT_FAILURE;
608 			break;
609 		default:
610 			g_assert_not_reached();
611 		}
612 	}
613 	return EXIT_SUCCESS;
614 }
615 
616 /* Return value:
617  * EXIT_FAILURE or EXIT_SUCCESS */
read_article(void)618 int textual_dict_parser_t::read_article(void)
619 {
620 	const line_number_t article_beg_line_number = get_line_number();
621 	{
622 		char content_type;
623 		switch(read_att_type(content_type))
624 		{
625 		case lrOK:
626 			elem_stack.back().content_type = content_type;
627 			break;
628 		case lrError:
629 			return EXIT_FAILURE;
630 		case lrNotFound:
631 			break;
632 		}
633 	}
634 	static const char* exp_elems[] = {
635 		"key",
636 		"synonym",
637 		"definition",
638 		"definition-r",
639 		NULL
640 	};
641 	article_data_t article;
642 	/* for simplicity we do not check the order of elements */
643 	while(true) {
644 		int ret = read_xml_element(exp_elems, elem_stack.back().name);
645 		if(ret == rrEOF) {
646 			unexpected_eof(exp_elems);
647 			return EXIT_FAILURE;
648 		}
649 		if(ret == rrError)
650 			return EXIT_FAILURE;
651 		if(ret == rrEndElement) {
652 			elem_stack.pop_back();
653 			break;
654 		}
655 		if(xmlTextReaderIsEmptyElement(xml_reader)) {
656 			unexpected_empty_element();
657 			return EXIT_FAILURE;
658 		}
659 		const line_number_t article_item_line_number = get_line_number();
660 		char content_type;
661 		if(ret == 2) { // read 'type' attribute of the 'definition' element
662 			switch(read_att_type(content_type))
663 			{
664 			case lrOK:
665 				break;
666 			case lrError:
667 				return EXIT_FAILURE;
668 			case lrNotFound:
669 				content_type = get_effective_content_type();
670 				break;
671 			}
672 			if(content_type == 0) {
673 				g_critical(attribute_content_type_absent_err,
674 					get_line_number());
675 				return EXIT_FAILURE;
676 			}
677 		}
678 		switch(ret) {
679 		case 0:
680 		case 1:
681 		case 2:
682 		{
683 			std::string value;
684 			if(read_xml_text_item(exp_elems[ret], value))
685 				return EXIT_FAILURE;
686 			if(value.empty()) {
687 				g_warning(element_blank_value, get_line_number(), exp_elems[ret]);
688 				g_message(fixed_ignore_msg);
689 				break;
690 			}
691 			if(ret == 0) {
692 				if(check_article_key(value, article_item_line_number))
693 					return EXIT_FAILURE;
694 				if(article.add_key(value)) {
695 					g_critical(duplicate_key_err, get_line_number(), value.c_str());
696 					return EXIT_FAILURE;
697 				}
698 			}
699 			if(ret == 1) {
700 				if(check_article_synonym(value, article_item_line_number))
701 					return EXIT_FAILURE;
702 				if(article.add_synonym(value)) {
703 					g_critical(duplicate_synonym_err, get_line_number(), value.c_str());
704 					return EXIT_FAILURE;
705 				}
706 			}
707 			if(ret == 2) {
708 				size_t size = value.length(), offset;
709 				if(norm_dict->write_data(value.c_str(), size, offset)) {
710 					g_critical(save_into_temp_file_err);
711 					return EXIT_FAILURE;
712 				}
713 				if(article.add_definition(article_def_t(content_type, offset, size)))
714 					return EXIT_FAILURE;
715 			}
716 			break;
717 		}
718 		case 3:
719 		{
720 			elem_stack.push_back(elem_data_t(exp_elems[ret]));
721 			article_def_t def;
722 			if(read_contents_r(def))
723 				return EXIT_FAILURE;
724 			if(article.add_definition(def))
725 				return EXIT_FAILURE;
726 			break;
727 		}
728 		default:
729 			g_assert_not_reached();
730 		}
731 	}
732 	if(article.key.empty()) {
733 		g_warning(article_key_list_empty_err, article_beg_line_number);
734 		g_message(fixed_ignore_msg);
735 		return EXIT_SUCCESS;
736 	}
737 	if(article.definitions.empty()) {
738 		g_warning(article_definition_list_empty_err, article_beg_line_number);
739 		g_message(fixed_ignore_msg);
740 		return EXIT_SUCCESS;
741 	}
742 	if(norm_dict->add_article(article))
743 		return EXIT_FAILURE;
744 	return EXIT_SUCCESS;
745 }
746 
747 /* Return value:
748  * EXIT_FAILURE or EXIT_SUCCESS */
read_contents_r(article_def_t & article_def)749 int textual_dict_parser_t::read_contents_r(article_def_t& article_def)
750 {
751 	const line_number_t contents_beg_line_number = get_line_number();
752 	article_def.type = 'r';
753 	static const char* exp_elems[] = {
754 		"resource",
755 		NULL
756 	};
757 	while(true) {
758 		int ret = read_xml_element(exp_elems, elem_stack.back().name);
759 		if(ret == rrEOF) {
760 			unexpected_eof(exp_elems);
761 			return EXIT_FAILURE;
762 		}
763 		if(ret == rrError)
764 			return EXIT_FAILURE;
765 		if(ret == rrEndElement) {
766 			elem_stack.pop_back();
767 			break;
768 		}
769 		if(!xmlTextReaderIsEmptyElement(xml_reader)) {
770 			unexpected_non_empty_element();
771 			return EXIT_FAILURE;
772 		}
773 		g_assert(ret == 0);
774 		const char* att_name = "key";
775 		std::string key;
776 		if(read_xml_attribute(att_name, key)) {
777 			g_critical(attribute_value_not_specified_err,
778 				get_line_number(), att_name);
779 			return EXIT_FAILURE;
780 		}
781 		if(key.empty()) {
782 			g_critical(attribute_value_empty_err,
783 				get_line_number(), att_name);
784 			return EXIT_FAILURE;
785 		}
786 		if(key.find_first_of('\\') != std::string::npos) {
787 			g_critical(incorrect_dir_sep_err, get_line_number());
788 			return EXIT_FAILURE;
789 		}
790 		att_name = "type";
791 		std::string type;
792 		if(read_xml_attribute(att_name, type)) {
793 			g_critical(attribute_value_not_specified_err,
794 				get_line_number(), att_name);
795 			return EXIT_FAILURE;
796 		}
797 		if(!is_known_resource_type(type.c_str())) {
798 			g_critical(attribute_value_unknown_err,
799 				get_line_number(), att_name, type.c_str());
800 			return EXIT_FAILURE;
801 		}
802 		article_def.resources.push_back(resource_t(type, key));
803 	}
804 	if(article_def.resources.empty()) {
805 		g_critical(resource_list_empty_err, contents_beg_line_number);
806 		return EXIT_FAILURE;
807 	}
808 	return EXIT_SUCCESS;
809 }
810 
811 
read_att_type(char & type)812 TLoadResult textual_dict_parser_t::read_att_type(char& type)
813 {
814 	std::string value;
815 	static const char* const att_name = "type";
816 	if(read_xml_attribute(att_name, value))
817 		return lrNotFound;
818 	if(value.length() != 1) {
819 		g_critical(unknown_content_type_str_err,
820 			get_line_number(), value.c_str());
821 		return lrError;
822 	}
823 	type = value[0];
824 	g_assert(type != 0);
825 	if(strchr(known_type_ids, type) == NULL) {
826 		g_warning(unknown_content_type_chr_err, get_line_number(), type);
827 		g_message(allow_unknown_content_type_msg);
828 	}
829 	if(type == 'r') {
830 		g_critical(content_type_r_in_definition_err,
831 			get_line_number());
832 		return lrError;
833 	}
834 	return lrOK;
835 }
836 
837 /* Return value:
838  * EXIT_FAILURE or EXIT_SUCCESS */
check_blank_info_items(void)839 int textual_dict_parser_t::check_blank_info_items(void)
840 {
841 	check_blank_info_item("version", &DictInfo::is_version,
842 		&DictInfo::get_version, &DictInfo::unset_version);
843 	check_blank_info_item("bookname", &DictInfo::is_bookname,
844 		&DictInfo::get_bookname, &DictInfo::unset_bookname);
845 	check_blank_info_item("author", &DictInfo::is_author,
846 		&DictInfo::get_author, &DictInfo::unset_author);
847 	check_blank_info_item("email", &DictInfo::is_email,
848 		&DictInfo::get_email, &DictInfo::unset_email);
849 	check_blank_info_item("website", &DictInfo::is_website,
850 		&DictInfo::get_website, &DictInfo::unset_website);
851 	check_blank_info_item("description", &DictInfo::is_description,
852 		&DictInfo::get_description, &DictInfo::unset_description);
853 	check_blank_info_item("date", &DictInfo::is_date,
854 		&DictInfo::get_date, &DictInfo::unset_date);
855 	check_blank_info_item("dicttype", &DictInfo::is_dicttype,
856 		&DictInfo::get_dicttype, &DictInfo::unset_dicttype);
857 	return EXIT_SUCCESS;
858 }
859 
860 /* Return value:
861  * EXIT_FAILURE or EXIT_SUCCESS */
check_new_line_info_items(void)862 int textual_dict_parser_t::check_new_line_info_items(void)
863 {
864 	if(check_new_line_info_item("version", &DictInfo::is_version,
865 			&DictInfo::get_version))
866 		return EXIT_FAILURE;
867 	if(check_new_line_info_item("bookname", &DictInfo::is_bookname,
868 			&DictInfo::get_bookname))
869 		return EXIT_FAILURE;
870 	if(check_new_line_info_item("author", &DictInfo::is_author,
871 			&DictInfo::get_author))
872 		return EXIT_FAILURE;
873 	if(check_new_line_info_item("email", &DictInfo::is_email,
874 			&DictInfo::get_email))
875 		return EXIT_FAILURE;
876 	if(check_new_line_info_item("website", &DictInfo::is_website,
877 			&DictInfo::get_website))
878 		return EXIT_FAILURE;
879 	if(check_new_line_info_item("date", &DictInfo::is_date,
880 			&DictInfo::get_date))
881 		return EXIT_FAILURE;
882 	if(check_new_line_info_item("dicttype", &DictInfo::is_dicttype,
883 			&DictInfo::get_dicttype))
884 		return EXIT_FAILURE;
885 	return EXIT_SUCCESS;
886 }
887 
888 /* Return value:
889  * EXIT_FAILURE or EXIT_SUCCESS */
check_mandatory_info_items(void)890 int textual_dict_parser_t::check_mandatory_info_items(void)
891 {
892 	if(!norm_dict->dict_info.is_version()) {
893 		g_critical(missing_req_info_item_msg, "version");
894 		return EXIT_FAILURE;
895 	}
896 	if(!norm_dict->dict_info.is_bookname()) {
897 		g_critical(missing_req_info_item_msg, "bookname");
898 		return EXIT_FAILURE;
899 	}
900 	return EXIT_SUCCESS;
901 }
902 
903 /* Return value:
904  * EXIT_FAILURE or EXIT_SUCCESS */
check_article_key(const std::string & value,line_number_t article_item_line_number)905 int textual_dict_parser_t::check_article_key(const std::string& value, line_number_t article_item_line_number)
906 {
907 	if(value.find_first_of(key_forbidden_chars) != std::string::npos) {
908 		g_critical(article_key_forbidden_chars_err,
909 			article_item_line_number, value.c_str());
910 		return EXIT_FAILURE;
911 	}
912 	if(value.length() >= (size_t)MAX_INDEX_KEY_SIZE) {
913 		g_critical(article_key_long_err,
914 			get_line_number(), value.c_str(), static_cast<unsigned>(value.length()), MAX_INDEX_KEY_SIZE-1);
915 		return EXIT_FAILURE;
916 	}
917 	return EXIT_SUCCESS;
918 }
919 
920 
921 /* Return value:
922  * EXIT_FAILURE or EXIT_SUCCESS */
check_article_synonym(const std::string & value,line_number_t article_item_line_number)923 int textual_dict_parser_t::check_article_synonym(const std::string& value, line_number_t article_item_line_number)
924 {
925 	if(value.find_first_of(key_forbidden_chars) != std::string::npos) {
926 		g_critical(article_synonym_forbidden_chars_err,
927 			article_item_line_number, value.c_str());
928 		return EXIT_FAILURE;
929 	}
930 	if(value.length() >= (size_t)MAX_INDEX_KEY_SIZE) {
931 		g_critical(article_synonym_long_err,
932 			get_line_number(), value.c_str(), static_cast<unsigned>(value.length()), MAX_INDEX_KEY_SIZE-1);
933 		return EXIT_FAILURE;
934 	}
935 	return EXIT_SUCCESS;
936 }
937 
938 /* Read the next element. It must be one of exp_elems.
939  * parameters:
940  * open_elem - expected end element, may be NULL
941  *
942  * return value:
943  * >= 0 - index in the exp_elems array
944  * rrEOF, rrError, rrEndElement
945  * */
read_xml_element(const char ** exp_elems,const char * open_elem)946 int textual_dict_parser_t::read_xml_element(const char** exp_elems, const char* open_elem)
947 {
948 	int ret = next_node();
949 	if(ret == 0)
950 		return rrEOF;
951 	if(ret < 0)
952 		return rrError;
953 	ret = xmlTextReaderNodeType(xml_reader);
954 	if(ret < 0) {
955 		g_critical(parser_err);
956 		return rrError;
957 	}
958 	const xmlReaderTypes node_type = static_cast<xmlReaderTypes>(ret);
959 	if(node_type == XML_READER_TYPE_END_ELEMENT) {
960 		xml::CharStr node_name(xmlTextReaderName(xml_reader));
961 		if(xmlStrEqual(get_impl(node_name), BAD_CAST open_elem))
962 			return rrEndElement;
963 		else {
964 			unmatched_end_element(open_elem, (const char*)get_impl(node_name));
965 			return rrError;
966 		}
967 	}
968 	if(node_type != XML_READER_TYPE_ELEMENT) {
969 		unexpected_node_type_element(exp_elems);
970 		return rrError;
971 	}
972 	xml::CharStr node_name(xmlTextReaderName(xml_reader));
973 	for(size_t i=0; exp_elems[i]; ++i)
974 		if(xmlStrEqual(get_impl(node_name), BAD_CAST exp_elems[i])) {
975 			return i;
976 		}
977 	unexpected_element(exp_elems);
978 	return rrError;
979 }
980 
981 /* Read the next node. It must be end element.
982  * parameters:
983  * open_elem - opened element, != NULL
984  *
985  * return value:
986  * rrEOF, rrError, rrEndElement
987  * */
read_xml_end_element(const char * open_elem)988 int textual_dict_parser_t::read_xml_end_element(const char* open_elem)
989 {
990 	int ret = next_node();
991 	if(ret == 0)
992 		return rrEOF;
993 	if(ret < 0)
994 		return rrError;
995 	ret = xmlTextReaderNodeType(xml_reader);
996 	if(ret < 0) {
997 		g_critical(parser_err);
998 		return rrError;
999 	}
1000 	const xmlReaderTypes node_type = static_cast<xmlReaderTypes>(ret);
1001 	if(node_type != XML_READER_TYPE_END_ELEMENT) {
1002 		unexpected_node_type_end_element(open_elem);
1003 		return rrError;
1004 	}
1005 	xml::CharStr node_name(xmlTextReaderName(xml_reader));
1006 	if(!xmlStrEqual(get_impl(node_name), BAD_CAST open_elem)) {
1007 		unmatched_end_element(open_elem, (const char*)get_impl(node_name));
1008 		return rrError;
1009 	}
1010 	return rrEndElement;
1011 }
1012 
1013 /* Return value:
1014  * EXIT_FAILURE or EXIT_SUCCESS */
read_xml_text_item(const char * elem,std::string & value)1015 int textual_dict_parser_t::read_xml_text_item(const char* elem, std::string& value)
1016 {
1017 	value.clear();
1018 	const line_number_t element_beg_line_number = get_line_number();
1019 	int ret = read_xml_text(elem);
1020 	if(ret == rrEOF) {
1021 		unexpected_eof();
1022 		return EXIT_FAILURE;
1023 	}
1024 	if(ret == rrError)
1025 		return EXIT_FAILURE;
1026 	if(ret == rrEndElement)
1027 		return EXIT_SUCCESS;
1028 	xml::CharStr tvalue(xmlTextReaderValue(xml_reader));
1029 	if(!g_utf8_validate((const char*)get_impl(tvalue), -1, NULL)) {
1030 		g_critical(element_invalid_utf8_value_err,
1031 			element_beg_line_number, elem, get_impl(tvalue));
1032 		return EXIT_FAILURE;
1033 	}
1034 	glib::CharStr t2value(g_utf8_normalize((const char*)get_impl(tvalue), -1, G_NORMALIZE_ALL_COMPOSE));
1035 	if(!t2value) {
1036 		g_critical(normalization_failed_err,
1037 			element_beg_line_number, get_impl(tvalue));
1038 		return EXIT_FAILURE;
1039 	}
1040 	std::string data_str;
1041 	{	// check for invalid chars
1042 		typedef std::list<const char*> str_list_t;
1043 		str_list_t invalid_chars;
1044 		if(check_stardict_string_chars(get_impl(t2value), invalid_chars)) {
1045 			g_message(element_invalid_text_err,
1046 				element_beg_line_number, elem, get_impl(t2value),
1047 				print_char_codes(invalid_chars).c_str());
1048 			fix_stardict_string_chars(get_impl(t2value), data_str);
1049 			g_message(fixed_drop_invalid_char_msg);
1050 		} else
1051 			data_str.assign(get_impl(t2value));
1052 	}
1053 	{
1054 		const char* beg;
1055 		size_t len;
1056 		trim_spaces(data_str.c_str(), beg, len);
1057 		value.assign(beg, len);
1058 	}
1059 	// read end element
1060 	ret = read_xml_end_element(elem);
1061 	if(ret == rrEOF) {
1062 		unexpected_eof();
1063 		return EXIT_FAILURE;
1064 	}
1065 	if(ret == rrError)
1066 		return EXIT_FAILURE;
1067 	return EXIT_SUCCESS;
1068 }
1069 
1070 /* Read the next node. It must be text.
1071  * parameters:
1072  * open_elem - opened element, != NULL
1073  *
1074  *  return value:
1075  * 0 - text node is found
1076  * rrEOF, rrError, rrEndElement
1077  * */
read_xml_text(const char * open_elem)1078 int textual_dict_parser_t::read_xml_text(const char* open_elem)
1079 {
1080 	int ret = next_node();
1081 	if(ret == 0)
1082 		return rrEOF;
1083 	if(ret < 0)
1084 		return rrError;
1085 	ret = xmlTextReaderNodeType(xml_reader);
1086 	if(ret < 0) {
1087 		g_critical(parser_err);
1088 		return rrError;
1089 	}
1090 	const xmlReaderTypes node_type = static_cast<xmlReaderTypes>(ret);
1091 	if(node_type == XML_READER_TYPE_END_ELEMENT) {
1092 		xml::CharStr node_name(xmlTextReaderName(xml_reader));
1093 		if(xmlStrEqual(get_impl(node_name), BAD_CAST open_elem))
1094 			return rrEndElement;
1095 		else {
1096 			unmatched_end_element(open_elem, (const char*)get_impl(node_name));
1097 			return rrError;
1098 		}
1099 	}
1100 	if(node_type != XML_READER_TYPE_TEXT) {
1101 		unexpected_node_type_text();
1102 		return rrError;
1103 	}
1104 	return 0;
1105 }
1106 
1107 /* Return value:
1108  * EXIT_FAILURE or EXIT_SUCCESS
1109  *
1110  * EXIT_FAILURE - attribute is missing,
1111  * EXIT_SUCCESS - attribute value read successfully */
read_xml_attribute(const char * att_name,std::string & value)1112 int textual_dict_parser_t::read_xml_attribute(const char* att_name, std::string& value)
1113 {
1114 	xmlChar * att_val = xmlTextReaderGetAttribute(xml_reader, (const xmlChar*) att_name);
1115 	if(!att_val)
1116 		return EXIT_FAILURE;
1117 	value.assign((const char*) att_val);
1118 	xmlFree(att_val);
1119 	return EXIT_SUCCESS;
1120 }
1121 
check_blank_info_item(const char * elem,bool (DictInfo::* is_item)(void)const,const std::string & (DictInfo::* get_item)(void)const,void (DictInfo::* unset_item)(void))1122 void textual_dict_parser_t::check_blank_info_item(const char* elem, bool (DictInfo::* is_item)(void) const,
1123 		const std::string& (DictInfo:: *get_item)(void) const, void (DictInfo:: *unset_item)(void))
1124 {
1125 	if((norm_dict->dict_info.*is_item)() && (norm_dict->dict_info.*get_item)().empty()) {
1126 		g_warning("info item %s is assigned an empty string.", elem);
1127 		g_message(fixed_msg2 "Unset the item.");
1128 		(norm_dict->dict_info.*unset_item)();
1129 	}
1130 }
1131 
check_new_line_info_item(const char * elem,bool (DictInfo::* is_item)(void)const,const std::string & (DictInfo::* get_item)(void)const)1132 int textual_dict_parser_t::check_new_line_info_item(const char* elem, bool (DictInfo::* is_item)(void) const,
1133 		const std::string& (DictInfo:: *get_item)(void) const)
1134 {
1135 	if((norm_dict->dict_info.*is_item)() && (norm_dict->dict_info.*get_item)().find_first_of("\n\r") != std::string::npos) {
1136 		g_critical("info item %s contains new line character.", elem);
1137 		return EXIT_FAILURE;
1138 	}
1139 	return EXIT_SUCCESS;
1140 }
1141 
1142 /* return effective content type for the current element.
1143  * return 0 if content type is not specified for all elements in the element stack. */
get_effective_content_type(void) const1144 char textual_dict_parser_t::get_effective_content_type(void) const
1145 {
1146 	for(size_t i=elem_stack.size()-1; i>=0; --i)
1147 		if(elem_stack[i].content_type)
1148 			return elem_stack[i].content_type;
1149 	return 0;
1150 }
1151 
unexpected_node_type_element(const char ** exp_elems)1152 void textual_dict_parser_t::unexpected_node_type_element(const char** exp_elems)
1153 {
1154 	std::string buf;
1155 	for(size_t i=0; exp_elems[i]; ++i) {
1156 		buf += " ";
1157 		buf += exp_elems[i];
1158 	}
1159 	g_warning("%u: Unexpected node type: %s, name: %s. Expecting elements:%s.",
1160 		get_line_number(),
1161 		note_type_str(xmlTextReaderNodeType(xml_reader)),
1162 		xmlTextReaderName(xml_reader),
1163 		buf.c_str()
1164 		);
1165 }
1166 
unexpected_node_type_text(void)1167 void textual_dict_parser_t::unexpected_node_type_text(void)
1168 {
1169 	g_warning("%u: Unexpected node type: %s, name: %s. Expecting text.",
1170 		get_line_number(),
1171 		note_type_str(xmlTextReaderNodeType(xml_reader)),
1172 		xmlTextReaderName(xml_reader)
1173 		);
1174 }
1175 
unexpected_node_type_end_element(const char * open_elem)1176 void textual_dict_parser_t::unexpected_node_type_end_element(const char* open_elem)
1177 {
1178 	g_warning("%u: Unexpected node type: %s, name: %s. Expecting end element: %s.",
1179 		get_line_number(),
1180 		note_type_str(xmlTextReaderNodeType(xml_reader)),
1181 		xmlTextReaderName(xml_reader),
1182 		open_elem
1183 		);
1184 }
1185 
unexpected_element(const char ** exp_elems)1186 void textual_dict_parser_t::unexpected_element(const char** exp_elems)
1187 {
1188 	std::string buf;
1189 	for(size_t i=0; exp_elems[i]; ++i) {
1190 		buf += " ";
1191 		buf += exp_elems[i];
1192 	}
1193 	g_warning("%u: Unexpected element: %s. Expecting elements:%s.",
1194 		get_line_number(),
1195 		xmlTextReaderName(xml_reader),
1196 		buf.c_str()
1197 		);
1198 }
1199 
unexpected_eof(const char ** exp_elems)1200 void textual_dict_parser_t::unexpected_eof(const char** exp_elems)
1201 {
1202 	std::string buf;
1203 	for(size_t i=0; exp_elems[i]; ++i) {
1204 		buf += " ";
1205 		buf += exp_elems[i];
1206 	}
1207 	g_warning("Unexpected end of file. Expecting: %s.", buf.c_str());
1208 }
1209 
unexpected_eof(void)1210 void textual_dict_parser_t::unexpected_eof(void)
1211 {
1212 	g_warning("Unexpected end of file.");
1213 }
1214 
unexpected_end_element(const char ** exp_elems,const char * open_elem)1215 void textual_dict_parser_t::unexpected_end_element(const char** exp_elems, const char* open_elem)
1216 {
1217 	std::string buf;
1218 	for(size_t i=0; exp_elems[i]; ++i) {
1219 		buf += " ";
1220 		buf += exp_elems[i];
1221 	}
1222 	g_warning("%u: Unexpected end element %s. Expecting:%s.",
1223 		get_line_number(),
1224 		open_elem,
1225 		buf.c_str());
1226 }
1227 
unmatched_end_element(const char * open_elem,const char * end_elem)1228 void textual_dict_parser_t::unmatched_end_element(const char* open_elem, const char* end_elem)
1229 {
1230 	g_warning("%u: open element %s, end element %s.",
1231 		get_line_number(),
1232 		open_elem,
1233 		end_elem
1234 	);
1235 }
1236 
unexpected_empty_element(void)1237 void textual_dict_parser_t::unexpected_empty_element(void)
1238 {
1239 	g_warning("%u: Unexpected empty element: %s.",
1240 		get_line_number(),
1241 		xmlTextReaderName(xml_reader)
1242 		);
1243 }
1244 
unexpected_non_empty_element(void)1245 void textual_dict_parser_t::unexpected_non_empty_element(void)
1246 {
1247 	g_warning("%u: Unexpected non-empty element: %s.",
1248 		get_line_number(),
1249 		xmlTextReaderName(xml_reader)
1250 		);
1251 }
1252 
get_line_number(void)1253 textual_dict_parser_t::line_number_t textual_dict_parser_t::get_line_number(void)
1254 {
1255 	xmlNodePtr node = xmlTextReaderCurrentNode(xml_reader);
1256 	return node ? node->line : 0;
1257 }
1258 
1259 /* Read next node, skipping comments and processing instructions.
1260  * Return value: see xmlTextReaderRead */
next_node(void)1261 int textual_dict_parser_t::next_node(void)
1262 {
1263 	while(true) {
1264 		int ret = xmlTextReaderRead(xml_reader);
1265 		if(ret != 1)
1266 			return ret;
1267 		ret = xmlTextReaderNodeType(xml_reader);
1268 		if(ret < 0)
1269 			return -1;
1270 		xmlReaderTypes reader_type = static_cast<xmlReaderTypes>(ret);
1271 		if(reader_type == XML_READER_TYPE_COMMENT
1272 			|| reader_type == XML_READER_TYPE_PROCESSING_INSTRUCTION) {
1273 			continue;
1274 		}
1275 		if(get_custom_include()) {
1276 			xmlNodePtr node = xmlTextReaderCurrentNode(xml_reader);
1277 			if(!node)
1278 				return -1;
1279 			// see xmlreader.c, xmlTextReaderRead function, #ifdef LIBXML_XINCLUDE_ENABLED block
1280 			if((node->type == XML_ELEMENT_NODE) &&
1281 				(node->ns != NULL) &&
1282 				((xmlStrEqual(node->ns->href, XINCLUDE_NS)) ||
1283 					(xmlStrEqual(node->ns->href, XINCLUDE_OLD_NS)))) {
1284 				if (xincctxt == NULL) {
1285 					xincctxt = xmlXIncludeNewContext(xmlTextReaderCurrentDoc(xml_reader));
1286 					xmlXIncludeSetFlags(xincctxt, reader_options);
1287 				}
1288 				const char* att_name = "href";
1289 				std::string href;
1290 				if(!read_xml_attribute(att_name, href)) {
1291 					g_message(xinclude_process_msg,
1292 						get_line_number(), href.c_str());
1293 				} else {
1294 					g_message(xinclude_process_msg,
1295 						get_line_number(), href.c_str());
1296 				}
1297 				/*
1298 				 * expand that node and process it
1299 				 */
1300 				if (xmlTextReaderExpand(xml_reader) == NULL)
1301 					return -1;
1302 				xmlXIncludeProcessNode(xincctxt, node);
1303 				continue;
1304 			}
1305 		}
1306 		break;
1307 	}
1308 	return 1;
1309 }
1310 
1311 
parse_textual_dict(const std::string & xmlfilename,common_dict_t * norm_dict,bool show_xincludes)1312 int parse_textual_dict(const std::string& xmlfilename, common_dict_t* norm_dict,
1313 		bool show_xincludes)
1314 {
1315 	textual_dict_parser_t parser;
1316 	parser.set_custom_include(show_xincludes);
1317 	return parser.parse(xmlfilename, norm_dict);
1318 }
1319