1 /* document.cc
2 * this file is part of libxml++
3 *
4 * copyright (C) 2003 by the libxml++ development team
5 *
6 * this file is covered by the GNU Lesser General Public License,
7 * which should be included with libxml++ as the file COPYING.
8 */
9
10 #include <libxml++/document.h>
11 #include <libxml++/dtd.h>
12 #include <libxml++/attribute.h>
13 #include <libxml++/nodes/element.h>
14 #include <libxml++/exceptions/internal_error.h>
15 #include <libxml++/keepblanks.h>
16 #include <libxml++/io/ostreamoutputbuffer.h>
17
18 #include <libxml/tree.h>
19 #include <libxml/xinclude.h>
20 #include <libxml/parser.h> // XML_PARSE_NOXINCNODE
21
22 #include <iostream>
23 #include <map>
24
25 namespace // anonymous
26 {
27 typedef std::map<xmlpp::Node*, xmlElementType> NodeMap;
28
29 // Find all C++ wrappers of 'node' and its descendants.
30 // Compare xmlpp::Node::free_wrappers().
find_wrappers(xmlNode * node,NodeMap & node_map)31 void find_wrappers(xmlNode* node, NodeMap& node_map)
32 {
33 if (!node)
34 return;
35
36 //If an entity declaration contains an entity reference, there can be cyclic
37 //references between entity declarations and entity references. (It's not
38 //a tree.) We must avoid an infinite recursion.
39 //Compare xmlFreeNode(), which frees the children of all node types except
40 //XML_ENTITY_REF_NODE.
41 if (node->type != XML_ENTITY_REF_NODE)
42 {
43 // Walk the children list.
44 for (auto child = node->children; child; child = child->next)
45 find_wrappers(child, node_map);
46 }
47
48 // Find the local one
49 bool has_attributes = true;
50 switch (node->type)
51 {
52 // Node types that have no attributes.
53 // These are not represented by struct xmlNode.
54 case XML_DTD_NODE:
55 case XML_ATTRIBUTE_NODE:
56 case XML_ELEMENT_DECL:
57 case XML_ATTRIBUTE_DECL:
58 case XML_ENTITY_DECL:
59 case XML_DOCUMENT_NODE:
60 has_attributes = false;
61 break;
62 default:
63 break;
64 }
65
66 if (node->_private)
67 node_map[static_cast<xmlpp::Node*>(node->_private)] = node->type;
68
69 if (!has_attributes)
70 return;
71
72 //Walk the attributes list.
73 //Note that some "derived" structs have a different layout, so
74 //_xmlNode::properties would be a nonsense value, leading to crashes
75 //(and shown as valgrind warnings), so we return above, to avoid
76 //checking it here.
77 for (auto attr = node->properties; attr; attr = attr->next)
78 find_wrappers(reinterpret_cast<xmlNode*>(attr), node_map);
79 }
80
81 // Remove from 'node_map' the pointers to the C++ wrappers that are found with
82 // unchanged type in 'node' and its descendants.
remove_found_wrappers(xmlNode * node,NodeMap & node_map)83 void remove_found_wrappers(xmlNode* node, NodeMap& node_map)
84 {
85 if (!node)
86 return;
87
88 if (node->type != XML_ENTITY_REF_NODE)
89 {
90 // Walk the children list.
91 for (auto child = node->children; child; child = child->next)
92 remove_found_wrappers(child, node_map);
93 }
94
95 // Find the local one
96 bool has_attributes = true;
97 switch (node->type)
98 {
99 // Node types that have no attributes
100 case XML_DTD_NODE:
101 case XML_ATTRIBUTE_NODE:
102 case XML_ELEMENT_DECL:
103 case XML_ATTRIBUTE_DECL:
104 case XML_ENTITY_DECL:
105 case XML_DOCUMENT_NODE:
106 has_attributes = false;
107 break;
108 default:
109 break;
110 }
111
112 if (node->_private)
113 {
114 const auto iter =
115 node_map.find(static_cast<xmlpp::Node*>(node->_private));
116 if (iter != node_map.end())
117 {
118 if (iter->second == node->type)
119 node_map.erase(iter);
120 else
121 node->_private = nullptr; // node->type has changed. The wrapper will be deleted.
122 }
123 }
124
125 if (!has_attributes)
126 return;
127
128 // Walk the attributes list.
129 for (auto attr = node->properties; attr; attr = attr->next)
130 remove_found_wrappers(reinterpret_cast<xmlNode*>(attr), node_map);
131
132 }
133 } // anonymous
134
135 namespace xmlpp
136 {
137
get_encoding_or_utf8(const Glib::ustring & encoding)138 static const char* get_encoding_or_utf8(const Glib::ustring& encoding)
139 {
140 if(encoding.empty())
141 {
142 //If we don't specify this to the xmlDocDump* functions (using nullptr instead),
143 //then some other encoding is used, causing them to fail on non-ASCII characters.
144 return "UTF-8";
145 }
146 else
147 return encoding.c_str();
148 }
149
150
Init()151 Document::Init::Init()
152 {
153 xmlInitParser(); //Not always necessary, but necessary for thread safety.
154 }
155
~Init()156 Document::Init::~Init()
157 {
158 //We don't call this because it breaks libxml generally and should only be
159 //called at the very end of a process, such as at the end of a main().
160 //libxml might still be used by the application, so we don't want to break
161 //that.
162 //This is important even here, which usually happens only when the library
163 //is unloaded, because that might happen during normal application use,
164 //if the application does dynamic library loading, for instance to load
165 //plugins.
166 //See http://xmlsoft.org/html/libxml-parser.html#xmlCleanupParser
167 //xmlCleanupParser(); //As per xmlInitParser(), or memory leak will happen.
168 }
169
170 Document::Init Document::init_;
171
Document(const Glib::ustring & version)172 Document::Document(const Glib::ustring& version)
173 : impl_(xmlNewDoc((const xmlChar*)version.c_str()))
174 {
175 if (!impl_)
176 throw internal_error("Could not create Document.");
177 impl_->_private = this;
178 }
179
Document(xmlDoc * doc)180 Document::Document(xmlDoc* doc)
181 : impl_(doc)
182 {
183 impl_->_private = this;
184 }
185
~Document()186 Document::~Document()
187 {
188 Node::free_wrappers(reinterpret_cast<xmlNode*>(impl_));
189 xmlFreeDoc(impl_);
190 }
191
get_encoding() const192 Glib::ustring Document::get_encoding() const
193 {
194 Glib::ustring encoding;
195 if(impl_->encoding)
196 encoding = (const char*)impl_->encoding;
197
198 return encoding;
199 }
200
get_internal_subset() const201 Dtd* Document::get_internal_subset() const
202 {
203 auto dtd = xmlGetIntSubset(impl_);
204 if(!dtd)
205 return nullptr;
206
207 if(!dtd->_private)
208 dtd->_private = new Dtd(dtd);
209
210 return reinterpret_cast<Dtd*>(dtd->_private);
211 }
212
set_internal_subset(const Glib::ustring & name,const Glib::ustring & external_id,const Glib::ustring & system_id)213 void Document::set_internal_subset(const Glib::ustring& name,
214 const Glib::ustring& external_id,
215 const Glib::ustring& system_id)
216 {
217 auto dtd = xmlCreateIntSubset(impl_,
218 (const xmlChar*)name.c_str(),
219 external_id.empty() ? nullptr : (const xmlChar*)external_id.c_str(),
220 system_id.empty() ? nullptr : (const xmlChar*)system_id.c_str());
221
222 if (dtd && !dtd->_private)
223 dtd->_private = new Dtd(dtd);
224 }
225
get_root_node() const226 Element* Document::get_root_node() const
227 {
228 auto root = xmlDocGetRootElement(impl_);
229 if(root == nullptr)
230 return nullptr;
231 else
232 {
233 Node::create_wrapper(root);
234 return reinterpret_cast<Element*>(root->_private);
235 }
236 }
237
create_root_node(const Glib::ustring & name,const Glib::ustring & ns_uri,const Glib::ustring & ns_prefix)238 Element* Document::create_root_node(const Glib::ustring& name,
239 const Glib::ustring& ns_uri,
240 const Glib::ustring& ns_prefix)
241 {
242 auto node = xmlNewDocNode(impl_, nullptr, (const xmlChar*)name.c_str(), nullptr);
243 if (!node)
244 throw internal_error("Could not create root element node " + name);
245
246 node = xmlDocSetRootElement(impl_, node);
247 if (node)
248 {
249 // An old root element node has been replaced.
250 Node::free_wrappers(node);
251 xmlFreeNode(node);
252 }
253
254 auto element = get_root_node();
255
256 if( !ns_uri.empty() && element )
257 {
258 element->set_namespace_declaration(ns_uri, ns_prefix);
259 element->set_namespace(ns_prefix);
260 }
261
262 return element;
263 }
264
create_root_node_by_import(const Node * node,bool recursive)265 Element* Document::create_root_node_by_import(const Node* node,
266 bool recursive)
267 {
268 if (!node)
269 return nullptr;
270
271 //Create the node, by copying:
272 auto imported_node = xmlDocCopyNode(const_cast<xmlNode*>(node->cobj()), impl_, recursive);
273 if (!imported_node)
274 {
275 throw exception("Unable to copy the node that shall be imported");
276 }
277
278 auto old_node = xmlDocSetRootElement(impl_, imported_node);
279 if (old_node)
280 {
281 // An old root element node has been replaced.
282 Node::free_wrappers(old_node);
283 xmlFreeNode(old_node);
284 }
285
286 return get_root_node();
287 }
288
add_comment(const Glib::ustring & content)289 CommentNode* Document::add_comment(const Glib::ustring& content)
290 {
291 auto child = xmlNewComment((const xmlChar*)content.c_str());
292
293 // Use the result, because child can be freed when merging text nodes:
294 auto node = xmlAddChild((xmlNode*)impl_, child);
295 if (!node)
296 {
297 xmlFreeNode(child);
298 throw internal_error("Could not add comment node \"" + content + "\"");
299 }
300 Node::create_wrapper(node);
301 return static_cast<CommentNode*>(node->_private);
302 }
303
add_processing_instruction(const Glib::ustring & name,const Glib::ustring & content)304 ProcessingInstructionNode* Document::add_processing_instruction(
305 const Glib::ustring& name, const Glib::ustring& content)
306 {
307 auto child = xmlNewDocPI(impl_, (const xmlChar*)name.c_str(), (const xmlChar*)content.c_str());
308 auto node = xmlAddChild((xmlNode*)impl_, child);
309 if (!node)
310 {
311 xmlFreeNode(child);
312 throw internal_error("Could not add processing instruction node " + name);
313 }
314 Node::create_wrapper(node);
315 return static_cast<ProcessingInstructionNode*>(node->_private);
316 }
317
write_to_file(const Glib::ustring & filename,const Glib::ustring & encoding)318 void Document::write_to_file(const Glib::ustring& filename, const Glib::ustring& encoding)
319 {
320 do_write_to_file(filename, encoding, false);
321 }
322
write_to_file_formatted(const Glib::ustring & filename,const Glib::ustring & encoding)323 void Document::write_to_file_formatted(const Glib::ustring& filename, const Glib::ustring& encoding)
324 {
325 do_write_to_file(filename, encoding, true);
326 }
327
write_to_string(const Glib::ustring & encoding)328 Glib::ustring Document::write_to_string(const Glib::ustring& encoding)
329 {
330 return do_write_to_string(encoding, false);
331 }
332
write_to_string_formatted(const Glib::ustring & encoding)333 Glib::ustring Document::write_to_string_formatted(const Glib::ustring& encoding)
334 {
335 return do_write_to_string(encoding, true);
336 }
337
write_to_stream(std::ostream & output,const Glib::ustring & encoding)338 void Document::write_to_stream(std::ostream& output, const Glib::ustring& encoding)
339 {
340 do_write_to_stream(output, encoding.empty()?get_encoding():encoding, false);
341 }
342
write_to_stream_formatted(std::ostream & output,const Glib::ustring & encoding)343 void Document::write_to_stream_formatted(std::ostream& output, const Glib::ustring& encoding)
344 {
345 do_write_to_stream(output, encoding.empty()?get_encoding():encoding, true);
346 }
347
do_write_to_file(const Glib::ustring & filename,const Glib::ustring & encoding,bool format)348 void Document::do_write_to_file(
349 const Glib::ustring& filename,
350 const Glib::ustring& encoding,
351 bool format)
352 {
353 KeepBlanks k(KeepBlanks::Default);
354 xmlIndentTreeOutput = format?1:0;
355 xmlResetLastError();
356 const int result = xmlSaveFormatFileEnc(filename.c_str(), impl_,
357 get_encoding_or_utf8(encoding), format?1:0);
358
359 if(result == -1)
360 {
361 throw exception("do_write_to_file() failed.\n" + format_xml_error());
362 }
363 }
364
do_write_to_string(const Glib::ustring & encoding,bool format)365 Glib::ustring Document::do_write_to_string(
366 const Glib::ustring& encoding,
367 bool format)
368 {
369 KeepBlanks k(KeepBlanks::Default);
370 xmlIndentTreeOutput = format?1:0;
371 xmlChar* buffer = nullptr;
372 int length = 0;
373
374 xmlResetLastError();
375 xmlDocDumpFormatMemoryEnc(impl_, &buffer, &length,
376 get_encoding_or_utf8(encoding), format?1:0);
377
378 if(!buffer)
379 {
380 throw exception("do_write_to_string() failed.\n" + format_xml_error());
381 }
382
383 // Create a Glib::ustring copy of the buffer
384
385 // Here we force the use of Glib::ustring::ustring( InputIterator begin, InputIterator end )
386 // instead of Glib::ustring::ustring( const char*, size_type ) because it
387 // expects the length of the string in characters, not in bytes.
388 Glib::ustring result( reinterpret_cast<const char *>(buffer), reinterpret_cast<const char *>(buffer + length) );
389
390 // Deletes the original buffer
391 xmlFree(buffer);
392 // Return a copy of the string
393 return result;
394 }
395
do_write_to_stream(std::ostream & output,const Glib::ustring & encoding,bool format)396 void Document::do_write_to_stream(std::ostream& output, const Glib::ustring& encoding, bool format)
397 {
398 // TODO assert document encoding is UTF-8 if encoding is different than UTF-8
399 OStreamOutputBuffer buffer(output, encoding);
400 xmlResetLastError();
401 const int result = xmlSaveFormatFileTo(buffer.cobj(), impl_,
402 get_encoding_or_utf8(encoding), format ? 1 : 0);
403
404 if(result == -1)
405 {
406 throw exception("do_write_to_stream() failed.\n" + format_xml_error());
407 }
408 }
409
set_entity_declaration(const Glib::ustring & name,XmlEntityType type,const Glib::ustring & publicId,const Glib::ustring & systemId,const Glib::ustring & content)410 void Document::set_entity_declaration(const Glib::ustring& name, XmlEntityType type,
411 const Glib::ustring& publicId, const Glib::ustring& systemId,
412 const Glib::ustring& content)
413 {
414 auto entity = xmlAddDocEntity( impl_, (const xmlChar*) name.c_str(), type,
415 publicId.empty() ? nullptr : (const xmlChar*)publicId.c_str(),
416 systemId.empty() ? nullptr : (const xmlChar*)systemId.c_str(),
417 (const xmlChar*) content.c_str() );
418 if (!entity)
419 throw internal_error("Could not add entity declaration " + name);
420 }
421
process_xinclude(bool generate_xinclude_nodes)422 int Document::process_xinclude(bool generate_xinclude_nodes)
423 {
424 NodeMap node_map;
425
426 auto root = xmlDocGetRootElement(impl_);
427
428 find_wrappers(root, node_map);
429
430 xmlResetLastError();
431 const int n_substitutions = xmlXIncludeProcessTreeFlags(root,
432 generate_xinclude_nodes ? 0 : XML_PARSE_NOXINCNODE);
433
434 remove_found_wrappers(reinterpret_cast<xmlNode*>(impl_), node_map);
435
436 // Delete wrappers of nodes that have been deleted or have had their type changed.
437 for (auto& the_pair : node_map)
438 {
439 auto node = the_pair.first;
440
441 switch (the_pair.second)
442 {
443 case XML_DTD_NODE:
444 delete reinterpret_cast<Dtd*>(node);
445 break;
446 case XML_DOCUMENT_NODE:
447 delete reinterpret_cast<Document*>(node);
448 break;
449 default:
450 delete node; // Node*
451 break;
452 }
453 }
454
455 if (n_substitutions < 0)
456 {
457 throw exception("Couldn't process XInclude\n" + format_xml_error());
458 }
459
460 return n_substitutions;
461 }
462
get_entity(const Glib::ustring & name)463 _xmlEntity* Document::get_entity(const Glib::ustring& name)
464 {
465 return xmlGetDocEntity(impl_, (const xmlChar*) name.c_str());
466 }
467
cobj()468 _xmlDoc* Document::cobj()
469 {
470 return impl_;
471 }
472
cobj() const473 const _xmlDoc* Document::cobj() const
474 {
475 return impl_;
476 }
477
478 } //namespace xmlpp
479