1 /* document.cc
2  * this file is part of libxml++
3  *
4  * copyright (C) 2003 by the libxml++ development team
5  *
6  * this file is covered by the GNU Lesser General Public License,
7  * which should be included with libxml++ as the file COPYING.
8  */
9 
10 #include <libxml++/document.h>
11 #include <libxml++/dtd.h>
12 #include <libxml++/attribute.h>
13 #include <libxml++/nodes/element.h>
14 #include <libxml++/exceptions/internal_error.h>
15 #include <libxml++/keepblanks.h>
16 #include <libxml++/io/ostreamoutputbuffer.h>
17 
18 #include <libxml/tree.h>
19 #include <libxml/xinclude.h>
20 #include <libxml/parser.h> // XML_PARSE_NOXINCNODE
21 
22 #include <iostream>
23 #include <map>
24 
25 namespace // anonymous
26 {
27 typedef std::map<xmlpp::Node*, xmlElementType> NodeMap;
28 
29 // Find all C++ wrappers of 'node' and its descendants.
30 // Compare xmlpp::Node::free_wrappers().
find_wrappers(xmlNode * node,NodeMap & node_map)31 void find_wrappers(xmlNode* node, NodeMap& node_map)
32 {
33   if (!node)
34     return;
35 
36   //If an entity declaration contains an entity reference, there can be cyclic
37   //references between entity declarations and entity references. (It's not
38   //a tree.) We must avoid an infinite recursion.
39   //Compare xmlFreeNode(), which frees the children of all node types except
40   //XML_ENTITY_REF_NODE.
41   if (node->type != XML_ENTITY_REF_NODE)
42   {
43     // Walk the children list.
44     for (auto child = node->children; child; child = child->next)
45       find_wrappers(child, node_map);
46   }
47 
48   // Find the local one
49   bool has_attributes = true;
50   switch (node->type)
51   {
52     // Node types that have no attributes.
53     // These are not represented by struct xmlNode.
54     case XML_DTD_NODE:
55     case XML_ATTRIBUTE_NODE:
56     case XML_ELEMENT_DECL:
57     case XML_ATTRIBUTE_DECL:
58     case XML_ENTITY_DECL:
59     case XML_DOCUMENT_NODE:
60       has_attributes = false;
61       break;
62     default:
63       break;
64   }
65 
66   if (node->_private)
67     node_map[static_cast<xmlpp::Node*>(node->_private)] = node->type;
68 
69   if (!has_attributes)
70     return;
71 
72   //Walk the attributes list.
73   //Note that some "derived" structs have a different layout, so
74   //_xmlNode::properties would be a nonsense value, leading to crashes
75   //(and shown as valgrind warnings), so we return above, to avoid
76   //checking it here.
77   for (auto attr = node->properties; attr; attr = attr->next)
78     find_wrappers(reinterpret_cast<xmlNode*>(attr), node_map);
79 }
80 
81 // Remove from 'node_map' the pointers to the C++ wrappers that are found with
82 // unchanged type in 'node' and its descendants.
remove_found_wrappers(xmlNode * node,NodeMap & node_map)83 void remove_found_wrappers(xmlNode* node, NodeMap& node_map)
84 {
85   if (!node)
86     return;
87 
88   if (node->type != XML_ENTITY_REF_NODE)
89   {
90     // Walk the children list.
91     for (auto child = node->children; child; child = child->next)
92       remove_found_wrappers(child, node_map);
93   }
94 
95   // Find the local one
96   bool has_attributes = true;
97   switch (node->type)
98   {
99     // Node types that have no attributes
100     case XML_DTD_NODE:
101     case XML_ATTRIBUTE_NODE:
102     case XML_ELEMENT_DECL:
103     case XML_ATTRIBUTE_DECL:
104     case XML_ENTITY_DECL:
105     case XML_DOCUMENT_NODE:
106       has_attributes = false;
107       break;
108     default:
109       break;
110   }
111 
112   if (node->_private)
113   {
114     const auto iter =
115       node_map.find(static_cast<xmlpp::Node*>(node->_private));
116     if (iter != node_map.end())
117     {
118       if (iter->second == node->type)
119         node_map.erase(iter);
120       else
121         node->_private = nullptr; // node->type has changed. The wrapper will be deleted.
122     }
123   }
124 
125   if (!has_attributes)
126     return;
127 
128   // Walk the attributes list.
129   for (auto attr = node->properties; attr; attr = attr->next)
130     remove_found_wrappers(reinterpret_cast<xmlNode*>(attr), node_map);
131 
132 }
133 } // anonymous
134 
135 namespace xmlpp
136 {
137 
get_encoding_or_utf8(const Glib::ustring & encoding)138 static const char* get_encoding_or_utf8(const Glib::ustring& encoding)
139 {
140   if(encoding.empty())
141   {
142     //If we don't specify this to the xmlDocDump* functions (using nullptr instead),
143     //then some other encoding is used, causing them to fail on non-ASCII characters.
144     return "UTF-8";
145   }
146   else
147     return encoding.c_str();
148 }
149 
150 
Init()151 Document::Init::Init()
152 {
153   xmlInitParser(); //Not always necessary, but necessary for thread safety.
154 }
155 
~Init()156 Document::Init::~Init()
157 {
158   //We don't call this because it breaks libxml generally and should only be
159   //called at the very end of a process, such as at the end of a main().
160   //libxml might still be used by the application, so we don't want to break
161   //that.
162   //This is important even here, which usually happens only when the library
163   //is unloaded, because that might happen during normal application use,
164   //if the application does dynamic library loading, for instance to load
165   //plugins.
166   //See http://xmlsoft.org/html/libxml-parser.html#xmlCleanupParser
167   //xmlCleanupParser(); //As per xmlInitParser(), or memory leak will happen.
168 }
169 
170 Document::Init Document::init_;
171 
Document(const Glib::ustring & version)172 Document::Document(const Glib::ustring& version)
173   : impl_(xmlNewDoc((const xmlChar*)version.c_str()))
174 {
175   if (!impl_)
176     throw internal_error("Could not create Document.");
177   impl_->_private = this;
178 }
179 
Document(xmlDoc * doc)180 Document::Document(xmlDoc* doc)
181   : impl_(doc)
182 {
183   impl_->_private = this;
184 }
185 
~Document()186 Document::~Document()
187 {
188   Node::free_wrappers(reinterpret_cast<xmlNode*>(impl_));
189   xmlFreeDoc(impl_);
190 }
191 
get_encoding() const192 Glib::ustring Document::get_encoding() const
193 {
194   Glib::ustring encoding;
195   if(impl_->encoding)
196     encoding = (const char*)impl_->encoding;
197 
198   return encoding;
199 }
200 
get_internal_subset() const201 Dtd* Document::get_internal_subset() const
202 {
203   auto dtd = xmlGetIntSubset(impl_);
204   if(!dtd)
205     return nullptr;
206 
207   if(!dtd->_private)
208     dtd->_private = new Dtd(dtd);
209 
210   return reinterpret_cast<Dtd*>(dtd->_private);
211 }
212 
set_internal_subset(const Glib::ustring & name,const Glib::ustring & external_id,const Glib::ustring & system_id)213 void Document::set_internal_subset(const Glib::ustring& name,
214                                    const Glib::ustring& external_id,
215                                    const Glib::ustring& system_id)
216 {
217   auto dtd = xmlCreateIntSubset(impl_,
218 				   (const xmlChar*)name.c_str(),
219 				   external_id.empty() ? nullptr : (const xmlChar*)external_id.c_str(),
220 				   system_id.empty() ? nullptr : (const xmlChar*)system_id.c_str());
221 
222   if (dtd && !dtd->_private)
223     dtd->_private = new Dtd(dtd);
224 }
225 
get_root_node() const226 Element* Document::get_root_node() const
227 {
228   auto root = xmlDocGetRootElement(impl_);
229   if(root == nullptr)
230     return nullptr;
231   else
232   {
233     Node::create_wrapper(root);
234     return reinterpret_cast<Element*>(root->_private);
235   }
236 }
237 
create_root_node(const Glib::ustring & name,const Glib::ustring & ns_uri,const Glib::ustring & ns_prefix)238 Element* Document::create_root_node(const Glib::ustring& name,
239                                     const Glib::ustring& ns_uri,
240                                     const Glib::ustring& ns_prefix)
241 {
242   auto node = xmlNewDocNode(impl_, nullptr, (const xmlChar*)name.c_str(), nullptr);
243   if (!node)
244     throw internal_error("Could not create root element node " + name);
245 
246   node = xmlDocSetRootElement(impl_, node);
247   if (node)
248   {
249     // An old root element node has been replaced.
250     Node::free_wrappers(node);
251     xmlFreeNode(node);
252   }
253 
254   auto element = get_root_node();
255 
256   if( !ns_uri.empty() && element )
257   {
258     element->set_namespace_declaration(ns_uri, ns_prefix);
259     element->set_namespace(ns_prefix);
260   }
261 
262   return element;
263 }
264 
create_root_node_by_import(const Node * node,bool recursive)265 Element* Document::create_root_node_by_import(const Node* node,
266 					      bool recursive)
267 {
268   if (!node)
269     return nullptr;
270 
271   //Create the node, by copying:
272   auto imported_node = xmlDocCopyNode(const_cast<xmlNode*>(node->cobj()), impl_, recursive);
273   if (!imported_node)
274   {
275     throw exception("Unable to copy the node that shall be imported");
276   }
277 
278   auto old_node = xmlDocSetRootElement(impl_, imported_node);
279   if (old_node)
280   {
281     // An old root element node has been replaced.
282     Node::free_wrappers(old_node);
283     xmlFreeNode(old_node);
284   }
285 
286   return get_root_node();
287 }
288 
add_comment(const Glib::ustring & content)289 CommentNode* Document::add_comment(const Glib::ustring& content)
290 {
291   auto child = xmlNewComment((const xmlChar*)content.c_str());
292 
293   // Use the result, because child can be freed when merging text nodes:
294   auto node = xmlAddChild((xmlNode*)impl_, child);
295   if (!node)
296   {
297     xmlFreeNode(child);
298     throw internal_error("Could not add comment node \"" + content + "\"");
299   }
300   Node::create_wrapper(node);
301   return static_cast<CommentNode*>(node->_private);
302 }
303 
add_processing_instruction(const Glib::ustring & name,const Glib::ustring & content)304 ProcessingInstructionNode* Document::add_processing_instruction(
305   const Glib::ustring& name, const Glib::ustring& content)
306 {
307   auto child = xmlNewDocPI(impl_, (const xmlChar*)name.c_str(), (const xmlChar*)content.c_str());
308   auto node = xmlAddChild((xmlNode*)impl_, child);
309   if (!node)
310   {
311     xmlFreeNode(child);
312     throw internal_error("Could not add processing instruction node " + name);
313   }
314   Node::create_wrapper(node);
315   return static_cast<ProcessingInstructionNode*>(node->_private);
316 }
317 
write_to_file(const Glib::ustring & filename,const Glib::ustring & encoding)318 void Document::write_to_file(const Glib::ustring& filename, const Glib::ustring& encoding)
319 {
320   do_write_to_file(filename, encoding, false);
321 }
322 
write_to_file_formatted(const Glib::ustring & filename,const Glib::ustring & encoding)323 void Document::write_to_file_formatted(const Glib::ustring& filename, const Glib::ustring& encoding)
324 {
325   do_write_to_file(filename, encoding, true);
326 }
327 
write_to_string(const Glib::ustring & encoding)328 Glib::ustring Document::write_to_string(const Glib::ustring& encoding)
329 {
330   return do_write_to_string(encoding, false);
331 }
332 
write_to_string_formatted(const Glib::ustring & encoding)333 Glib::ustring Document::write_to_string_formatted(const Glib::ustring& encoding)
334 {
335   return do_write_to_string(encoding, true);
336 }
337 
write_to_stream(std::ostream & output,const Glib::ustring & encoding)338 void Document::write_to_stream(std::ostream& output, const Glib::ustring& encoding)
339 {
340   do_write_to_stream(output, encoding.empty()?get_encoding():encoding, false);
341 }
342 
write_to_stream_formatted(std::ostream & output,const Glib::ustring & encoding)343 void Document::write_to_stream_formatted(std::ostream& output, const Glib::ustring& encoding)
344 {
345   do_write_to_stream(output, encoding.empty()?get_encoding():encoding, true);
346 }
347 
do_write_to_file(const Glib::ustring & filename,const Glib::ustring & encoding,bool format)348 void Document::do_write_to_file(
349     const Glib::ustring& filename,
350     const Glib::ustring& encoding,
351     bool format)
352 {
353   KeepBlanks k(KeepBlanks::Default);
354   xmlIndentTreeOutput = format?1:0;
355   xmlResetLastError();
356   const int result = xmlSaveFormatFileEnc(filename.c_str(), impl_,
357     get_encoding_or_utf8(encoding), format?1:0);
358 
359   if(result == -1)
360   {
361     throw exception("do_write_to_file() failed.\n" + format_xml_error());
362   }
363 }
364 
do_write_to_string(const Glib::ustring & encoding,bool format)365 Glib::ustring Document::do_write_to_string(
366     const Glib::ustring& encoding,
367     bool format)
368 {
369   KeepBlanks k(KeepBlanks::Default);
370   xmlIndentTreeOutput = format?1:0;
371   xmlChar* buffer = nullptr;
372   int length = 0;
373 
374   xmlResetLastError();
375   xmlDocDumpFormatMemoryEnc(impl_, &buffer, &length,
376     get_encoding_or_utf8(encoding), format?1:0);
377 
378   if(!buffer)
379   {
380     throw exception("do_write_to_string() failed.\n" + format_xml_error());
381   }
382 
383   // Create a Glib::ustring copy of the buffer
384 
385   // Here we force the use of Glib::ustring::ustring( InputIterator begin, InputIterator end )
386   // instead of Glib::ustring::ustring( const char*, size_type ) because it
387   // expects the length of the string in characters, not in bytes.
388   Glib::ustring result( reinterpret_cast<const char *>(buffer), reinterpret_cast<const char *>(buffer + length) );
389 
390   // Deletes the original buffer
391   xmlFree(buffer);
392   // Return a copy of the string
393   return result;
394 }
395 
do_write_to_stream(std::ostream & output,const Glib::ustring & encoding,bool format)396 void Document::do_write_to_stream(std::ostream& output, const Glib::ustring& encoding, bool format)
397 {
398   // TODO assert document encoding is UTF-8 if encoding is different than UTF-8
399   OStreamOutputBuffer buffer(output, encoding);
400   xmlResetLastError();
401   const int result = xmlSaveFormatFileTo(buffer.cobj(), impl_,
402     get_encoding_or_utf8(encoding), format ? 1 : 0);
403 
404   if(result == -1)
405   {
406     throw exception("do_write_to_stream() failed.\n" + format_xml_error());
407   }
408 }
409 
set_entity_declaration(const Glib::ustring & name,XmlEntityType type,const Glib::ustring & publicId,const Glib::ustring & systemId,const Glib::ustring & content)410 void Document::set_entity_declaration(const Glib::ustring& name, XmlEntityType type,
411                               const Glib::ustring& publicId, const Glib::ustring& systemId,
412                               const Glib::ustring& content)
413 {
414   auto entity = xmlAddDocEntity( impl_, (const xmlChar*) name.c_str(), type,
415     publicId.empty() ? nullptr : (const xmlChar*)publicId.c_str(),
416     systemId.empty() ? nullptr : (const xmlChar*)systemId.c_str(),
417     (const xmlChar*) content.c_str() );
418   if (!entity)
419     throw internal_error("Could not add entity declaration " + name);
420 }
421 
process_xinclude(bool generate_xinclude_nodes)422 int Document::process_xinclude(bool generate_xinclude_nodes)
423 {
424   NodeMap node_map;
425 
426   auto root = xmlDocGetRootElement(impl_);
427 
428   find_wrappers(root, node_map);
429 
430   xmlResetLastError();
431   const int n_substitutions = xmlXIncludeProcessTreeFlags(root,
432     generate_xinclude_nodes ? 0 : XML_PARSE_NOXINCNODE);
433 
434   remove_found_wrappers(reinterpret_cast<xmlNode*>(impl_), node_map);
435 
436   // Delete wrappers of nodes that have been deleted or have had their type changed.
437   for (auto& the_pair : node_map)
438   {
439     auto node = the_pair.first;
440 
441     switch (the_pair.second)
442     {
443     case XML_DTD_NODE:
444       delete reinterpret_cast<Dtd*>(node);
445       break;
446     case XML_DOCUMENT_NODE:
447       delete reinterpret_cast<Document*>(node);
448       break;
449     default:
450       delete node; // Node*
451       break;
452     }
453   }
454 
455   if (n_substitutions < 0)
456   {
457     throw exception("Couldn't process XInclude\n" + format_xml_error());
458   }
459 
460   return n_substitutions;
461 }
462 
get_entity(const Glib::ustring & name)463 _xmlEntity* Document::get_entity(const Glib::ustring& name)
464 {
465   return xmlGetDocEntity(impl_, (const xmlChar*) name.c_str());
466 }
467 
cobj()468 _xmlDoc* Document::cobj()
469 {
470   return impl_;
471 }
472 
cobj() const473 const _xmlDoc* Document::cobj() const
474 {
475   return impl_;
476 }
477 
478 } //namespace xmlpp
479