1 // Copyright 2019 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef THIRD_PARTY_LIBXML_CHROMIUM_XML_READER_H_ 6 #define THIRD_PARTY_LIBXML_CHROMIUM_XML_READER_H_ 7 8 #include <map> 9 #include <string> 10 11 extern "C" { 12 struct _xmlTextReader; 13 } 14 15 // XmlReader is a wrapper class around libxml's xmlReader, 16 // providing a simplified C++ API. 17 class XmlReader { 18 public: 19 XmlReader(); 20 ~XmlReader(); 21 22 // Load a document into the reader from memory. |input| must be UTF-8 and 23 // exist for the lifetime of this object. Returns false on error. 24 // TODO(evanm): handle encodings other than UTF-8? 25 bool Load(const std::string& input); 26 27 // Load a document into the reader from a file. Returns false on error. 28 bool LoadFile(const std::string& file_path); 29 30 // Wrappers around libxml functions ----------------------------------------- 31 32 // Read() advances to the next node. Returns false on EOF or error. 33 bool Read(); 34 35 // Next(), when pointing at an opening tag, advances to the node after 36 // the matching closing tag. Returns false on EOF or error. 37 bool Next(); 38 39 // Return the depth in the tree of the current node. 40 int Depth(); 41 42 // Returns the "local" name of the current node. 43 // For a tag like <foo:bar>, this is the string "bar". 44 std::string NodeName(); 45 46 // Returns the name of the current node. 47 // For a tag like <foo:bar>, this is the string "foo:bar". 48 std::string NodeFullName(); 49 50 // When pointing at a tag, retrieves the value of an attribute. 51 // Returns false on failure. 52 // E.g. for <foo bar:baz="a">, NodeAttribute("bar:baz", &value) 53 // returns true and |value| is set to "a". 54 bool NodeAttribute(const char* name, std::string* value); 55 56 // Populates |attributes| with all the attributes of the current tag and 57 // returns true. Note that namespace declarations are not reported. 58 // Returns false if there are no attributes in the current tag. 59 bool GetAllNodeAttributes(std::map<std::string, std::string>* attributes); 60 61 // Populates |namespaces| with all the namespaces (prefix/URI pairs) declared 62 // in the current tag and returns true. Note that the default namespace, if 63 // declared in the tag, is populated with an empty prefix. 64 // Returns false if there are no namespaces declared in the current tag. 65 bool GetAllDeclaredNamespaces(std::map<std::string, std::string>* namespaces); 66 67 // Sets |content| to the content of the current node if it is a #text/#cdata 68 // node. 69 // Returns true if the current node is a #text/#cdata node, false otherwise. 70 bool GetTextIfTextElement(std::string* content); 71 bool GetTextIfCDataElement(std::string* content); 72 73 // Returns true if the node is an element (e.g. <foo>). Note this returns 74 // false for self-closing elements (e.g. <foo/>). Use IsEmptyElement() to 75 // check for those. 76 bool IsElement(); 77 78 // Returns true if the node is a closing element (e.g. </foo>). 79 bool IsClosingElement(); 80 81 // Returns true if the current node is an empty (self-closing) element (e.g. 82 // <foo/>). 83 bool IsEmptyElement(); 84 85 // Helper functions not provided by libxml ---------------------------------- 86 87 // Return the string content within an element. 88 // "<foo>bar</foo>" is a sequence of three nodes: 89 // (1) open tag, (2) text, (3) close tag. 90 // With the reader currently at (1), this returns the text of (2), 91 // and advances past (3). 92 // Returns false on error. 93 bool ReadElementContent(std::string* content); 94 95 // Skip to the next opening tag, returning false if we reach a closing 96 // tag or EOF first. 97 // If currently on an opening tag, doesn't advance at all. 98 bool SkipToElement(); 99 100 private: 101 // Returns the libxml node type of the current node. 102 int NodeType(); 103 104 // The underlying libxml xmlTextReader. 105 _xmlTextReader* reader_; 106 }; 107 108 #endif // THIRD_PARTY_LIBXML_CHROMIUM_XML_READER_H_ 109