1 // Copyright 2019 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef THIRD_PARTY_LIBXML_CHROMIUM_XML_READER_H_
6 #define THIRD_PARTY_LIBXML_CHROMIUM_XML_READER_H_
7 
8 #include <map>
9 #include <string>
10 
11 extern "C" {
12 struct _xmlTextReader;
13 }
14 
15 // XmlReader is a wrapper class around libxml's xmlReader,
16 // providing a simplified C++ API.
17 class XmlReader {
18  public:
19   XmlReader();
20   ~XmlReader();
21 
22   // Load a document into the reader from memory.  |input| must be UTF-8 and
23   // exist for the lifetime of this object.  Returns false on error.
24   // TODO(evanm): handle encodings other than UTF-8?
25   bool Load(const std::string& input);
26 
27   // Load a document into the reader from a file.  Returns false on error.
28   bool LoadFile(const std::string& file_path);
29 
30   // Wrappers around libxml functions -----------------------------------------
31 
32   // Read() advances to the next node.  Returns false on EOF or error.
33   bool Read();
34 
35   // Next(), when pointing at an opening tag, advances to the node after
36   // the matching closing tag.  Returns false on EOF or error.
37   bool Next();
38 
39   // Return the depth in the tree of the current node.
40   int Depth();
41 
42   // Returns the "local" name of the current node.
43   // For a tag like <foo:bar>, this is the string "bar".
44   std::string NodeName();
45 
46   // Returns the name of the current node.
47   // For a tag like <foo:bar>, this is the string "foo:bar".
48   std::string NodeFullName();
49 
50   // When pointing at a tag, retrieves the value of an attribute.
51   // Returns false on failure.
52   // E.g. for <foo bar:baz="a">, NodeAttribute("bar:baz", &value)
53   // returns true and |value| is set to "a".
54   bool NodeAttribute(const char* name, std::string* value);
55 
56   // Populates |attributes| with all the attributes of the current tag and
57   // returns true. Note that namespace declarations are not reported.
58   // Returns false if there are no attributes in the current tag.
59   bool GetAllNodeAttributes(std::map<std::string, std::string>* attributes);
60 
61   // Populates |namespaces| with all the namespaces (prefix/URI pairs) declared
62   // in the current tag and returns true. Note that the default namespace, if
63   // declared in the tag, is populated with an empty prefix.
64   // Returns false if there are no namespaces declared in the current tag.
65   bool GetAllDeclaredNamespaces(std::map<std::string, std::string>* namespaces);
66 
67   // Sets |content| to the content of the current node if it is a #text/#cdata
68   // node.
69   // Returns true if the current node is a #text/#cdata node, false otherwise.
70   bool GetTextIfTextElement(std::string* content);
71   bool GetTextIfCDataElement(std::string* content);
72 
73   // Returns true if the node is an element (e.g. <foo>). Note this returns
74   // false for self-closing elements (e.g. <foo/>). Use IsEmptyElement() to
75   // check for those.
76   bool IsElement();
77 
78   // Returns true if the node is a closing element (e.g. </foo>).
79   bool IsClosingElement();
80 
81   // Returns true if the current node is an empty (self-closing) element (e.g.
82   // <foo/>).
83   bool IsEmptyElement();
84 
85   // Helper functions not provided by libxml ----------------------------------
86 
87   // Return the string content within an element.
88   // "<foo>bar</foo>" is a sequence of three nodes:
89   // (1) open tag, (2) text, (3) close tag.
90   // With the reader currently at (1), this returns the text of (2),
91   // and advances past (3).
92   // Returns false on error.
93   bool ReadElementContent(std::string* content);
94 
95   // Skip to the next opening tag, returning false if we reach a closing
96   // tag or EOF first.
97   // If currently on an opening tag, doesn't advance at all.
98   bool SkipToElement();
99 
100  private:
101   // Returns the libxml node type of the current node.
102   int NodeType();
103 
104   // The underlying libxml xmlTextReader.
105   _xmlTextReader* reader_;
106 };
107 
108 #endif  // THIRD_PARTY_LIBXML_CHROMIUM_XML_READER_H_
109