1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
6  */
7 
8 #ifndef INCLUDED_ORCUS_XML_STRUCTURE_TREE_HPP
9 #define INCLUDED_ORCUS_XML_STRUCTURE_TREE_HPP
10 
11 #include "env.hpp"
12 #include "types.hpp"
13 
14 #include <ostream>
15 #include <memory>
16 #include <functional>
17 
18 namespace orcus {
19 
20 class xmlns_context;
21 
22 struct ORCUS_DLLPUBLIC xml_table_range_t
23 {
24     std::vector<std::string> paths;
25     std::vector<std::string> row_groups;
26 
27     xml_table_range_t();
28     ~xml_table_range_t();
29 };
30 
31 /**
32  * Tree representing the structure of elements in XML content.  Recurring
33  * elements under the same parent are represented by a single element
34  * instance.  This tree only includes elements; no attributes and content
35  * nodes appear in this tree.
36  */
37 class ORCUS_DLLPUBLIC xml_structure_tree
38 {
39     struct impl;
40     std::unique_ptr<impl> mp_impl;
41 
42 public:
43     xml_structure_tree() = delete;
44     xml_structure_tree(const xml_structure_tree&) = delete;
45     xml_structure_tree& operator= (const xml_structure_tree&) = delete;
46 
47     struct ORCUS_DLLPUBLIC entity_name
48     {
49         xmlns_id_t ns;
50         pstring name;
51 
52         entity_name();
53         entity_name(xmlns_id_t _ns, const pstring& _name);
54 
55         bool operator< (const entity_name& r) const;
56         bool operator== (const entity_name& r) const;
57 
58         struct ORCUS_DLLPUBLIC hash
59         {
60             size_t operator ()(const entity_name& val) const;
61         };
62     };
63 
64     typedef std::vector<entity_name> entity_names_type;
65 
66     struct ORCUS_DLLPUBLIC element
67     {
68         entity_name name;
69         bool repeat;
70         bool has_content;
71 
72         element();
73         element(const entity_name& _name, bool _repeat, bool _has_content);
74     };
75 
76     struct walker_impl;
77 
78     /**
79      * This class allows client to traverse the tree.
80      */
81     class ORCUS_DLLPUBLIC walker
82     {
83         friend class xml_structure_tree;
84 
85         std::unique_ptr<walker_impl> mp_impl;
86 
87         walker(const xml_structure_tree::impl& parent_impl);
88     public:
89         walker() = delete;
90         walker(const walker& r);
91         ~walker();
92         walker& operator= (const walker& r);
93 
94         /**
95          * Set current position to the root element, and return the root
96          * element.
97          *
98          * @return root element.
99          */
100         element root();
101 
102         /**
103          * Descend into specified child element.
104          *
105          * @param ns namespace of child element
106          * @param name name of child element
107          *
108          * @return child element
109          */
110         element descend(const entity_name& name);
111 
112         /**
113          * Move up to the parent element.
114          */
115         element ascend();
116 
117         /**
118          * Move to the element specified by a path expression. The path
119          * expression may be generated by
120          * <code>xml_structure_tree::walker::get_path</code>.
121          *
122          * @param path a simple XPath like expression
123          *
124          * @return element pointed to by the path.
125          */
126         element move_to(const std::string& path);
127 
128         /**
129          * Get a list of names of all child elements at the current element
130          * position.  The list of names is in order of appearance.
131          *
132          * @return list of child element names in order of appearance.
133          */
134         entity_names_type get_children();
135 
136         /**
137          * Get a list of names of all attributes that belong to current
138          * element.  The list of names is in order of appearance.
139          *
140          * @return list of attribute names in order of appearance.
141          */
142         entity_names_type get_attributes();
143 
144         /**
145          * Get a numerical, 0-based index of given XML namespace.
146          *
147          * @param ns XML namespace ID.
148          *
149          * @return numeric, 0-based index of XML namespace if found, or
150          *         <code>xml_structure_tree::walker::index_not_found</code> if
151          *         the namespace is not found in this structure.
152          */
153         size_t get_xmlns_index(xmlns_id_t ns) const;
154 
155         std::string get_xmlns_short_name(xmlns_id_t ns) const;
156 
157         /**
158          * Convert an entity name to its proper string representation.
159          *
160          * @param name entity name to convert to string.
161          *
162          * @return string representation of the entity name, including the
163          *         namespace.
164          */
165         std::string to_string(const entity_name& name) const;
166 
167         /**
168          * Get a XPath like ID for the element inside of the XML tree.
169          *
170          */
171         std::string get_path() const;
172     };
173 
174     xml_structure_tree(xmlns_context& xmlns_cxt);
175     xml_structure_tree(xml_structure_tree&& other);
176     ~xml_structure_tree();
177 
178     void parse(const char* p, size_t n);
179 
180     void dump_compact(std::ostream& os) const;
181 
182     walker get_walker() const;
183 
184     using range_handler_type = std::function<void(xml_table_range_t&&)>;
185 
186     void process_ranges(range_handler_type rh) const;
187 
188     void swap(xml_structure_tree& other);
189 };
190 
191 }
192 
193 
194 
195 #endif
196 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
197