1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ 2 /* 3 * This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. 6 */ 7 8 #ifndef INCLUDED_ORCUS_XML_STRUCTURE_TREE_HPP 9 #define INCLUDED_ORCUS_XML_STRUCTURE_TREE_HPP 10 11 #include "env.hpp" 12 #include "types.hpp" 13 14 #include <ostream> 15 #include <memory> 16 #include <functional> 17 18 namespace orcus { 19 20 class xmlns_context; 21 22 struct ORCUS_DLLPUBLIC xml_table_range_t 23 { 24 std::vector<std::string> paths; 25 std::vector<std::string> row_groups; 26 27 xml_table_range_t(); 28 ~xml_table_range_t(); 29 }; 30 31 /** 32 * Tree representing the structure of elements in XML content. Recurring 33 * elements under the same parent are represented by a single element 34 * instance. This tree only includes elements; no attributes and content 35 * nodes appear in this tree. 36 */ 37 class ORCUS_DLLPUBLIC xml_structure_tree 38 { 39 struct impl; 40 std::unique_ptr<impl> mp_impl; 41 42 public: 43 xml_structure_tree() = delete; 44 xml_structure_tree(const xml_structure_tree&) = delete; 45 xml_structure_tree& operator= (const xml_structure_tree&) = delete; 46 47 struct ORCUS_DLLPUBLIC entity_name 48 { 49 xmlns_id_t ns; 50 pstring name; 51 52 entity_name(); 53 entity_name(xmlns_id_t _ns, const pstring& _name); 54 55 bool operator< (const entity_name& r) const; 56 bool operator== (const entity_name& r) const; 57 58 struct ORCUS_DLLPUBLIC hash 59 { 60 size_t operator ()(const entity_name& val) const; 61 }; 62 }; 63 64 typedef std::vector<entity_name> entity_names_type; 65 66 struct ORCUS_DLLPUBLIC element 67 { 68 entity_name name; 69 bool repeat; 70 bool has_content; 71 72 element(); 73 element(const entity_name& _name, bool _repeat, bool _has_content); 74 }; 75 76 struct walker_impl; 77 78 /** 79 * This class allows client to traverse the tree. 80 */ 81 class ORCUS_DLLPUBLIC walker 82 { 83 friend class xml_structure_tree; 84 85 std::unique_ptr<walker_impl> mp_impl; 86 87 walker(const xml_structure_tree::impl& parent_impl); 88 public: 89 walker() = delete; 90 walker(const walker& r); 91 ~walker(); 92 walker& operator= (const walker& r); 93 94 /** 95 * Set current position to the root element, and return the root 96 * element. 97 * 98 * @return root element. 99 */ 100 element root(); 101 102 /** 103 * Descend into specified child element. 104 * 105 * @param ns namespace of child element 106 * @param name name of child element 107 * 108 * @return child element 109 */ 110 element descend(const entity_name& name); 111 112 /** 113 * Move up to the parent element. 114 */ 115 element ascend(); 116 117 /** 118 * Move to the element specified by a path expression. The path 119 * expression may be generated by 120 * <code>xml_structure_tree::walker::get_path</code>. 121 * 122 * @param path a simple XPath like expression 123 * 124 * @return element pointed to by the path. 125 */ 126 element move_to(const std::string& path); 127 128 /** 129 * Get a list of names of all child elements at the current element 130 * position. The list of names is in order of appearance. 131 * 132 * @return list of child element names in order of appearance. 133 */ 134 entity_names_type get_children(); 135 136 /** 137 * Get a list of names of all attributes that belong to current 138 * element. The list of names is in order of appearance. 139 * 140 * @return list of attribute names in order of appearance. 141 */ 142 entity_names_type get_attributes(); 143 144 /** 145 * Get a numerical, 0-based index of given XML namespace. 146 * 147 * @param ns XML namespace ID. 148 * 149 * @return numeric, 0-based index of XML namespace if found, or 150 * <code>xml_structure_tree::walker::index_not_found</code> if 151 * the namespace is not found in this structure. 152 */ 153 size_t get_xmlns_index(xmlns_id_t ns) const; 154 155 std::string get_xmlns_short_name(xmlns_id_t ns) const; 156 157 /** 158 * Convert an entity name to its proper string representation. 159 * 160 * @param name entity name to convert to string. 161 * 162 * @return string representation of the entity name, including the 163 * namespace. 164 */ 165 std::string to_string(const entity_name& name) const; 166 167 /** 168 * Get a XPath like ID for the element inside of the XML tree. 169 * 170 */ 171 std::string get_path() const; 172 }; 173 174 xml_structure_tree(xmlns_context& xmlns_cxt); 175 xml_structure_tree(xml_structure_tree&& other); 176 ~xml_structure_tree(); 177 178 void parse(const char* p, size_t n); 179 180 void dump_compact(std::ostream& os) const; 181 182 walker get_walker() const; 183 184 using range_handler_type = std::function<void(xml_table_range_t&&)>; 185 186 void process_ranges(range_handler_type rh) const; 187 188 void swap(xml_structure_tree& other); 189 }; 190 191 } 192 193 194 195 #endif 196 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ 197