1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
6 */
7
8 #include "orcus/xml_namespace.hpp"
9 #include "orcus/exception.hpp"
10 #include "orcus/string_pool.hpp"
11 #include "orcus/global.hpp"
12
13 #include <unordered_map>
14 #include <vector>
15 #include <limits>
16 #include <sstream>
17 #include <algorithm>
18 #include <cassert>
19
20 #define ORCUS_DEBUG_XML_NAMESPACE 0
21
22 using namespace std;
23
24 #if ORCUS_DEBUG_XML_NAMESPACE
25 #include <cstdio>
26 #include <iostream>
27 #endif
28
29 namespace orcus {
30
31 namespace {
32
33 #if ORCUS_DEBUG_XML_NAMESPACE
34 template<typename _MapType>
print_map_keys(const _MapType & map_store)35 void print_map_keys(const _MapType& map_store)
36 {
37 cout << "keys: (";
38 bool first = true;
39 typename _MapType::const_iterator it = map_store.begin(), it_end = map_store.end();
40 for (; it != it_end; ++it)
41 {
42 if (first)
43 first = false;
44 else
45 cout << " ";
46 cout << "'" << it->first << "'";
47 }
48 cout << ")";
49 };
50 #endif
51
52 }
53
54 typedef std::unordered_map<pstring, size_t, pstring::hash> strid_map_type;
55
56 struct xmlns_repository::impl
57 {
58 size_t m_predefined_ns_size;
59 string_pool m_pool; /// storage of live string instances.
60 std::vector<pstring> m_identifiers; /// map strings to numerical identifiers.
61 strid_map_type m_strid_map; /// string-to-numerical identifiers map for quick lookup.
62
implorcus::xmlns_repository::impl63 impl() : m_predefined_ns_size(0) {}
64 };
65
xmlns_repository()66 xmlns_repository::xmlns_repository() : mp_impl(orcus::make_unique<impl>()) {}
~xmlns_repository()67 xmlns_repository::~xmlns_repository() {}
68
intern(const pstring & uri)69 xmlns_id_t xmlns_repository::intern(const pstring& uri)
70 {
71 // See if the uri is already registered.
72 strid_map_type::iterator it = mp_impl->m_strid_map.find(uri);
73 if (it != mp_impl->m_strid_map.end())
74 return it->first.get();
75
76 try
77 {
78 std::pair<pstring, bool> r = mp_impl->m_pool.intern(uri);
79 pstring uri_interned = r.first;
80 if (!uri_interned.empty())
81 {
82 // Intern successful.
83 if (r.second)
84 {
85 // This is a new instance. Assign a numerical identifier.
86 mp_impl->m_strid_map.insert(
87 strid_map_type::value_type(r.first, mp_impl->m_identifiers.size()));
88 #if ORCUS_DEBUG_XML_NAMESPACE
89 cout << "xmlns_repository::intern: uri='" << uri_interned << "' (" << mp_impl->m_identifiers.size() << ")" << endl;
90 #endif
91 mp_impl->m_identifiers.push_back(r.first);
92
93 #if ORCUS_DEBUG_XML_NAMESPACE
94 cout << "pool size=" << mp_impl->m_pool.size() << ", predefined ns size=" << mp_impl->m_predefined_ns_size <<
95 ", identifiers size=" << mp_impl->m_identifiers.size() << ", map size=" << mp_impl->m_strid_map.size() << endl;
96 #endif
97 assert(mp_impl->m_pool.size()+mp_impl->m_predefined_ns_size == mp_impl->m_identifiers.size());
98 assert(mp_impl->m_pool.size()+mp_impl->m_predefined_ns_size == mp_impl->m_strid_map.size());
99 }
100 return uri_interned.get();
101 }
102 }
103 catch (const general_error&)
104 {
105 }
106
107 return XMLNS_UNKNOWN_ID;
108 }
109
add_predefined_values(const xmlns_id_t * predefined_ns)110 void xmlns_repository::add_predefined_values(const xmlns_id_t* predefined_ns)
111 {
112 if (!predefined_ns)
113 return;
114
115 const xmlns_id_t* val = &predefined_ns[0];
116 for (; *val; ++val)
117 {
118 pstring s(*val);
119 mp_impl->m_strid_map.insert(
120 strid_map_type::value_type(s, mp_impl->m_identifiers.size()));
121 mp_impl->m_identifiers.push_back(s);
122
123 ++mp_impl->m_predefined_ns_size;
124
125 #if ORCUS_DEBUG_XML_NAMESPACE
126 cout << "xlmns_repository: predefined ns='" << s << "'" << endl;
127 #endif
128 }
129 }
130
create_context()131 xmlns_context xmlns_repository::create_context()
132 {
133 return xmlns_context(*this);
134 }
135
get_identifier(size_t index) const136 xmlns_id_t xmlns_repository::get_identifier(size_t index) const
137 {
138 if (index >= mp_impl->m_identifiers.size())
139 return XMLNS_UNKNOWN_ID;
140
141 // All identifier strings are interned which means they are all null-terminated.
142 return mp_impl->m_identifiers[index].get();
143 }
144
get_short_name(xmlns_id_t ns_id) const145 string xmlns_repository::get_short_name(xmlns_id_t ns_id) const
146 {
147 size_t index = get_index(ns_id);
148 return get_short_name(index);
149 }
150
get_short_name(size_t index) const151 string xmlns_repository::get_short_name(size_t index) const
152 {
153 if (index == index_not_found)
154 return string("???");
155
156 ostringstream os;
157 os << "ns" << index;
158 return os.str();
159 }
160
get_index(xmlns_id_t ns_id) const161 size_t xmlns_repository::get_index(xmlns_id_t ns_id) const
162 {
163 if (!ns_id)
164 return index_not_found;
165
166 strid_map_type::const_iterator it = mp_impl->m_strid_map.find(pstring(ns_id));
167 if (it == mp_impl->m_strid_map.end())
168 return index_not_found;
169
170 return it->second;
171 }
172
173 typedef std::vector<xmlns_id_t> xmlns_list_type;
174 typedef std::unordered_map<pstring, xmlns_list_type, pstring::hash> alias_map_type;
175
176 struct xmlns_context::impl
177 {
178 xmlns_repository& m_repo;
179 xmlns_list_type m_all_ns; /// all namespaces ever used in this context.
180 xmlns_list_type m_default;
181 alias_map_type m_map;
182
183 bool m_trim_all_ns;
184
implorcus::xmlns_context::impl185 impl(xmlns_repository& repo) : m_repo(repo), m_trim_all_ns(true) {}
implorcus::xmlns_context::impl186 impl(const impl& r) :
187 m_repo(r.m_repo), m_all_ns(r.m_all_ns), m_default(r.m_default), m_map(r.m_map), m_trim_all_ns(r.m_trim_all_ns) {}
188 };
189
xmlns_context(xmlns_repository & repo)190 xmlns_context::xmlns_context(xmlns_repository& repo) : mp_impl(orcus::make_unique<impl>(repo)) {}
xmlns_context(const xmlns_context & r)191 xmlns_context::xmlns_context(const xmlns_context& r) : mp_impl(orcus::make_unique<impl>(*r.mp_impl)) {}
~xmlns_context()192 xmlns_context::~xmlns_context() {}
193
push(const pstring & key,const pstring & uri)194 xmlns_id_t xmlns_context::push(const pstring& key, const pstring& uri)
195 {
196 #if ORCUS_DEBUG_XML_NAMESPACE
197 cout << "xmlns_context::push: key='" << key << "', uri='" << uri << "'" << endl;
198 #endif
199 mp_impl->m_trim_all_ns = true;
200
201 pstring uri_interned = mp_impl->m_repo.intern(uri);
202
203 if (key.empty())
204 {
205 // empty key value is associated with default namespace.
206 mp_impl->m_default.push_back(uri_interned.get());
207 mp_impl->m_all_ns.push_back(uri_interned.get());
208 return mp_impl->m_default.back();
209 }
210
211 // See if this key already exists.
212 alias_map_type::iterator it = mp_impl->m_map.find(key);
213 if (it == mp_impl->m_map.end())
214 {
215 // This is the first time this key is used.
216 xmlns_list_type nslist;
217 nslist.push_back(uri_interned.get());
218 mp_impl->m_all_ns.push_back(uri_interned.get());
219 std::pair<alias_map_type::iterator,bool> r =
220 mp_impl->m_map.insert(alias_map_type::value_type(key, nslist));
221
222 if (!r.second)
223 // insertion failed.
224 throw general_error("Failed to insert new namespace.");
225
226 return nslist.back();
227 }
228
229 // The key already exists.
230 xmlns_list_type& nslist = it->second;
231 nslist.push_back(uri_interned.get());
232 mp_impl->m_all_ns.push_back(uri_interned.get());
233 return nslist.back();
234 }
235
pop(const pstring & key)236 void xmlns_context::pop(const pstring& key)
237 {
238 #if ORCUS_DEBUG_XML_NAMESPACE
239 cout << "xmlns_context::pop: key='" << key << "'" << endl;
240 #endif
241 if (key.empty())
242 {
243 // empty key value is associated with default namespace.
244 if (mp_impl->m_default.empty())
245 throw general_error("default namespace stack is empty.");
246
247 mp_impl->m_default.pop_back();
248 return;
249 }
250
251 // See if this key really exists.
252 alias_map_type::iterator it = mp_impl->m_map.find(key);
253 if (it == mp_impl->m_map.end())
254 throw general_error("failed to find the key.");
255
256 xmlns_list_type& nslist = it->second;
257 if (nslist.empty())
258 throw general_error("namespace stack for this key is empty.");
259
260 nslist.pop_back();
261 }
262
get(const pstring & key) const263 xmlns_id_t xmlns_context::get(const pstring& key) const
264 {
265 #if ORCUS_DEBUG_XML_NAMESPACE
266 cout << "xmlns_context::get: alias='" << key << "', default ns stack size="
267 << mp_impl->m_default.size() << ", non-default alias count=" << mp_impl->m_map.size();
268 cout << ", ";
269 print_map_keys(mp_impl->m_map);
270 cout << endl;
271 #endif
272 if (key.empty())
273 return mp_impl->m_default.empty() ? XMLNS_UNKNOWN_ID : mp_impl->m_default.back();
274
275 alias_map_type::const_iterator it = mp_impl->m_map.find(key);
276 if (it == mp_impl->m_map.end())
277 {
278 #if ORCUS_DEBUG_XML_NAMESPACE
279 cout << "xmlns_context::get: alias not in this context" << endl;
280 #endif
281 return XMLNS_UNKNOWN_ID;
282 }
283
284 #if ORCUS_DEBUG_XML_NAMESPACE
285 cout << "xmlns_context::get: alias stack size=" << it->second.size() << endl;
286 #endif
287 return it->second.empty() ? XMLNS_UNKNOWN_ID : it->second.back();
288 }
289
get_index(xmlns_id_t ns_id) const290 size_t xmlns_context::get_index(xmlns_id_t ns_id) const
291 {
292 return mp_impl->m_repo.get_index(ns_id);
293 }
294
get_short_name(xmlns_id_t ns_id) const295 string xmlns_context::get_short_name(xmlns_id_t ns_id) const
296 {
297 return mp_impl->m_repo.get_short_name(ns_id);
298 }
299
get_alias(xmlns_id_t ns_id) const300 pstring xmlns_context::get_alias(xmlns_id_t ns_id) const
301 {
302 alias_map_type::const_iterator it = mp_impl->m_map.begin(), it_end = mp_impl->m_map.end();
303 for (; it != it_end; ++it)
304 {
305 const xmlns_list_type& lst = it->second;
306 if (lst.empty())
307 continue;
308
309 if (lst.back() == ns_id)
310 return it->first;
311 }
312
313 return pstring();
314 }
315
316 namespace {
317
318 #if ORCUS_DEBUG_XML_NAMESPACE
319 struct print_ns : std::unary_function<xmlns_id_t, void>
320 {
operator ()orcus::__anon87eaaa1f0211::print_ns321 void operator() (xmlns_id_t ns_id) const
322 {
323 const char* p = ns_id;
324 printf("%p: %s\n", p, p);
325 }
326 };
327 #endif
328
329 struct ns_item
330 {
331 size_t index;
332 xmlns_id_t ns;
333
ns_itemorcus::__anon87eaaa1f0211::ns_item334 ns_item(size_t _index, xmlns_id_t _ns) : index(_index), ns(_ns) {}
335 };
336
337 struct less_ns_by_index : binary_function<ns_item, ns_item, bool>
338 {
operator ()orcus::__anon87eaaa1f0211::less_ns_by_index339 bool operator() (const ns_item& left, const ns_item& right) const
340 {
341 return left.index < right.index;
342 }
343 };
344
345 class push_back_ns_to_item : unary_function<xmlns_id_t, void>
346 {
347 vector<ns_item>& m_store;
348 const xmlns_context& m_cxt;
349 public:
push_back_ns_to_item(vector<ns_item> & store,const xmlns_context & cxt)350 push_back_ns_to_item(vector<ns_item>& store, const xmlns_context& cxt) : m_store(store), m_cxt(cxt) {}
operator ()(xmlns_id_t ns)351 void operator() (xmlns_id_t ns)
352 {
353 size_t num_id = m_cxt.get_index(ns);
354 if (num_id != index_not_found)
355 m_store.push_back(ns_item(num_id, ns));
356 }
357 };
358
359 class push_back_item_to_ns : unary_function<ns_item, void>
360 {
361 std::vector<xmlns_id_t>& m_store;
362 public:
push_back_item_to_ns(std::vector<xmlns_id_t> & store)363 push_back_item_to_ns(std::vector<xmlns_id_t>& store) : m_store(store) {}
operator ()(const ns_item & item)364 void operator() (const ns_item& item)
365 {
366 m_store.push_back(item.ns);
367 }
368 };
369
370 }
371
get_all_namespaces() const372 std::vector<xmlns_id_t> xmlns_context::get_all_namespaces() const
373 {
374 #if ORCUS_DEBUG_XML_NAMESPACE
375 cout << "xmlns_context::get_all_namespaces: count=" << mp_impl->m_all_ns.size() << endl;
376 std::for_each(mp_impl->m_all_ns.begin(), mp_impl->m_all_ns.end(), print_ns());
377 #endif
378
379 std::vector<xmlns_id_t> nslist;
380
381 if (mp_impl->m_trim_all_ns)
382 {
383 xmlns_list_type& all_ns = mp_impl->m_all_ns;
384
385 nslist.assign(mp_impl->m_all_ns.begin(), mp_impl->m_all_ns.end());
386
387 // Sort it and remove duplicate.
388 std::sort(all_ns.begin(), all_ns.end());
389 xmlns_list_type::iterator it_unique_end =
390 std::unique(all_ns.begin(), all_ns.end());
391 all_ns.erase(it_unique_end, all_ns.end());
392
393 // Now, sort by indices.
394 vector<ns_item> items;
395 std::for_each(all_ns.begin(), all_ns.end(), push_back_ns_to_item(items, *this));
396 std::sort(items.begin(), items.end(), less_ns_by_index());
397
398 all_ns.clear();
399 std::for_each(items.begin(), items.end(), push_back_item_to_ns(all_ns));
400
401 mp_impl->m_trim_all_ns = false;
402 }
403
404 nslist.assign(mp_impl->m_all_ns.begin(), mp_impl->m_all_ns.end());
405 return nslist;
406 }
407
dump(std::ostream & os) const408 void xmlns_context::dump(std::ostream& os) const
409 {
410 vector<xmlns_id_t> nslist = get_all_namespaces();
411 vector<xmlns_id_t>::const_iterator it = nslist.begin(), it_end = nslist.end();
412 for (; it != it_end; ++it)
413 {
414 xmlns_id_t ns_id = *it;
415 size_t num_id = get_index(ns_id);
416 if (num_id == index_not_found)
417 continue;
418
419 os << "ns" << num_id << "=\"" << ns_id << '"' << endl;
420 }
421 }
422
423 }
424
425 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
426