1 /**
2  * Licensed to the University Corporation for Advanced Internet
3  * Development, Inc. (UCAID) under one or more contributor license
4  * agreements. See the NOTICE file distributed with this work for
5  * additional information regarding copyright ownership.
6  *
7  * UCAID licenses this file to you under the Apache License,
8  * Version 2.0 (the "License"); you may not use this file except
9  * in compliance with the License. You may obtain a copy of the
10  * License at
11  *
12  * http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing,
15  * software distributed under the License is distributed on an
16  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
17  * either express or implied. See the License for the specific
18  * language governing permissions and limitations under the License.
19  */
20 
21 /**
22  * @file xmltooling/util/XMLHelper.h
23  *
24  * A helper class for working with W3C DOM objects.
25  */
26 
27 #ifndef __xmltooling_xmlhelper_h__
28 #define __xmltooling_xmlhelper_h__
29 
30 #include <xmltooling/unicode.h>
31 
32 #include <map>
33 #include <iostream>
34 #include <xercesc/dom/DOM.hpp>
35 
36 namespace xmltooling {
37 
38     class XMLTOOL_API QName;
39     class XMLTOOL_API XMLObject;
40 
41     /**
42      * RAII wrapper for Xerces resources.
43      */
44     template<class T> class XercesJanitor
45     {
46         MAKE_NONCOPYABLE(XercesJanitor);
47         T* m_held;
48     public:
49         /**
50          * Constructor
51          *
52          * @param resource  object to release when leaving scope
53          */
XercesJanitor(T * resource)54         XercesJanitor(T* resource) : m_held(resource) {}
55 
~XercesJanitor()56         ~XercesJanitor() {
57             if (m_held)
58                 m_held->release();
59         }
60 
61         /**
62          * Returns resource held by this object.
63          *
64          * @return  the resource held or nullptr
65          */
get()66         T* get() {
67             return m_held;
68         }
69 
70         /**
71          * Returns resource held by this object.
72          *
73          * @return  the resource held or nullptr
74          */
75         T* operator->() {
76             return m_held;
77         }
78 
79         /**
80          * Returns resource held by this object and releases it to the caller.
81          *
82          * @return  the resource held or nullptr
83          */
release()84         T* release() {
85             T* ret=m_held;
86             m_held=nullptr;
87             return ret;
88         }
89     };
90 
91     /**
92      * A helper class for working with W3C DOM objects.
93      */
94     class XMLTOOL_API XMLHelper
95     {
96     public:
97         /**
98          * Checks if the given element has an xsi:type defined for it
99          *
100          * @param e the DOM element
101          * @return true if there is a type, false if not
102          */
103         static bool hasXSIType(const xercesc::DOMElement* e);
104 
105         /**
106          * Gets the XSI type for a given element if it has one.
107          * <p>The caller is responsible for freeing the result.
108          *
109          * @param e the element
110          * @return the type or null
111          */
112         static QName* getXSIType(const xercesc::DOMElement* e);
113 
114         /**
115          * Gets the ID attribute of a DOM element.
116          *
117          * @param domElement the DOM element
118          * @return the ID attribute or null if there isn't one
119          */
120         static xercesc::DOMAttr* getIdAttribute(const xercesc::DOMElement* domElement);
121 
122         /**
123          * Attempts to locate an XMLObject from this point downward in the tree whose
124          * XML ID matches the supplied value.
125          *
126          * @param tree  root of tree to search
127          * @param id    ID value to locate
128          * @return XMLObject in the tree with a matching ID value, or nullptr
129          */
130         static const XMLObject* getXMLObjectById(const XMLObject& tree, const XMLCh* id);
131 
132         /**
133          * Attempts to locate an XMLObject from this point downward in the tree whose
134          * XML ID matches the supplied value.
135          *
136          * @param tree  root of tree to search
137          * @param id    ID value to locate
138          * @return XMLObject in the tree with a matching ID value, or nullptr
139          */
140         static XMLObject* getXMLObjectById(XMLObject& tree, const XMLCh* id);
141 
142         /**
143          * Returns the set of non-visibly-used namespace declarations found in a tree.
144          * <p>Each member of the set is a prefix/URI pair.
145          *
146          * @param tree      root of tree to search
147          * @param prefixes  container to store declarations
148          */
149         static void getNonVisiblyUsedPrefixes(const XMLObject& tree, std::map<xstring,xstring>& prefixes);
150 
151         /**
152          * Gets the QName for the given DOM node.
153          *
154          * @param domNode the DOM node
155          * @return the QName for the element or null if the element was null
156          */
157         static QName* getNodeQName(const xercesc::DOMNode* domNode);
158 
159         /**
160          * Constructs a QName from a node's value.
161          * <p>The caller is responsible for freeing the result.
162          *
163          * @param domNode the DOM node with a QName value
164          * @return a QName from a node's value, or null if the given node has no value
165          */
166         static QName* getNodeValueAsQName(const xercesc::DOMNode* domNode);
167 
168         /**
169          * Returns a boolean based on a node's value.
170          *
171          * @param domNode   the DOM node with a boolean (1/0/true/false) value
172          * @param def       value to return if the node is null/missing
173          * @return a bool value based on the node's value, or a default value
174          */
175         static bool getNodeValueAsBool(const xercesc::DOMNode* domNode, bool def);
176 
177         /**
178          * Appends the child Element to the parent Element,
179          * importing the child Element into the parent's Document if needed.
180          *
181          * @param parentElement the parent Element
182          * @param childElement the child Element
183          * @return the child Element that was added (may be an imported copy)
184          */
185         static xercesc::DOMElement* appendChildElement(xercesc::DOMElement* parentElement, xercesc::DOMElement* childElement);
186 
187         /**
188          * Checks the qualified name of a node.
189          *
190          * @param n     node to check
191          * @param ns    namespace to compare with
192          * @param local local name to compare with
193          * @return  true iff the node's qualified name matches the other parameters
194          */
195         static bool isNodeNamed(const xercesc::DOMNode* n, const XMLCh* ns, const XMLCh* local);
196 
197         /**
198          * Returns the first matching child element of the node if any.
199          *
200          * @param n         node to check
201          * @param localName local name to compare with or nullptr for any match
202          * @return  the first matching child node of type Element, or nullptr
203          */
204         static xercesc::DOMElement* getFirstChildElement(const xercesc::DOMNode* n, const XMLCh* localName=nullptr);
205 
206         /**
207          * Returns the last matching child element of the node if any.
208          *
209          * @param n     node to check
210          * @param localName local name to compare with or nullptr for any match
211          * @return  the last matching child node of type Element, or nullptr
212          */
213         static xercesc::DOMElement* getLastChildElement(const xercesc::DOMNode* n, const XMLCh* localName=nullptr);
214 
215         /**
216          * Returns the next matching sibling element of the node if any.
217          *
218          * @param n     node to check
219          * @param localName local name to compare with or nullptr for any match
220          * @return  the next matching sibling node of type Element, or nullptr
221          */
222         static xercesc::DOMElement* getNextSiblingElement(const xercesc::DOMNode* n, const XMLCh* localName=nullptr);
223 
224         /**
225          * Returns the previous matching sibling element of the node if any.
226          *
227          * @param n     node to check
228          * @param localName local name to compare with or nullptr for any match
229          * @return  the previous matching sibling node of type Element, or nullptr
230          */
231         static xercesc::DOMElement* getPreviousSiblingElement(const xercesc::DOMNode* n, const XMLCh* localName=nullptr);
232 
233         /**
234          * Returns the first matching child element of the node if any.
235          *
236          * @param n         node to check
237          * @param ns        namespace to compare with
238          * @param localName local name to compare with
239          * @return  the first matching child node of type Element, or nullptr
240          */
241         static xercesc::DOMElement* getFirstChildElement(const xercesc::DOMNode* n, const XMLCh* ns, const XMLCh* localName);
242 
243         /**
244          * Returns the last matching child element of the node if any.
245          *
246          * @param n         node to check
247          * @param ns        namespace to compare with
248          * @param localName local name to compare with
249          * @return  the last matching child node of type Element, or nullptr
250          */
251         static xercesc::DOMElement* getLastChildElement(const xercesc::DOMNode* n, const XMLCh* ns, const XMLCh* localName);
252 
253         /**
254          * Returns the next matching sibling element of the node if any.
255          *
256          * @param n         node to check
257          * @param ns        namespace to compare with
258          * @param localName local name to compare with
259          * @return  the next matching sibling node of type Element, or nullptr
260          */
261         static xercesc::DOMElement* getNextSiblingElement(const xercesc::DOMNode* n, const XMLCh* ns, const XMLCh* localName);
262 
263         /**
264          * Returns the previous matching sibling element of the node if any.
265          *
266          * @param n         node to check
267          * @param ns        namespace to compare with
268          * @param localName local name to compare with
269          * @return  the previous matching sibling node of type Element, or nullptr
270          */
271         static xercesc::DOMElement* getPreviousSiblingElement(const xercesc::DOMNode* n, const XMLCh* ns, const XMLCh* localName);
272 
273         /**
274         * Returns all text content inside an element, regardless of the number of
275         * child nodes involved, up to the first child element encountered if any.
276 
277         * Because this may require merging data, the text is returned in a separately
278         * allocated buffer the caller must free using delete[].
279         *
280         * @param e   the element
281         * @return an array allocated with new[] containing the text
282         */
283         static XMLCh* getWholeTextContent(const xercesc::DOMElement* e);
284 
285         /**
286          * Returns the content of the first Text node found in the element, if any,
287          * acting on the assumption there can be no embedded comment, CDATA, or other
288          * interfering node types interrupting the text.
289          *
290          * @param e     element to examine
291          * @return the content of the first Text node found, or nullptr
292          */
293         static const XMLCh* getTextContent(const xercesc::DOMElement* e);
294 
295         /**
296          * Returns the content of the specified attribute node as a string,
297          * or the default value, if the attribute is not present.
298          *
299          * @param e         element to examine (may be nullptr)
300          * @param defValue  default value to return
301          * @param localName local name of attribute
302          * @param ns        namespace of attribute
303          * @return  the specified attribute's value, or the specified default
304          */
305         static std::string getAttrString(
306             const xercesc::DOMElement* e, const char* defValue, const XMLCh* localName, const XMLCh* ns=nullptr
307             );
308 
309         /**
310          * Returns the content of the specified attribute node as an integer,
311          * or the default value, if the attribute is not present.
312          *
313          * @param e         element to examine (may be nullptr)
314          * @param defValue  default value to return
315          * @param localName local name of attribute
316          * @param ns        namespace of attribute
317          * @return  the specified attribute's value, or the specified default
318          */
319         static int getAttrInt(
320             const xercesc::DOMElement* e, int defValue, const XMLCh* localName, const XMLCh* ns=nullptr
321             );
322 
323         /**
324          * Returns the content of the specified attribute node as a boolean,
325          * or the default value, if the attribute is not present.
326          *
327          * @param e         element to examine (may be nullptr)
328          * @param defValue  default value to return
329          * @param localName local name of attribute
330          * @param ns        namespace of attribute
331          * @return  the specified attribute's value, or the specified default
332          */
333         static bool getAttrBool(
334             const xercesc::DOMElement* e, bool defValue, const XMLCh* localName, const XMLCh* ns=nullptr
335             );
336 
337         /**
338          *
339          * Returns the value of the attribute "caseSensitive" (if present).  Also interogates
340          * the (deprecated) "ignoreCase" attribute, warning if it is encountered.
341          *
342          * @param e         element to examine (may be nullptr)
343          * @param defValue  default value to return
344          * @param ns        namespace of attribute
345          * @return whatever "caseSensitive" or "ignoreCase" specifies, or the specified default
346          */
347         static bool getCaseSensitive(
348             const xercesc::DOMElement* e, bool defValue, const XMLCh* ns=nullptr
349             );
350 
351         /**
352          *
353          * Perform XMLEncoding on the input string into the provided stream.
354          * Symbols handled: " (&quot;) < (&lt;) > (&gt;) & (&amp;)
355          *
356          * @param os  where to put the encoded string
357          * @param str what to encode
358          */
359         static void encode(std::ostream& os, const char* str);
360 
361         /**
362         *
363         * Perform XMLEncoding on the input string.
364         * Symbols handled: " (&quot;) < (&lt;) > (&gt;) & (&amp;)
365         *
366         * @param  str what to encode
367         * @return the encoded input
368         */
369         static std::string encode(const char* str);
370 
371         /**
372          * Serializes the DOM node provided into a buffer using UTF-8 encoding and
373          * the default XML serializer available. No manipulation or formatting is applied.
374          *
375          * @param n         node to serialize
376          * @param buf       buffer to serialize element into
377          * @param pretty    enable pretty printing if supported
378          */
379         static void serialize(const xercesc::DOMNode* n, std::string& buf, bool pretty=false);
380 
381         /**
382          * Serializes the DOM node provided to a stream using UTF-8 encoding and
383          * the default XML serializer available. No manipulation or formatting is applied.
384          *
385          * @param n         node to serialize
386          * @param out       stream to serialize element into
387          * @param pretty    enable pretty printing if supported
388          * @return reference to output stream
389          */
390         static std::ostream& serialize(const xercesc::DOMNode* n, std::ostream& out, bool pretty=false);
391 
392         /**
393         * Deflates data in accordance with RFC1951. The caller must free the
394         * resulting buffer using delete[]
395         *
396         * @param in        the data to compress
397         * @param in_len    length of input data
398         * @param out_len   will contain the length of the resulting data
399         * @return  allocated buffer of out_len bytes containing deflated data
400         */
401         static char* deflate(char* in, unsigned int in_len, unsigned int* out_len);
402 
403         /**
404         * Inflates data compressed in accordance with RFC1951 and sends the
405         * results to an output stream.
406         *
407         * @param in        the data to inflate
408         * @param in_len    length of input data
409         * @param out       reference to output stream to receive data
410         * @return  number of bytes written to stream
411         */
412         static unsigned int inflate(char* in, unsigned int in_len, std::ostream& out);
413    };
414 
415     /**
416      * Serializes the DOM node provided to a stream using UTF-8 encoding and
417      * the default XML serializer available. No manipulation or formatting is applied.
418      *
419      * @param n      node to serialize
420      * @param ostr   stream to serialize element into
421      * @return reference to output stream
422      */
423     extern XMLTOOL_API std::ostream& operator<<(std::ostream& ostr, const xercesc::DOMNode& n);
424 
425     /**
426      * Marshalls and serializes the XMLObject provided to a stream using UTF-8 encoding and
427      * the default XML serializer available. No manipulation or formatting is applied.
428      *
429      * <p>The marshaller operation takes no parameters.
430      *
431      * @param obj    object to serialize
432      * @param ostr   stream to serialize object into
433      * @return reference to output stream
434      */
435     extern XMLTOOL_API std::ostream& operator<<(std::ostream& ostr, const XMLObject& obj);
436 };
437 
438 #endif /* __xmltooling_xmlhelper_h__ */
439