1 /*
2  * pxml.h
3  *
4  * XML parser support
5  *
6  * Portable Windows Library
7  *
8  * Copyright (c) 2002 Equivalence Pty. Ltd.
9  *
10  * The contents of this file are subject to the Mozilla Public License
11  * Version 1.0 (the "License"); you may not use this file except in
12  * compliance with the License. You may obtain a copy of the License at
13  * http://www.mozilla.org/MPL/
14  *
15  * Software distributed under the License is distributed on an "AS IS"
16  * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
17  * the License for the specific language governing rights and limitations
18  * under the License.
19  *
20  * The Original Code is Portable Windows Library.
21  *
22  * The Initial Developer of the Original Code is Equivalence Pty. Ltd.
23  *
24  * Contributor(s): ______________________________________.
25  *
26  * $Revision: 28994 $
27  * $Author: rjongbloed $
28  * $Date: 2013-01-25 02:06:35 -0600 (Fri, 25 Jan 2013) $
29  */
30 
31 #ifndef PTLIB_PXML_H
32 #define PTLIB_PXML_H
33 
34 #ifdef P_USE_PRAGMA
35 #pragma interface
36 #endif
37 
38 #include <ptlib.h>
39 
40 #include <ptbuildopts.h>
41 
42 #ifndef P_EXPAT
43 
44 namespace PXML {
45 extern PString EscapeSpecialChars(const PString & str);
46 };
47 
48 #else
49 
50 #include <ptclib/http.h>
51 
52 ////////////////////////////////////////////////////////////
53 
54 class PXMLElement;
55 class PXMLData;
56 
57 
58 class PXMLObject;
59 class PXMLElement;
60 class PXMLData;
61 
62 ////////////////////////////////////////////////////////////
63 
64 class PXMLBase : public PObject
65 {
66   public:
67     enum {
68       DEFAULT_MAX_ENTITY_LENGTH = 4096
69     };
70 
71     enum Options {
72       NoOptions           = 0x0000,
73       Indent              = 0x0001,
74       NewLineAfterElement = 0x0002,
75       NoIgnoreWhiteSpace  = 0x0004,   ///< ignored
76       CloseExtended       = 0x0008,   ///< ignored
77       WithNS              = 0x0010,
78       FragmentOnly        = 0x0020,   ///< XML fragment, not complete document.
79       AllOptions          = 0xffff
80     };
81     __inline friend Options operator|(Options o1, Options o2) { return (Options)(((unsigned)o1) | ((unsigned)o2)); }
82     __inline friend Options operator&(Options o1, Options o2) { return (Options)(((unsigned)o1) & ((unsigned)o2)); }
83 
84     enum StandAloneType {
85       UninitialisedStandAlone = -2,
86       UnknownStandAlone = -1,
87       NotStandAlone,
88       IsStandAlone
89     };
90 
91     PXMLBase(int opts = NoOptions);
92 
SetOptions(int opts)93     void SetOptions(int opts)
94       { m_options = opts; }
95 
GetOptions()96     int GetOptions() const { return m_options; }
97 
IsNoIndentElement(const PString &)98     virtual PBoolean IsNoIndentElement(
99       const PString & /*elementName*/
100     ) const
101     {
102       return false;
103     }
104 
SetMaxEntityLength(unsigned len)105     void SetMaxEntityLength(unsigned len) { m_maxEntityLength = len; }
GetMaxEntityLength()106     unsigned GetMaxEntityLength() const { return m_maxEntityLength; }
107 
108   protected:
109     int m_options;
110     unsigned m_maxEntityLength;
111 };
112 
113 
114 class PXML : public PXMLBase
115 {
116     PCLASSINFO(PXML, PObject);
117   public:
118     PXML(
119       int options = NoOptions,
120       const char * noIndentElements = NULL
121     );
122     PXML(
123       const PString & data,
124       int options = NoOptions,
125       const char * noIndentElements = NULL
126     );
127 
128     PXML(const PXML & xml);
129 
130     ~PXML();
131 
IsLoaded()132     bool IsLoaded() const { return rootElement != NULL; }
133     bool IsDirty() const;
134 
135     bool Load(const PString & data, Options options = NoOptions);
136     bool LoadFile(const PFilePath & fn, Options options = NoOptions);
137 
OnLoaded()138     virtual void OnLoaded() { }
139 
140     bool Save(Options options = NoOptions);
141     bool Save(PString & data, Options options = NoOptions);
142     bool SaveFile(const PFilePath & fn, Options options = NoOptions);
143 
144     void RemoveAll();
145 
146     PBoolean IsNoIndentElement(
147       const PString & elementName
148     ) const;
149 
150     PString AsString() const;
151     void PrintOn(ostream & strm) const;
152     void ReadFrom(istream & strm);
153 
154 
155     PXMLElement * GetElement(const PCaselessString & name, const PCaselessString & attr, const PString & attrval) const;
156     PXMLElement * GetElement(const PCaselessString & name, PINDEX idx = 0) const;
157     PXMLElement * GetElement(PINDEX idx) const;
158     PINDEX        GetNumElements() const;
GetRootElement()159     PXMLElement * GetRootElement() const { return rootElement; }
160     PXMLElement * SetRootElement(PXMLElement * p);
161     PXMLElement * SetRootElement(const PString & documentType);
162     bool          RemoveElement(PINDEX idx);
163 
164     PCaselessString GetDocumentType() const;
165 
166 
167     enum ValidationOp {
168       EndOfValidationList,
169       DocType,
170       ElementName,
171       RequiredAttribute,
172       RequiredNonEmptyAttribute,
173       RequiredAttributeWithValue,
174       RequiredElement,
175       Subtree,
176       RequiredAttributeWithValueMatching,
177       RequiredElementWithBodyMatching,
178       OptionalElement,
179       OptionalAttribute,
180       OptionalNonEmptyAttribute,
181       OptionalAttributeWithValue,
182       OptionalAttributeWithValueMatching,
183       OptionalElementWithBodyMatching,
184       SetDefaultNamespace,
185       SetNamespace,
186 
187       RequiredAttributeWithValueMatchingEx = RequiredAttributeWithValueMatching + 0x8000,
188       OptionalAttributeWithValueMatchingEx = OptionalAttributeWithValueMatching + 0x8000,
189       RequiredElementWithBodyMatchingEx    = RequiredElementWithBodyMatching    + 0x8000,
190       OptionalElementWithBodyMatchingEx    = OptionalElementWithBodyMatching    + 0x8000
191     };
192 
193     struct ValidationContext {
194       PString m_defaultNameSpace;
195       PStringToString m_nameSpaces;
196     };
197 
198     struct ValidationInfo {
199       ValidationOp m_op;
200       const char * m_name;
201 
202       union {
203         const void     * m_placeHolder;
204         const char     * m_attributeValues;
205         ValidationInfo * m_subElement;
206         const char     * m_namespace;
207       };
208 
209       PINDEX m_minCount;
210       PINDEX m_maxCount;
211     };
212 
213     bool Validate(const ValidationInfo * validator);
214     bool ValidateElements(ValidationContext & context, PXMLElement * baseElement, const ValidationInfo * elements);
215     bool ValidateElement(ValidationContext & context, PXMLElement * element, const ValidationInfo * elements);
216     bool LoadAndValidate(const PString & body, const PXML::ValidationInfo * validator, PString & error, int options = NoOptions);
217 
GetErrorString()218     PString  GetErrorString() const { return m_errorString; }
GetErrorColumn()219     unsigned GetErrorColumn() const { return m_errorColumn; }
GetErrorLine()220     unsigned GetErrorLine() const   { return m_errorLine; }
221 
GetDocType()222     PString GetDocType() const         { return docType; }
SetDocType(const PString & v)223     void SetDocType(const PString & v) { docType = v; }
224 
GetMutex()225     PMutex & GetMutex() { return rootMutex; }
226 
227     // static methods to create XML tags
228     static PString CreateStartTag (const PString & text);
229     static PString CreateEndTag (const PString & text);
230     static PString CreateTagNoData (const PString & text);
231     static PString CreateTag (const PString & text, const PString & data);
232 
233     static PString EscapeSpecialChars(const PString & string);
234 
235   protected:
236     void Construct(int options, const char * noIndentElements);
237     PXMLElement * rootElement;
238     PMutex rootMutex;
239 
240     bool loadFromFile;
241     PFilePath loadFilename;
242     PString version, encoding;
243     StandAloneType m_standAlone;
244 
245     PStringStream m_errorString;
246     unsigned      m_errorLine;
247     unsigned      m_errorColumn;
248 
249     PSortedStringList noIndentElements;
250 
251     PString docType;
252     PString m_defaultNameSpace;
253 };
254 
255 
256 #if P_HTTP
257 class PXML_HTTP : public PXML
258 {
259     PCLASSINFO(PXML_HTTP, PXML);
260   public:
261     PXML_HTTP(
262       int options = NoOptions,
263       const char * noIndentElements = NULL
264     );
265 
266     bool StartAutoReloadURL(
267       const PURL & url,
268       const PTimeInterval & timeout,
269       const PTimeInterval & refreshTime,
270       Options options = NoOptions
271     );
272     bool StopAutoReloadURL();
GetAutoReloadStatus()273     PString GetAutoReloadStatus() { PWaitAndSignal m(autoLoadMutex); PString str = autoLoadError; return str; }
274     bool AutoLoadURL();
275     virtual void OnAutoLoad(PBoolean ok);
276 
277     bool LoadURL(const PURL & url);
278     bool LoadURL(const PURL & url, const PTimeInterval & timeout, Options options = NoOptions);
279 
280   protected:
281     PDECLARE_NOTIFIER(PTimer,  PXML_HTTP, AutoReloadTimeout);
282     PDECLARE_NOTIFIER(PThread, PXML_HTTP, AutoReloadThread);
283 
284     PTimer autoLoadTimer;
285     PURL autoloadURL;
286     PTimeInterval autoLoadWaitTime;
287     PMutex autoLoadMutex;
288     PString autoLoadError;
289 };
290 #endif // P_HTTP
291 
292 ////////////////////////////////////////////////////////////
293 
294 PARRAY(PXMLObjectArray, PXMLObject);
295 
296 class PXMLObject : public PObject {
297   PCLASSINFO(PXMLObject, PObject);
298   public:
PXMLObject(PXMLElement * par)299     PXMLObject(PXMLElement * par)
300       : parent(par) { dirty = false; }
301 
GetParent()302     PXMLElement * GetParent() const
303       { return parent; }
304 
305     PXMLObject * GetNextObject() const;
306 
SetParent(PXMLElement * newParent)307     void SetParent(PXMLElement * newParent)
308     {
309       PAssert(parent == NULL, "Cannot reparent PXMLElement");
310       parent = newParent;
311     }
312 
313     PString AsString() const;
314 
315     virtual void Output(ostream & strm, const PXMLBase & xml, int indent) const = 0;
316 
317     virtual PBoolean IsElement() const = 0;
318 
319     void SetDirty();
IsDirty()320     bool IsDirty() const { return dirty; }
321 
322     virtual PXMLObject * Clone(PXMLElement * parent) const = 0;
323 
324   protected:
325     PXMLElement * parent;
326     bool dirty;
327 };
328 
329 ////////////////////////////////////////////////////////////
330 
331 class PXMLData : public PXMLObject {
332   PCLASSINFO(PXMLData, PXMLObject);
333   public:
334     PXMLData(PXMLElement * parent, const PString & data);
335     PXMLData(PXMLElement * parent, const char * data, int len);
336 
IsElement()337     PBoolean IsElement() const    { return false; }
338 
339     void SetString(const PString & str, bool dirty = true);
340 
GetString()341     PString GetString() const           { return value; }
342 
343     void Output(ostream & strm, const PXMLBase & xml, int indent) const;
344 
345     PXMLObject * Clone(PXMLElement * parent) const;
346 
347   protected:
348     PString value;
349 };
350 
351 ////////////////////////////////////////////////////////////
352 
353 class PXMLElement : public PXMLObject {
354   PCLASSINFO(PXMLElement, PXMLObject);
355   public:
356     PXMLElement(PXMLElement * parent, const char * name = NULL);
357     PXMLElement(PXMLElement * parent, const PString & name, const PString & data);
358 
IsElement()359     PBoolean IsElement() const { return true; }
360 
361     void PrintOn(ostream & strm) const;
362     void Output(ostream & strm, const PXMLBase & xml, int indent) const;
363 
GetName()364     PCaselessString GetName() const
365       { return name; }
366 
367     /**
368         Get the completely qualified name for the element inside the
369         XML tree, for example "root:trunk:branch:subbranch:leaf".
370      */
371     PCaselessString GetPathName() const;
372 
SetName(const PString & v)373     void SetName(const PString & v)
374     { name = v; }
375 
GetSize()376     PINDEX GetSize() const
377       { return subObjects.GetSize(); }
378 
379     PXMLObject  * AddSubObject(PXMLObject * elem, bool dirty = true);
380 
381     PXMLElement * AddChild    (PXMLElement * elem, bool dirty = true);
382     PXMLData    * AddChild    (PXMLData    * elem, bool dirty = true);
383 
384     PXMLElement * AddElement(const char * name);
385     PXMLElement * AddElement(const PString & name, const PString & data);
386     PXMLElement * AddElement(const PString & name, const PString & attrName, const PString & attrVal);
387 
388     void SetAttribute(const PCaselessString & key,
389                       const PString & value,
390                       bool setDirty = true);
391 
392     PString GetAttribute(const PCaselessString & key) const;
393     PString GetKeyAttribute(PINDEX idx) const;
394     PString GetDataAttribute(PINDEX idx) const;
395     bool HasAttribute(const PCaselessString & key) const;
HasAttributes()396     bool HasAttributes() const      { return attributes.GetSize() > 0; }
GetNumAttributes()397     PINDEX GetNumAttributes() const { return attributes.GetSize(); }
398 
399     PXMLElement * GetElement(const PCaselessString & name, const PCaselessString & attr, const PString & attrval) const;
400     PXMLElement * GetElement(const PCaselessString & name, PINDEX idx = 0) const;
401     PXMLObject  * GetElement(PINDEX idx = 0) const;
402     bool          RemoveElement(PINDEX idx);
403 
404     PINDEX FindObject(const PXMLObject * ptr) const;
405 
HasSubObjects()406     bool HasSubObjects() const
407       { return subObjects.GetSize() != 0; }
408 
GetSubObjects()409     PXMLObjectArray  GetSubObjects() const
410       { return subObjects; }
411 
412     PXMLObject  * GetSubObject(PINDEX idx = 0) const
413       { return GetElement(idx); }
414 
415     PString GetData() const;
416     void SetData(const PString & data);
417     void AddData(const PString & data);
418 
419     PXMLObject * Clone(PXMLElement * parent) const;
420 
GetFilePosition(unsigned & col,unsigned & line)421     void GetFilePosition(unsigned & col, unsigned & line) const { col = column; line = lineNumber; }
SetFilePosition(unsigned col,unsigned line)422     void SetFilePosition(unsigned col,   unsigned line)         { column = col; lineNumber = line; }
423 
424     void AddNamespace(const PString & prefix, const PString & uri);
425     void RemoveNamespace(const PString & prefix);
426 
427     bool GetDefaultNamespace(PCaselessString & str) const;
428     bool GetNamespace(const PCaselessString & prefix, PCaselessString & str) const;
429     PCaselessString PrependNamespace(const PCaselessString & name) const;
430     bool GetURIForNamespace(const PCaselessString & prefix, PCaselessString & uri);
431 
432   protected:
433     PCaselessString name;
434     PStringToString attributes;
435     PXMLObjectArray subObjects;
436     bool dirty;
437     unsigned column;
438     unsigned lineNumber;
439     PStringToString m_nameSpaces;
440     PCaselessString m_defaultNamespace;
441 };
442 
443 ////////////////////////////////////////////////////////////
444 
445 class PConfig;      // stupid gcc 4 does not recognize PConfig as a class
446 
447 class PXMLSettings : public PXML
448 {
449   PCLASSINFO(PXMLSettings, PXML);
450   public:
451     PXMLSettings(Options options = NewLineAfterElement);
452     PXMLSettings(const PString & data, Options options = NewLineAfterElement);
453     PXMLSettings(const PConfig & data, Options options = NewLineAfterElement);
454 
455     bool Load(const PString & data);
456     bool LoadFile(const PFilePath & fn);
457 
458     bool Save();
459     bool Save(PString & data);
460     bool SaveFile(const PFilePath & fn);
461 
462     void SetAttribute(const PCaselessString & section, const PString & key, const PString & value);
463 
464     PString GetAttribute(const PCaselessString & section, const PString & key) const;
465     bool    HasAttribute(const PCaselessString & section, const PString & key) const;
466 
467     void ToConfig(PConfig & cfg) const;
468 };
469 
470 
471 ////////////////////////////////////////////////////////////
472 
473 class PXMLParser : public PXMLBase
474 {
475   PCLASSINFO(PXMLParser, PXMLBase);
476   public:
477     PXMLParser(int options = NoOptions);
478     ~PXMLParser();
479     bool Parse(const char * data, int dataLen, bool final);
480     void GetErrorInfo(PString & errorString, unsigned & errorCol, unsigned & errorLine);
481 
482     virtual void StartElement(const char * name, const char **attrs);
483     virtual void EndElement(const char * name);
484     virtual void AddCharacterData(const char * data, int len);
485     virtual void XmlDecl(const char * version, const char * encoding, int standAlone);
486     virtual void StartDocTypeDecl(const char * docTypeName,
487                                   const char * sysid,
488                                   const char * pubid,
489                                   int hasInternalSubSet);
490     virtual void EndDocTypeDecl();
491     virtual void StartNamespaceDeclHandler(const char * prefix, const char * uri);
492     virtual void EndNamespaceDeclHandler(const char * prefix);
493 
GetVersion()494     PString GetVersion() const  { return version; }
GetEncoding()495     PString GetEncoding() const { return encoding; }
496 
GetStandAlone()497     StandAloneType GetStandAlone() const { return m_standAlone; }
498 
499     PXMLElement * GetXMLTree() const;
500     PXMLElement * SetXMLTree(PXMLElement * newRoot);
501 
502   protected:
503     void * expat;
504     PXMLElement * rootElement;
505     bool rootOpen;
506     PXMLElement * currentElement;
507     PXMLData * lastElement;
508     PString version, encoding;
509     StandAloneType m_standAlone;
510     PStringToString m_tempNamespaceList;
511 };
512 
513 ////////////////////////////////////////////////////////////
514 
515 class PXMLStreamParser : public PXMLParser
516 {
517   PCLASSINFO(PXMLStreamParser, PXMLParser);
518   public:
519     PXMLStreamParser();
520 
521     virtual void EndElement(const char * name);
522     virtual PXML * Read(PChannel * channel);
523 
524   protected:
525     PQueue<PXML> messages;
526 };
527 
528 
529 #endif // P_EXPAT
530 
531 #endif // PTLIB_PXML_H
532 
533 
534 // End Of File ///////////////////////////////////////////////////////////////
535