1 /* 2 * pxml.h 3 * 4 * XML parser support 5 * 6 * Portable Windows Library 7 * 8 * Copyright (c) 2002 Equivalence Pty. Ltd. 9 * 10 * The contents of this file are subject to the Mozilla Public License 11 * Version 1.0 (the "License"); you may not use this file except in 12 * compliance with the License. You may obtain a copy of the License at 13 * http://www.mozilla.org/MPL/ 14 * 15 * Software distributed under the License is distributed on an "AS IS" 16 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 17 * the License for the specific language governing rights and limitations 18 * under the License. 19 * 20 * The Original Code is Portable Windows Library. 21 * 22 * The Initial Developer of the Original Code is Equivalence Pty. Ltd. 23 * 24 * Contributor(s): ______________________________________. 25 * 26 * $Revision: 28994 $ 27 * $Author: rjongbloed $ 28 * $Date: 2013-01-25 02:06:35 -0600 (Fri, 25 Jan 2013) $ 29 */ 30 31 #ifndef PTLIB_PXML_H 32 #define PTLIB_PXML_H 33 34 #ifdef P_USE_PRAGMA 35 #pragma interface 36 #endif 37 38 #include <ptlib.h> 39 40 #include <ptbuildopts.h> 41 42 #ifndef P_EXPAT 43 44 namespace PXML { 45 extern PString EscapeSpecialChars(const PString & str); 46 }; 47 48 #else 49 50 #include <ptclib/http.h> 51 52 //////////////////////////////////////////////////////////// 53 54 class PXMLElement; 55 class PXMLData; 56 57 58 class PXMLObject; 59 class PXMLElement; 60 class PXMLData; 61 62 //////////////////////////////////////////////////////////// 63 64 class PXMLBase : public PObject 65 { 66 public: 67 enum { 68 DEFAULT_MAX_ENTITY_LENGTH = 4096 69 }; 70 71 enum Options { 72 NoOptions = 0x0000, 73 Indent = 0x0001, 74 NewLineAfterElement = 0x0002, 75 NoIgnoreWhiteSpace = 0x0004, ///< ignored 76 CloseExtended = 0x0008, ///< ignored 77 WithNS = 0x0010, 78 FragmentOnly = 0x0020, ///< XML fragment, not complete document. 79 AllOptions = 0xffff 80 }; 81 __inline friend Options operator|(Options o1, Options o2) { return (Options)(((unsigned)o1) | ((unsigned)o2)); } 82 __inline friend Options operator&(Options o1, Options o2) { return (Options)(((unsigned)o1) & ((unsigned)o2)); } 83 84 enum StandAloneType { 85 UninitialisedStandAlone = -2, 86 UnknownStandAlone = -1, 87 NotStandAlone, 88 IsStandAlone 89 }; 90 91 PXMLBase(int opts = NoOptions); 92 SetOptions(int opts)93 void SetOptions(int opts) 94 { m_options = opts; } 95 GetOptions()96 int GetOptions() const { return m_options; } 97 IsNoIndentElement(const PString &)98 virtual PBoolean IsNoIndentElement( 99 const PString & /*elementName*/ 100 ) const 101 { 102 return false; 103 } 104 SetMaxEntityLength(unsigned len)105 void SetMaxEntityLength(unsigned len) { m_maxEntityLength = len; } GetMaxEntityLength()106 unsigned GetMaxEntityLength() const { return m_maxEntityLength; } 107 108 protected: 109 int m_options; 110 unsigned m_maxEntityLength; 111 }; 112 113 114 class PXML : public PXMLBase 115 { 116 PCLASSINFO(PXML, PObject); 117 public: 118 PXML( 119 int options = NoOptions, 120 const char * noIndentElements = NULL 121 ); 122 PXML( 123 const PString & data, 124 int options = NoOptions, 125 const char * noIndentElements = NULL 126 ); 127 128 PXML(const PXML & xml); 129 130 ~PXML(); 131 IsLoaded()132 bool IsLoaded() const { return rootElement != NULL; } 133 bool IsDirty() const; 134 135 bool Load(const PString & data, Options options = NoOptions); 136 bool LoadFile(const PFilePath & fn, Options options = NoOptions); 137 OnLoaded()138 virtual void OnLoaded() { } 139 140 bool Save(Options options = NoOptions); 141 bool Save(PString & data, Options options = NoOptions); 142 bool SaveFile(const PFilePath & fn, Options options = NoOptions); 143 144 void RemoveAll(); 145 146 PBoolean IsNoIndentElement( 147 const PString & elementName 148 ) const; 149 150 PString AsString() const; 151 void PrintOn(ostream & strm) const; 152 void ReadFrom(istream & strm); 153 154 155 PXMLElement * GetElement(const PCaselessString & name, const PCaselessString & attr, const PString & attrval) const; 156 PXMLElement * GetElement(const PCaselessString & name, PINDEX idx = 0) const; 157 PXMLElement * GetElement(PINDEX idx) const; 158 PINDEX GetNumElements() const; GetRootElement()159 PXMLElement * GetRootElement() const { return rootElement; } 160 PXMLElement * SetRootElement(PXMLElement * p); 161 PXMLElement * SetRootElement(const PString & documentType); 162 bool RemoveElement(PINDEX idx); 163 164 PCaselessString GetDocumentType() const; 165 166 167 enum ValidationOp { 168 EndOfValidationList, 169 DocType, 170 ElementName, 171 RequiredAttribute, 172 RequiredNonEmptyAttribute, 173 RequiredAttributeWithValue, 174 RequiredElement, 175 Subtree, 176 RequiredAttributeWithValueMatching, 177 RequiredElementWithBodyMatching, 178 OptionalElement, 179 OptionalAttribute, 180 OptionalNonEmptyAttribute, 181 OptionalAttributeWithValue, 182 OptionalAttributeWithValueMatching, 183 OptionalElementWithBodyMatching, 184 SetDefaultNamespace, 185 SetNamespace, 186 187 RequiredAttributeWithValueMatchingEx = RequiredAttributeWithValueMatching + 0x8000, 188 OptionalAttributeWithValueMatchingEx = OptionalAttributeWithValueMatching + 0x8000, 189 RequiredElementWithBodyMatchingEx = RequiredElementWithBodyMatching + 0x8000, 190 OptionalElementWithBodyMatchingEx = OptionalElementWithBodyMatching + 0x8000 191 }; 192 193 struct ValidationContext { 194 PString m_defaultNameSpace; 195 PStringToString m_nameSpaces; 196 }; 197 198 struct ValidationInfo { 199 ValidationOp m_op; 200 const char * m_name; 201 202 union { 203 const void * m_placeHolder; 204 const char * m_attributeValues; 205 ValidationInfo * m_subElement; 206 const char * m_namespace; 207 }; 208 209 PINDEX m_minCount; 210 PINDEX m_maxCount; 211 }; 212 213 bool Validate(const ValidationInfo * validator); 214 bool ValidateElements(ValidationContext & context, PXMLElement * baseElement, const ValidationInfo * elements); 215 bool ValidateElement(ValidationContext & context, PXMLElement * element, const ValidationInfo * elements); 216 bool LoadAndValidate(const PString & body, const PXML::ValidationInfo * validator, PString & error, int options = NoOptions); 217 GetErrorString()218 PString GetErrorString() const { return m_errorString; } GetErrorColumn()219 unsigned GetErrorColumn() const { return m_errorColumn; } GetErrorLine()220 unsigned GetErrorLine() const { return m_errorLine; } 221 GetDocType()222 PString GetDocType() const { return docType; } SetDocType(const PString & v)223 void SetDocType(const PString & v) { docType = v; } 224 GetMutex()225 PMutex & GetMutex() { return rootMutex; } 226 227 // static methods to create XML tags 228 static PString CreateStartTag (const PString & text); 229 static PString CreateEndTag (const PString & text); 230 static PString CreateTagNoData (const PString & text); 231 static PString CreateTag (const PString & text, const PString & data); 232 233 static PString EscapeSpecialChars(const PString & string); 234 235 protected: 236 void Construct(int options, const char * noIndentElements); 237 PXMLElement * rootElement; 238 PMutex rootMutex; 239 240 bool loadFromFile; 241 PFilePath loadFilename; 242 PString version, encoding; 243 StandAloneType m_standAlone; 244 245 PStringStream m_errorString; 246 unsigned m_errorLine; 247 unsigned m_errorColumn; 248 249 PSortedStringList noIndentElements; 250 251 PString docType; 252 PString m_defaultNameSpace; 253 }; 254 255 256 #if P_HTTP 257 class PXML_HTTP : public PXML 258 { 259 PCLASSINFO(PXML_HTTP, PXML); 260 public: 261 PXML_HTTP( 262 int options = NoOptions, 263 const char * noIndentElements = NULL 264 ); 265 266 bool StartAutoReloadURL( 267 const PURL & url, 268 const PTimeInterval & timeout, 269 const PTimeInterval & refreshTime, 270 Options options = NoOptions 271 ); 272 bool StopAutoReloadURL(); GetAutoReloadStatus()273 PString GetAutoReloadStatus() { PWaitAndSignal m(autoLoadMutex); PString str = autoLoadError; return str; } 274 bool AutoLoadURL(); 275 virtual void OnAutoLoad(PBoolean ok); 276 277 bool LoadURL(const PURL & url); 278 bool LoadURL(const PURL & url, const PTimeInterval & timeout, Options options = NoOptions); 279 280 protected: 281 PDECLARE_NOTIFIER(PTimer, PXML_HTTP, AutoReloadTimeout); 282 PDECLARE_NOTIFIER(PThread, PXML_HTTP, AutoReloadThread); 283 284 PTimer autoLoadTimer; 285 PURL autoloadURL; 286 PTimeInterval autoLoadWaitTime; 287 PMutex autoLoadMutex; 288 PString autoLoadError; 289 }; 290 #endif // P_HTTP 291 292 //////////////////////////////////////////////////////////// 293 294 PARRAY(PXMLObjectArray, PXMLObject); 295 296 class PXMLObject : public PObject { 297 PCLASSINFO(PXMLObject, PObject); 298 public: PXMLObject(PXMLElement * par)299 PXMLObject(PXMLElement * par) 300 : parent(par) { dirty = false; } 301 GetParent()302 PXMLElement * GetParent() const 303 { return parent; } 304 305 PXMLObject * GetNextObject() const; 306 SetParent(PXMLElement * newParent)307 void SetParent(PXMLElement * newParent) 308 { 309 PAssert(parent == NULL, "Cannot reparent PXMLElement"); 310 parent = newParent; 311 } 312 313 PString AsString() const; 314 315 virtual void Output(ostream & strm, const PXMLBase & xml, int indent) const = 0; 316 317 virtual PBoolean IsElement() const = 0; 318 319 void SetDirty(); IsDirty()320 bool IsDirty() const { return dirty; } 321 322 virtual PXMLObject * Clone(PXMLElement * parent) const = 0; 323 324 protected: 325 PXMLElement * parent; 326 bool dirty; 327 }; 328 329 //////////////////////////////////////////////////////////// 330 331 class PXMLData : public PXMLObject { 332 PCLASSINFO(PXMLData, PXMLObject); 333 public: 334 PXMLData(PXMLElement * parent, const PString & data); 335 PXMLData(PXMLElement * parent, const char * data, int len); 336 IsElement()337 PBoolean IsElement() const { return false; } 338 339 void SetString(const PString & str, bool dirty = true); 340 GetString()341 PString GetString() const { return value; } 342 343 void Output(ostream & strm, const PXMLBase & xml, int indent) const; 344 345 PXMLObject * Clone(PXMLElement * parent) const; 346 347 protected: 348 PString value; 349 }; 350 351 //////////////////////////////////////////////////////////// 352 353 class PXMLElement : public PXMLObject { 354 PCLASSINFO(PXMLElement, PXMLObject); 355 public: 356 PXMLElement(PXMLElement * parent, const char * name = NULL); 357 PXMLElement(PXMLElement * parent, const PString & name, const PString & data); 358 IsElement()359 PBoolean IsElement() const { return true; } 360 361 void PrintOn(ostream & strm) const; 362 void Output(ostream & strm, const PXMLBase & xml, int indent) const; 363 GetName()364 PCaselessString GetName() const 365 { return name; } 366 367 /** 368 Get the completely qualified name for the element inside the 369 XML tree, for example "root:trunk:branch:subbranch:leaf". 370 */ 371 PCaselessString GetPathName() const; 372 SetName(const PString & v)373 void SetName(const PString & v) 374 { name = v; } 375 GetSize()376 PINDEX GetSize() const 377 { return subObjects.GetSize(); } 378 379 PXMLObject * AddSubObject(PXMLObject * elem, bool dirty = true); 380 381 PXMLElement * AddChild (PXMLElement * elem, bool dirty = true); 382 PXMLData * AddChild (PXMLData * elem, bool dirty = true); 383 384 PXMLElement * AddElement(const char * name); 385 PXMLElement * AddElement(const PString & name, const PString & data); 386 PXMLElement * AddElement(const PString & name, const PString & attrName, const PString & attrVal); 387 388 void SetAttribute(const PCaselessString & key, 389 const PString & value, 390 bool setDirty = true); 391 392 PString GetAttribute(const PCaselessString & key) const; 393 PString GetKeyAttribute(PINDEX idx) const; 394 PString GetDataAttribute(PINDEX idx) const; 395 bool HasAttribute(const PCaselessString & key) const; HasAttributes()396 bool HasAttributes() const { return attributes.GetSize() > 0; } GetNumAttributes()397 PINDEX GetNumAttributes() const { return attributes.GetSize(); } 398 399 PXMLElement * GetElement(const PCaselessString & name, const PCaselessString & attr, const PString & attrval) const; 400 PXMLElement * GetElement(const PCaselessString & name, PINDEX idx = 0) const; 401 PXMLObject * GetElement(PINDEX idx = 0) const; 402 bool RemoveElement(PINDEX idx); 403 404 PINDEX FindObject(const PXMLObject * ptr) const; 405 HasSubObjects()406 bool HasSubObjects() const 407 { return subObjects.GetSize() != 0; } 408 GetSubObjects()409 PXMLObjectArray GetSubObjects() const 410 { return subObjects; } 411 412 PXMLObject * GetSubObject(PINDEX idx = 0) const 413 { return GetElement(idx); } 414 415 PString GetData() const; 416 void SetData(const PString & data); 417 void AddData(const PString & data); 418 419 PXMLObject * Clone(PXMLElement * parent) const; 420 GetFilePosition(unsigned & col,unsigned & line)421 void GetFilePosition(unsigned & col, unsigned & line) const { col = column; line = lineNumber; } SetFilePosition(unsigned col,unsigned line)422 void SetFilePosition(unsigned col, unsigned line) { column = col; lineNumber = line; } 423 424 void AddNamespace(const PString & prefix, const PString & uri); 425 void RemoveNamespace(const PString & prefix); 426 427 bool GetDefaultNamespace(PCaselessString & str) const; 428 bool GetNamespace(const PCaselessString & prefix, PCaselessString & str) const; 429 PCaselessString PrependNamespace(const PCaselessString & name) const; 430 bool GetURIForNamespace(const PCaselessString & prefix, PCaselessString & uri); 431 432 protected: 433 PCaselessString name; 434 PStringToString attributes; 435 PXMLObjectArray subObjects; 436 bool dirty; 437 unsigned column; 438 unsigned lineNumber; 439 PStringToString m_nameSpaces; 440 PCaselessString m_defaultNamespace; 441 }; 442 443 //////////////////////////////////////////////////////////// 444 445 class PConfig; // stupid gcc 4 does not recognize PConfig as a class 446 447 class PXMLSettings : public PXML 448 { 449 PCLASSINFO(PXMLSettings, PXML); 450 public: 451 PXMLSettings(Options options = NewLineAfterElement); 452 PXMLSettings(const PString & data, Options options = NewLineAfterElement); 453 PXMLSettings(const PConfig & data, Options options = NewLineAfterElement); 454 455 bool Load(const PString & data); 456 bool LoadFile(const PFilePath & fn); 457 458 bool Save(); 459 bool Save(PString & data); 460 bool SaveFile(const PFilePath & fn); 461 462 void SetAttribute(const PCaselessString & section, const PString & key, const PString & value); 463 464 PString GetAttribute(const PCaselessString & section, const PString & key) const; 465 bool HasAttribute(const PCaselessString & section, const PString & key) const; 466 467 void ToConfig(PConfig & cfg) const; 468 }; 469 470 471 //////////////////////////////////////////////////////////// 472 473 class PXMLParser : public PXMLBase 474 { 475 PCLASSINFO(PXMLParser, PXMLBase); 476 public: 477 PXMLParser(int options = NoOptions); 478 ~PXMLParser(); 479 bool Parse(const char * data, int dataLen, bool final); 480 void GetErrorInfo(PString & errorString, unsigned & errorCol, unsigned & errorLine); 481 482 virtual void StartElement(const char * name, const char **attrs); 483 virtual void EndElement(const char * name); 484 virtual void AddCharacterData(const char * data, int len); 485 virtual void XmlDecl(const char * version, const char * encoding, int standAlone); 486 virtual void StartDocTypeDecl(const char * docTypeName, 487 const char * sysid, 488 const char * pubid, 489 int hasInternalSubSet); 490 virtual void EndDocTypeDecl(); 491 virtual void StartNamespaceDeclHandler(const char * prefix, const char * uri); 492 virtual void EndNamespaceDeclHandler(const char * prefix); 493 GetVersion()494 PString GetVersion() const { return version; } GetEncoding()495 PString GetEncoding() const { return encoding; } 496 GetStandAlone()497 StandAloneType GetStandAlone() const { return m_standAlone; } 498 499 PXMLElement * GetXMLTree() const; 500 PXMLElement * SetXMLTree(PXMLElement * newRoot); 501 502 protected: 503 void * expat; 504 PXMLElement * rootElement; 505 bool rootOpen; 506 PXMLElement * currentElement; 507 PXMLData * lastElement; 508 PString version, encoding; 509 StandAloneType m_standAlone; 510 PStringToString m_tempNamespaceList; 511 }; 512 513 //////////////////////////////////////////////////////////// 514 515 class PXMLStreamParser : public PXMLParser 516 { 517 PCLASSINFO(PXMLStreamParser, PXMLParser); 518 public: 519 PXMLStreamParser(); 520 521 virtual void EndElement(const char * name); 522 virtual PXML * Read(PChannel * channel); 523 524 protected: 525 PQueue<PXML> messages; 526 }; 527 528 529 #endif // P_EXPAT 530 531 #endif // PTLIB_PXML_H 532 533 534 // End Of File /////////////////////////////////////////////////////////////// 535