1 // Written in the D programming language.
2 
3 /**
4 $(RED Warning: This module is considered out-dated and not up to Phobos'
5       current standards. It will remain until we have a suitable replacement,
6       but be aware that it will not remain long term.)
7 
8 Classes and functions for creating and parsing XML
9 
10 The basic architecture of this module is that there are standalone functions,
11 classes for constructing an XML document from scratch (Tag, Element and
12 Document), and also classes for parsing a pre-existing XML file (ElementParser
13 and DocumentParser). The parsing classes <i>may</i> be used to build a
14 Document, but that is not their primary purpose. The handling capabilities of
15 DocumentParser and ElementParser are sufficiently customizable that you can
16 make them do pretty much whatever you want.
17 
18 Example: This example creates a DOM (Document Object Model) tree
19     from an XML file.
20 ------------------------------------------------------------------------------
21 import std.xml;
22 import std.stdio;
23 import std.string;
24 import std.file;
25 
26 // books.xml is used in various samples throughout the Microsoft XML Core
27 // Services (MSXML) SDK.
28 //
29 // See http://msdn2.microsoft.com/en-us/library/ms762271(VS.85).aspx
30 
31 void main()
32 {
33     string s = cast(string) std.file.read("books.xml");
34 
35     // Check for well-formedness
36     check(s);
37 
38     // Make a DOM tree
39     auto doc = new Document(s);
40 
41     // Plain-print it
42     writeln(doc);
43 }
44 ------------------------------------------------------------------------------
45 
46 Example: This example does much the same thing, except that the file is
47     deconstructed and reconstructed by hand. This is more work, but the
48     techniques involved offer vastly more power.
49 ------------------------------------------------------------------------------
50 import std.xml;
51 import std.stdio;
52 import std.string;
53 
54 struct Book
55 {
56     string id;
57     string author;
58     string title;
59     string genre;
60     string price;
61     string pubDate;
62     string description;
63 }
64 
65 void main()
66 {
67     string s = cast(string) std.file.read("books.xml");
68 
69     // Check for well-formedness
70     check(s);
71 
72     // Take it apart
73     Book[] books;
74 
75     auto xml = new DocumentParser(s);
76     xml.onStartTag["book"] = (ElementParser xml)
77     {
78         Book book;
79         book.id = xml.tag.attr["id"];
80 
81         xml.onEndTag["author"]       = (in Element e) { book.author      = e.text(); };
82         xml.onEndTag["title"]        = (in Element e) { book.title       = e.text(); };
83         xml.onEndTag["genre"]        = (in Element e) { book.genre       = e.text(); };
84         xml.onEndTag["price"]        = (in Element e) { book.price       = e.text(); };
85         xml.onEndTag["publish-date"] = (in Element e) { book.pubDate     = e.text(); };
86         xml.onEndTag["description"]  = (in Element e) { book.description = e.text(); };
87 
88         xml.parse();
89 
90         books ~= book;
91     };
92     xml.parse();
93 
94     // Put it back together again;
95     auto doc = new Document(new Tag("catalog"));
96     foreach (book;books)
97     {
98         auto element = new Element("book");
99         element.tag.attr["id"] = book.id;
100 
101         element ~= new Element("author",      book.author);
102         element ~= new Element("title",       book.title);
103         element ~= new Element("genre",       book.genre);
104         element ~= new Element("price",       book.price);
105         element ~= new Element("publish-date",book.pubDate);
106         element ~= new Element("description", book.description);
107 
108         doc ~= element;
109     }
110 
111     // Pretty-print it
112     writefln(join(doc.pretty(3),"\n"));
113 }
114 -------------------------------------------------------------------------------
115 Copyright: Copyright Janice Caron 2008 - 2009.
116 License:   $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
117 Authors:   Janice Caron
118 Source:    $(PHOBOSSRC std/_xml.d)
119 */
120 /*
121          Copyright Janice Caron 2008 - 2009.
122 Distributed under the Boost Software License, Version 1.0.
123    (See accompanying file LICENSE_1_0.txt or copy at
124          http://www.boost.org/LICENSE_1_0.txt)
125 */
126 module std.xml;
127 
128 enum cdata = "<![CDATA[";
129 
130 /**
131  * Returns true if the character is a character according to the XML standard
132  *
133  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
134  *
135  * Params:
136  *    c = the character to be tested
137  */
isChar(dchar c)138 bool isChar(dchar c) @safe @nogc pure nothrow // rule 2
139 {
140     if (c <= 0xD7FF)
141     {
142         if (c >= 0x20)
143             return true;
144         switch (c)
145         {
146         case 0xA:
147         case 0x9:
148         case 0xD:
149             return true;
150         default:
151             return false;
152         }
153     }
154     else if (0xE000 <= c && c <= 0x10FFFF)
155     {
156         if ((c & 0x1FFFFE) != 0xFFFE) // U+FFFE and U+FFFF
157             return true;
158     }
159     return false;
160 }
161 
162 @safe @nogc nothrow pure unittest
163 {
164     assert(!isChar(cast(dchar) 0x8));
165     assert( isChar(cast(dchar) 0x9));
166     assert( isChar(cast(dchar) 0xA));
167     assert(!isChar(cast(dchar) 0xB));
168     assert(!isChar(cast(dchar) 0xC));
169     assert( isChar(cast(dchar) 0xD));
170     assert(!isChar(cast(dchar) 0xE));
171     assert(!isChar(cast(dchar) 0x1F));
172     assert( isChar(cast(dchar) 0x20));
173     assert( isChar('J'));
174     assert( isChar(cast(dchar) 0xD7FF));
175     assert(!isChar(cast(dchar) 0xD800));
176     assert(!isChar(cast(dchar) 0xDFFF));
177     assert( isChar(cast(dchar) 0xE000));
178     assert( isChar(cast(dchar) 0xFFFD));
179     assert(!isChar(cast(dchar) 0xFFFE));
180     assert(!isChar(cast(dchar) 0xFFFF));
181     assert( isChar(cast(dchar) 0x10000));
182     assert( isChar(cast(dchar) 0x10FFFF));
183     assert(!isChar(cast(dchar) 0x110000));
184 
debug(stdxml_TestHardcodedChecks)185     debug (stdxml_TestHardcodedChecks)
186     {
187         foreach (c; 0 .. dchar.max + 1)
188             assert(isChar(c) == lookup(CharTable, c));
189     }
190 }
191 
192 /**
193  * Returns true if the character is whitespace according to the XML standard
194  *
195  * Only the following characters are considered whitespace in XML - space, tab,
196  * carriage return and linefeed
197  *
198  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
199  *
200  * Params:
201  *    c = the character to be tested
202  */
isSpace(dchar c)203 bool isSpace(dchar c) @safe @nogc pure nothrow
204 {
205     return c == '\u0020' || c == '\u0009' || c == '\u000A' || c == '\u000D';
206 }
207 
208 /**
209  * Returns true if the character is a digit according to the XML standard
210  *
211  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
212  *
213  * Params:
214  *    c = the character to be tested
215  */
isDigit(dchar c)216 bool isDigit(dchar c) @safe @nogc pure nothrow
217 {
218     if (c <= 0x0039 && c >= 0x0030)
219         return true;
220     else
221         return lookup(DigitTable,c);
222 }
223 
224 @safe @nogc nothrow pure unittest
225 {
debug(stdxml_TestHardcodedChecks)226     debug (stdxml_TestHardcodedChecks)
227     {
228         foreach (c; 0 .. dchar.max + 1)
229             assert(isDigit(c) == lookup(DigitTable, c));
230     }
231 }
232 
233 /**
234  * Returns true if the character is a letter according to the XML standard
235  *
236  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
237  *
238  * Params:
239  *    c = the character to be tested
240  */
isLetter(dchar c)241 bool isLetter(dchar c) @safe @nogc nothrow pure // rule 84
242 {
243     return isIdeographic(c) || isBaseChar(c);
244 }
245 
246 /**
247  * Returns true if the character is an ideographic character according to the
248  * XML standard
249  *
250  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
251  *
252  * Params:
253  *    c = the character to be tested
254  */
isIdeographic(dchar c)255 bool isIdeographic(dchar c) @safe @nogc nothrow pure
256 {
257     if (c == 0x3007)
258         return true;
259     if (c <= 0x3029 && c >= 0x3021 )
260         return true;
261     if (c <= 0x9FA5 && c >= 0x4E00)
262         return true;
263     return false;
264 }
265 
266 @safe @nogc nothrow pure unittest
267 {
268     assert(isIdeographic('\u4E00'));
269     assert(isIdeographic('\u9FA5'));
270     assert(isIdeographic('\u3007'));
271     assert(isIdeographic('\u3021'));
272     assert(isIdeographic('\u3029'));
273 
debug(stdxml_TestHardcodedChecks)274     debug (stdxml_TestHardcodedChecks)
275     {
276         foreach (c; 0 .. dchar.max + 1)
277             assert(isIdeographic(c) == lookup(IdeographicTable, c));
278     }
279 }
280 
281 /**
282  * Returns true if the character is a base character according to the XML
283  * standard
284  *
285  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
286  *
287  * Params:
288  *    c = the character to be tested
289  */
isBaseChar(dchar c)290 bool isBaseChar(dchar c) @safe @nogc nothrow pure
291 {
292     return lookup(BaseCharTable,c);
293 }
294 
295 /**
296  * Returns true if the character is a combining character according to the
297  * XML standard
298  *
299  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
300  *
301  * Params:
302  *    c = the character to be tested
303  */
isCombiningChar(dchar c)304 bool isCombiningChar(dchar c) @safe @nogc nothrow pure
305 {
306     return lookup(CombiningCharTable,c);
307 }
308 
309 /**
310  * Returns true if the character is an extender according to the XML standard
311  *
312  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
313  *
314  * Params:
315  *    c = the character to be tested
316  */
isExtender(dchar c)317 bool isExtender(dchar c) @safe @nogc nothrow pure
318 {
319     return lookup(ExtenderTable,c);
320 }
321 
322 /**
323  * Encodes a string by replacing all characters which need to be escaped with
324  * appropriate predefined XML entities.
325  *
326  * encode() escapes certain characters (ampersand, quote, apostrophe, less-than
327  * and greater-than), and similarly, decode() unescapes them. These functions
328  * are provided for convenience only. You do not need to use them when using
329  * the std.xml classes, because then all the encoding and decoding will be done
330  * for you automatically.
331  *
332  * If the string is not modified, the original will be returned.
333  *
334  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
335  *
336  * Params:
337  *      s = The string to be encoded
338  *
339  * Returns: The encoded string
340  *
341  * Example:
342  * --------------
343  * writefln(encode("a > b")); // writes "a &gt; b"
344  * --------------
345  */
encode(S)346 S encode(S)(S s)
347 {
348     import std.array : appender;
349 
350     string r;
351     size_t lastI;
352     auto result = appender!S();
353 
354     foreach (i, c; s)
355     {
356         switch (c)
357         {
358         case '&':  r = "&amp;"; break;
359         case '"':  r = "&quot;"; break;
360         case '\'': r = "&apos;"; break;
361         case '<':  r = "&lt;"; break;
362         case '>':  r = "&gt;"; break;
363         default: continue;
364         }
365         // Replace with r
366         result.put(s[lastI .. i]);
367         result.put(r);
368         lastI = i + 1;
369     }
370 
371     if (!result.data.ptr) return s;
372     result.put(s[lastI .. $]);
373     return result.data;
374 }
375 
376 @safe pure unittest
377 {
378     auto s = "hello";
379     assert(encode(s) is s);
380     assert(encode("a > b") == "a &gt; b", encode("a > b"));
381     assert(encode("a < b") == "a &lt; b");
382     assert(encode("don't") == "don&apos;t");
383     assert(encode("\"hi\"") == "&quot;hi&quot;", encode("\"hi\""));
384     assert(encode("cat & dog") == "cat &amp; dog");
385 }
386 
387 /**
388  * Mode to use for decoding.
389  *
390  * $(DDOC_ENUM_MEMBERS NONE) Do not decode
391  * $(DDOC_ENUM_MEMBERS LOOSE) Decode, but ignore errors
392  * $(DDOC_ENUM_MEMBERS STRICT) Decode, and throw exception on error
393  */
394 enum DecodeMode
395 {
396     NONE, LOOSE, STRICT
397 }
398 
399 /**
400  * Decodes a string by unescaping all predefined XML entities.
401  *
402  * encode() escapes certain characters (ampersand, quote, apostrophe, less-than
403  * and greater-than), and similarly, decode() unescapes them. These functions
404  * are provided for convenience only. You do not need to use them when using
405  * the std.xml classes, because then all the encoding and decoding will be done
406  * for you automatically.
407  *
408  * This function decodes the entities &amp;amp;, &amp;quot;, &amp;apos;,
409  * &amp;lt; and &amp;gt,
410  * as well as decimal and hexadecimal entities such as &amp;#x20AC;
411  *
412  * If the string does not contain an ampersand, the original will be returned.
413  *
414  * Note that the "mode" parameter can be one of DecodeMode.NONE (do not
415  * decode), DecodeMode.LOOSE (decode, but ignore errors), or DecodeMode.STRICT
416  * (decode, and throw a DecodeException in the event of an error).
417  *
418  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
419  *
420  * Params:
421  *      s = The string to be decoded
422  *      mode = (optional) Mode to use for decoding. (Defaults to LOOSE).
423  *
424  * Throws: DecodeException if mode == DecodeMode.STRICT and decode fails
425  *
426  * Returns: The decoded string
427  *
428  * Example:
429  * --------------
430  * writefln(decode("a &gt; b")); // writes "a > b"
431  * --------------
432  */
433 string decode(string s, DecodeMode mode=DecodeMode.LOOSE) @safe pure
434 {
435     import std.algorithm.searching : startsWith;
436 
437     if (mode == DecodeMode.NONE) return s;
438 
439     string buffer;
440     foreach (ref i; 0 .. s.length)
441     {
442         char c = s[i];
443         if (c != '&')
444         {
445             if (buffer.length != 0) buffer ~= c;
446         }
447         else
448         {
449             if (buffer.length == 0)
450             {
451                 buffer = s[0 .. i].dup;
452             }
453             if (startsWith(s[i..$],"&#"))
454             {
455                 try
456                 {
457                     dchar d;
458                     string t = s[i..$];
459                     checkCharRef(t, d);
460                     char[4] temp;
461                     import std.utf : encode;
462                     buffer ~= temp[0 .. encode(temp, d)];
463                     i = s.length - t.length - 1;
464                 }
catch(Err e)465                 catch (Err e)
466                 {
467                     if (mode == DecodeMode.STRICT)
468                         throw new DecodeException("Unescaped &");
469                     buffer ~= '&';
470                 }
471             }
472             else if (startsWith(s[i..$],"&amp;" )) { buffer ~= '&';  i += 4; }
473             else if (startsWith(s[i..$],"&quot;")) { buffer ~= '"';  i += 5; }
474             else if (startsWith(s[i..$],"&apos;")) { buffer ~= '\''; i += 5; }
475             else if (startsWith(s[i..$],"&lt;"  )) { buffer ~= '<';  i += 3; }
476             else if (startsWith(s[i..$],"&gt;"  )) { buffer ~= '>';  i += 3; }
477             else
478             {
479                 if (mode == DecodeMode.STRICT)
480                     throw new DecodeException("Unescaped &");
481                 buffer ~= '&';
482             }
483         }
484     }
485     return (buffer.length == 0) ? s : buffer;
486 }
487 
488 @safe pure unittest
489 {
assertNot(string s)490     void assertNot(string s) pure
491     {
492         bool b = false;
493         try { decode(s,DecodeMode.STRICT); }
494         catch (DecodeException e) { b = true; }
495         assert(b,s);
496     }
497 
498     // Assert that things that should work, do
499     auto s = "hello";
500     assert(decode(s,                DecodeMode.STRICT) is s);
501     assert(decode("a &gt; b",       DecodeMode.STRICT) == "a > b");
502     assert(decode("a &lt; b",       DecodeMode.STRICT) == "a < b");
503     assert(decode("don&apos;t",     DecodeMode.STRICT) == "don't");
504     assert(decode("&quot;hi&quot;", DecodeMode.STRICT) == "\"hi\"");
505     assert(decode("cat &amp; dog",  DecodeMode.STRICT) == "cat & dog");
506     assert(decode("&#42;",          DecodeMode.STRICT) == "*");
507     assert(decode("&#x2A;",         DecodeMode.STRICT) == "*");
508     assert(decode("cat & dog",      DecodeMode.LOOSE) == "cat & dog");
509     assert(decode("a &gt b",        DecodeMode.LOOSE) == "a &gt b");
510     assert(decode("&#;",            DecodeMode.LOOSE) == "&#;");
511     assert(decode("&#x;",           DecodeMode.LOOSE) == "&#x;");
512     assert(decode("&#2G;",          DecodeMode.LOOSE) == "&#2G;");
513     assert(decode("&#x2G;",         DecodeMode.LOOSE) == "&#x2G;");
514 
515     // Assert that things that shouldn't work, don't
516     assertNot("cat & dog");
517     assertNot("a &gt b");
518     assertNot("&#;");
519     assertNot("&#x;");
520     assertNot("&#2G;");
521     assertNot("&#x2G;");
522 }
523 
524 /**
525  * Class representing an XML document.
526  *
527  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
528  *
529  */
530 class Document : Element
531 {
532     /**
533      * Contains all text which occurs before the root element.
534      * Defaults to &lt;?xml version="1.0"?&gt;
535      */
536     string prolog = "<?xml version=\"1.0\"?>";
537     /**
538      * Contains all text which occurs after the root element.
539      * Defaults to the empty string
540      */
541     string epilog;
542 
543     /**
544      * Constructs a Document by parsing XML text.
545      *
546      * This function creates a complete DOM (Document Object Model) tree.
547      *
548      * The input to this function MUST be valid XML.
549      * This is enforced by DocumentParser's in contract.
550      *
551      * Params:
552      *      s = the complete XML text.
553      */
this(string s)554     this(string s)
555     in
556     {
557         assert(s.length != 0);
558     }
559     body
560     {
561         auto xml = new DocumentParser(s);
562         string tagString = xml.tag.tagString;
563 
564         this(xml.tag);
565         prolog = s[0 .. tagString.ptr - s.ptr];
566         parse(xml);
567         epilog = *xml.s;
568     }
569 
570     /**
571      * Constructs a Document from a Tag.
572      *
573      * Params:
574      *      tag = the start tag of the document.
575      */
this(const (Tag)tag)576     this(const(Tag) tag)
577     {
578         super(tag);
579     }
580 
581     const
582     {
583         /**
584          * Compares two Documents for equality
585          *
586          * Example:
587          * --------------
588          * Document d1,d2;
589          * if (d1 == d2) { }
590          * --------------
591          */
opEquals(scope const Object o)592         override bool opEquals(scope const Object o) const
593         {
594             const doc = toType!(const Document)(o);
595             return prolog == doc.prolog
596                 && (cast(const) this).Element.opEquals(cast(const) doc)
597                 && epilog == doc.epilog;
598         }
599 
600         /**
601          * Compares two Documents
602          *
603          * You should rarely need to call this function. It exists so that
604          * Documents can be used as associative array keys.
605          *
606          * Example:
607          * --------------
608          * Document d1,d2;
609          * if (d1 < d2) { }
610          * --------------
611          */
opCmp(scope const Object o)612         override int opCmp(scope const Object o) scope const
613         {
614             const doc = toType!(const Document)(o);
615             if (prolog != doc.prolog)
616                 return prolog < doc.prolog ? -1 : 1;
617             if (int cmp = this.Element.opCmp(doc))
618                 return cmp;
619             if (epilog != doc.epilog)
620                 return epilog < doc.epilog ? -1 : 1;
621             return 0;
622         }
623 
624         /**
625          * Returns the hash of a Document
626          *
627          * You should rarely need to call this function. It exists so that
628          * Documents can be used as associative array keys.
629          */
toHash()630         override size_t toHash() scope const @trusted
631         {
632             return hash(prolog, hash(epilog, (cast() this).Element.toHash()));
633         }
634 
635         /**
636          * Returns the string representation of a Document. (That is, the
637          * complete XML of a document).
638          */
toString()639         override string toString() scope const @safe
640         {
641             return prolog ~ super.toString() ~ epilog;
642         }
643     }
644 }
645 
646 @system unittest
647 {
648     // https://issues.dlang.org/show_bug.cgi?id=14966
649     auto xml = `<?xml version="1.0" encoding="UTF-8"?><foo></foo>`;
650 
651     auto a = new Document(xml);
652     auto b = new Document(xml);
653     assert(a == b);
654     assert(!(a < b));
655     int[Document] aa;
656     aa[a] = 1;
657     assert(aa[b] == 1);
658 
659     b ~= new Element("b");
660     assert(a < b);
661     assert(b > a);
662 }
663 
664 /**
665  * Class representing an XML element.
666  *
667  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
668  */
669 class Element : Item
670 {
671     Tag tag; /// The start tag of the element
672     Item[] items; /// The element's items
673     Text[] texts; /// The element's text items
674     CData[] cdatas; /// The element's CData items
675     Comment[] comments; /// The element's comments
676     ProcessingInstruction[] pis; /// The element's processing instructions
677     Element[] elements; /// The element's child elements
678 
679     /**
680      * Constructs an Element given a name and a string to be used as a Text
681      * interior.
682      *
683      * Params:
684      *      name = the name of the element.
685      *      interior = (optional) the string interior.
686      *
687      * Example:
688      * -------------------------------------------------------
689      * auto element = new Element("title","Serenity")
690      *     // constructs the element <title>Serenity</title>
691      * -------------------------------------------------------
692      */
693     this(string name, string interior=null) @safe pure
694     {
695         this(new Tag(name));
696         if (interior.length != 0) opCatAssign(new Text(interior));
697     }
698 
699     /**
700      * Constructs an Element from a Tag.
701      *
702      * Params:
703      *      tag_ = the start or empty tag of the element.
704      */
this(const (Tag)tag_)705     this(const(Tag) tag_) @safe pure
706     {
707         this.tag = new Tag(tag_.name);
708         tag.type = TagType.EMPTY;
709         foreach (k,v;tag_.attr) tag.attr[k] = v;
710         tag.tagString = tag_.tagString;
711     }
712 
713     /**
714      * Append a text item to the interior of this element
715      *
716      * Params:
717      *      item = the item you wish to append.
718      *
719      * Example:
720      * --------------
721      * Element element;
722      * element ~= new Text("hello");
723      * --------------
724      */
opCatAssign(Text item)725     void opCatAssign(Text item) @safe pure
726     {
727         texts ~= item;
728         appendItem(item);
729     }
730 
731     /**
732      * Append a CData item to the interior of this element
733      *
734      * Params:
735      *      item = the item you wish to append.
736      *
737      * Example:
738      * --------------
739      * Element element;
740      * element ~= new CData("hello");
741      * --------------
742      */
opCatAssign(CData item)743     void opCatAssign(CData item) @safe pure
744     {
745         cdatas ~= item;
746         appendItem(item);
747     }
748 
749     /**
750      * Append a comment to the interior of this element
751      *
752      * Params:
753      *      item = the item you wish to append.
754      *
755      * Example:
756      * --------------
757      * Element element;
758      * element ~= new Comment("hello");
759      * --------------
760      */
opCatAssign(Comment item)761     void opCatAssign(Comment item) @safe pure
762     {
763         comments ~= item;
764         appendItem(item);
765     }
766 
767     /**
768      * Append a processing instruction to the interior of this element
769      *
770      * Params:
771      *      item = the item you wish to append.
772      *
773      * Example:
774      * --------------
775      * Element element;
776      * element ~= new ProcessingInstruction("hello");
777      * --------------
778      */
opCatAssign(ProcessingInstruction item)779     void opCatAssign(ProcessingInstruction item) @safe pure
780     {
781         pis ~= item;
782         appendItem(item);
783     }
784 
785     /**
786      * Append a complete element to the interior of this element
787      *
788      * Params:
789      *      item = the item you wish to append.
790      *
791      * Example:
792      * --------------
793      * Element element;
794      * Element other = new Element("br");
795      * element ~= other;
796      *    // appends element representing <br />
797      * --------------
798      */
opCatAssign(Element item)799     void opCatAssign(Element item) @safe pure
800     {
801         elements ~= item;
802         appendItem(item);
803     }
804 
appendItem(Item item)805     private void appendItem(Item item) @safe pure
806     {
807         items ~= item;
808         if (tag.type == TagType.EMPTY && !item.isEmptyXML)
809             tag.type = TagType.START;
810     }
811 
parse(ElementParser xml)812     private void parse(ElementParser xml)
813     {
814         xml.onText = (string s) { opCatAssign(new Text(s)); };
815         xml.onCData = (string s) { opCatAssign(new CData(s)); };
816         xml.onComment = (string s) { opCatAssign(new Comment(s)); };
817         xml.onPI = (string s) { opCatAssign(new ProcessingInstruction(s)); };
818 
819         xml.onStartTag[null] = (ElementParser xml)
820         {
821             auto e = new Element(xml.tag);
822             e.parse(xml);
823             opCatAssign(e);
824         };
825 
826         xml.parse();
827     }
828 
829     /**
830      * Compares two Elements for equality
831      *
832      * Example:
833      * --------------
834      * Element e1,e2;
835      * if (e1 == e2) { }
836      * --------------
837      */
opEquals(scope const Object o)838     override bool opEquals(scope const Object o) const
839     {
840         const element = toType!(const Element)(o);
841         immutable len = items.length;
842         if (len != element.items.length) return false;
843         foreach (i; 0 .. len)
844         {
845             if (!items[i].opEquals(element.items[i])) return false;
846         }
847         return true;
848     }
849 
850     /**
851      * Compares two Elements
852      *
853      * You should rarely need to call this function. It exists so that Elements
854      * can be used as associative array keys.
855      *
856      * Example:
857      * --------------
858      * Element e1,e2;
859      * if (e1 < e2) { }
860      * --------------
861      */
opCmp(scope const Object o)862     override int opCmp(scope const Object o) @safe const
863     {
864         const element = toType!(const Element)(o);
865         for (uint i=0; ; ++i)
866         {
867             if (i == items.length && i == element.items.length) return 0;
868             if (i == items.length) return -1;
869             if (i == element.items.length) return 1;
870             if (!items[i].opEquals(element.items[i]))
871                 return items[i].opCmp(element.items[i]);
872         }
873     }
874 
875     /**
876      * Returns the hash of an Element
877      *
878      * You should rarely need to call this function. It exists so that Elements
879      * can be used as associative array keys.
880      */
toHash()881     override size_t toHash() scope const @safe
882     {
883         size_t hash = tag.toHash();
884         foreach (item;items) hash += item.toHash();
885         return hash;
886     }
887 
888     const
889     {
890         /**
891          * Returns the decoded interior of an element.
892          *
893          * The element is assumed to contain text <i>only</i>. So, for
894          * example, given XML such as "&lt;title&gt;Good &amp;amp;
895          * Bad&lt;/title&gt;", will return "Good &amp; Bad".
896          *
897          * Params:
898          *      mode = (optional) Mode to use for decoding. (Defaults to LOOSE).
899          *
900          * Throws: DecodeException if decode fails
901          */
902         string text(DecodeMode mode=DecodeMode.LOOSE)
903         {
904             string buffer;
foreach(item;items)905             foreach (item;items)
906             {
907                 Text t = cast(Text) item;
908                 if (t is null) throw new DecodeException(item.toString());
909                 buffer ~= decode(t.toString(),mode);
910             }
911             return buffer;
912         }
913 
914         /**
915          * Returns an indented string representation of this item
916          *
917          * Params:
918          *      indent = (optional) number of spaces by which to indent this
919          *          element. Defaults to 2.
920          */
921         override string[] pretty(uint indent=2) scope
922         {
923             import std.algorithm.searching : count;
924             import std.string : rightJustify;
925 
926             if (isEmptyXML) return [ tag.toEmptyString() ];
927 
928             if (items.length == 1)
929             {
930                 auto t = cast(const(Text))(items[0]);
931                 if (t !is null)
932                 {
933                     return [tag.toStartString() ~ t.toString() ~ tag.toEndString()];
934                 }
935             }
936 
937             string[] a = [ tag.toStartString() ];
foreach(item;items)938             foreach (item;items)
939             {
940                 string[] b = item.pretty(indent);
941                 foreach (s;b)
942                 {
943                     a ~= rightJustify(s,count(s) + indent);
944                 }
945             }
946             a ~= tag.toEndString();
947             return a;
948         }
949 
950         /**
951          * Returns the string representation of an Element
952          *
953          * Example:
954          * --------------
955          * auto element = new Element("br");
956          * writefln(element.toString()); // writes "<br />"
957          * --------------
958          */
toString()959         override string toString() scope @safe
960         {
961             if (isEmptyXML) return tag.toEmptyString();
962 
963             string buffer = tag.toStartString();
964             foreach (item;items) { buffer ~= item.toString(); }
965             buffer ~= tag.toEndString();
966             return buffer;
967         }
968 
isEmptyXML()969         override @property @safe pure @nogc nothrow bool isEmptyXML() const scope { return items.length == 0; }
970     }
971 }
972 
973 /**
974  * Tag types.
975  *
976  * $(DDOC_ENUM_MEMBERS START) Used for start tags
977  * $(DDOC_ENUM_MEMBERS END) Used for end tags
978  * $(DDOC_ENUM_MEMBERS EMPTY) Used for empty tags
979  *
980  */
981 enum TagType { START, END, EMPTY }
982 
983 /**
984  * Class representing an XML tag.
985  *
986  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
987  *
988  * The class invariant guarantees
989  * <ul>
990  * <li> that $(B type) is a valid enum TagType value</li>
991  * <li> that $(B name) consists of valid characters</li>
992  * <li> that each attribute name consists of valid characters</li>
993  * </ul>
994  */
995 class Tag
996 {
997     TagType type = TagType.START;   /// Type of tag
998     string name;                    /// Tag name
999     string[string] attr;            /// Associative array of attributes
1000     private string tagString;
1001 
invariant()1002     invariant()
1003     {
1004         string s;
1005         string t;
1006 
1007         assert(type == TagType.START
1008             || type == TagType.END
1009             || type == TagType.EMPTY);
1010 
1011         s = name;
1012         try { checkName(s,t); }
1013         catch (Err e) { assert(false,"Invalid tag name:" ~ e.toString()); }
1014 
1015         foreach (k,v;attr)
1016         {
1017             s = k;
1018             try { checkName(s,t); }
1019             catch (Err e)
1020                 { assert(false,"Invalid atrribute name:" ~ e.toString()); }
1021         }
1022     }
1023 
1024     /**
1025      * Constructs an instance of Tag with a specified name and type
1026      *
1027      * The constructor does not initialize the attributes. To initialize the
1028      * attributes, you access the $(B attr) member variable.
1029      *
1030      * Params:
1031      *      name = the Tag's name
1032      *      type = (optional) the Tag's type. If omitted, defaults to
1033      *          TagType.START.
1034      *
1035      * Example:
1036      * --------------
1037      * auto tag = new Tag("img",Tag.EMPTY);
1038      * tag.attr["src"] = "http://example.com/example.jpg";
1039      * --------------
1040      */
1041     this(string name, TagType type=TagType.START) @safe pure
1042     {
1043         this.name = name;
1044         this.type = type;
1045     }
1046 
1047     /* Private constructor (so don't ddoc this!)
1048      *
1049      * Constructs a Tag by parsing the string representation, e.g. "<html>".
1050      *
1051      * The string is passed by reference, and is advanced over all characters
1052      * consumed.
1053      *
1054      * The second parameter is a dummy parameter only, required solely to
1055      * distinguish this constructor from the public one.
1056      */
this(ref string s,bool dummy)1057     private this(ref string s, bool dummy) @safe pure
1058     {
1059         import std.algorithm.searching : countUntil;
1060         import std.ascii : isWhite;
1061         import std.utf : byCodeUnit;
1062 
1063         tagString = s;
1064         try
1065         {
1066             reqc(s,'<');
1067             if (optc(s,'/')) type = TagType.END;
1068             ptrdiff_t i = s.byCodeUnit.countUntil(">", "/>", " ", "\t", "\v", "\r", "\n", "\f");
1069             name = s[0 .. i];
1070             s = s[i .. $];
1071 
1072             i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1073             s = s[i .. $];
1074 
1075             while (s.length > 0 && s[0] != '>' && s[0] != '/')
1076             {
1077                 i = s.byCodeUnit.countUntil("=", " ", "\t", "\v", "\r", "\n", "\f");
1078                 string key = s[0 .. i];
1079                 s = s[i .. $];
1080 
1081                 i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1082                 s = s[i .. $];
1083                 reqc(s,'=');
1084                 i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1085                 s = s[i .. $];
1086 
1087                 immutable char quote = requireOneOf(s,"'\"");
1088                 i = s.byCodeUnit.countUntil(quote);
1089                 string val = decode(s[0 .. i], DecodeMode.LOOSE);
1090                 s = s[i .. $];
1091                 reqc(s,quote);
1092 
1093                 i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1094                 s = s[i .. $];
1095                 attr[key] = val;
1096             }
1097             if (optc(s,'/'))
1098             {
1099                 if (type == TagType.END) throw new TagException("");
1100                 type = TagType.EMPTY;
1101             }
1102             reqc(s,'>');
1103             tagString.length = tagString.length - s.length;
1104         }
1105         catch (XMLException e)
1106         {
1107             tagString.length = tagString.length - s.length;
1108             throw new TagException(tagString);
1109         }
1110     }
1111 
1112     const
1113     {
1114         /**
1115          * Compares two Tags for equality
1116          *
1117          * You should rarely need to call this function. It exists so that Tags
1118          * can be used as associative array keys.
1119          *
1120          * Example:
1121          * --------------
1122          * Tag tag1,tag2
1123          * if (tag1 == tag2) { }
1124          * --------------
1125          */
opEquals(scope Object o)1126         override bool opEquals(scope Object o)
1127         {
1128             const tag = toType!(const Tag)(o);
1129             return
1130                 (name != tag.name) ? false : (
1131                 (attr != tag.attr) ? false : (
1132                 (type != tag.type) ? false : (
1133             true )));
1134         }
1135 
1136         /**
1137          * Compares two Tags
1138          *
1139          * Example:
1140          * --------------
1141          * Tag tag1,tag2
1142          * if (tag1 < tag2) { }
1143          * --------------
1144          */
opCmp(Object o)1145         override int opCmp(Object o)
1146         {
1147             const tag = toType!(const Tag)(o);
1148             // Note that attr is an AA, so the comparison is nonsensical (bug 10381)
1149             return
1150                 ((name != tag.name) ? ( name < tag.name ? -1 : 1 ) :
1151                 ((attr != tag.attr) ? ( cast(void *) attr < cast(void*) tag.attr ? -1 : 1 ) :
1152                 ((type != tag.type) ? ( type < tag.type ? -1 : 1 ) :
1153             0 )));
1154         }
1155 
1156         /**
1157          * Returns the hash of a Tag
1158          *
1159          * You should rarely need to call this function. It exists so that Tags
1160          * can be used as associative array keys.
1161          */
toHash()1162         override size_t toHash()
1163         {
1164             return typeid(name).getHash(&name);
1165         }
1166 
1167         /**
1168          * Returns the string representation of a Tag
1169          *
1170          * Example:
1171          * --------------
1172          * auto tag = new Tag("book",TagType.START);
1173          * writefln(tag.toString()); // writes "<book>"
1174          * --------------
1175          */
toString()1176         override string toString() @safe
1177         {
1178             if (isEmpty) return toEmptyString();
1179             return (isEnd) ? toEndString() : toStartString();
1180         }
1181 
1182         private
1183         {
toNonEndString()1184             string toNonEndString() @safe
1185             {
1186                 import std.format : format;
1187 
1188                 string s = "<" ~ name;
1189                 foreach (key,val;attr)
1190                     s ~= format(" %s=\"%s\"",key,encode(val));
1191                 return s;
1192             }
1193 
toStartString()1194             string toStartString() @safe { return toNonEndString() ~ ">"; }
1195 
toEndString()1196             string toEndString() @safe { return "</" ~ name ~ ">"; }
1197 
toEmptyString()1198             string toEmptyString() @safe { return toNonEndString() ~ " />"; }
1199         }
1200 
1201         /**
1202          * Returns true if the Tag is a start tag
1203          *
1204          * Example:
1205          * --------------
1206          * if (tag.isStart) { }
1207          * --------------
1208          */
isStart()1209         @property bool isStart() @safe @nogc pure nothrow { return type == TagType.START; }
1210 
1211         /**
1212          * Returns true if the Tag is an end tag
1213          *
1214          * Example:
1215          * --------------
1216          * if (tag.isEnd) { }
1217          * --------------
1218          */
isEnd()1219         @property bool isEnd() @safe @nogc pure nothrow { return type == TagType.END;   }
1220 
1221         /**
1222          * Returns true if the Tag is an empty tag
1223          *
1224          * Example:
1225          * --------------
1226          * if (tag.isEmpty) { }
1227          * --------------
1228          */
isEmpty()1229         @property bool isEmpty() @safe @nogc pure nothrow { return type == TagType.EMPTY; }
1230     }
1231 }
1232 
1233 /**
1234  * Class representing a comment
1235  */
1236 class Comment : Item
1237 {
1238     private string content;
1239 
1240     /**
1241      * Construct a comment
1242      *
1243      * Params:
1244      *      content = the body of the comment
1245      *
1246      * Throws: CommentException if the comment body is illegal (contains "--"
1247      * or exactly equals "-")
1248      *
1249      * Example:
1250      * --------------
1251      * auto item = new Comment("This is a comment");
1252      *    // constructs <!--This is a comment-->
1253      * --------------
1254      */
this(string content)1255     this(string content) @safe pure
1256     {
1257         import std.string : indexOf;
1258 
1259         if (content == "-" || content.indexOf("--") != -1)
1260             throw new CommentException(content);
1261         this.content = content;
1262     }
1263 
1264     /**
1265      * Compares two comments for equality
1266      *
1267      * Example:
1268      * --------------
1269      * Comment item1,item2;
1270      * if (item1 == item2) { }
1271      * --------------
1272      */
opEquals(scope const Object o)1273     override bool opEquals(scope const Object o) const
1274     {
1275         const item = toType!(const Item)(o);
1276         const t = cast(const Comment) item;
1277         return t !is null && content == t.content;
1278     }
1279 
1280     /**
1281      * Compares two comments
1282      *
1283      * You should rarely need to call this function. It exists so that Comments
1284      * can be used as associative array keys.
1285      *
1286      * Example:
1287      * --------------
1288      * Comment item1,item2;
1289      * if (item1 < item2) { }
1290      * --------------
1291      */
opCmp(scope const Object o)1292     override int opCmp(scope const Object o) scope const
1293     {
1294         const item = toType!(const Item)(o);
1295         const t = cast(const Comment) item;
1296         return t !is null && (content != t.content
1297             ? (content < t.content ? -1 : 1 ) : 0 );
1298     }
1299 
1300     /**
1301      * Returns the hash of a Comment
1302      *
1303      * You should rarely need to call this function. It exists so that Comments
1304      * can be used as associative array keys.
1305      */
toHash()1306     override size_t toHash() scope const nothrow { return hash(content); }
1307 
1308     /**
1309      * Returns a string representation of this comment
1310      */
toString()1311     override string toString() scope const @safe pure nothrow { return "<!--" ~ content ~ "-->"; }
1312 
isEmptyXML()1313     override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always
1314 }
1315 
1316 @safe unittest // issue 16241
1317 {
1318     import std.exception : assertThrown;
1319     auto c = new Comment("==");
1320     assert(c.content == "==");
1321     assertThrown!CommentException(new Comment("--"));
1322 }
1323 
1324 /**
1325  * Class representing a Character Data section
1326  */
1327 class CData : Item
1328 {
1329     private string content;
1330 
1331     /**
1332      * Construct a character data section
1333      *
1334      * Params:
1335      *      content = the body of the character data segment
1336      *
1337      * Throws: CDataException if the segment body is illegal (contains "]]>")
1338      *
1339      * Example:
1340      * --------------
1341      * auto item = new CData("<b>hello</b>");
1342      *    // constructs <![CDATA[<b>hello</b>]]>
1343      * --------------
1344      */
this(string content)1345     this(string content) @safe pure
1346     {
1347         import std.string : indexOf;
1348         if (content.indexOf("]]>") != -1) throw new CDataException(content);
1349         this.content = content;
1350     }
1351 
1352     /**
1353      * Compares two CDatas for equality
1354      *
1355      * Example:
1356      * --------------
1357      * CData item1,item2;
1358      * if (item1 == item2) { }
1359      * --------------
1360      */
opEquals(scope const Object o)1361     override bool opEquals(scope const Object o) const
1362     {
1363         const item = toType!(const Item)(o);
1364         const t = cast(const CData) item;
1365         return t !is null && content == t.content;
1366     }
1367 
1368     /**
1369      * Compares two CDatas
1370      *
1371      * You should rarely need to call this function. It exists so that CDatas
1372      * can be used as associative array keys.
1373      *
1374      * Example:
1375      * --------------
1376      * CData item1,item2;
1377      * if (item1 < item2) { }
1378      * --------------
1379      */
opCmp(scope const Object o)1380     override int opCmp(scope const Object o) scope const
1381     {
1382         const item = toType!(const Item)(o);
1383         const t = cast(const CData) item;
1384         return t !is null && (content != t.content
1385             ? (content < t.content ? -1 : 1 ) : 0 );
1386     }
1387 
1388     /**
1389      * Returns the hash of a CData
1390      *
1391      * You should rarely need to call this function. It exists so that CDatas
1392      * can be used as associative array keys.
1393      */
toHash()1394     override size_t toHash() scope const nothrow { return hash(content); }
1395 
1396     /**
1397      * Returns a string representation of this CData section
1398      */
toString()1399     override string toString() scope const @safe pure nothrow { return cdata ~ content ~ "]]>"; }
1400 
isEmptyXML()1401     override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always
1402 }
1403 
1404 /**
1405  * Class representing a text (aka Parsed Character Data) section
1406  */
1407 class Text : Item
1408 {
1409     private string content;
1410 
1411     /**
1412      * Construct a text (aka PCData) section
1413      *
1414      * Params:
1415      *      content = the text. This function encodes the text before
1416      *      insertion, so it is safe to insert any text
1417      *
1418      * Example:
1419      * --------------
1420      * auto Text = new CData("a < b");
1421      *    // constructs a &lt; b
1422      * --------------
1423      */
this(string content)1424     this(string content) @safe pure
1425     {
1426         this.content = encode(content);
1427     }
1428 
1429     /**
1430      * Compares two text sections for equality
1431      *
1432      * Example:
1433      * --------------
1434      * Text item1,item2;
1435      * if (item1 == item2) { }
1436      * --------------
1437      */
opEquals(scope const Object o)1438     override bool opEquals(scope const Object o) const
1439     {
1440         const item = toType!(const Item)(o);
1441         const t = cast(const Text) item;
1442         return t !is null && content == t.content;
1443     }
1444 
1445     /**
1446      * Compares two text sections
1447      *
1448      * You should rarely need to call this function. It exists so that Texts
1449      * can be used as associative array keys.
1450      *
1451      * Example:
1452      * --------------
1453      * Text item1,item2;
1454      * if (item1 < item2) { }
1455      * --------------
1456      */
opCmp(scope const Object o)1457     override int opCmp(scope const Object o) scope const
1458     {
1459         const item = toType!(const Item)(o);
1460         const t = cast(const Text) item;
1461         return t !is null
1462             && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
1463     }
1464 
1465     /**
1466      * Returns the hash of a text section
1467      *
1468      * You should rarely need to call this function. It exists so that Texts
1469      * can be used as associative array keys.
1470      */
toHash()1471     override size_t toHash() scope const nothrow { return hash(content); }
1472 
1473     /**
1474      * Returns a string representation of this Text section
1475      */
toString()1476     override string toString() scope const @safe @nogc pure nothrow { return content; }
1477 
1478     /**
1479      * Returns true if the content is the empty string
1480      */
isEmptyXML()1481     override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return content.length == 0; }
1482 }
1483 
1484 /**
1485  * Class representing an XML Instruction section
1486  */
1487 class XMLInstruction : Item
1488 {
1489     private string content;
1490 
1491     /**
1492      * Construct an XML Instruction section
1493      *
1494      * Params:
1495      *      content = the body of the instruction segment
1496      *
1497      * Throws: XIException if the segment body is illegal (contains ">")
1498      *
1499      * Example:
1500      * --------------
1501      * auto item = new XMLInstruction("ATTLIST");
1502      *    // constructs <!ATTLIST>
1503      * --------------
1504      */
this(string content)1505     this(string content) @safe pure
1506     {
1507         import std.string : indexOf;
1508         if (content.indexOf(">") != -1) throw new XIException(content);
1509         this.content = content;
1510     }
1511 
1512     /**
1513      * Compares two XML instructions for equality
1514      *
1515      * Example:
1516      * --------------
1517      * XMLInstruction item1,item2;
1518      * if (item1 == item2) { }
1519      * --------------
1520      */
opEquals(scope const Object o)1521     override bool opEquals(scope const Object o) const
1522     {
1523         const item = toType!(const Item)(o);
1524         const t = cast(const XMLInstruction) item;
1525         return t !is null && content == t.content;
1526     }
1527 
1528     /**
1529      * Compares two XML instructions
1530      *
1531      * You should rarely need to call this function. It exists so that
1532      * XmlInstructions can be used as associative array keys.
1533      *
1534      * Example:
1535      * --------------
1536      * XMLInstruction item1,item2;
1537      * if (item1 < item2) { }
1538      * --------------
1539      */
opCmp(scope const Object o)1540     override int opCmp(scope const Object o) scope const
1541     {
1542         const item = toType!(const Item)(o);
1543         const t = cast(const XMLInstruction) item;
1544         return t !is null
1545             && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
1546     }
1547 
1548     /**
1549      * Returns the hash of an XMLInstruction
1550      *
1551      * You should rarely need to call this function. It exists so that
1552      * XmlInstructions can be used as associative array keys.
1553      */
toHash()1554     override size_t toHash() scope const nothrow { return hash(content); }
1555 
1556     /**
1557      * Returns a string representation of this XmlInstruction
1558      */
toString()1559     override string toString() scope const @safe pure nothrow { return "<!" ~ content ~ ">"; }
1560 
isEmptyXML()1561     override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always
1562 }
1563 
1564 /**
1565  * Class representing a Processing Instruction section
1566  */
1567 class ProcessingInstruction : Item
1568 {
1569     private string content;
1570 
1571     /**
1572      * Construct a Processing Instruction section
1573      *
1574      * Params:
1575      *      content = the body of the instruction segment
1576      *
1577      * Throws: PIException if the segment body is illegal (contains "?>")
1578      *
1579      * Example:
1580      * --------------
1581      * auto item = new ProcessingInstruction("php");
1582      *    // constructs <?php?>
1583      * --------------
1584      */
this(string content)1585     this(string content) @safe pure
1586     {
1587         import std.string : indexOf;
1588         if (content.indexOf("?>") != -1) throw new PIException(content);
1589         this.content = content;
1590     }
1591 
1592     /**
1593      * Compares two processing instructions for equality
1594      *
1595      * Example:
1596      * --------------
1597      * ProcessingInstruction item1,item2;
1598      * if (item1 == item2) { }
1599      * --------------
1600      */
opEquals(scope const Object o)1601     override bool opEquals(scope const Object o) const
1602     {
1603         const item = toType!(const Item)(o);
1604         const t = cast(const ProcessingInstruction) item;
1605         return t !is null && content == t.content;
1606     }
1607 
1608     /**
1609      * Compares two processing instructions
1610      *
1611      * You should rarely need to call this function. It exists so that
1612      * ProcessingInstructions can be used as associative array keys.
1613      *
1614      * Example:
1615      * --------------
1616      * ProcessingInstruction item1,item2;
1617      * if (item1 < item2) { }
1618      * --------------
1619      */
opCmp(scope const Object o)1620     override int opCmp(scope const Object o) scope const
1621     {
1622         const item = toType!(const Item)(o);
1623         const t = cast(const ProcessingInstruction) item;
1624         return t !is null
1625             && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
1626     }
1627 
1628     /**
1629      * Returns the hash of a ProcessingInstruction
1630      *
1631      * You should rarely need to call this function. It exists so that
1632      * ProcessingInstructions can be used as associative array keys.
1633      */
toHash()1634     override size_t toHash() scope const nothrow { return hash(content); }
1635 
1636     /**
1637      * Returns a string representation of this ProcessingInstruction
1638      */
toString()1639     override string toString() scope const @safe pure nothrow { return "<?" ~ content ~ "?>"; }
1640 
isEmptyXML()1641     override @property @safe @nogc pure nothrow bool isEmptyXML() scope const { return false; } /// Returns false always
1642 }
1643 
1644 /**
1645  * Abstract base class for XML items
1646  */
1647 abstract class Item
1648 {
1649     /// Compares with another Item of same type for equality
1650     abstract override bool opEquals(scope const Object o) @safe const;
1651 
1652     /// Compares with another Item of same type
1653     abstract override int opCmp(scope const Object o) @safe const;
1654 
1655     /// Returns the hash of this item
1656     abstract override size_t toHash() @safe scope const;
1657 
1658     /// Returns a string representation of this item
1659     abstract override string toString() @safe scope const;
1660 
1661     /**
1662      * Returns an indented string representation of this item
1663      *
1664      * Params:
1665      *      indent = number of spaces by which to indent child elements
1666      */
pretty(uint indent)1667     string[] pretty(uint indent) @safe scope const
1668     {
1669         import std.string : strip;
1670         string s = strip(toString());
1671         return s.length == 0 ? [] : [ s ];
1672     }
1673 
1674     /// Returns true if the item represents empty XML text
1675     abstract @property @safe @nogc pure nothrow bool isEmptyXML() scope const;
1676 }
1677 
1678 /**
1679  * Class for parsing an XML Document.
1680  *
1681  * This is a subclass of ElementParser. Most of the useful functions are
1682  * documented there.
1683  *
1684  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
1685  *
1686  * Bugs:
1687  *      Currently only supports UTF documents.
1688  *
1689  *      If there is an encoding attribute in the prolog, it is ignored.
1690  *
1691  */
1692 class DocumentParser : ElementParser
1693 {
1694     string xmlText;
1695 
1696     /**
1697      * Constructs a DocumentParser.
1698      *
1699      * The input to this function MUST be valid XML.
1700      * This is enforced by the function's in contract.
1701      *
1702      * Params:
1703      *      xmlText_ = the entire XML document as text
1704      *
1705      */
this(string xmlText_)1706     this(string xmlText_)
1707     in
1708     {
1709         assert(xmlText_.length != 0);
1710         try
1711         {
1712             // Confirm that the input is valid XML
1713             check(xmlText_);
1714         }
1715         catch (CheckException e)
1716         {
1717             // And if it's not, tell the user why not
1718             assert(false, "\n" ~ e.toString());
1719         }
1720     }
1721     body
1722     {
1723         xmlText = xmlText_;
1724         s = &xmlText;
1725         super();    // Initialize everything
1726         parse();    // Parse through the root tag (but not beyond)
1727     }
1728 }
1729 
1730 @system unittest
1731 {
1732     auto doc = new Document("<root><child><grandchild/></child></root>");
1733     assert(doc.elements.length == 1);
1734     assert(doc.elements[0].tag.name == "child");
1735     assert(doc.items == doc.elements);
1736 }
1737 
1738 /**
1739  * Class for parsing an XML element.
1740  *
1741  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
1742  *
1743  * Note that you cannot construct instances of this class directly. You can
1744  * construct a DocumentParser (which is a subclass of ElementParser), but
1745  * otherwise, Instances of ElementParser will be created for you by the
1746  * library, and passed your way via onStartTag handlers.
1747  *
1748  */
1749 class ElementParser
1750 {
1751     alias Handler = void delegate(string);
1752     alias ElementHandler = void delegate(in Element element);
1753     alias ParserHandler = void delegate(ElementParser parser);
1754 
1755     private
1756     {
1757         Tag tag_;
1758         string elementStart;
1759         string* s;
1760 
1761         Handler commentHandler = null;
1762         Handler cdataHandler = null;
1763         Handler xiHandler = null;
1764         Handler piHandler = null;
1765         Handler rawTextHandler = null;
1766         Handler textHandler = null;
1767 
1768         // Private constructor for start tags
this(ElementParser parent)1769         this(ElementParser parent) @safe @nogc pure nothrow
1770         {
1771             s = parent.s;
1772             this();
1773             tag_ = parent.tag_;
1774         }
1775 
1776         // Private constructor for empty tags
this(Tag tag,string * t)1777         this(Tag tag, string* t) @safe @nogc pure nothrow
1778         {
1779             s = t;
1780             this();
1781             tag_ = tag;
1782         }
1783     }
1784 
1785     /**
1786      * The Tag at the start of the element being parsed. You can read this to
1787      * determine the tag's name and attributes.
1788      */
tag()1789     @property @safe @nogc pure nothrow const(Tag) tag() const { return tag_; }
1790 
1791     /**
1792      * Register a handler which will be called whenever a start tag is
1793      * encountered which matches the specified name. You can also pass null as
1794      * the name, in which case the handler will be called for any unmatched
1795      * start tag.
1796      *
1797      * Example:
1798      * --------------
1799      * // Call this function whenever a <podcast> start tag is encountered
1800      * onStartTag["podcast"] = (ElementParser xml)
1801      * {
1802      *     // Your code here
1803      *     //
1804      *     // This is a a closure, so code here may reference
1805      *     // variables which are outside of this scope
1806      * };
1807      *
1808      * // call myEpisodeStartHandler (defined elsewhere) whenever an <episode>
1809      * // start tag is encountered
1810      * onStartTag["episode"] = &myEpisodeStartHandler;
1811      *
1812      * // call delegate dg for all other start tags
1813      * onStartTag[null] = dg;
1814      * --------------
1815      *
1816      * This library will supply your function with a new instance of
1817      * ElementHandler, which may be used to parse inside the element whose
1818      * start tag was just found, or to identify the tag attributes of the
1819      * element, etc.
1820      *
1821      * Note that your function will be called for both start tags and empty
1822      * tags. That is, we make no distinction between &lt;br&gt;&lt;/br&gt;
1823      * and &lt;br/&gt;.
1824      */
1825     ParserHandler[string] onStartTag;
1826 
1827     /**
1828      * Register a handler which will be called whenever an end tag is
1829      * encountered which matches the specified name. You can also pass null as
1830      * the name, in which case the handler will be called for any unmatched
1831      * end tag.
1832      *
1833      * Example:
1834      * --------------
1835      * // Call this function whenever a </podcast> end tag is encountered
1836      * onEndTag["podcast"] = (in Element e)
1837      * {
1838      *     // Your code here
1839      *     //
1840      *     // This is a a closure, so code here may reference
1841      *     // variables which are outside of this scope
1842      * };
1843      *
1844      * // call myEpisodeEndHandler (defined elsewhere) whenever an </episode>
1845      * // end tag is encountered
1846      * onEndTag["episode"] = &myEpisodeEndHandler;
1847      *
1848      * // call delegate dg for all other end tags
1849      * onEndTag[null] = dg;
1850      * --------------
1851      *
1852      * Note that your function will be called for both start tags and empty
1853      * tags. That is, we make no distinction between &lt;br&gt;&lt;/br&gt;
1854      * and &lt;br/&gt;.
1855      */
1856     ElementHandler[string] onEndTag;
1857 
this()1858     protected this() @safe @nogc pure nothrow
1859     {
1860         elementStart = *s;
1861     }
1862 
1863     /**
1864      * Register a handler which will be called whenever text is encountered.
1865      *
1866      * Example:
1867      * --------------
1868      * // Call this function whenever text is encountered
1869      * onText = (string s)
1870      * {
1871      *     // Your code here
1872      *
1873      *     // The passed parameter s will have been decoded by the time you see
1874      *     // it, and so may contain any character.
1875      *     //
1876      *     // This is a a closure, so code here may reference
1877      *     // variables which are outside of this scope
1878      * };
1879      * --------------
1880      */
onText(Handler handler)1881     @property @safe @nogc pure nothrow void onText(Handler handler) { textHandler = handler; }
1882 
1883     /**
1884      * Register an alternative handler which will be called whenever text
1885      * is encountered. This differs from onText in that onText will decode
1886      * the text, whereas onTextRaw will not. This allows you to make design
1887      * choices, since onText will be more accurate, but slower, while
1888      * onTextRaw will be faster, but less accurate. Of course, you can
1889      * still call decode() within your handler, if you want, but you'd
1890      * probably want to use onTextRaw only in circumstances where you
1891      * know that decoding is unnecessary.
1892      *
1893      * Example:
1894      * --------------
1895      * // Call this function whenever text is encountered
1896      * onText = (string s)
1897      * {
1898      *     // Your code here
1899      *
1900      *     // The passed parameter s will NOT have been decoded.
1901      *     //
1902      *     // This is a a closure, so code here may reference
1903      *     // variables which are outside of this scope
1904      * };
1905      * --------------
1906      */
onTextRaw(Handler handler)1907     @safe @nogc pure nothrow void onTextRaw(Handler handler) { rawTextHandler = handler; }
1908 
1909     /**
1910      * Register a handler which will be called whenever a character data
1911      * segment is encountered.
1912      *
1913      * Example:
1914      * --------------
1915      * // Call this function whenever a CData section is encountered
1916      * onCData = (string s)
1917      * {
1918      *     // Your code here
1919      *
1920      *     // The passed parameter s does not include the opening <![CDATA[
1921      *     // nor closing ]]>
1922      *     //
1923      *     // This is a a closure, so code here may reference
1924      *     // variables which are outside of this scope
1925      * };
1926      * --------------
1927      */
onCData(Handler handler)1928     @property @safe @nogc pure nothrow void onCData(Handler handler) { cdataHandler = handler; }
1929 
1930     /**
1931      * Register a handler which will be called whenever a comment is
1932      * encountered.
1933      *
1934      * Example:
1935      * --------------
1936      * // Call this function whenever a comment is encountered
1937      * onComment = (string s)
1938      * {
1939      *     // Your code here
1940      *
1941      *     // The passed parameter s does not include the opening <!-- nor
1942      *     // closing -->
1943      *     //
1944      *     // This is a a closure, so code here may reference
1945      *     // variables which are outside of this scope
1946      * };
1947      * --------------
1948      */
onComment(Handler handler)1949     @property @safe @nogc pure nothrow void onComment(Handler handler) { commentHandler = handler; }
1950 
1951     /**
1952      * Register a handler which will be called whenever a processing
1953      * instruction is encountered.
1954      *
1955      * Example:
1956      * --------------
1957      * // Call this function whenever a processing instruction is encountered
1958      * onPI = (string s)
1959      * {
1960      *     // Your code here
1961      *
1962      *     // The passed parameter s does not include the opening <? nor
1963      *     // closing ?>
1964      *     //
1965      *     // This is a a closure, so code here may reference
1966      *     // variables which are outside of this scope
1967      * };
1968      * --------------
1969      */
onPI(Handler handler)1970     @property @safe @nogc pure nothrow void onPI(Handler handler) { piHandler = handler; }
1971 
1972     /**
1973      * Register a handler which will be called whenever an XML instruction is
1974      * encountered.
1975      *
1976      * Example:
1977      * --------------
1978      * // Call this function whenever an XML instruction is encountered
1979      * // (Note: XML instructions may only occur preceding the root tag of a
1980      * // document).
1981      * onPI = (string s)
1982      * {
1983      *     // Your code here
1984      *
1985      *     // The passed parameter s does not include the opening <! nor
1986      *     // closing >
1987      *     //
1988      *     // This is a a closure, so code here may reference
1989      *     // variables which are outside of this scope
1990      * };
1991      * --------------
1992      */
onXI(Handler handler)1993     @property @safe @nogc pure nothrow void onXI(Handler handler) { xiHandler = handler; }
1994 
1995     /**
1996      * Parse an XML element.
1997      *
1998      * Parsing will continue until the end of the current element. Any items
1999      * encountered for which a handler has been registered will invoke that
2000      * handler.
2001      *
2002      * Throws: various kinds of XMLException
2003      */
parse()2004     void parse()
2005     {
2006         import std.algorithm.searching : startsWith;
2007         import std.string : indexOf;
2008 
2009         string t;
2010         const Tag root = tag_;
2011         Tag[string] startTags;
2012         if (tag_ !is null) startTags[tag_.name] = tag_;
2013 
2014         while (s.length != 0)
2015         {
2016             if (startsWith(*s,"<!--"))
2017             {
2018                 chop(*s,4);
2019                 t = chop(*s,indexOf(*s,"-->"));
2020                 if (commentHandler.funcptr !is null) commentHandler(t);
2021                 chop(*s,3);
2022             }
2023             else if (startsWith(*s,"<![CDATA["))
2024             {
2025                 chop(*s,9);
2026                 t = chop(*s,indexOf(*s,"]]>"));
2027                 if (cdataHandler.funcptr !is null) cdataHandler(t);
2028                 chop(*s,3);
2029             }
2030             else if (startsWith(*s,"<!"))
2031             {
2032                 chop(*s,2);
2033                 t = chop(*s,indexOf(*s,">"));
2034                 if (xiHandler.funcptr !is null) xiHandler(t);
2035                 chop(*s,1);
2036             }
2037             else if (startsWith(*s,"<?"))
2038             {
2039                 chop(*s,2);
2040                 t = chop(*s,indexOf(*s,"?>"));
2041                 if (piHandler.funcptr !is null) piHandler(t);
2042                 chop(*s,2);
2043             }
2044             else if (startsWith(*s,"<"))
2045             {
2046                 tag_ = new Tag(*s,true);
2047                 if (root is null)
2048                     return; // Return to constructor of derived class
2049 
2050                 if (tag_.isStart)
2051                 {
2052                     startTags[tag_.name] = tag_;
2053 
2054                     auto parser = new ElementParser(this);
2055 
2056                     auto handler = tag_.name in onStartTag;
2057                     if (handler !is null) (*handler)(parser);
2058                     else
2059                     {
2060                         handler = null in onStartTag;
2061                         if (handler !is null) (*handler)(parser);
2062                     }
2063                 }
2064                 else if (tag_.isEnd)
2065                 {
2066                     const startTag = startTags[tag_.name];
2067                     string text;
2068 
2069                     if (startTag.tagString.length == 0)
2070                         assert(0);
2071 
2072                     immutable(char)* p = startTag.tagString.ptr
2073                         + startTag.tagString.length;
2074                     immutable(char)* q = &tag_.tagString[0];
2075                     text = decode(p[0..(q-p)], DecodeMode.LOOSE);
2076 
2077                     auto element = new Element(startTag);
2078                     if (text.length != 0) element ~= new Text(text);
2079 
2080                     auto handler = tag_.name in onEndTag;
2081                     if (handler !is null) (*handler)(element);
2082                     else
2083                     {
2084                         handler = null in onEndTag;
2085                         if (handler !is null) (*handler)(element);
2086                     }
2087 
2088                     if (tag_.name == root.name) return;
2089                 }
2090                 else if (tag_.isEmpty)
2091                 {
2092                     Tag startTag = new Tag(tag_.name);
2093 
2094                     // FIX by hed010gy, for bug 2979
2095                     // http://d.puremagic.com/issues/show_bug.cgi?id=2979
2096                     if (tag_.attr.length > 0)
2097                           foreach (tn,tv; tag_.attr) startTag.attr[tn]=tv;
2098                     // END FIX
2099 
2100                     // Handle the pretend start tag
2101                     string s2;
2102                     auto parser = new ElementParser(startTag,&s2);
2103                     auto handler1 = startTag.name in onStartTag;
2104                     if (handler1 !is null) (*handler1)(parser);
2105                     else
2106                     {
2107                         handler1 = null in onStartTag;
2108                         if (handler1 !is null) (*handler1)(parser);
2109                     }
2110 
2111                     // Handle the pretend end tag
2112                     auto element = new Element(startTag);
2113                     auto handler2 = tag_.name in onEndTag;
2114                     if (handler2 !is null) (*handler2)(element);
2115                     else
2116                     {
2117                         handler2 = null in onEndTag;
2118                         if (handler2 !is null) (*handler2)(element);
2119                     }
2120                 }
2121             }
2122             else
2123             {
2124                 t = chop(*s,indexOf(*s,"<"));
2125                 if (rawTextHandler.funcptr !is null)
2126                     rawTextHandler(t);
2127                 else if (textHandler.funcptr !is null)
2128                     textHandler(decode(t,DecodeMode.LOOSE));
2129             }
2130         }
2131     }
2132 
2133     /**
2134      * Returns that part of the element which has already been parsed
2135      */
toString()2136     override string toString() const @nogc @safe pure nothrow
2137     {
2138         assert(elementStart.length >= s.length);
2139         return elementStart[0 .. elementStart.length - s.length];
2140     }
2141 
2142 }
2143 
2144 private
2145 {
Check(string msg)2146     template Check(string msg)
2147     {
2148         string old = s;
2149 
2150         void fail() @safe pure
2151         {
2152             s = old;
2153             throw new Err(s,msg);
2154         }
2155 
2156         void fail(Err e) @safe pure
2157         {
2158             s = old;
2159             throw new Err(s,msg,e);
2160         }
2161 
2162         void fail(string msg2) @safe pure
2163         {
2164             fail(new Err(s,msg2));
2165         }
2166     }
2167 
checkMisc(ref string s)2168     void checkMisc(ref string s) @safe pure // rule 27
2169     {
2170         import std.algorithm.searching : startsWith;
2171 
2172         mixin Check!("Misc");
2173 
2174         try
2175         {
2176                  if (s.startsWith("<!--")) { checkComment(s); }
2177             else if (s.startsWith("<?"))   { checkPI(s); }
2178             else                           { checkSpace(s); }
2179         }
2180         catch (Err e) { fail(e); }
2181     }
2182 
checkDocument(ref string s)2183     void checkDocument(ref string s) @safe pure // rule 1
2184     {
2185         mixin Check!("Document");
2186         try
2187         {
2188             checkProlog(s);
2189             checkElement(s);
2190             star!(checkMisc)(s);
2191         }
2192         catch (Err e) { fail(e); }
2193     }
2194 
checkChars(ref string s)2195     void checkChars(ref string s) @safe pure // rule 2
2196     {
2197         // TO DO - Fix std.utf stride and decode functions, then use those
2198         // instead
2199         import std.format : format;
2200 
2201         mixin Check!("Chars");
2202 
2203         dchar c;
2204         ptrdiff_t n = -1;
2205         // 'i' must not be smaller than size_t because size_t is used internally in
2206         // aApply.d and it will be cast e.g to (int *) which fails on BigEndian targets.
2207         foreach (size_t i, dchar d; s)
2208         {
2209             if (!isChar(d))
2210             {
2211                 c = d;
2212                 n = i;
2213                 break;
2214             }
2215         }
2216         if (n != -1)
2217         {
2218             s = s[n..$];
2219             fail(format("invalid character: U+%04X",c));
2220         }
2221     }
2222 
checkSpace(ref string s)2223     void checkSpace(ref string s) @safe pure // rule 3
2224     {
2225         import std.algorithm.searching : countUntil;
2226         import std.ascii : isWhite;
2227         import std.utf : byCodeUnit;
2228 
2229         mixin Check!("Whitespace");
2230         ptrdiff_t i = s.byCodeUnit.countUntil!(a => !isWhite(a));
2231         if (i == -1 && s.length > 0 && isWhite(s[0]))
2232             s = s[$ .. $];
2233         else if (i > -1)
2234             s = s[i .. $];
2235         if (s is old) fail();
2236     }
2237 
checkName(ref string s,out string name)2238     void checkName(ref string s, out string name) @safe pure // rule 5
2239     {
2240         mixin Check!("Name");
2241 
2242         if (s.length == 0) fail();
2243         ptrdiff_t n;
2244         // 'i' must not be smaller than size_t because size_t is used internally in
2245         // aApply.d and it will be cast e.g to (int *) which fails on BigEndian targets.
2246         foreach (size_t i, dchar c; s)
2247         {
2248             if (c == '_' || c == ':' || isLetter(c)) continue;
2249             if (i == 0) fail();
2250             if (c == '-' || c == '.' || isDigit(c)
2251                 || isCombiningChar(c) || isExtender(c)) continue;
2252             n = i;
2253             break;
2254         }
2255         name = s[0 .. n];
2256         s = s[n..$];
2257     }
2258 
checkAttValue(ref string s)2259     void checkAttValue(ref string s) @safe pure // rule 10
2260     {
2261         import std.algorithm.searching : countUntil;
2262         import std.utf : byCodeUnit;
2263 
2264         mixin Check!("AttValue");
2265 
2266         if (s.length == 0) fail();
2267         char c = s[0];
2268         if (c != '\u0022' && c != '\u0027')
2269             fail("attribute value requires quotes");
2270         s = s[1..$];
2271         for (;;)
2272         {
2273             s = s[s.byCodeUnit.countUntil(c) .. $];
2274             if (s.length == 0) fail("unterminated attribute value");
2275             if (s[0] == '<') fail("< found in attribute value");
2276             if (s[0] == c) break;
2277             try { checkReference(s); } catch (Err e) { fail(e); }
2278         }
2279         s = s[1..$];
2280     }
2281 
checkCharData(ref string s)2282     void checkCharData(ref string s) @safe pure // rule 14
2283     {
2284         import std.algorithm.searching : startsWith;
2285 
2286         mixin Check!("CharData");
2287 
2288         while (s.length != 0)
2289         {
2290             if (s.startsWith("&")) break;
2291             if (s.startsWith("<")) break;
2292             if (s.startsWith("]]>")) fail("]]> found within char data");
2293             s = s[1..$];
2294         }
2295     }
2296 
checkComment(ref string s)2297     void checkComment(ref string s) @safe pure // rule 15
2298     {
2299         import std.string : indexOf;
2300 
2301         mixin Check!("Comment");
2302 
2303         try { checkLiteral("<!--",s); } catch (Err e) { fail(e); }
2304         ptrdiff_t n = s.indexOf("--");
2305         if (n == -1) fail("unterminated comment");
2306         s = s[n..$];
2307         try { checkLiteral("-->",s); } catch (Err e) { fail(e); }
2308     }
2309 
checkPI(ref string s)2310     void checkPI(ref string s) @safe pure // rule 16
2311     {
2312         mixin Check!("PI");
2313 
2314         try
2315         {
2316             checkLiteral("<?",s);
2317             checkEnd("?>",s);
2318         }
2319         catch (Err e) { fail(e); }
2320     }
2321 
checkCDSect(ref string s)2322     void checkCDSect(ref string s) @safe pure // rule 18
2323     {
2324         mixin Check!("CDSect");
2325 
2326         try
2327         {
2328             checkLiteral(cdata,s);
2329             checkEnd("]]>",s);
2330         }
2331         catch (Err e) { fail(e); }
2332     }
2333 
checkProlog(ref string s)2334     void checkProlog(ref string s) @safe pure // rule 22
2335     {
2336         mixin Check!("Prolog");
2337 
2338         try
2339         {
2340             /* The XML declaration is optional
2341              * http://www.w3.org/TR/2008/REC-xml-20081126/#NT-prolog
2342              */
2343             opt!(checkXMLDecl)(s);
2344 
2345             star!(checkMisc)(s);
2346             opt!(seq!(checkDocTypeDecl,star!(checkMisc)))(s);
2347         }
2348         catch (Err e) { fail(e); }
2349     }
2350 
checkXMLDecl(ref string s)2351     void checkXMLDecl(ref string s) @safe pure // rule 23
2352     {
2353         mixin Check!("XMLDecl");
2354 
2355         try
2356         {
2357             checkLiteral("<?xml",s);
2358             checkVersionInfo(s);
2359             opt!(checkEncodingDecl)(s);
2360             opt!(checkSDDecl)(s);
2361             opt!(checkSpace)(s);
2362             checkLiteral("?>",s);
2363         }
2364         catch (Err e) { fail(e); }
2365     }
2366 
checkVersionInfo(ref string s)2367     void checkVersionInfo(ref string s) @safe pure // rule 24
2368     {
2369         mixin Check!("VersionInfo");
2370 
2371         try
2372         {
2373             checkSpace(s);
2374             checkLiteral("version",s);
2375             checkEq(s);
2376             quoted!(checkVersionNum)(s);
2377         }
2378         catch (Err e) { fail(e); }
2379     }
2380 
checkEq(ref string s)2381     void checkEq(ref string s) @safe pure // rule 25
2382     {
2383         mixin Check!("Eq");
2384 
2385         try
2386         {
2387             opt!(checkSpace)(s);
2388             checkLiteral("=",s);
2389             opt!(checkSpace)(s);
2390         }
2391         catch (Err e) { fail(e); }
2392     }
2393 
checkVersionNum(ref string s)2394     void checkVersionNum(ref string s) @safe pure // rule 26
2395     {
2396         import std.algorithm.searching : countUntil;
2397         import std.utf : byCodeUnit;
2398 
2399         mixin Check!("VersionNum");
2400 
2401         s = s[s.byCodeUnit.countUntil('\"') .. $];
2402         if (s is old) fail();
2403     }
2404 
checkDocTypeDecl(ref string s)2405     void checkDocTypeDecl(ref string s) @safe pure // rule 28
2406     {
2407         mixin Check!("DocTypeDecl");
2408 
2409         try
2410         {
2411             checkLiteral("<!DOCTYPE",s);
2412             //
2413             // TO DO -- ensure DOCTYPE is well formed
2414             // (But not yet. That's one of our "future directions")
2415             //
2416             checkEnd(">",s);
2417         }
2418         catch (Err e) { fail(e); }
2419     }
2420 
checkSDDecl(ref string s)2421     void checkSDDecl(ref string s) @safe pure // rule 32
2422     {
2423         import std.algorithm.searching : startsWith;
2424 
2425         mixin Check!("SDDecl");
2426 
2427         try
2428         {
2429             checkSpace(s);
2430             checkLiteral("standalone",s);
2431             checkEq(s);
2432         }
2433         catch (Err e) { fail(e); }
2434 
2435         int n = 0;
2436              if (s.startsWith("'yes'") || s.startsWith("\"yes\"")) n = 5;
2437         else if (s.startsWith("'no'" ) || s.startsWith("\"no\"" )) n = 4;
2438         else fail("standalone attribute value must be 'yes', \"yes\","~
2439             " 'no' or \"no\"");
2440         s = s[n..$];
2441     }
2442 
checkElement(ref string s)2443     void checkElement(ref string s) @safe pure // rule 39
2444     {
2445         mixin Check!("Element");
2446 
2447         string sname,ename,t;
2448         try { checkTag(s,t,sname); } catch (Err e) { fail(e); }
2449 
2450         if (t == "STag")
2451         {
2452             try
2453             {
2454                 checkContent(s);
2455                 t = s;
2456                 checkETag(s,ename);
2457             }
2458             catch (Err e) { fail(e); }
2459 
2460             if (sname != ename)
2461             {
2462                 s = t;
2463                 fail("end tag name \"" ~ ename
2464                     ~ "\" differs from start tag name \""~sname~"\"");
2465             }
2466         }
2467     }
2468 
2469     // rules 40 and 44
checkTag(ref string s,out string type,out string name)2470     void checkTag(ref string s, out string type, out string name) @safe pure
2471     {
2472         mixin Check!("Tag");
2473 
2474         try
2475         {
2476             type = "STag";
2477             checkLiteral("<",s);
2478             checkName(s,name);
2479             star!(seq!(checkSpace,checkAttribute))(s);
2480             opt!(checkSpace)(s);
2481             if (s.length != 0 && s[0] == '/')
2482             {
2483                 s = s[1..$];
2484                 type = "ETag";
2485             }
2486             checkLiteral(">",s);
2487         }
2488         catch (Err e) { fail(e); }
2489     }
2490 
checkAttribute(ref string s)2491     void checkAttribute(ref string s) @safe pure // rule 41
2492     {
2493         mixin Check!("Attribute");
2494 
2495         try
2496         {
2497             string name;
2498             checkName(s,name);
2499             checkEq(s);
2500             checkAttValue(s);
2501         }
2502         catch (Err e) { fail(e); }
2503     }
2504 
checkETag(ref string s,out string name)2505     void checkETag(ref string s, out string name) @safe pure // rule 42
2506     {
2507         mixin Check!("ETag");
2508 
2509         try
2510         {
2511             checkLiteral("</",s);
2512             checkName(s,name);
2513             opt!(checkSpace)(s);
2514             checkLiteral(">",s);
2515         }
2516         catch (Err e) { fail(e); }
2517     }
2518 
checkContent(ref string s)2519     void checkContent(ref string s) @safe pure // rule 43
2520     {
2521         import std.algorithm.searching : startsWith;
2522 
2523         mixin Check!("Content");
2524 
2525         try
2526         {
2527             while (s.length != 0)
2528             {
2529                 old = s;
2530                      if (s.startsWith("&"))        { checkReference(s); }
2531                 else if (s.startsWith("<!--"))     { checkComment(s); }
2532                 else if (s.startsWith("<?"))       { checkPI(s); }
2533                 else if (s.startsWith(cdata)) { checkCDSect(s); }
2534                 else if (s.startsWith("</"))       { break; }
2535                 else if (s.startsWith("<"))        { checkElement(s); }
2536                 else                               { checkCharData(s); }
2537             }
2538         }
2539         catch (Err e) { fail(e); }
2540     }
2541 
checkCharRef(ref string s,out dchar c)2542     void checkCharRef(ref string s, out dchar c) @safe pure // rule 66
2543     {
2544         import std.format : format;
2545 
2546         mixin Check!("CharRef");
2547 
2548         c = 0;
2549         try { checkLiteral("&#",s); } catch (Err e) { fail(e); }
2550         int radix = 10;
2551         if (s.length != 0 && s[0] == 'x')
2552         {
2553             s = s[1..$];
2554             radix = 16;
2555         }
2556         if (s.length == 0) fail("unterminated character reference");
2557         if (s[0] == ';')
2558             fail("character reference must have at least one digit");
2559         while (s.length != 0)
2560         {
2561             immutable char d = s[0];
2562             int n = 0;
2563             switch (d)
2564             {
2565                 case 'F','f': ++n;      goto case;
2566                 case 'E','e': ++n;      goto case;
2567                 case 'D','d': ++n;      goto case;
2568                 case 'C','c': ++n;      goto case;
2569                 case 'B','b': ++n;      goto case;
2570                 case 'A','a': ++n;      goto case;
2571                 case '9':     ++n;      goto case;
2572                 case '8':     ++n;      goto case;
2573                 case '7':     ++n;      goto case;
2574                 case '6':     ++n;      goto case;
2575                 case '5':     ++n;      goto case;
2576                 case '4':     ++n;      goto case;
2577                 case '3':     ++n;      goto case;
2578                 case '2':     ++n;      goto case;
2579                 case '1':     ++n;      goto case;
2580                 case '0':     break;
2581                 default: n = 100; break;
2582             }
2583             if (n >= radix) break;
2584             c *= radix;
2585             c += n;
2586             s = s[1..$];
2587         }
2588         if (!isChar(c)) fail(format("U+%04X is not a legal character",c));
2589         if (s.length == 0 || s[0] != ';') fail("expected ;");
2590         else s = s[1..$];
2591     }
2592 
checkReference(ref string s)2593     void checkReference(ref string s) @safe pure // rule 67
2594     {
2595         import std.algorithm.searching : startsWith;
2596 
2597         mixin Check!("Reference");
2598 
2599         try
2600         {
2601             dchar c;
2602             if (s.startsWith("&#")) checkCharRef(s,c);
2603             else checkEntityRef(s);
2604         }
2605         catch (Err e) { fail(e); }
2606     }
2607 
checkEntityRef(ref string s)2608     void checkEntityRef(ref string s) @safe pure // rule 68
2609     {
2610         mixin Check!("EntityRef");
2611 
2612         try
2613         {
2614             string name;
2615             checkLiteral("&",s);
2616             checkName(s,name);
2617             checkLiteral(";",s);
2618         }
2619         catch (Err e) { fail(e); }
2620     }
2621 
checkEncName(ref string s)2622     void checkEncName(ref string s) @safe pure // rule 81
2623     {
2624         import std.algorithm.searching : countUntil;
2625         import std.ascii : isAlpha;
2626         import std.utf : byCodeUnit;
2627 
2628         mixin Check!("EncName");
2629 
2630         s = s[s.byCodeUnit.countUntil!(a => !isAlpha(a)) .. $];
2631         if (s is old) fail();
2632         s = s[s.byCodeUnit.countUntil('\"', '\'') .. $];
2633     }
2634 
checkEncodingDecl(ref string s)2635     void checkEncodingDecl(ref string s) @safe pure // rule 80
2636     {
2637         mixin Check!("EncodingDecl");
2638 
2639         try
2640         {
2641             checkSpace(s);
2642             checkLiteral("encoding",s);
2643             checkEq(s);
2644             quoted!(checkEncName)(s);
2645         }
2646         catch (Err e) { fail(e); }
2647     }
2648 
2649     // Helper functions
2650 
checkLiteral(string literal,ref string s)2651     void checkLiteral(string literal,ref string s) @safe pure
2652     {
2653         import std.string : startsWith;
2654 
2655         mixin Check!("Literal");
2656 
2657         if (!s.startsWith(literal)) fail("Expected literal \""~literal~"\"");
2658         s = s[literal.length..$];
2659     }
2660 
checkEnd(string end,ref string s)2661     void checkEnd(string end,ref string s) @safe pure
2662     {
2663         import std.string : indexOf;
2664         // Deliberately no mixin Check here.
2665 
2666         auto n = s.indexOf(end);
2667         if (n == -1) throw new Err(s,"Unable to find terminating \""~end~"\"");
2668         s = s[n..$];
2669         checkLiteral(end,s);
2670     }
2671 
2672     // Metafunctions -- none of these use mixin Check
2673 
opt(alias f)2674     void opt(alias f)(ref string s)
2675     {
2676         try { f(s); } catch (Err e) {}
2677     }
2678 
plus(alias f)2679     void plus(alias f)(ref string s)
2680     {
2681         f(s);
2682         star!(f)(s);
2683     }
2684 
star(alias f)2685     void star(alias f)(ref string s)
2686     {
2687         while (s.length != 0)
2688         {
2689             try { f(s); }
2690             catch (Err e) { return; }
2691         }
2692     }
2693 
quoted(alias f)2694     void quoted(alias f)(ref string s)
2695     {
2696         import std.string : startsWith;
2697 
2698         if (s.startsWith("'"))
2699         {
2700             checkLiteral("'",s);
2701             f(s);
2702             checkLiteral("'",s);
2703         }
2704         else
2705         {
2706             checkLiteral("\"",s);
2707             f(s);
2708             checkLiteral("\"",s);
2709         }
2710     }
2711 
seq(alias f,alias g)2712     void seq(alias f,alias g)(ref string s)
2713     {
2714         f(s);
2715         g(s);
2716     }
2717 }
2718 
2719 /**
2720  * Check an entire XML document for well-formedness
2721  *
2722  * Params:
2723  *      s = the document to be checked, passed as a string
2724  *
2725  * Throws: CheckException if the document is not well formed
2726  *
2727  * CheckException's toString() method will yield the complete hierarchy of
2728  * parse failure (the XML equivalent of a stack trace), giving the line and
2729  * column number of every failure at every level.
2730  */
check(string s)2731 void check(string s) @safe pure
2732 {
2733     try
2734     {
2735         checkChars(s);
2736         checkDocument(s);
2737         if (s.length != 0) throw new Err(s,"Junk found after document");
2738     }
2739     catch (Err e)
2740     {
2741         e.complete(s);
2742         throw e;
2743     }
2744 }
2745 
2746 @system pure unittest
2747 {
2748     import std.string : indexOf;
2749 
2750     try
2751     {
2752         check(q"[<?xml version="1.0"?>
2753         <catalog>
2754            <book id="bk101">
2755               <author>Gambardella, Matthew</author>
2756               <title>XML Developer's Guide</title>
2757               <genre>Computer</genre>
2758               <price>44.95</price>
2759               <publish_date>2000-10-01</publish_date>
2760               <description>An in-depth look at creating applications
2761               with XML.</description>
2762            </book>
2763            <book id="bk102">
2764               <author>Ralls, Kim</author>
2765               <title>Midnight Rain</title>
2766               <genre>Fantasy</genres>
2767               <price>5.95</price>
2768               <publish_date>2000-12-16</publish_date>
2769               <description>A former architect battles corporate zombies,
2770               an evil sorceress, and her own childhood to become queen
2771               of the world.</description>
2772            </book>
2773            <book id="bk103">
2774               <author>Corets, Eva</author>
2775               <title>Maeve Ascendant</title>
2776               <genre>Fantasy</genre>
2777               <price>5.95</price>
2778               <publish_date>2000-11-17</publish_date>
2779               <description>After the collapse of a nanotechnology
2780               society in England, the young survivors lay the
2781               foundation for a new society.</description>
2782            </book>
2783         </catalog>
2784         ]");
2785         assert(false);
2786     }
catch(CheckException e)2787     catch (CheckException e)
2788     {
2789         auto n = e.toString().indexOf("end tag name \"genres\" differs"~
2790                                       " from start tag name \"genre\"");
2791         assert(n != -1);
2792     }
2793 }
2794 
2795 @system unittest
2796 {
2797     string s = q"EOS
2798 <?xml version="1.0"?>
2799 <set>
2800     <one>A</one>
2801     <!-- comment -->
2802     <two>B</two>
2803 </set>
2804 EOS";
2805     try
2806     {
2807         check(s);
2808     }
catch(CheckException e)2809     catch (CheckException e)
2810     {
2811         assert(0, e.toString());
2812     }
2813 }
2814 
2815 @system unittest
2816 {
2817     string test_xml = `<?xml version="1.0" encoding='UTF-8'?><r><stream:stream
2818                         xmlns:stream="http://etherx.'jabber'.org/streams"
2819                         xmlns="jabber:'client'" from='jid.pl' id="587a5767"
2820                         xml:lang="en" version="1.0" attr='a"b"c'>
2821                         </stream:stream></r>`;
2822 
2823     DocumentParser parser = new DocumentParser(test_xml);
2824     bool tested = false;
2825     parser.onStartTag["stream:stream"] = (ElementParser p) {
2826         assert(p.tag.attr["xmlns"] == "jabber:'client'");
2827         assert(p.tag.attr["from"] == "jid.pl");
2828         assert(p.tag.attr["attr"] == "a\"b\"c");
2829         tested = true;
2830     };
2831     parser.parse();
2832     assert(tested);
2833 }
2834 
2835 @system unittest
2836 {
2837     string s = q"EOS
2838 <?xml version="1.0" encoding="utf-8"?> <Tests>
2839     <Test thing="What &amp; Up">What &amp; Up Second</Test>
2840 </Tests>
2841 EOS";
2842     auto xml = new DocumentParser(s);
2843 
2844     xml.onStartTag["Test"] = (ElementParser xml) {
2845         assert(xml.tag.attr["thing"] == "What & Up");
2846     };
2847 
2848     xml.onEndTag["Test"] = (in Element e) {
2849         assert(e.text() == "What & Up Second");
2850     };
2851     xml.parse();
2852 }
2853 
2854 @system unittest
2855 {
2856     string s = `<tag attr="&quot;value&gt;" />`;
2857     auto doc = new Document(s);
2858     assert(doc.toString() == s);
2859 }
2860 
2861 /** The base class for exceptions thrown by this module */
this(string msg)2862 class XMLException : Exception { this(string msg) @safe pure { super(msg); } }
2863 
2864 // Other exceptions
2865 
2866 /// Thrown during Comment constructor
2867 class CommentException : XMLException
this(string msg)2868 { private this(string msg) @safe pure { super(msg); } }
2869 
2870 /// Thrown during CData constructor
2871 class CDataException : XMLException
this(string msg)2872 { private this(string msg) @safe pure { super(msg); } }
2873 
2874 /// Thrown during XMLInstruction constructor
2875 class XIException : XMLException
this(string msg)2876 { private this(string msg) @safe pure { super(msg); } }
2877 
2878 /// Thrown during ProcessingInstruction constructor
2879 class PIException : XMLException
this(string msg)2880 { private this(string msg) @safe pure { super(msg); } }
2881 
2882 /// Thrown during Text constructor
2883 class TextException : XMLException
this(string msg)2884 { private this(string msg) @safe pure { super(msg); } }
2885 
2886 /// Thrown during decode()
2887 class DecodeException : XMLException
this(string msg)2888 { private this(string msg) @safe pure { super(msg); } }
2889 
2890 /// Thrown if comparing with wrong type
2891 class InvalidTypeException : XMLException
this(string msg)2892 { private this(string msg) @safe pure { super(msg); } }
2893 
2894 /// Thrown when parsing for Tags
2895 class TagException : XMLException
this(string msg)2896 { private this(string msg) @safe pure { super(msg); } }
2897 
2898 /**
2899  * Thrown during check()
2900  */
2901 class CheckException : XMLException
2902 {
2903     CheckException err; /// Parent in hierarchy
2904     private string tail;
2905     /**
2906      * Name of production rule which failed to parse,
2907      * or specific error message
2908      */
2909     string msg;
2910     size_t line = 0; /// Line number at which parse failure occurred
2911     size_t column = 0; /// Column number at which parse failure occurred
2912 
2913     private this(string tail,string msg,Err err=null) @safe pure
2914     {
2915         super(null);
2916         this.tail = tail;
2917         this.msg = msg;
2918         this.err = err;
2919     }
2920 
complete(string entire)2921     private void complete(string entire) @safe pure
2922     {
2923         import std.string : count, lastIndexOf;
2924         import std.utf : toUTF32;
2925 
2926         string head = entire[0..$-tail.length];
2927         ptrdiff_t n = head.lastIndexOf('\n') + 1;
2928         line = head.count("\n") + 1;
2929         dstring t = toUTF32(head[n..$]);
2930         column = t.length + 1;
2931         if (err !is null) err.complete(entire);
2932     }
2933 
toString()2934     override string toString() const @safe pure
2935     {
2936         import std.format : format;
2937 
2938         string s;
2939         if (line != 0) s = format("Line %d, column %d: ",line,column);
2940         s ~= msg;
2941         s ~= '\n';
2942         if (err !is null) s = err.toString() ~ s;
2943         return s;
2944     }
2945 }
2946 
2947 private alias Err = CheckException;
2948 
2949 // Private helper functions
2950 
2951 private
2952 {
toType(T)2953     inout(T) toType(T)(inout Object o)
2954     {
2955         T t = cast(T)(o);
2956         if (t is null)
2957         {
2958             throw new InvalidTypeException("Attempt to compare a "
2959                 ~ T.stringof ~ " with an instance of another type");
2960         }
2961         return t;
2962     }
2963 
chop(ref string s,size_t n)2964     string chop(ref string s, size_t n) @safe pure nothrow
2965     {
2966         if (n == -1) n = s.length;
2967         string t = s[0 .. n];
2968         s = s[n..$];
2969         return t;
2970     }
2971 
optc(ref string s,char c)2972     bool optc(ref string s, char c) @safe pure nothrow
2973     {
2974         immutable bool b = s.length != 0 && s[0] == c;
2975         if (b) s = s[1..$];
2976         return b;
2977     }
2978 
reqc(ref string s,char c)2979     void reqc(ref string s, char c) @safe pure
2980     {
2981         if (s.length == 0 || s[0] != c) throw new TagException("");
2982         s = s[1..$];
2983     }
2984 
requireOneOf(ref string s,string chars)2985     char requireOneOf(ref string s, string chars) @safe pure
2986     {
2987         import std.string : indexOf;
2988 
2989         if (s.length == 0 || indexOf(chars,s[0]) == -1)
2990             throw new TagException("");
2991         immutable char ch = s[0];
2992         s = s[1..$];
2993         return ch;
2994     }
2995 
2996     size_t hash(string s,size_t h=0) @trusted nothrow
2997     {
2998         return typeid(s).getHash(&s) + h;
2999     }
3000 
3001     // Definitions from the XML specification
3002     immutable CharTable=[0x9,0x9,0xA,0xA,0xD,0xD,0x20,0xD7FF,0xE000,0xFFFD,
3003         0x10000,0x10FFFF];
3004     immutable BaseCharTable=[0x0041,0x005A,0x0061,0x007A,0x00C0,0x00D6,0x00D8,
3005         0x00F6,0x00F8,0x00FF,0x0100,0x0131,0x0134,0x013E,0x0141,0x0148,0x014A,
3006         0x017E,0x0180,0x01C3,0x01CD,0x01F0,0x01F4,0x01F5,0x01FA,0x0217,0x0250,
3007         0x02A8,0x02BB,0x02C1,0x0386,0x0386,0x0388,0x038A,0x038C,0x038C,0x038E,
3008         0x03A1,0x03A3,0x03CE,0x03D0,0x03D6,0x03DA,0x03DA,0x03DC,0x03DC,0x03DE,
3009         0x03DE,0x03E0,0x03E0,0x03E2,0x03F3,0x0401,0x040C,0x040E,0x044F,0x0451,
3010         0x045C,0x045E,0x0481,0x0490,0x04C4,0x04C7,0x04C8,0x04CB,0x04CC,0x04D0,
3011         0x04EB,0x04EE,0x04F5,0x04F8,0x04F9,0x0531,0x0556,0x0559,0x0559,0x0561,
3012         0x0586,0x05D0,0x05EA,0x05F0,0x05F2,0x0621,0x063A,0x0641,0x064A,0x0671,
3013         0x06B7,0x06BA,0x06BE,0x06C0,0x06CE,0x06D0,0x06D3,0x06D5,0x06D5,0x06E5,
3014         0x06E6,0x0905,0x0939,0x093D,0x093D,0x0958,0x0961,0x0985,0x098C,0x098F,
3015         0x0990,0x0993,0x09A8,0x09AA,0x09B0,0x09B2,0x09B2,0x09B6,0x09B9,0x09DC,
3016         0x09DD,0x09DF,0x09E1,0x09F0,0x09F1,0x0A05,0x0A0A,0x0A0F,0x0A10,0x0A13,
3017         0x0A28,0x0A2A,0x0A30,0x0A32,0x0A33,0x0A35,0x0A36,0x0A38,0x0A39,0x0A59,
3018         0x0A5C,0x0A5E,0x0A5E,0x0A72,0x0A74,0x0A85,0x0A8B,0x0A8D,0x0A8D,0x0A8F,
3019         0x0A91,0x0A93,0x0AA8,0x0AAA,0x0AB0,0x0AB2,0x0AB3,0x0AB5,0x0AB9,0x0ABD,
3020         0x0ABD,0x0AE0,0x0AE0,0x0B05,0x0B0C,0x0B0F,0x0B10,0x0B13,0x0B28,0x0B2A,
3021         0x0B30,0x0B32,0x0B33,0x0B36,0x0B39,0x0B3D,0x0B3D,0x0B5C,0x0B5D,0x0B5F,
3022         0x0B61,0x0B85,0x0B8A,0x0B8E,0x0B90,0x0B92,0x0B95,0x0B99,0x0B9A,0x0B9C,
3023         0x0B9C,0x0B9E,0x0B9F,0x0BA3,0x0BA4,0x0BA8,0x0BAA,0x0BAE,0x0BB5,0x0BB7,
3024         0x0BB9,0x0C05,0x0C0C,0x0C0E,0x0C10,0x0C12,0x0C28,0x0C2A,0x0C33,0x0C35,
3025         0x0C39,0x0C60,0x0C61,0x0C85,0x0C8C,0x0C8E,0x0C90,0x0C92,0x0CA8,0x0CAA,
3026         0x0CB3,0x0CB5,0x0CB9,0x0CDE,0x0CDE,0x0CE0,0x0CE1,0x0D05,0x0D0C,0x0D0E,
3027         0x0D10,0x0D12,0x0D28,0x0D2A,0x0D39,0x0D60,0x0D61,0x0E01,0x0E2E,0x0E30,
3028         0x0E30,0x0E32,0x0E33,0x0E40,0x0E45,0x0E81,0x0E82,0x0E84,0x0E84,0x0E87,
3029         0x0E88,0x0E8A,0x0E8A,0x0E8D,0x0E8D,0x0E94,0x0E97,0x0E99,0x0E9F,0x0EA1,
3030         0x0EA3,0x0EA5,0x0EA5,0x0EA7,0x0EA7,0x0EAA,0x0EAB,0x0EAD,0x0EAE,0x0EB0,
3031         0x0EB0,0x0EB2,0x0EB3,0x0EBD,0x0EBD,0x0EC0,0x0EC4,0x0F40,0x0F47,0x0F49,
3032         0x0F69,0x10A0,0x10C5,0x10D0,0x10F6,0x1100,0x1100,0x1102,0x1103,0x1105,
3033         0x1107,0x1109,0x1109,0x110B,0x110C,0x110E,0x1112,0x113C,0x113C,0x113E,
3034         0x113E,0x1140,0x1140,0x114C,0x114C,0x114E,0x114E,0x1150,0x1150,0x1154,
3035         0x1155,0x1159,0x1159,0x115F,0x1161,0x1163,0x1163,0x1165,0x1165,0x1167,
3036         0x1167,0x1169,0x1169,0x116D,0x116E,0x1172,0x1173,0x1175,0x1175,0x119E,
3037         0x119E,0x11A8,0x11A8,0x11AB,0x11AB,0x11AE,0x11AF,0x11B7,0x11B8,0x11BA,
3038         0x11BA,0x11BC,0x11C2,0x11EB,0x11EB,0x11F0,0x11F0,0x11F9,0x11F9,0x1E00,
3039         0x1E9B,0x1EA0,0x1EF9,0x1F00,0x1F15,0x1F18,0x1F1D,0x1F20,0x1F45,0x1F48,
3040         0x1F4D,0x1F50,0x1F57,0x1F59,0x1F59,0x1F5B,0x1F5B,0x1F5D,0x1F5D,0x1F5F,
3041         0x1F7D,0x1F80,0x1FB4,0x1FB6,0x1FBC,0x1FBE,0x1FBE,0x1FC2,0x1FC4,0x1FC6,
3042         0x1FCC,0x1FD0,0x1FD3,0x1FD6,0x1FDB,0x1FE0,0x1FEC,0x1FF2,0x1FF4,0x1FF6,
3043         0x1FFC,0x2126,0x2126,0x212A,0x212B,0x212E,0x212E,0x2180,0x2182,0x3041,
3044         0x3094,0x30A1,0x30FA,0x3105,0x312C,0xAC00,0xD7A3];
3045     immutable IdeographicTable=[0x3007,0x3007,0x3021,0x3029,0x4E00,0x9FA5];
3046     immutable CombiningCharTable=[0x0300,0x0345,0x0360,0x0361,0x0483,0x0486,
3047         0x0591,0x05A1,0x05A3,0x05B9,0x05BB,0x05BD,0x05BF,0x05BF,0x05C1,0x05C2,
3048         0x05C4,0x05C4,0x064B,0x0652,0x0670,0x0670,0x06D6,0x06DC,0x06DD,0x06DF,
3049         0x06E0,0x06E4,0x06E7,0x06E8,0x06EA,0x06ED,0x0901,0x0903,0x093C,0x093C,
3050         0x093E,0x094C,0x094D,0x094D,0x0951,0x0954,0x0962,0x0963,0x0981,0x0983,
3051         0x09BC,0x09BC,0x09BE,0x09BE,0x09BF,0x09BF,0x09C0,0x09C4,0x09C7,0x09C8,
3052         0x09CB,0x09CD,0x09D7,0x09D7,0x09E2,0x09E3,0x0A02,0x0A02,0x0A3C,0x0A3C,
3053         0x0A3E,0x0A3E,0x0A3F,0x0A3F,0x0A40,0x0A42,0x0A47,0x0A48,0x0A4B,0x0A4D,
3054         0x0A70,0x0A71,0x0A81,0x0A83,0x0ABC,0x0ABC,0x0ABE,0x0AC5,0x0AC7,0x0AC9,
3055         0x0ACB,0x0ACD,0x0B01,0x0B03,0x0B3C,0x0B3C,0x0B3E,0x0B43,0x0B47,0x0B48,
3056         0x0B4B,0x0B4D,0x0B56,0x0B57,0x0B82,0x0B83,0x0BBE,0x0BC2,0x0BC6,0x0BC8,
3057         0x0BCA,0x0BCD,0x0BD7,0x0BD7,0x0C01,0x0C03,0x0C3E,0x0C44,0x0C46,0x0C48,
3058         0x0C4A,0x0C4D,0x0C55,0x0C56,0x0C82,0x0C83,0x0CBE,0x0CC4,0x0CC6,0x0CC8,
3059         0x0CCA,0x0CCD,0x0CD5,0x0CD6,0x0D02,0x0D03,0x0D3E,0x0D43,0x0D46,0x0D48,
3060         0x0D4A,0x0D4D,0x0D57,0x0D57,0x0E31,0x0E31,0x0E34,0x0E3A,0x0E47,0x0E4E,
3061         0x0EB1,0x0EB1,0x0EB4,0x0EB9,0x0EBB,0x0EBC,0x0EC8,0x0ECD,0x0F18,0x0F19,
3062         0x0F35,0x0F35,0x0F37,0x0F37,0x0F39,0x0F39,0x0F3E,0x0F3E,0x0F3F,0x0F3F,
3063         0x0F71,0x0F84,0x0F86,0x0F8B,0x0F90,0x0F95,0x0F97,0x0F97,0x0F99,0x0FAD,
3064         0x0FB1,0x0FB7,0x0FB9,0x0FB9,0x20D0,0x20DC,0x20E1,0x20E1,0x302A,0x302F,
3065         0x3099,0x3099,0x309A,0x309A];
3066     immutable DigitTable=[0x0030,0x0039,0x0660,0x0669,0x06F0,0x06F9,0x0966,
3067         0x096F,0x09E6,0x09EF,0x0A66,0x0A6F,0x0AE6,0x0AEF,0x0B66,0x0B6F,0x0BE7,
3068         0x0BEF,0x0C66,0x0C6F,0x0CE6,0x0CEF,0x0D66,0x0D6F,0x0E50,0x0E59,0x0ED0,
3069         0x0ED9,0x0F20,0x0F29];
3070     immutable ExtenderTable=[0x00B7,0x00B7,0x02D0,0x02D0,0x02D1,0x02D1,0x0387,
3071         0x0387,0x0640,0x0640,0x0E46,0x0E46,0x0EC6,0x0EC6,0x3005,0x3005,0x3031,
3072         0x3035,0x309D,0x309E,0x30FC,0x30FE];
3073 
lookup(const (int)[]table,int c)3074     bool lookup(const(int)[] table, int c) @safe @nogc nothrow pure
3075     {
3076         while (table.length != 0)
3077         {
3078             auto m = (table.length >> 1) & ~1;
3079             if (c < table[m])
3080             {
3081                 table = table[0 .. m];
3082             }
3083             else if (c > table[m+1])
3084             {
3085                 table = table[m+2..$];
3086             }
3087             else return true;
3088         }
3089         return false;
3090     }
3091 
startOf(string s)3092     string startOf(string s) @safe nothrow pure
3093     {
3094         string r;
3095         foreach (char c;s)
3096         {
3097             r ~= (c < 0x20 || c > 0x7F) ? '.' : c;
3098             if (r.length >= 40) { r ~= "___"; break; }
3099         }
3100         return r;
3101     }
3102 
3103     void exit(string s=null)
3104     {
3105         throw new XMLException(s);
3106     }
3107 }
3108