xref: /netbsd/external/gpl3/gcc/dist/libphobos/src/std/xml.d (revision f0fbc68b)
1 // Written in the D programming language.
2 
3 /**
4 $(RED Warning: This module is considered out-dated and not up to Phobos'
5       current standards. It will be removed from Phobos in 2.101.0.
6       If you still need it, go to $(LINK https://github.com/DigitalMars/undeaD))
7  */
8 
9 /*
10 Classes and functions for creating and parsing XML
11 
12 The basic architecture of this module is that there are standalone functions,
13 classes for constructing an XML document from scratch (Tag, Element and
14 Document), and also classes for parsing a pre-existing XML file (ElementParser
15 and DocumentParser). The parsing classes <i>may</i> be used to build a
16 Document, but that is not their primary purpose. The handling capabilities of
17 DocumentParser and ElementParser are sufficiently customizable that you can
18 make them do pretty much whatever you want.
19 
20 Example: This example creates a DOM (Document Object Model) tree
21     from an XML file.
22 ------------------------------------------------------------------------------
23 import std.xml;
24 import std.stdio;
25 import std.string;
26 import std.file;
27 
28 // books.xml is used in various samples throughout the Microsoft XML Core
29 // Services (MSXML) SDK.
30 //
31 // See http://msdn2.microsoft.com/en-us/library/ms762271(VS.85).aspx
32 
33 void main()
34 {
35     string s = cast(string) std.file.read("books.xml");
36 
37     // Check for well-formedness
38     check(s);
39 
40     // Make a DOM tree
41     auto doc = new Document(s);
42 
43     // Plain-print it
44     writeln(doc);
45 }
46 ------------------------------------------------------------------------------
47 
48 Example: This example does much the same thing, except that the file is
49     deconstructed and reconstructed by hand. This is more work, but the
50     techniques involved offer vastly more power.
51 ------------------------------------------------------------------------------
52 import std.xml;
53 import std.stdio;
54 import std.string;
55 
56 struct Book
57 {
58     string id;
59     string author;
60     string title;
61     string genre;
62     string price;
63     string pubDate;
64     string description;
65 }
66 
67 void main()
68 {
69     string s = cast(string) std.file.read("books.xml");
70 
71     // Check for well-formedness
72     check(s);
73 
74     // Take it apart
75     Book[] books;
76 
77     auto xml = new DocumentParser(s);
78     xml.onStartTag["book"] = (ElementParser xml)
79     {
80         Book book;
81         book.id = xml.tag.attr["id"];
82 
83         xml.onEndTag["author"]       = (in Element e) { book.author      = e.text(); };
84         xml.onEndTag["title"]        = (in Element e) { book.title       = e.text(); };
85         xml.onEndTag["genre"]        = (in Element e) { book.genre       = e.text(); };
86         xml.onEndTag["price"]        = (in Element e) { book.price       = e.text(); };
87         xml.onEndTag["publish-date"] = (in Element e) { book.pubDate     = e.text(); };
88         xml.onEndTag["description"]  = (in Element e) { book.description = e.text(); };
89 
90         xml.parse();
91 
92         books ~= book;
93     };
94     xml.parse();
95 
96     // Put it back together again;
97     auto doc = new Document(new Tag("catalog"));
98     foreach (book;books)
99     {
100         auto element = new Element("book");
101         element.tag.attr["id"] = book.id;
102 
103         element ~= new Element("author",      book.author);
104         element ~= new Element("title",       book.title);
105         element ~= new Element("genre",       book.genre);
106         element ~= new Element("price",       book.price);
107         element ~= new Element("publish-date",book.pubDate);
108         element ~= new Element("description", book.description);
109 
110         doc ~= element;
111     }
112 
113     // Pretty-print it
114     writefln(join(doc.pretty(3),"\n"));
115 }
116 -------------------------------------------------------------------------------
117 Copyright: Copyright Janice Caron 2008 - 2009.
118 License:   $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
119 Authors:   Janice Caron
120 Source:    $(PHOBOSSRC std/xml.d)
121 */
122 /*
123          Copyright Janice Caron 2008 - 2009.
124 Distributed under the Boost Software License, Version 1.0.
125    (See accompanying file LICENSE_1_0.txt or copy at
126          http://www.boost.org/LICENSE_1_0.txt)
127 */
128 deprecated("Will be removed from Phobos in 2.101.0. If you still need it, go to https://github.com/DigitalMars/undeaD")
129 module std.xml;
130 
131 enum cdata = "<![CDATA[";
132 
133 /*
134  * Returns true if the character is a character according to the XML standard
135  *
136  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
137  *
138  * Params:
139  *    c = the character to be tested
140  */
isChar(dchar c)141 bool isChar(dchar c) @safe @nogc pure nothrow // rule 2
142 {
143     if (c <= 0xD7FF)
144     {
145         if (c >= 0x20)
146             return true;
147         switch (c)
148         {
149         case 0xA:
150         case 0x9:
151         case 0xD:
152             return true;
153         default:
154             return false;
155         }
156     }
157     else if (0xE000 <= c && c <= 0x10FFFF)
158     {
159         if ((c & 0x1FFFFE) != 0xFFFE) // U+FFFE and U+FFFF
160             return true;
161     }
162     return false;
163 }
164 
165 @safe @nogc nothrow pure unittest
166 {
167     assert(!isChar(cast(dchar) 0x8));
168     assert( isChar(cast(dchar) 0x9));
169     assert( isChar(cast(dchar) 0xA));
170     assert(!isChar(cast(dchar) 0xB));
171     assert(!isChar(cast(dchar) 0xC));
172     assert( isChar(cast(dchar) 0xD));
173     assert(!isChar(cast(dchar) 0xE));
174     assert(!isChar(cast(dchar) 0x1F));
175     assert( isChar(cast(dchar) 0x20));
176     assert( isChar('J'));
177     assert( isChar(cast(dchar) 0xD7FF));
178     assert(!isChar(cast(dchar) 0xD800));
179     assert(!isChar(cast(dchar) 0xDFFF));
180     assert( isChar(cast(dchar) 0xE000));
181     assert( isChar(cast(dchar) 0xFFFD));
182     assert(!isChar(cast(dchar) 0xFFFE));
183     assert(!isChar(cast(dchar) 0xFFFF));
184     assert( isChar(cast(dchar) 0x10000));
185     assert( isChar(cast(dchar) 0x10FFFF));
186     assert(!isChar(cast(dchar) 0x110000));
187 
debug(stdxml_TestHardcodedChecks)188     debug (stdxml_TestHardcodedChecks)
189     {
190         foreach (c; 0 .. dchar.max + 1)
191             assert(isChar(c) == lookup(CharTable, c));
192     }
193 }
194 
195 /*
196  * Returns true if the character is whitespace according to the XML standard
197  *
198  * Only the following characters are considered whitespace in XML - space, tab,
199  * carriage return and linefeed
200  *
201  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
202  *
203  * Params:
204  *    c = the character to be tested
205  */
isSpace(dchar c)206 bool isSpace(dchar c) @safe @nogc pure nothrow
207 {
208     return c == '\u0020' || c == '\u0009' || c == '\u000A' || c == '\u000D';
209 }
210 
211 /*
212  * Returns true if the character is a digit according to the XML standard
213  *
214  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
215  *
216  * Params:
217  *    c = the character to be tested
218  */
isDigit(dchar c)219 bool isDigit(dchar c) @safe @nogc pure nothrow
220 {
221     if (c <= 0x0039 && c >= 0x0030)
222         return true;
223     else
224         return lookup(DigitTable,c);
225 }
226 
227 @safe @nogc nothrow pure unittest
228 {
debug(stdxml_TestHardcodedChecks)229     debug (stdxml_TestHardcodedChecks)
230     {
231         foreach (c; 0 .. dchar.max + 1)
232             assert(isDigit(c) == lookup(DigitTable, c));
233     }
234 }
235 
236 /*
237  * Returns true if the character is a letter according to the XML standard
238  *
239  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
240  *
241  * Params:
242  *    c = the character to be tested
243  */
isLetter(dchar c)244 bool isLetter(dchar c) @safe @nogc nothrow pure // rule 84
245 {
246     return isIdeographic(c) || isBaseChar(c);
247 }
248 
249 /*
250  * Returns true if the character is an ideographic character according to the
251  * XML standard
252  *
253  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
254  *
255  * Params:
256  *    c = the character to be tested
257  */
isIdeographic(dchar c)258 bool isIdeographic(dchar c) @safe @nogc nothrow pure
259 {
260     if (c == 0x3007)
261         return true;
262     if (c <= 0x3029 && c >= 0x3021 )
263         return true;
264     if (c <= 0x9FA5 && c >= 0x4E00)
265         return true;
266     return false;
267 }
268 
269 @safe @nogc nothrow pure unittest
270 {
271     assert(isIdeographic('\u4E00'));
272     assert(isIdeographic('\u9FA5'));
273     assert(isIdeographic('\u3007'));
274     assert(isIdeographic('\u3021'));
275     assert(isIdeographic('\u3029'));
276 
debug(stdxml_TestHardcodedChecks)277     debug (stdxml_TestHardcodedChecks)
278     {
279         foreach (c; 0 .. dchar.max + 1)
280             assert(isIdeographic(c) == lookup(IdeographicTable, c));
281     }
282 }
283 
284 /*
285  * Returns true if the character is a base character according to the XML
286  * standard
287  *
288  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
289  *
290  * Params:
291  *    c = the character to be tested
292  */
isBaseChar(dchar c)293 bool isBaseChar(dchar c) @safe @nogc nothrow pure
294 {
295     return lookup(BaseCharTable,c);
296 }
297 
298 /*
299  * Returns true if the character is a combining character according to the
300  * XML standard
301  *
302  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
303  *
304  * Params:
305  *    c = the character to be tested
306  */
isCombiningChar(dchar c)307 bool isCombiningChar(dchar c) @safe @nogc nothrow pure
308 {
309     return lookup(CombiningCharTable,c);
310 }
311 
312 /*
313  * Returns true if the character is an extender according to the XML standard
314  *
315  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
316  *
317  * Params:
318  *    c = the character to be tested
319  */
isExtender(dchar c)320 bool isExtender(dchar c) @safe @nogc nothrow pure
321 {
322     return lookup(ExtenderTable,c);
323 }
324 
325 /*
326  * Encodes a string by replacing all characters which need to be escaped with
327  * appropriate predefined XML entities.
328  *
329  * encode() escapes certain characters (ampersand, quote, apostrophe, less-than
330  * and greater-than), and similarly, decode() unescapes them. These functions
331  * are provided for convenience only. You do not need to use them when using
332  * the std.xml classes, because then all the encoding and decoding will be done
333  * for you automatically.
334  *
335  * If the string is not modified, the original will be returned.
336  *
337  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
338  *
339  * Params:
340  *      s = The string to be encoded
341  *
342  * Returns: The encoded string
343  *
344  * Example:
345  * --------------
346  * writefln(encode("a > b")); // writes "a &gt; b"
347  * --------------
348  */
encode(S)349 S encode(S)(S s)
350 {
351     import std.array : appender;
352 
353     string r;
354     size_t lastI;
355     auto result = appender!S();
356 
357     foreach (i, c; s)
358     {
359         switch (c)
360         {
361         case '&':  r = "&amp;"; break;
362         case '"':  r = "&quot;"; break;
363         case '\'': r = "&apos;"; break;
364         case '<':  r = "&lt;"; break;
365         case '>':  r = "&gt;"; break;
366         default: continue;
367         }
368         // Replace with r
369         result.put(s[lastI .. i]);
370         result.put(r);
371         lastI = i + 1;
372     }
373 
374     if (!result.data.ptr) return s;
375     result.put(s[lastI .. $]);
376     return result.data;
377 }
378 
379 @safe pure unittest
380 {
381     auto s = "hello";
382     assert(encode(s) is s);
383     assert(encode("a > b") == "a &gt; b", encode("a > b"));
384     assert(encode("a < b") == "a &lt; b");
385     assert(encode("don't") == "don&apos;t");
386     assert(encode("\"hi\"") == "&quot;hi&quot;", encode("\"hi\""));
387     assert(encode("cat & dog") == "cat &amp; dog");
388 }
389 
390 /*
391  * Mode to use for decoding.
392  *
393  * $(DDOC_ENUM_MEMBERS NONE) Do not decode
394  * $(DDOC_ENUM_MEMBERS LOOSE) Decode, but ignore errors
395  * $(DDOC_ENUM_MEMBERS STRICT) Decode, and throw exception on error
396  */
397 enum DecodeMode
398 {
399     NONE, LOOSE, STRICT
400 }
401 
402 /*
403  * Decodes a string by unescaping all predefined XML entities.
404  *
405  * encode() escapes certain characters (ampersand, quote, apostrophe, less-than
406  * and greater-than), and similarly, decode() unescapes them. These functions
407  * are provided for convenience only. You do not need to use them when using
408  * the std.xml classes, because then all the encoding and decoding will be done
409  * for you automatically.
410  *
411  * This function decodes the entities &amp;amp;, &amp;quot;, &amp;apos;,
412  * &amp;lt; and &amp;gt,
413  * as well as decimal and hexadecimal entities such as &amp;#x20AC;
414  *
415  * If the string does not contain an ampersand, the original will be returned.
416  *
417  * Note that the "mode" parameter can be one of DecodeMode.NONE (do not
418  * decode), DecodeMode.LOOSE (decode, but ignore errors), or DecodeMode.STRICT
419  * (decode, and throw a DecodeException in the event of an error).
420  *
421  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
422  *
423  * Params:
424  *      s = The string to be decoded
425  *      mode = (optional) Mode to use for decoding. (Defaults to LOOSE).
426  *
427  * Throws: DecodeException if mode == DecodeMode.STRICT and decode fails
428  *
429  * Returns: The decoded string
430  *
431  * Example:
432  * --------------
433  * writefln(decode("a &gt; b")); // writes "a > b"
434  * --------------
435  */
436 string decode(string s, DecodeMode mode=DecodeMode.LOOSE) @safe pure
437 {
438     import std.algorithm.searching : startsWith;
439 
440     if (mode == DecodeMode.NONE) return s;
441 
442     string buffer;
443     foreach (ref i; 0 .. s.length)
444     {
445         char c = s[i];
446         if (c != '&')
447         {
448             if (buffer.length != 0) buffer ~= c;
449         }
450         else
451         {
452             if (buffer.length == 0)
453             {
454                 buffer = s[0 .. i].dup;
455             }
456             if (startsWith(s[i..$],"&#"))
457             {
458                 try
459                 {
460                     dchar d;
461                     string t = s[i..$];
462                     checkCharRef(t, d);
463                     char[4] temp;
464                     import std.utf : encode;
465                     buffer ~= temp[0 .. encode(temp, d)];
466                     i = s.length - t.length - 1;
467                 }
catch(Err e)468                 catch (Err e)
469                 {
470                     if (mode == DecodeMode.STRICT)
471                         throw new DecodeException("Unescaped &");
472                     buffer ~= '&';
473                 }
474             }
475             else if (startsWith(s[i..$],"&amp;" )) { buffer ~= '&';  i += 4; }
476             else if (startsWith(s[i..$],"&quot;")) { buffer ~= '"';  i += 5; }
477             else if (startsWith(s[i..$],"&apos;")) { buffer ~= '\''; i += 5; }
478             else if (startsWith(s[i..$],"&lt;"  )) { buffer ~= '<';  i += 3; }
479             else if (startsWith(s[i..$],"&gt;"  )) { buffer ~= '>';  i += 3; }
480             else
481             {
482                 if (mode == DecodeMode.STRICT)
483                     throw new DecodeException("Unescaped &");
484                 buffer ~= '&';
485             }
486         }
487     }
488     return (buffer.length == 0) ? s : buffer;
489 }
490 
491 @safe pure unittest
492 {
assertNot(string s)493     void assertNot(string s) pure
494     {
495         bool b = false;
496         try { decode(s,DecodeMode.STRICT); }
497         catch (DecodeException e) { b = true; }
498         assert(b,s);
499     }
500 
501     // Assert that things that should work, do
502     auto s = "hello";
503     assert(decode(s,                DecodeMode.STRICT) is s);
504     assert(decode("a &gt; b",       DecodeMode.STRICT) == "a > b");
505     assert(decode("a &lt; b",       DecodeMode.STRICT) == "a < b");
506     assert(decode("don&apos;t",     DecodeMode.STRICT) == "don't");
507     assert(decode("&quot;hi&quot;", DecodeMode.STRICT) == "\"hi\"");
508     assert(decode("cat &amp; dog",  DecodeMode.STRICT) == "cat & dog");
509     assert(decode("&#42;",          DecodeMode.STRICT) == "*");
510     assert(decode("&#x2A;",         DecodeMode.STRICT) == "*");
511     assert(decode("cat & dog",      DecodeMode.LOOSE) == "cat & dog");
512     assert(decode("a &gt b",        DecodeMode.LOOSE) == "a &gt b");
513     assert(decode("&#;",            DecodeMode.LOOSE) == "&#;");
514     assert(decode("&#x;",           DecodeMode.LOOSE) == "&#x;");
515     assert(decode("&#2G;",          DecodeMode.LOOSE) == "&#2G;");
516     assert(decode("&#x2G;",         DecodeMode.LOOSE) == "&#x2G;");
517 
518     // Assert that things that shouldn't work, don't
519     assertNot("cat & dog");
520     assertNot("a &gt b");
521     assertNot("&#;");
522     assertNot("&#x;");
523     assertNot("&#2G;");
524     assertNot("&#x2G;");
525 }
526 
527 /*
528  * Class representing an XML document.
529  *
530  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
531  *
532  */
533 class Document : Element
534 {
535     /*
536      * Contains all text which occurs before the root element.
537      * Defaults to &lt;?xml version="1.0"?&gt;
538      */
539     string prolog = "<?xml version=\"1.0\"?>";
540     /*
541      * Contains all text which occurs after the root element.
542      * Defaults to the empty string
543      */
544     string epilog;
545 
546     /*
547      * Constructs a Document by parsing XML text.
548      *
549      * This function creates a complete DOM (Document Object Model) tree.
550      *
551      * The input to this function MUST be valid XML.
552      * This is enforced by DocumentParser's in contract.
553      *
554      * Params:
555      *      s = the complete XML text.
556      */
this(string s)557     this(string s)
558     in
559     {
560         assert(s.length != 0);
561     }
562     do
563     {
564         auto xml = new DocumentParser(s);
565         string tagString = xml.tag.tagString;
566 
567         this(xml.tag);
568         prolog = s[0 .. tagString.ptr - s.ptr];
569         parse(xml);
570         epilog = *xml.s;
571     }
572 
573     /*
574      * Constructs a Document from a Tag.
575      *
576      * Params:
577      *      tag = the start tag of the document.
578      */
this(const (Tag)tag)579     this(const(Tag) tag)
580     {
581         super(tag);
582     }
583 
584     const
585     {
586         /*
587          * Compares two Documents for equality
588          *
589          * Example:
590          * --------------
591          * Document d1,d2;
592          * if (d1 == d2) { }
593          * --------------
594          */
opEquals(scope const Object o)595         override bool opEquals(scope const Object o) const
596         {
597             const doc = toType!(const Document)(o);
598             return prolog == doc.prolog
599                 && (cast(const) this).Element.opEquals(cast(const) doc)
600                 && epilog == doc.epilog;
601         }
602 
603         /*
604          * Compares two Documents
605          *
606          * You should rarely need to call this function. It exists so that
607          * Documents can be used as associative array keys.
608          *
609          * Example:
610          * --------------
611          * Document d1,d2;
612          * if (d1 < d2) { }
613          * --------------
614          */
opCmp(scope const Object o)615         override int opCmp(scope const Object o) scope const
616         {
617             const doc = toType!(const Document)(o);
618             if (prolog != doc.prolog)
619                 return prolog < doc.prolog ? -1 : 1;
620             if (int cmp = this.Element.opCmp(doc))
621                 return cmp;
622             if (epilog != doc.epilog)
623                 return epilog < doc.epilog ? -1 : 1;
624             return 0;
625         }
626 
627         /*
628          * Returns the hash of a Document
629          *
630          * You should rarely need to call this function. It exists so that
631          * Documents can be used as associative array keys.
632          */
toHash()633         override size_t toHash() scope const @trusted
634         {
635             return hash(prolog, hash(epilog, (cast() this).Element.toHash()));
636         }
637 
638         /*
639          * Returns the string representation of a Document. (That is, the
640          * complete XML of a document).
641          */
toString()642         override string toString() scope const @safe
643         {
644             return prolog ~ super.toString() ~ epilog;
645         }
646     }
647 }
648 
649 @system unittest
650 {
651     // https://issues.dlang.org/show_bug.cgi?id=14966
652     auto xml = `<?xml version="1.0" encoding="UTF-8"?><foo></foo>`;
653 
654     auto a = new Document(xml);
655     auto b = new Document(xml);
656     assert(a == b);
657     assert(!(a < b));
658     int[Document] aa;
659     aa[a] = 1;
660     assert(aa[b] == 1);
661 
662     b ~= new Element("b");
663     assert(a < b);
664     assert(b > a);
665 }
666 
667 /*
668  * Class representing an XML element.
669  *
670  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
671  */
672 class Element : Item
673 {
674     Tag tag; // The start tag of the element
675     Item[] items; // The element's items
676     Text[] texts; // The element's text items
677     CData[] cdatas; // The element's CData items
678     Comment[] comments; // The element's comments
679     ProcessingInstruction[] pis; // The element's processing instructions
680     Element[] elements; // The element's child elements
681 
682     /*
683      * Constructs an Element given a name and a string to be used as a Text
684      * interior.
685      *
686      * Params:
687      *      name = the name of the element.
688      *      interior = (optional) the string interior.
689      *
690      * Example:
691      * -------------------------------------------------------
692      * auto element = new Element("title","Serenity")
693      *     // constructs the element <title>Serenity</title>
694      * -------------------------------------------------------
695      */
696     this(string name, string interior=null) @safe pure
697     {
698         this(new Tag(name));
699         if (interior.length != 0) opOpAssign!("~")(new Text(interior));
700     }
701 
702     /*
703      * Constructs an Element from a Tag.
704      *
705      * Params:
706      *      tag_ = the start or empty tag of the element.
707      */
this(const (Tag)tag_)708     this(const(Tag) tag_) @safe pure
709     {
710         this.tag = new Tag(tag_.name);
711         tag.type = TagType.EMPTY;
712         foreach (k,v;tag_.attr) tag.attr[k] = v;
713         tag.tagString = tag_.tagString;
714     }
715 
716     /*
717      * Append a text item to the interior of this element
718      *
719      * Params:
720      *      item = the item you wish to append.
721      *
722      * Example:
723      * --------------
724      * Element element;
725      * element ~= new Text("hello");
726      * --------------
727      */
728     void opOpAssign(string op)(Text item) @safe pure
729         if (op == "~")
730     {
731         texts ~= item;
732         appendItem(item);
733     }
734 
735     /*
736      * Append a CData item to the interior of this element
737      *
738      * Params:
739      *      item = the item you wish to append.
740      *
741      * Example:
742      * --------------
743      * Element element;
744      * element ~= new CData("hello");
745      * --------------
746      */
747     void opOpAssign(string op)(CData item) @safe pure
748         if (op == "~")
749     {
750         cdatas ~= item;
751         appendItem(item);
752     }
753 
754     /*
755      * Append a comment to the interior of this element
756      *
757      * Params:
758      *      item = the item you wish to append.
759      *
760      * Example:
761      * --------------
762      * Element element;
763      * element ~= new Comment("hello");
764      * --------------
765      */
766     void opOpAssign(string op)(Comment item) @safe pure
767         if (op == "~")
768     {
769         comments ~= item;
770         appendItem(item);
771     }
772 
773     /*
774      * Append a processing instruction to the interior of this element
775      *
776      * Params:
777      *      item = the item you wish to append.
778      *
779      * Example:
780      * --------------
781      * Element element;
782      * element ~= new ProcessingInstruction("hello");
783      * --------------
784      */
785     void opOpAssign(string op)(ProcessingInstruction item) @safe pure
786         if (op == "~")
787     {
788         pis ~= item;
789         appendItem(item);
790     }
791 
792     /*
793      * Append a complete element to the interior of this element
794      *
795      * Params:
796      *      item = the item you wish to append.
797      *
798      * Example:
799      * --------------
800      * Element element;
801      * Element other = new Element("br");
802      * element ~= other;
803      *    // appends element representing <br />
804      * --------------
805      */
806     void opOpAssign(string op)(Element item) @safe pure
807         if (op == "~")
808     {
809         elements ~= item;
810         appendItem(item);
811     }
812 
appendItem(Item item)813     private void appendItem(Item item) @safe pure
814     {
815         items ~= item;
816         if (tag.type == TagType.EMPTY && !item.isEmptyXML)
817             tag.type = TagType.START;
818     }
819 
parse(ElementParser xml)820     private void parse(ElementParser xml)
821     {
822         xml.onText = (string s) { opOpAssign!("~")(new Text(s)); };
823         xml.onCData = (string s) { opOpAssign!("~")(new CData(s)); };
824         xml.onComment = (string s) { opOpAssign!("~")(new Comment(s)); };
825         xml.onPI = (string s) { opOpAssign!("~")(new ProcessingInstruction(s)); };
826 
827         xml.onStartTag[null] = (ElementParser xml)
828         {
829             auto e = new Element(xml.tag);
830             e.parse(xml);
831             opOpAssign!("~")(e);
832         };
833 
834         xml.parse();
835     }
836 
837     /*
838      * Compares two Elements for equality
839      *
840      * Example:
841      * --------------
842      * Element e1,e2;
843      * if (e1 == e2) { }
844      * --------------
845      */
opEquals(scope const Object o)846     override bool opEquals(scope const Object o) const
847     {
848         const element = toType!(const Element)(o);
849         immutable len = items.length;
850         if (len != element.items.length) return false;
851         foreach (i; 0 .. len)
852         {
853             if (!items[i].opEquals(element.items[i])) return false;
854         }
855         return true;
856     }
857 
858     /*
859      * Compares two Elements
860      *
861      * You should rarely need to call this function. It exists so that Elements
862      * can be used as associative array keys.
863      *
864      * Example:
865      * --------------
866      * Element e1,e2;
867      * if (e1 < e2) { }
868      * --------------
869      */
opCmp(scope const Object o)870     override int opCmp(scope const Object o) @safe const
871     {
872         const element = toType!(const Element)(o);
873         for (uint i=0; ; ++i)
874         {
875             if (i == items.length && i == element.items.length) return 0;
876             if (i == items.length) return -1;
877             if (i == element.items.length) return 1;
878             if (!items[i].opEquals(element.items[i]))
879                 return items[i].opCmp(element.items[i]);
880         }
881     }
882 
883     /*
884      * Returns the hash of an Element
885      *
886      * You should rarely need to call this function. It exists so that Elements
887      * can be used as associative array keys.
888      */
toHash()889     override size_t toHash() scope const @safe
890     {
891         size_t hash = tag.toHash();
892         foreach (item;items) hash += item.toHash();
893         return hash;
894     }
895 
896     const
897     {
898         /*
899          * Returns the decoded interior of an element.
900          *
901          * The element is assumed to contain text <i>only</i>. So, for
902          * example, given XML such as "&lt;title&gt;Good &amp;amp;
903          * Bad&lt;/title&gt;", will return "Good &amp; Bad".
904          *
905          * Params:
906          *      mode = (optional) Mode to use for decoding. (Defaults to LOOSE).
907          *
908          * Throws: DecodeException if decode fails
909          */
910         string text(DecodeMode mode=DecodeMode.LOOSE)
911         {
912             string buffer;
foreach(item;items)913             foreach (item;items)
914             {
915                 Text t = cast(Text) item;
916                 if (t is null) throw new DecodeException(item.toString());
917                 buffer ~= decode(t.toString(),mode);
918             }
919             return buffer;
920         }
921 
922         /*
923          * Returns an indented string representation of this item
924          *
925          * Params:
926          *      indent = (optional) number of spaces by which to indent this
927          *          element. Defaults to 2.
928          */
929         override string[] pretty(uint indent=2) scope
930         {
931             import std.algorithm.searching : count;
932             import std.string : rightJustify;
933 
934             if (isEmptyXML) return [ tag.toEmptyString() ];
935 
936             if (items.length == 1)
937             {
938                 auto t = cast(const(Text))(items[0]);
939                 if (t !is null)
940                 {
941                     return [tag.toStartString() ~ t.toString() ~ tag.toEndString()];
942                 }
943             }
944 
945             string[] a = [ tag.toStartString() ];
foreach(item;items)946             foreach (item;items)
947             {
948                 string[] b = item.pretty(indent);
949                 foreach (s;b)
950                 {
951                     a ~= rightJustify(s,count(s) + indent);
952                 }
953             }
954             a ~= tag.toEndString();
955             return a;
956         }
957 
958         /*
959          * Returns the string representation of an Element
960          *
961          * Example:
962          * --------------
963          * auto element = new Element("br");
964          * writefln(element.toString()); // writes "<br />"
965          * --------------
966          */
toString()967         override string toString() scope @safe
968         {
969             if (isEmptyXML) return tag.toEmptyString();
970 
971             string buffer = tag.toStartString();
972             foreach (item;items) { buffer ~= item.toString(); }
973             buffer ~= tag.toEndString();
974             return buffer;
975         }
976 
isEmptyXML()977         override @property @safe pure @nogc nothrow bool isEmptyXML() const scope { return items.length == 0; }
978     }
979 }
980 
981 /*
982  * Tag types.
983  *
984  * $(DDOC_ENUM_MEMBERS START) Used for start tags
985  * $(DDOC_ENUM_MEMBERS END) Used for end tags
986  * $(DDOC_ENUM_MEMBERS EMPTY) Used for empty tags
987  *
988  */
989 enum TagType { START, END, EMPTY }
990 
991 /*
992  * Class representing an XML tag.
993  *
994  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
995  *
996  * The class invariant guarantees
997  * <ul>
998  * <li> that $(B type) is a valid enum TagType value</li>
999  * <li> that $(B name) consists of valid characters</li>
1000  * <li> that each attribute name consists of valid characters</li>
1001  * </ul>
1002  */
1003 class Tag
1004 {
1005     TagType type = TagType.START;   // Type of tag
1006     string name;                    // Tag name
1007     string[string] attr;            // Associative array of attributes
1008     private string tagString;
1009 
invariant()1010     invariant()
1011     {
1012         string s;
1013         string t;
1014 
1015         assert(type == TagType.START
1016             || type == TagType.END
1017             || type == TagType.EMPTY);
1018 
1019         s = name;
1020         try { checkName(s,t); }
1021         catch (Err e) { assert(false,"Invalid tag name:" ~ e.toString()); }
1022 
1023         foreach (k,v;attr)
1024         {
1025             s = k;
1026             try { checkName(s,t); }
1027             catch (Err e)
1028                 { assert(false,"Invalid attribute name:" ~ e.toString()); }
1029         }
1030     }
1031 
1032     /*
1033      * Constructs an instance of Tag with a specified name and type
1034      *
1035      * The constructor does not initialize the attributes. To initialize the
1036      * attributes, you access the $(B attr) member variable.
1037      *
1038      * Params:
1039      *      name = the Tag's name
1040      *      type = (optional) the Tag's type. If omitted, defaults to
1041      *          TagType.START.
1042      *
1043      * Example:
1044      * --------------
1045      * auto tag = new Tag("img",Tag.EMPTY);
1046      * tag.attr["src"] = "http://example.com/example.jpg";
1047      * --------------
1048      */
1049     this(string name, TagType type=TagType.START) @safe pure
1050     {
1051         this.name = name;
1052         this.type = type;
1053     }
1054 
1055     /* Private constructor (so don't ddoc this!)
1056      *
1057      * Constructs a Tag by parsing the string representation, e.g. "<html>".
1058      *
1059      * The string is passed by reference, and is advanced over all characters
1060      * consumed.
1061      *
1062      * The second parameter is a dummy parameter only, required solely to
1063      * distinguish this constructor from the public one.
1064      */
this(ref string s,bool dummy)1065     private this(ref string s, bool dummy) @safe pure
1066     {
1067         import std.algorithm.searching : countUntil;
1068         import std.ascii : isWhite;
1069         import std.utf : byCodeUnit;
1070 
1071         tagString = s;
1072         try
1073         {
1074             reqc(s,'<');
1075             if (optc(s,'/')) type = TagType.END;
1076             ptrdiff_t i = s.byCodeUnit.countUntil(">", "/>", " ", "\t", "\v", "\r", "\n", "\f");
1077             name = s[0 .. i];
1078             s = s[i .. $];
1079 
1080             i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1081             s = s[i .. $];
1082 
1083             while (s.length > 0 && s[0] != '>' && s[0] != '/')
1084             {
1085                 i = s.byCodeUnit.countUntil("=", " ", "\t", "\v", "\r", "\n", "\f");
1086                 string key = s[0 .. i];
1087                 s = s[i .. $];
1088 
1089                 i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1090                 s = s[i .. $];
1091                 reqc(s,'=');
1092                 i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1093                 s = s[i .. $];
1094 
1095                 immutable char quote = requireOneOf(s,"'\"");
1096                 i = s.byCodeUnit.countUntil(quote);
1097                 string val = decode(s[0 .. i], DecodeMode.LOOSE);
1098                 s = s[i .. $];
1099                 reqc(s,quote);
1100 
1101                 i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1102                 s = s[i .. $];
1103                 attr[key] = val;
1104             }
1105             if (optc(s,'/'))
1106             {
1107                 if (type == TagType.END) throw new TagException("");
1108                 type = TagType.EMPTY;
1109             }
1110             reqc(s,'>');
1111             tagString.length = tagString.length - s.length;
1112         }
1113         catch (XMLException e)
1114         {
1115             tagString.length = tagString.length - s.length;
1116             throw new TagException(tagString);
1117         }
1118     }
1119 
1120     const
1121     {
1122         /*
1123          * Compares two Tags for equality
1124          *
1125          * You should rarely need to call this function. It exists so that Tags
1126          * can be used as associative array keys.
1127          *
1128          * Example:
1129          * --------------
1130          * Tag tag1,tag2
1131          * if (tag1 == tag2) { }
1132          * --------------
1133          */
opEquals(scope Object o)1134         override bool opEquals(scope Object o)
1135         {
1136             const tag = toType!(const Tag)(o);
1137             return
1138                 (name != tag.name) ? false : (
1139                 (attr != tag.attr) ? false : (
1140                 (type != tag.type) ? false : (
1141             true )));
1142         }
1143 
1144         /*
1145          * Compares two Tags
1146          *
1147          * Example:
1148          * --------------
1149          * Tag tag1,tag2
1150          * if (tag1 < tag2) { }
1151          * --------------
1152          */
opCmp(Object o)1153         override int opCmp(Object o)
1154         {
1155             const tag = toType!(const Tag)(o);
1156             // Note that attr is an AA, so the comparison is nonsensical (bug 10381)
1157             return
1158                 ((name != tag.name) ? ( name < tag.name ? -1 : 1 ) :
1159                 ((attr != tag.attr) ? ( cast(void *) attr < cast(void*) tag.attr ? -1 : 1 ) :
1160                 ((type != tag.type) ? ( type < tag.type ? -1 : 1 ) :
1161             0 )));
1162         }
1163 
1164         /*
1165          * Returns the hash of a Tag
1166          *
1167          * You should rarely need to call this function. It exists so that Tags
1168          * can be used as associative array keys.
1169          */
toHash()1170         override size_t toHash()
1171         {
1172             return .hashOf(name);
1173         }
1174 
1175         /*
1176          * Returns the string representation of a Tag
1177          *
1178          * Example:
1179          * --------------
1180          * auto tag = new Tag("book",TagType.START);
1181          * writefln(tag.toString()); // writes "<book>"
1182          * --------------
1183          */
toString()1184         override string toString() @safe
1185         {
1186             if (isEmpty) return toEmptyString();
1187             return (isEnd) ? toEndString() : toStartString();
1188         }
1189 
1190         private
1191         {
toNonEndString()1192             string toNonEndString() @safe
1193             {
1194                 import std.format : format;
1195 
1196                 string s = "<" ~ name;
1197                 foreach (key,val;attr)
1198                     s ~= format(" %s=\"%s\"",key,encode(val));
1199                 return s;
1200             }
1201 
toStartString()1202             string toStartString() @safe { return toNonEndString() ~ ">"; }
1203 
toEndString()1204             string toEndString() @safe { return "</" ~ name ~ ">"; }
1205 
toEmptyString()1206             string toEmptyString() @safe { return toNonEndString() ~ " />"; }
1207         }
1208 
1209         /*
1210          * Returns true if the Tag is a start tag
1211          *
1212          * Example:
1213          * --------------
1214          * if (tag.isStart) { }
1215          * --------------
1216          */
isStart()1217         @property bool isStart() @safe @nogc pure nothrow { return type == TagType.START; }
1218 
1219         /*
1220          * Returns true if the Tag is an end tag
1221          *
1222          * Example:
1223          * --------------
1224          * if (tag.isEnd) { }
1225          * --------------
1226          */
isEnd()1227         @property bool isEnd() @safe @nogc pure nothrow { return type == TagType.END;   }
1228 
1229         /*
1230          * Returns true if the Tag is an empty tag
1231          *
1232          * Example:
1233          * --------------
1234          * if (tag.isEmpty) { }
1235          * --------------
1236          */
isEmpty()1237         @property bool isEmpty() @safe @nogc pure nothrow { return type == TagType.EMPTY; }
1238     }
1239 }
1240 
1241 /*
1242  * Class representing a comment
1243  */
1244 class Comment : Item
1245 {
1246     private string content;
1247 
1248     /*
1249      * Construct a comment
1250      *
1251      * Params:
1252      *      content = the body of the comment
1253      *
1254      * Throws: CommentException if the comment body is illegal (contains "--"
1255      * or exactly equals "-")
1256      *
1257      * Example:
1258      * --------------
1259      * auto item = new Comment("This is a comment");
1260      *    // constructs <!--This is a comment-->
1261      * --------------
1262      */
this(string content)1263     this(string content) @safe pure
1264     {
1265         import std.string : indexOf;
1266 
1267         if (content == "-" || content.indexOf("--") != -1)
1268             throw new CommentException(content);
1269         this.content = content;
1270     }
1271 
1272     /*
1273      * Compares two comments for equality
1274      *
1275      * Example:
1276      * --------------
1277      * Comment item1,item2;
1278      * if (item1 == item2) { }
1279      * --------------
1280      */
opEquals(scope const Object o)1281     override bool opEquals(scope const Object o) const
1282     {
1283         const item = toType!(const Item)(o);
1284         const t = cast(const Comment) item;
1285         return t !is null && content == t.content;
1286     }
1287 
1288     /*
1289      * Compares two comments
1290      *
1291      * You should rarely need to call this function. It exists so that Comments
1292      * can be used as associative array keys.
1293      *
1294      * Example:
1295      * --------------
1296      * Comment item1,item2;
1297      * if (item1 < item2) { }
1298      * --------------
1299      */
opCmp(scope const Object o)1300     override int opCmp(scope const Object o) scope const
1301     {
1302         const item = toType!(const Item)(o);
1303         const t = cast(const Comment) item;
1304         return t !is null && (content != t.content
1305             ? (content < t.content ? -1 : 1 ) : 0 );
1306     }
1307 
1308     /*
1309      * Returns the hash of a Comment
1310      *
1311      * You should rarely need to call this function. It exists so that Comments
1312      * can be used as associative array keys.
1313      */
toHash()1314     override size_t toHash() scope const nothrow { return hash(content); }
1315 
1316     /*
1317      * Returns a string representation of this comment
1318      */
toString()1319     override string toString() scope const @safe pure nothrow { return "<!--" ~ content ~ "-->"; }
1320 
isEmptyXML()1321     override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } // Returns false always
1322 }
1323 
1324 // https://issues.dlang.org/show_bug.cgi?id=16241
1325 @safe unittest
1326 {
1327     import std.exception : assertThrown;
1328     auto c = new Comment("==");
1329     assert(c.content == "==");
1330     assertThrown!CommentException(new Comment("--"));
1331 }
1332 
1333 /*
1334  * Class representing a Character Data section
1335  */
1336 class CData : Item
1337 {
1338     private string content;
1339 
1340     /*
1341      * Construct a character data section
1342      *
1343      * Params:
1344      *      content = the body of the character data segment
1345      *
1346      * Throws: CDataException if the segment body is illegal (contains "]]>")
1347      *
1348      * Example:
1349      * --------------
1350      * auto item = new CData("<b>hello</b>");
1351      *    // constructs <![CDATA[<b>hello</b>]]>
1352      * --------------
1353      */
this(string content)1354     this(string content) @safe pure
1355     {
1356         import std.string : indexOf;
1357         if (content.indexOf("]]>") != -1) throw new CDataException(content);
1358         this.content = content;
1359     }
1360 
1361     /*
1362      * Compares two CDatas for equality
1363      *
1364      * Example:
1365      * --------------
1366      * CData item1,item2;
1367      * if (item1 == item2) { }
1368      * --------------
1369      */
opEquals(scope const Object o)1370     override bool opEquals(scope const Object o) const
1371     {
1372         const item = toType!(const Item)(o);
1373         const t = cast(const CData) item;
1374         return t !is null && content == t.content;
1375     }
1376 
1377     /*
1378      * Compares two CDatas
1379      *
1380      * You should rarely need to call this function. It exists so that CDatas
1381      * can be used as associative array keys.
1382      *
1383      * Example:
1384      * --------------
1385      * CData item1,item2;
1386      * if (item1 < item2) { }
1387      * --------------
1388      */
opCmp(scope const Object o)1389     override int opCmp(scope const Object o) scope const
1390     {
1391         const item = toType!(const Item)(o);
1392         const t = cast(const CData) item;
1393         return t !is null && (content != t.content
1394             ? (content < t.content ? -1 : 1 ) : 0 );
1395     }
1396 
1397     /*
1398      * Returns the hash of a CData
1399      *
1400      * You should rarely need to call this function. It exists so that CDatas
1401      * can be used as associative array keys.
1402      */
toHash()1403     override size_t toHash() scope const nothrow { return hash(content); }
1404 
1405     /*
1406      * Returns a string representation of this CData section
1407      */
toString()1408     override string toString() scope const @safe pure nothrow { return cdata ~ content ~ "]]>"; }
1409 
isEmptyXML()1410     override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } // Returns false always
1411 }
1412 
1413 /*
1414  * Class representing a text (aka Parsed Character Data) section
1415  */
1416 class Text : Item
1417 {
1418     private string content;
1419 
1420     /*
1421      * Construct a text (aka PCData) section
1422      *
1423      * Params:
1424      *      content = the text. This function encodes the text before
1425      *      insertion, so it is safe to insert any text
1426      *
1427      * Example:
1428      * --------------
1429      * auto Text = new CData("a < b");
1430      *    // constructs a &lt; b
1431      * --------------
1432      */
this(string content)1433     this(string content) @safe pure
1434     {
1435         this.content = encode(content);
1436     }
1437 
1438     /*
1439      * Compares two text sections for equality
1440      *
1441      * Example:
1442      * --------------
1443      * Text item1,item2;
1444      * if (item1 == item2) { }
1445      * --------------
1446      */
opEquals(scope const Object o)1447     override bool opEquals(scope const Object o) const
1448     {
1449         const item = toType!(const Item)(o);
1450         const t = cast(const Text) item;
1451         return t !is null && content == t.content;
1452     }
1453 
1454     /*
1455      * Compares two text sections
1456      *
1457      * You should rarely need to call this function. It exists so that Texts
1458      * can be used as associative array keys.
1459      *
1460      * Example:
1461      * --------------
1462      * Text item1,item2;
1463      * if (item1 < item2) { }
1464      * --------------
1465      */
opCmp(scope const Object o)1466     override int opCmp(scope const Object o) scope const
1467     {
1468         const item = toType!(const Item)(o);
1469         const t = cast(const Text) item;
1470         return t !is null
1471             && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
1472     }
1473 
1474     /*
1475      * Returns the hash of a text section
1476      *
1477      * You should rarely need to call this function. It exists so that Texts
1478      * can be used as associative array keys.
1479      */
toHash()1480     override size_t toHash() scope const nothrow { return hash(content); }
1481 
1482     /*
1483      * Returns a string representation of this Text section
1484      */
toString()1485     override string toString() scope const @safe @nogc pure nothrow { return content; }
1486 
1487     /*
1488      * Returns true if the content is the empty string
1489      */
isEmptyXML()1490     override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return content.length == 0; }
1491 }
1492 
1493 /*
1494  * Class representing an XML Instruction section
1495  */
1496 class XMLInstruction : Item
1497 {
1498     private string content;
1499 
1500     /*
1501      * Construct an XML Instruction section
1502      *
1503      * Params:
1504      *      content = the body of the instruction segment
1505      *
1506      * Throws: XIException if the segment body is illegal (contains ">")
1507      *
1508      * Example:
1509      * --------------
1510      * auto item = new XMLInstruction("ATTLIST");
1511      *    // constructs <!ATTLIST>
1512      * --------------
1513      */
this(string content)1514     this(string content) @safe pure
1515     {
1516         import std.string : indexOf;
1517         if (content.indexOf(">") != -1) throw new XIException(content);
1518         this.content = content;
1519     }
1520 
1521     /*
1522      * Compares two XML instructions for equality
1523      *
1524      * Example:
1525      * --------------
1526      * XMLInstruction item1,item2;
1527      * if (item1 == item2) { }
1528      * --------------
1529      */
opEquals(scope const Object o)1530     override bool opEquals(scope const Object o) const
1531     {
1532         const item = toType!(const Item)(o);
1533         const t = cast(const XMLInstruction) item;
1534         return t !is null && content == t.content;
1535     }
1536 
1537     /*
1538      * Compares two XML instructions
1539      *
1540      * You should rarely need to call this function. It exists so that
1541      * XmlInstructions can be used as associative array keys.
1542      *
1543      * Example:
1544      * --------------
1545      * XMLInstruction item1,item2;
1546      * if (item1 < item2) { }
1547      * --------------
1548      */
opCmp(scope const Object o)1549     override int opCmp(scope const Object o) scope const
1550     {
1551         const item = toType!(const Item)(o);
1552         const t = cast(const XMLInstruction) item;
1553         return t !is null
1554             && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
1555     }
1556 
1557     /*
1558      * Returns the hash of an XMLInstruction
1559      *
1560      * You should rarely need to call this function. It exists so that
1561      * XmlInstructions can be used as associative array keys.
1562      */
toHash()1563     override size_t toHash() scope const nothrow { return hash(content); }
1564 
1565     /*
1566      * Returns a string representation of this XmlInstruction
1567      */
toString()1568     override string toString() scope const @safe pure nothrow { return "<!" ~ content ~ ">"; }
1569 
isEmptyXML()1570     override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } // Returns false always
1571 }
1572 
1573 /*
1574  * Class representing a Processing Instruction section
1575  */
1576 class ProcessingInstruction : Item
1577 {
1578     private string content;
1579 
1580     /*
1581      * Construct a Processing Instruction section
1582      *
1583      * Params:
1584      *      content = the body of the instruction segment
1585      *
1586      * Throws: PIException if the segment body is illegal (contains "?>")
1587      *
1588      * Example:
1589      * --------------
1590      * auto item = new ProcessingInstruction("php");
1591      *    // constructs <?php?>
1592      * --------------
1593      */
this(string content)1594     this(string content) @safe pure
1595     {
1596         import std.string : indexOf;
1597         if (content.indexOf("?>") != -1) throw new PIException(content);
1598         this.content = content;
1599     }
1600 
1601     /*
1602      * Compares two processing instructions for equality
1603      *
1604      * Example:
1605      * --------------
1606      * ProcessingInstruction item1,item2;
1607      * if (item1 == item2) { }
1608      * --------------
1609      */
opEquals(scope const Object o)1610     override bool opEquals(scope const Object o) const
1611     {
1612         const item = toType!(const Item)(o);
1613         const t = cast(const ProcessingInstruction) item;
1614         return t !is null && content == t.content;
1615     }
1616 
1617     /*
1618      * Compares two processing instructions
1619      *
1620      * You should rarely need to call this function. It exists so that
1621      * ProcessingInstructions can be used as associative array keys.
1622      *
1623      * Example:
1624      * --------------
1625      * ProcessingInstruction item1,item2;
1626      * if (item1 < item2) { }
1627      * --------------
1628      */
opCmp(scope const Object o)1629     override int opCmp(scope const Object o) scope const
1630     {
1631         const item = toType!(const Item)(o);
1632         const t = cast(const ProcessingInstruction) item;
1633         return t !is null
1634             && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
1635     }
1636 
1637     /*
1638      * Returns the hash of a ProcessingInstruction
1639      *
1640      * You should rarely need to call this function. It exists so that
1641      * ProcessingInstructions can be used as associative array keys.
1642      */
toHash()1643     override size_t toHash() scope const nothrow { return hash(content); }
1644 
1645     /*
1646      * Returns a string representation of this ProcessingInstruction
1647      */
toString()1648     override string toString() scope const @safe pure nothrow { return "<?" ~ content ~ "?>"; }
1649 
isEmptyXML()1650     override @property @safe @nogc pure nothrow bool isEmptyXML() scope const { return false; } // Returns false always
1651 }
1652 
1653 /*
1654  * Abstract base class for XML items
1655  */
1656 abstract class Item
1657 {
1658     // Compares with another Item of same type for equality
1659     abstract override bool opEquals(scope const Object o) @safe const;
1660 
1661     // Compares with another Item of same type
1662     abstract override int opCmp(scope const Object o) @safe const;
1663 
1664     // Returns the hash of this item
1665     abstract override size_t toHash() @safe scope const;
1666 
1667     // Returns a string representation of this item
1668     abstract override string toString() @safe scope const;
1669 
1670     /*
1671      * Returns an indented string representation of this item
1672      *
1673      * Params:
1674      *      indent = number of spaces by which to indent child elements
1675      */
pretty(uint indent)1676     string[] pretty(uint indent) @safe scope const
1677     {
1678         import std.string : strip;
1679         string s = strip(toString());
1680         return s.length == 0 ? [] : [ s ];
1681     }
1682 
1683     // Returns true if the item represents empty XML text
1684     abstract @property @safe @nogc pure nothrow bool isEmptyXML() scope const;
1685 }
1686 
1687 /*
1688  * Class for parsing an XML Document.
1689  *
1690  * This is a subclass of ElementParser. Most of the useful functions are
1691  * documented there.
1692  *
1693  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
1694  *
1695  * Bugs:
1696  *      Currently only supports UTF documents.
1697  *
1698  *      If there is an encoding attribute in the prolog, it is ignored.
1699  *
1700  */
1701 class DocumentParser : ElementParser
1702 {
1703     string xmlText;
1704 
1705     /*
1706      * Constructs a DocumentParser.
1707      *
1708      * The input to this function MUST be valid XML.
1709      * This is enforced by the function's in contract.
1710      *
1711      * Params:
1712      *      xmlText_ = the entire XML document as text
1713      *
1714      */
this(string xmlText_)1715     this(string xmlText_)
1716     in
1717     {
1718         assert(xmlText_.length != 0);
1719         try
1720         {
1721             // Confirm that the input is valid XML
1722             check(xmlText_);
1723         }
1724         catch (CheckException e)
1725         {
1726             // And if it's not, tell the user why not
1727             assert(false, "\n" ~ e.toString());
1728         }
1729     }
1730     do
1731     {
1732         xmlText = xmlText_;
1733         s = &xmlText;
1734         super();    // Initialize everything
1735         parse();    // Parse through the root tag (but not beyond)
1736     }
1737 }
1738 
1739 @system unittest
1740 {
1741     auto doc = new Document("<root><child><grandchild/></child></root>");
1742     assert(doc.elements.length == 1);
1743     assert(doc.elements[0].tag.name == "child");
1744     assert(doc.items == doc.elements);
1745 }
1746 
1747 /*
1748  * Class for parsing an XML element.
1749  *
1750  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
1751  *
1752  * Note that you cannot construct instances of this class directly. You can
1753  * construct a DocumentParser (which is a subclass of ElementParser), but
1754  * otherwise, Instances of ElementParser will be created for you by the
1755  * library, and passed your way via onStartTag handlers.
1756  *
1757  */
1758 class ElementParser
1759 {
1760     alias Handler = void delegate(string);
1761     alias ElementHandler = void delegate(in Element element);
1762     alias ParserHandler = void delegate(ElementParser parser);
1763 
1764     private
1765     {
1766         Tag tag_;
1767         string elementStart;
1768         string* s;
1769 
1770         Handler commentHandler = null;
1771         Handler cdataHandler = null;
1772         Handler xiHandler = null;
1773         Handler piHandler = null;
1774         Handler rawTextHandler = null;
1775         Handler textHandler = null;
1776 
1777         // Private constructor for start tags
this(ElementParser parent)1778         this(ElementParser parent) @safe @nogc pure nothrow
1779         {
1780             s = parent.s;
1781             this();
1782             tag_ = parent.tag_;
1783         }
1784 
1785         // Private constructor for empty tags
this(Tag tag,string * t)1786         this(Tag tag, string* t) @safe @nogc pure nothrow
1787         {
1788             s = t;
1789             this();
1790             tag_ = tag;
1791         }
1792     }
1793 
1794     /*
1795      * The Tag at the start of the element being parsed. You can read this to
1796      * determine the tag's name and attributes.
1797      */
tag()1798     @property @safe @nogc pure nothrow const(Tag) tag() const { return tag_; }
1799 
1800     /*
1801      * Register a handler which will be called whenever a start tag is
1802      * encountered which matches the specified name. You can also pass null as
1803      * the name, in which case the handler will be called for any unmatched
1804      * start tag.
1805      *
1806      * Example:
1807      * --------------
1808      * // Call this function whenever a <podcast> start tag is encountered
1809      * onStartTag["podcast"] = (ElementParser xml)
1810      * {
1811      *     // Your code here
1812      *     //
1813      *     // This is a a closure, so code here may reference
1814      *     // variables which are outside of this scope
1815      * };
1816      *
1817      * // call myEpisodeStartHandler (defined elsewhere) whenever an <episode>
1818      * // start tag is encountered
1819      * onStartTag["episode"] = &myEpisodeStartHandler;
1820      *
1821      * // call delegate dg for all other start tags
1822      * onStartTag[null] = dg;
1823      * --------------
1824      *
1825      * This library will supply your function with a new instance of
1826      * ElementHandler, which may be used to parse inside the element whose
1827      * start tag was just found, or to identify the tag attributes of the
1828      * element, etc.
1829      *
1830      * Note that your function will be called for both start tags and empty
1831      * tags. That is, we make no distinction between &lt;br&gt;&lt;/br&gt;
1832      * and &lt;br/&gt;.
1833      */
1834     ParserHandler[string] onStartTag;
1835 
1836     /*
1837      * Register a handler which will be called whenever an end tag is
1838      * encountered which matches the specified name. You can also pass null as
1839      * the name, in which case the handler will be called for any unmatched
1840      * end tag.
1841      *
1842      * Example:
1843      * --------------
1844      * // Call this function whenever a </podcast> end tag is encountered
1845      * onEndTag["podcast"] = (in Element e)
1846      * {
1847      *     // Your code here
1848      *     //
1849      *     // This is a a closure, so code here may reference
1850      *     // variables which are outside of this scope
1851      * };
1852      *
1853      * // call myEpisodeEndHandler (defined elsewhere) whenever an </episode>
1854      * // end tag is encountered
1855      * onEndTag["episode"] = &myEpisodeEndHandler;
1856      *
1857      * // call delegate dg for all other end tags
1858      * onEndTag[null] = dg;
1859      * --------------
1860      *
1861      * Note that your function will be called for both start tags and empty
1862      * tags. That is, we make no distinction between &lt;br&gt;&lt;/br&gt;
1863      * and &lt;br/&gt;.
1864      */
1865     ElementHandler[string] onEndTag;
1866 
this()1867     protected this() @safe @nogc pure nothrow
1868     {
1869         elementStart = *s;
1870     }
1871 
1872     /*
1873      * Register a handler which will be called whenever text is encountered.
1874      *
1875      * Example:
1876      * --------------
1877      * // Call this function whenever text is encountered
1878      * onText = (string s)
1879      * {
1880      *     // Your code here
1881      *
1882      *     // The passed parameter s will have been decoded by the time you see
1883      *     // it, and so may contain any character.
1884      *     //
1885      *     // This is a a closure, so code here may reference
1886      *     // variables which are outside of this scope
1887      * };
1888      * --------------
1889      */
onText(Handler handler)1890     @property @safe @nogc pure nothrow void onText(Handler handler) { textHandler = handler; }
1891 
1892     /*
1893      * Register an alternative handler which will be called whenever text
1894      * is encountered. This differs from onText in that onText will decode
1895      * the text, whereas onTextRaw will not. This allows you to make design
1896      * choices, since onText will be more accurate, but slower, while
1897      * onTextRaw will be faster, but less accurate. Of course, you can
1898      * still call decode() within your handler, if you want, but you'd
1899      * probably want to use onTextRaw only in circumstances where you
1900      * know that decoding is unnecessary.
1901      *
1902      * Example:
1903      * --------------
1904      * // Call this function whenever text is encountered
1905      * onText = (string s)
1906      * {
1907      *     // Your code here
1908      *
1909      *     // The passed parameter s will NOT have been decoded.
1910      *     //
1911      *     // This is a a closure, so code here may reference
1912      *     // variables which are outside of this scope
1913      * };
1914      * --------------
1915      */
onTextRaw(Handler handler)1916     @safe @nogc pure nothrow void onTextRaw(Handler handler) { rawTextHandler = handler; }
1917 
1918     /*
1919      * Register a handler which will be called whenever a character data
1920      * segment is encountered.
1921      *
1922      * Example:
1923      * --------------
1924      * // Call this function whenever a CData section is encountered
1925      * onCData = (string s)
1926      * {
1927      *     // Your code here
1928      *
1929      *     // The passed parameter s does not include the opening <![CDATA[
1930      *     // nor closing ]]>
1931      *     //
1932      *     // This is a a closure, so code here may reference
1933      *     // variables which are outside of this scope
1934      * };
1935      * --------------
1936      */
onCData(Handler handler)1937     @property @safe @nogc pure nothrow void onCData(Handler handler) { cdataHandler = handler; }
1938 
1939     /*
1940      * Register a handler which will be called whenever a comment is
1941      * encountered.
1942      *
1943      * Example:
1944      * --------------
1945      * // Call this function whenever a comment is encountered
1946      * onComment = (string s)
1947      * {
1948      *     // Your code here
1949      *
1950      *     // The passed parameter s does not include the opening <!-- nor
1951      *     // closing -->
1952      *     //
1953      *     // This is a a closure, so code here may reference
1954      *     // variables which are outside of this scope
1955      * };
1956      * --------------
1957      */
onComment(Handler handler)1958     @property @safe @nogc pure nothrow void onComment(Handler handler) { commentHandler = handler; }
1959 
1960     /*
1961      * Register a handler which will be called whenever a processing
1962      * instruction is encountered.
1963      *
1964      * Example:
1965      * --------------
1966      * // Call this function whenever a processing instruction is encountered
1967      * onPI = (string s)
1968      * {
1969      *     // Your code here
1970      *
1971      *     // The passed parameter s does not include the opening <? nor
1972      *     // closing ?>
1973      *     //
1974      *     // This is a a closure, so code here may reference
1975      *     // variables which are outside of this scope
1976      * };
1977      * --------------
1978      */
onPI(Handler handler)1979     @property @safe @nogc pure nothrow void onPI(Handler handler) { piHandler = handler; }
1980 
1981     /*
1982      * Register a handler which will be called whenever an XML instruction is
1983      * encountered.
1984      *
1985      * Example:
1986      * --------------
1987      * // Call this function whenever an XML instruction is encountered
1988      * // (Note: XML instructions may only occur preceding the root tag of a
1989      * // document).
1990      * onPI = (string s)
1991      * {
1992      *     // Your code here
1993      *
1994      *     // The passed parameter s does not include the opening <! nor
1995      *     // closing >
1996      *     //
1997      *     // This is a a closure, so code here may reference
1998      *     // variables which are outside of this scope
1999      * };
2000      * --------------
2001      */
onXI(Handler handler)2002     @property @safe @nogc pure nothrow void onXI(Handler handler) { xiHandler = handler; }
2003 
2004     /*
2005      * Parse an XML element.
2006      *
2007      * Parsing will continue until the end of the current element. Any items
2008      * encountered for which a handler has been registered will invoke that
2009      * handler.
2010      *
2011      * Throws: various kinds of XMLException
2012      */
parse()2013     void parse()
2014     {
2015         import std.algorithm.searching : startsWith;
2016         import std.string : indexOf;
2017 
2018         string t;
2019         const Tag root = tag_;
2020         Tag[string] startTags;
2021         if (tag_ !is null) startTags[tag_.name] = tag_;
2022 
2023         while (s.length != 0)
2024         {
2025             if (startsWith(*s,"<!--"))
2026             {
2027                 chop(*s,4);
2028                 t = chop(*s,indexOf(*s,"-->"));
2029                 if (commentHandler.funcptr !is null) commentHandler(t);
2030                 chop(*s,3);
2031             }
2032             else if (startsWith(*s,"<![CDATA["))
2033             {
2034                 chop(*s,9);
2035                 t = chop(*s,indexOf(*s,"]]>"));
2036                 if (cdataHandler.funcptr !is null) cdataHandler(t);
2037                 chop(*s,3);
2038             }
2039             else if (startsWith(*s,"<!"))
2040             {
2041                 chop(*s,2);
2042                 t = chop(*s,indexOf(*s,">"));
2043                 if (xiHandler.funcptr !is null) xiHandler(t);
2044                 chop(*s,1);
2045             }
2046             else if (startsWith(*s,"<?"))
2047             {
2048                 chop(*s,2);
2049                 t = chop(*s,indexOf(*s,"?>"));
2050                 if (piHandler.funcptr !is null) piHandler(t);
2051                 chop(*s,2);
2052             }
2053             else if (startsWith(*s,"<"))
2054             {
2055                 tag_ = new Tag(*s,true);
2056                 if (root is null)
2057                     return; // Return to constructor of derived class
2058 
2059                 if (tag_.isStart)
2060                 {
2061                     startTags[tag_.name] = tag_;
2062 
2063                     auto parser = new ElementParser(this);
2064 
2065                     auto handler = tag_.name in onStartTag;
2066                     if (handler !is null) (*handler)(parser);
2067                     else
2068                     {
2069                         handler = null in onStartTag;
2070                         if (handler !is null) (*handler)(parser);
2071                     }
2072                 }
2073                 else if (tag_.isEnd)
2074                 {
2075                     const startTag = startTags[tag_.name];
2076                     string text;
2077 
2078                     if (startTag.tagString.length == 0)
2079                         assert(0);
2080 
2081                     immutable(char)* p = startTag.tagString.ptr
2082                         + startTag.tagString.length;
2083                     immutable(char)* q = &tag_.tagString[0];
2084                     text = decode(p[0..(q-p)], DecodeMode.LOOSE);
2085 
2086                     auto element = new Element(startTag);
2087                     if (text.length != 0) element ~= new Text(text);
2088 
2089                     auto handler = tag_.name in onEndTag;
2090                     if (handler !is null) (*handler)(element);
2091                     else
2092                     {
2093                         handler = null in onEndTag;
2094                         if (handler !is null) (*handler)(element);
2095                     }
2096 
2097                     if (tag_.name == root.name) return;
2098                 }
2099                 else if (tag_.isEmpty)
2100                 {
2101                     Tag startTag = new Tag(tag_.name);
2102 
2103                     // FIX by hed010gy
2104                     // https://issues.dlang.org/show_bug.cgi?id=2979
2105                     if (tag_.attr.length > 0)
2106                           foreach (tn,tv; tag_.attr) startTag.attr[tn]=tv;
2107                     // END FIX
2108 
2109                     // Handle the pretend start tag
2110                     string s2;
2111                     auto parser = new ElementParser(startTag,&s2);
2112                     auto handler1 = startTag.name in onStartTag;
2113                     if (handler1 !is null) (*handler1)(parser);
2114                     else
2115                     {
2116                         handler1 = null in onStartTag;
2117                         if (handler1 !is null) (*handler1)(parser);
2118                     }
2119 
2120                     // Handle the pretend end tag
2121                     auto element = new Element(startTag);
2122                     auto handler2 = tag_.name in onEndTag;
2123                     if (handler2 !is null) (*handler2)(element);
2124                     else
2125                     {
2126                         handler2 = null in onEndTag;
2127                         if (handler2 !is null) (*handler2)(element);
2128                     }
2129                 }
2130             }
2131             else
2132             {
2133                 t = chop(*s,indexOf(*s,"<"));
2134                 if (rawTextHandler.funcptr !is null)
2135                     rawTextHandler(t);
2136                 else if (textHandler.funcptr !is null)
2137                     textHandler(decode(t,DecodeMode.LOOSE));
2138             }
2139         }
2140     }
2141 
2142     /*
2143      * Returns that part of the element which has already been parsed
2144      */
toString()2145     override string toString() const @nogc @safe pure nothrow
2146     {
2147         assert(elementStart.length >= s.length);
2148         return elementStart[0 .. elementStart.length - s.length];
2149     }
2150 
2151 }
2152 
2153 private
2154 {
Check(string msg)2155     template Check(string msg)
2156     {
2157         string old = s;
2158 
2159         void fail() @safe pure
2160         {
2161             s = old;
2162             throw new Err(s,msg);
2163         }
2164 
2165         void fail(Err e) @safe pure
2166         {
2167             s = old;
2168             throw new Err(s,msg,e);
2169         }
2170 
2171         void fail(string msg2) @safe pure
2172         {
2173             fail(new Err(s,msg2));
2174         }
2175     }
2176 
checkMisc(ref string s)2177     void checkMisc(ref string s) @safe pure // rule 27
2178     {
2179         import std.algorithm.searching : startsWith;
2180 
2181         mixin Check!("Misc");
2182 
2183         try
2184         {
2185                  if (s.startsWith("<!--")) { checkComment(s); }
2186             else if (s.startsWith("<?"))   { checkPI(s); }
2187             else                           { checkSpace(s); }
2188         }
2189         catch (Err e) { fail(e); }
2190     }
2191 
checkDocument(ref string s)2192     void checkDocument(ref string s) @safe pure // rule 1
2193     {
2194         mixin Check!("Document");
2195         try
2196         {
2197             checkProlog(s);
2198             checkElement(s);
2199             star!(checkMisc)(s);
2200         }
2201         catch (Err e) { fail(e); }
2202     }
2203 
checkChars(ref string s)2204     void checkChars(ref string s) @safe pure // rule 2
2205     {
2206         // TO DO - Fix std.utf stride and decode functions, then use those
2207         // instead
2208         import std.format : format;
2209 
2210         mixin Check!("Chars");
2211 
2212         dchar c;
2213         ptrdiff_t n = -1;
2214         // 'i' must not be smaller than size_t because size_t is used internally in
2215         // aApply.d and it will be cast e.g to (int *) which fails on BigEndian targets.
2216         foreach (size_t i, dchar d; s)
2217         {
2218             if (!isChar(d))
2219             {
2220                 c = d;
2221                 n = i;
2222                 break;
2223             }
2224         }
2225         if (n != -1)
2226         {
2227             s = s[n..$];
2228             fail(format("invalid character: U+%04X",c));
2229         }
2230     }
2231 
checkSpace(ref string s)2232     void checkSpace(ref string s) @safe pure // rule 3
2233     {
2234         import std.algorithm.searching : countUntil;
2235         import std.ascii : isWhite;
2236         import std.utf : byCodeUnit;
2237 
2238         mixin Check!("Whitespace");
2239         ptrdiff_t i = s.byCodeUnit.countUntil!(a => !isWhite(a));
2240         if (i == -1 && s.length > 0 && isWhite(s[0]))
2241             s = s[$ .. $];
2242         else if (i > -1)
2243             s = s[i .. $];
2244         if (s is old) fail();
2245     }
2246 
checkName(ref string s,out string name)2247     void checkName(ref string s, out string name) @safe pure // rule 5
2248     {
2249         mixin Check!("Name");
2250 
2251         if (s.length == 0) fail();
2252         ptrdiff_t n;
2253         // 'i' must not be smaller than size_t because size_t is used internally in
2254         // aApply.d and it will be cast e.g to (int *) which fails on BigEndian targets.
2255         foreach (size_t i, dchar c; s)
2256         {
2257             if (c == '_' || c == ':' || isLetter(c)) continue;
2258             if (i == 0) fail();
2259             if (c == '-' || c == '.' || isDigit(c)
2260                 || isCombiningChar(c) || isExtender(c)) continue;
2261             n = i;
2262             break;
2263         }
2264         name = s[0 .. n];
2265         s = s[n..$];
2266     }
2267 
checkAttValue(ref string s)2268     void checkAttValue(ref string s) @safe pure // rule 10
2269     {
2270         import std.algorithm.searching : countUntil;
2271         import std.utf : byCodeUnit;
2272 
2273         mixin Check!("AttValue");
2274 
2275         if (s.length == 0) fail();
2276         char c = s[0];
2277         if (c != '\u0022' && c != '\u0027')
2278             fail("attribute value requires quotes");
2279         s = s[1..$];
2280         for (;;)
2281         {
2282             s = s[s.byCodeUnit.countUntil(c) .. $];
2283             if (s.length == 0) fail("unterminated attribute value");
2284             if (s[0] == '<') fail("< found in attribute value");
2285             if (s[0] == c) break;
2286             try { checkReference(s); } catch (Err e) { fail(e); }
2287         }
2288         s = s[1..$];
2289     }
2290 
checkCharData(ref string s)2291     void checkCharData(ref string s) @safe pure // rule 14
2292     {
2293         import std.algorithm.searching : startsWith;
2294 
2295         mixin Check!("CharData");
2296 
2297         while (s.length != 0)
2298         {
2299             if (s.startsWith("&")) break;
2300             if (s.startsWith("<")) break;
2301             if (s.startsWith("]]>")) fail("]]> found within char data");
2302             s = s[1..$];
2303         }
2304     }
2305 
checkComment(ref string s)2306     void checkComment(ref string s) @safe pure // rule 15
2307     {
2308         import std.string : indexOf;
2309 
2310         mixin Check!("Comment");
2311 
2312         try { checkLiteral("<!--",s); } catch (Err e) { fail(e); }
2313         ptrdiff_t n = s.indexOf("--");
2314         if (n == -1) fail("unterminated comment");
2315         s = s[n..$];
2316         try { checkLiteral("-->",s); } catch (Err e) { fail(e); }
2317     }
2318 
checkPI(ref string s)2319     void checkPI(ref string s) @safe pure // rule 16
2320     {
2321         mixin Check!("PI");
2322 
2323         try
2324         {
2325             checkLiteral("<?",s);
2326             checkEnd("?>",s);
2327         }
2328         catch (Err e) { fail(e); }
2329     }
2330 
checkCDSect(ref string s)2331     void checkCDSect(ref string s) @safe pure // rule 18
2332     {
2333         mixin Check!("CDSect");
2334 
2335         try
2336         {
2337             checkLiteral(cdata,s);
2338             checkEnd("]]>",s);
2339         }
2340         catch (Err e) { fail(e); }
2341     }
2342 
checkProlog(ref string s)2343     void checkProlog(ref string s) @safe pure // rule 22
2344     {
2345         mixin Check!("Prolog");
2346 
2347         try
2348         {
2349             /* The XML declaration is optional
2350              * http://www.w3.org/TR/2008/REC-xml-20081126/#NT-prolog
2351              */
2352             opt!(checkXMLDecl)(s);
2353 
2354             star!(checkMisc)(s);
2355             opt!(seq!(checkDocTypeDecl,star!(checkMisc)))(s);
2356         }
2357         catch (Err e) { fail(e); }
2358     }
2359 
checkXMLDecl(ref string s)2360     void checkXMLDecl(ref string s) @safe pure // rule 23
2361     {
2362         mixin Check!("XMLDecl");
2363 
2364         try
2365         {
2366             checkLiteral("<?xml",s);
2367             checkVersionInfo(s);
2368             opt!(checkEncodingDecl)(s);
2369             opt!(checkSDDecl)(s);
2370             opt!(checkSpace)(s);
2371             checkLiteral("?>",s);
2372         }
2373         catch (Err e) { fail(e); }
2374     }
2375 
checkVersionInfo(ref string s)2376     void checkVersionInfo(ref string s) @safe pure // rule 24
2377     {
2378         mixin Check!("VersionInfo");
2379 
2380         try
2381         {
2382             checkSpace(s);
2383             checkLiteral("version",s);
2384             checkEq(s);
2385             quoted!(checkVersionNum)(s);
2386         }
2387         catch (Err e) { fail(e); }
2388     }
2389 
checkEq(ref string s)2390     void checkEq(ref string s) @safe pure // rule 25
2391     {
2392         mixin Check!("Eq");
2393 
2394         try
2395         {
2396             opt!(checkSpace)(s);
2397             checkLiteral("=",s);
2398             opt!(checkSpace)(s);
2399         }
2400         catch (Err e) { fail(e); }
2401     }
2402 
checkVersionNum(ref string s)2403     void checkVersionNum(ref string s) @safe pure // rule 26
2404     {
2405         import std.algorithm.searching : countUntil;
2406         import std.utf : byCodeUnit;
2407 
2408         mixin Check!("VersionNum");
2409 
2410         s = s[s.byCodeUnit.countUntil('\"') .. $];
2411         if (s is old) fail();
2412     }
2413 
checkDocTypeDecl(ref string s)2414     void checkDocTypeDecl(ref string s) @safe pure // rule 28
2415     {
2416         mixin Check!("DocTypeDecl");
2417 
2418         try
2419         {
2420             checkLiteral("<!DOCTYPE",s);
2421             //
2422             // TO DO -- ensure DOCTYPE is well formed
2423             // (But not yet. That's one of our "future directions")
2424             //
2425             checkEnd(">",s);
2426         }
2427         catch (Err e) { fail(e); }
2428     }
2429 
checkSDDecl(ref string s)2430     void checkSDDecl(ref string s) @safe pure // rule 32
2431     {
2432         import std.algorithm.searching : startsWith;
2433 
2434         mixin Check!("SDDecl");
2435 
2436         try
2437         {
2438             checkSpace(s);
2439             checkLiteral("standalone",s);
2440             checkEq(s);
2441         }
2442         catch (Err e) { fail(e); }
2443 
2444         int n = 0;
2445              if (s.startsWith("'yes'") || s.startsWith("\"yes\"")) n = 5;
2446         else if (s.startsWith("'no'" ) || s.startsWith("\"no\"" )) n = 4;
2447         else fail("standalone attribute value must be 'yes', \"yes\","~
2448             " 'no' or \"no\"");
2449         s = s[n..$];
2450     }
2451 
checkElement(ref string s)2452     void checkElement(ref string s) @safe pure // rule 39
2453     {
2454         mixin Check!("Element");
2455 
2456         string sname,ename,t;
2457         try { checkTag(s,t,sname); } catch (Err e) { fail(e); }
2458 
2459         if (t == "STag")
2460         {
2461             try
2462             {
2463                 checkContent(s);
2464                 t = s;
2465                 checkETag(s,ename);
2466             }
2467             catch (Err e) { fail(e); }
2468 
2469             if (sname != ename)
2470             {
2471                 s = t;
2472                 fail("end tag name \"" ~ ename
2473                     ~ "\" differs from start tag name \""~sname~"\"");
2474             }
2475         }
2476     }
2477 
2478     // rules 40 and 44
checkTag(ref string s,out string type,out string name)2479     void checkTag(ref string s, out string type, out string name) @safe pure
2480     {
2481         mixin Check!("Tag");
2482 
2483         try
2484         {
2485             type = "STag";
2486             checkLiteral("<",s);
2487             checkName(s,name);
2488             star!(seq!(checkSpace,checkAttribute))(s);
2489             opt!(checkSpace)(s);
2490             if (s.length != 0 && s[0] == '/')
2491             {
2492                 s = s[1..$];
2493                 type = "ETag";
2494             }
2495             checkLiteral(">",s);
2496         }
2497         catch (Err e) { fail(e); }
2498     }
2499 
checkAttribute(ref string s)2500     void checkAttribute(ref string s) @safe pure // rule 41
2501     {
2502         mixin Check!("Attribute");
2503 
2504         try
2505         {
2506             string name;
2507             checkName(s,name);
2508             checkEq(s);
2509             checkAttValue(s);
2510         }
2511         catch (Err e) { fail(e); }
2512     }
2513 
checkETag(ref string s,out string name)2514     void checkETag(ref string s, out string name) @safe pure // rule 42
2515     {
2516         mixin Check!("ETag");
2517 
2518         try
2519         {
2520             checkLiteral("</",s);
2521             checkName(s,name);
2522             opt!(checkSpace)(s);
2523             checkLiteral(">",s);
2524         }
2525         catch (Err e) { fail(e); }
2526     }
2527 
checkContent(ref string s)2528     void checkContent(ref string s) @safe pure // rule 43
2529     {
2530         import std.algorithm.searching : startsWith;
2531 
2532         mixin Check!("Content");
2533 
2534         try
2535         {
2536             while (s.length != 0)
2537             {
2538                 old = s;
2539                      if (s.startsWith("&"))        { checkReference(s); }
2540                 else if (s.startsWith("<!--"))     { checkComment(s); }
2541                 else if (s.startsWith("<?"))       { checkPI(s); }
2542                 else if (s.startsWith(cdata)) { checkCDSect(s); }
2543                 else if (s.startsWith("</"))       { break; }
2544                 else if (s.startsWith("<"))        { checkElement(s); }
2545                 else                               { checkCharData(s); }
2546             }
2547         }
2548         catch (Err e) { fail(e); }
2549     }
2550 
checkCharRef(ref string s,out dchar c)2551     void checkCharRef(ref string s, out dchar c) @safe pure // rule 66
2552     {
2553         import std.format : format;
2554 
2555         mixin Check!("CharRef");
2556 
2557         c = 0;
2558         try { checkLiteral("&#",s); } catch (Err e) { fail(e); }
2559         int radix = 10;
2560         if (s.length != 0 && s[0] == 'x')
2561         {
2562             s = s[1..$];
2563             radix = 16;
2564         }
2565         if (s.length == 0) fail("unterminated character reference");
2566         if (s[0] == ';')
2567             fail("character reference must have at least one digit");
2568         while (s.length != 0)
2569         {
2570             immutable char d = s[0];
2571             int n = 0;
2572             switch (d)
2573             {
2574                 case 'F','f': ++n;      goto case;
2575                 case 'E','e': ++n;      goto case;
2576                 case 'D','d': ++n;      goto case;
2577                 case 'C','c': ++n;      goto case;
2578                 case 'B','b': ++n;      goto case;
2579                 case 'A','a': ++n;      goto case;
2580                 case '9':     ++n;      goto case;
2581                 case '8':     ++n;      goto case;
2582                 case '7':     ++n;      goto case;
2583                 case '6':     ++n;      goto case;
2584                 case '5':     ++n;      goto case;
2585                 case '4':     ++n;      goto case;
2586                 case '3':     ++n;      goto case;
2587                 case '2':     ++n;      goto case;
2588                 case '1':     ++n;      goto case;
2589                 case '0':     break;
2590                 default: n = 100; break;
2591             }
2592             if (n >= radix) break;
2593             c *= radix;
2594             c += n;
2595             s = s[1..$];
2596         }
2597         if (!isChar(c)) fail(format("U+%04X is not a legal character",c));
2598         if (s.length == 0 || s[0] != ';') fail("expected ;");
2599         else s = s[1..$];
2600     }
2601 
checkReference(ref string s)2602     void checkReference(ref string s) @safe pure // rule 67
2603     {
2604         import std.algorithm.searching : startsWith;
2605 
2606         mixin Check!("Reference");
2607 
2608         try
2609         {
2610             dchar c;
2611             if (s.startsWith("&#")) checkCharRef(s,c);
2612             else checkEntityRef(s);
2613         }
2614         catch (Err e) { fail(e); }
2615     }
2616 
checkEntityRef(ref string s)2617     void checkEntityRef(ref string s) @safe pure // rule 68
2618     {
2619         mixin Check!("EntityRef");
2620 
2621         try
2622         {
2623             string name;
2624             checkLiteral("&",s);
2625             checkName(s,name);
2626             checkLiteral(";",s);
2627         }
2628         catch (Err e) { fail(e); }
2629     }
2630 
checkEncName(ref string s)2631     void checkEncName(ref string s) @safe pure // rule 81
2632     {
2633         import std.algorithm.searching : countUntil;
2634         import std.ascii : isAlpha;
2635         import std.utf : byCodeUnit;
2636 
2637         mixin Check!("EncName");
2638 
2639         s = s[s.byCodeUnit.countUntil!(a => !isAlpha(a)) .. $];
2640         if (s is old) fail();
2641         s = s[s.byCodeUnit.countUntil('\"', '\'') .. $];
2642     }
2643 
checkEncodingDecl(ref string s)2644     void checkEncodingDecl(ref string s) @safe pure // rule 80
2645     {
2646         mixin Check!("EncodingDecl");
2647 
2648         try
2649         {
2650             checkSpace(s);
2651             checkLiteral("encoding",s);
2652             checkEq(s);
2653             quoted!(checkEncName)(s);
2654         }
2655         catch (Err e) { fail(e); }
2656     }
2657 
2658     // Helper functions
2659 
checkLiteral(string literal,ref string s)2660     void checkLiteral(string literal,ref string s) @safe pure
2661     {
2662         import std.string : startsWith;
2663 
2664         mixin Check!("Literal");
2665 
2666         if (!s.startsWith(literal)) fail("Expected literal \""~literal~"\"");
2667         s = s[literal.length..$];
2668     }
2669 
checkEnd(string end,ref string s)2670     void checkEnd(string end,ref string s) @safe pure
2671     {
2672         import std.string : indexOf;
2673         // Deliberately no mixin Check here.
2674 
2675         auto n = s.indexOf(end);
2676         if (n == -1) throw new Err(s,"Unable to find terminating \""~end~"\"");
2677         s = s[n..$];
2678         checkLiteral(end,s);
2679     }
2680 
2681     // Metafunctions -- none of these use mixin Check
2682 
opt(alias f)2683     void opt(alias f)(ref string s)
2684     {
2685         try { f(s); } catch (Err e) {}
2686     }
2687 
plus(alias f)2688     void plus(alias f)(ref string s)
2689     {
2690         f(s);
2691         star!(f)(s);
2692     }
2693 
star(alias f)2694     void star(alias f)(ref string s)
2695     {
2696         while (s.length != 0)
2697         {
2698             try { f(s); }
2699             catch (Err e) { return; }
2700         }
2701     }
2702 
quoted(alias f)2703     void quoted(alias f)(ref string s)
2704     {
2705         import std.string : startsWith;
2706 
2707         if (s.startsWith("'"))
2708         {
2709             checkLiteral("'",s);
2710             f(s);
2711             checkLiteral("'",s);
2712         }
2713         else
2714         {
2715             checkLiteral("\"",s);
2716             f(s);
2717             checkLiteral("\"",s);
2718         }
2719     }
2720 
seq(alias f,alias g)2721     void seq(alias f,alias g)(ref string s)
2722     {
2723         f(s);
2724         g(s);
2725     }
2726 }
2727 
2728 /*
2729  * Check an entire XML document for well-formedness
2730  *
2731  * Params:
2732  *      s = the document to be checked, passed as a string
2733  *
2734  * Throws: CheckException if the document is not well formed
2735  *
2736  * CheckException's toString() method will yield the complete hierarchy of
2737  * parse failure (the XML equivalent of a stack trace), giving the line and
2738  * column number of every failure at every level.
2739  */
check(string s)2740 void check(string s) @safe pure
2741 {
2742     try
2743     {
2744         checkChars(s);
2745         checkDocument(s);
2746         if (s.length != 0) throw new Err(s,"Junk found after document");
2747     }
2748     catch (Err e)
2749     {
2750         e.complete(s);
2751         throw e;
2752     }
2753 }
2754 
2755 @system pure unittest
2756 {
2757     import std.string : indexOf;
2758 
2759     try
2760     {
2761         check(q"[<?xml version="1.0"?>
2762         <catalog>
2763            <book id="bk101">
2764               <author>Gambardella, Matthew</author>
2765               <title>XML Developer's Guide</title>
2766               <genre>Computer</genre>
2767               <price>44.95</price>
2768               <publish_date>2000-10-01</publish_date>
2769               <description>An in-depth look at creating applications
2770               with XML.</description>
2771            </book>
2772            <book id="bk102">
2773               <author>Ralls, Kim</author>
2774               <title>Midnight Rain</title>
2775               <genre>Fantasy</genres>
2776               <price>5.95</price>
2777               <publish_date>2000-12-16</publish_date>
2778               <description>A former architect battles corporate zombies,
2779               an evil sorceress, and her own childhood to become queen
2780               of the world.</description>
2781            </book>
2782            <book id="bk103">
2783               <author>Corets, Eva</author>
2784               <title>Maeve Ascendant</title>
2785               <genre>Fantasy</genre>
2786               <price>5.95</price>
2787               <publish_date>2000-11-17</publish_date>
2788               <description>After the collapse of a nanotechnology
2789               society in England, the young survivors lay the
2790               foundation for a new society.</description>
2791            </book>
2792         </catalog>
2793         ]");
2794         assert(false);
2795     }
catch(CheckException e)2796     catch (CheckException e)
2797     {
2798         auto n = e.toString().indexOf("end tag name \"genres\" differs"~
2799                                       " from start tag name \"genre\"");
2800         assert(n != -1);
2801     }
2802 }
2803 
2804 @system unittest
2805 {
2806     string s = q"EOS
2807 <?xml version="1.0"?>
2808 <set>
2809     <one>A</one>
2810     <!-- comment -->
2811     <two>B</two>
2812 </set>
2813 EOS";
2814     try
2815     {
2816         check(s);
2817     }
catch(CheckException e)2818     catch (CheckException e)
2819     {
2820         assert(0, e.toString());
2821     }
2822 }
2823 
2824 @system unittest
2825 {
2826     string test_xml = `<?xml version="1.0" encoding='UTF-8'?><r><stream:stream
2827                         xmlns:stream="http://etherx.'jabber'.org/streams"
2828                         xmlns="jabber:'client'" from='jid.pl' id="587a5767"
2829                         xml:lang="en" version="1.0" attr='a"b"c'>
2830                         </stream:stream></r>`;
2831 
2832     DocumentParser parser = new DocumentParser(test_xml);
2833     bool tested = false;
2834     parser.onStartTag["stream:stream"] = (ElementParser p) {
2835         assert(p.tag.attr["xmlns"] == "jabber:'client'");
2836         assert(p.tag.attr["from"] == "jid.pl");
2837         assert(p.tag.attr["attr"] == "a\"b\"c");
2838         tested = true;
2839     };
2840     parser.parse();
2841     assert(tested);
2842 }
2843 
2844 @system unittest
2845 {
2846     string s = q"EOS
2847 <?xml version="1.0" encoding="utf-8"?> <Tests>
2848     <Test thing="What &amp; Up">What &amp; Up Second</Test>
2849 </Tests>
2850 EOS";
2851     auto xml = new DocumentParser(s);
2852 
2853     xml.onStartTag["Test"] = (ElementParser xml) {
2854         assert(xml.tag.attr["thing"] == "What & Up");
2855     };
2856 
2857     xml.onEndTag["Test"] = (in Element e) {
2858         assert(e.text() == "What & Up Second");
2859     };
2860     xml.parse();
2861 }
2862 
2863 @system unittest
2864 {
2865     string s = `<tag attr="&quot;value&gt;" />`;
2866     auto doc = new Document(s);
2867     assert(doc.toString() == s);
2868 }
2869 
2870 /* The base class for exceptions thrown by this module */
this(string msg)2871 class XMLException : Exception { this(string msg) @safe pure { super(msg); } }
2872 
2873 // Other exceptions
2874 
2875 // Thrown during Comment constructor
2876 class CommentException : XMLException
this(string msg)2877 { private this(string msg) @safe pure { super(msg); } }
2878 
2879 // Thrown during CData constructor
2880 class CDataException : XMLException
this(string msg)2881 { private this(string msg) @safe pure { super(msg); } }
2882 
2883 // Thrown during XMLInstruction constructor
2884 class XIException : XMLException
this(string msg)2885 { private this(string msg) @safe pure { super(msg); } }
2886 
2887 // Thrown during ProcessingInstruction constructor
2888 class PIException : XMLException
this(string msg)2889 { private this(string msg) @safe pure { super(msg); } }
2890 
2891 // Thrown during Text constructor
2892 class TextException : XMLException
this(string msg)2893 { private this(string msg) @safe pure { super(msg); } }
2894 
2895 // Thrown during decode()
2896 class DecodeException : XMLException
this(string msg)2897 { private this(string msg) @safe pure { super(msg); } }
2898 
2899 // Thrown if comparing with wrong type
2900 class InvalidTypeException : XMLException
this(string msg)2901 { private this(string msg) @safe pure { super(msg); } }
2902 
2903 // Thrown when parsing for Tags
2904 class TagException : XMLException
this(string msg)2905 { private this(string msg) @safe pure { super(msg); } }
2906 
2907 /*
2908  * Thrown during check()
2909  */
2910 class CheckException : XMLException
2911 {
2912     CheckException err; // Parent in hierarchy
2913     private string tail;
2914     /*
2915      * Name of production rule which failed to parse,
2916      * or specific error message
2917      */
2918     string msg;
2919     size_t line = 0; // Line number at which parse failure occurred
2920     size_t column = 0; // Column number at which parse failure occurred
2921 
2922     private this(string tail,string msg,Err err=null) @safe pure
2923     {
2924         super(null);
2925         this.tail = tail;
2926         this.msg = msg;
2927         this.err = err;
2928     }
2929 
complete(string entire)2930     private void complete(string entire) @safe pure
2931     {
2932         import std.string : count, lastIndexOf;
2933         import std.utf : toUTF32;
2934 
2935         string head = entire[0..$-tail.length];
2936         ptrdiff_t n = head.lastIndexOf('\n') + 1;
2937         line = head.count("\n") + 1;
2938         dstring t = toUTF32(head[n..$]);
2939         column = t.length + 1;
2940         if (err !is null) err.complete(entire);
2941     }
2942 
toString()2943     override string toString() const @safe pure
2944     {
2945         import std.format : format;
2946 
2947         string s;
2948         if (line != 0) s = format("Line %d, column %d: ",line,column);
2949         s ~= msg;
2950         s ~= '\n';
2951         if (err !is null) s = err.toString() ~ s;
2952         return s;
2953     }
2954 }
2955 
2956 private alias Err = CheckException;
2957 
2958 // Private helper functions
2959 
2960 private
2961 {
toType(T)2962     inout(T) toType(T)(inout return scope Object o)
2963     {
2964         T t = cast(T)(o);
2965         if (t is null)
2966         {
2967             throw new InvalidTypeException("Attempt to compare a "
2968                 ~ T.stringof ~ " with an instance of another type");
2969         }
2970         return t;
2971     }
2972 
chop(ref string s,size_t n)2973     string chop(ref string s, size_t n) @safe pure nothrow
2974     {
2975         if (n == -1) n = s.length;
2976         string t = s[0 .. n];
2977         s = s[n..$];
2978         return t;
2979     }
2980 
optc(ref string s,char c)2981     bool optc(ref string s, char c) @safe pure nothrow
2982     {
2983         immutable bool b = s.length != 0 && s[0] == c;
2984         if (b) s = s[1..$];
2985         return b;
2986     }
2987 
reqc(ref string s,char c)2988     void reqc(ref string s, char c) @safe pure
2989     {
2990         if (s.length == 0 || s[0] != c) throw new TagException("");
2991         s = s[1..$];
2992     }
2993 
requireOneOf(ref string s,string chars)2994     char requireOneOf(ref string s, string chars) @safe pure
2995     {
2996         import std.string : indexOf;
2997 
2998         if (s.length == 0 || indexOf(chars,s[0]) == -1)
2999             throw new TagException("");
3000         immutable char ch = s[0];
3001         s = s[1..$];
3002         return ch;
3003     }
3004 
3005     alias hash = .hashOf;
3006 
3007     // Definitions from the XML specification
3008     immutable CharTable=[0x9,0x9,0xA,0xA,0xD,0xD,0x20,0xD7FF,0xE000,0xFFFD,
3009         0x10000,0x10FFFF];
3010     immutable BaseCharTable=[0x0041,0x005A,0x0061,0x007A,0x00C0,0x00D6,0x00D8,
3011         0x00F6,0x00F8,0x00FF,0x0100,0x0131,0x0134,0x013E,0x0141,0x0148,0x014A,
3012         0x017E,0x0180,0x01C3,0x01CD,0x01F0,0x01F4,0x01F5,0x01FA,0x0217,0x0250,
3013         0x02A8,0x02BB,0x02C1,0x0386,0x0386,0x0388,0x038A,0x038C,0x038C,0x038E,
3014         0x03A1,0x03A3,0x03CE,0x03D0,0x03D6,0x03DA,0x03DA,0x03DC,0x03DC,0x03DE,
3015         0x03DE,0x03E0,0x03E0,0x03E2,0x03F3,0x0401,0x040C,0x040E,0x044F,0x0451,
3016         0x045C,0x045E,0x0481,0x0490,0x04C4,0x04C7,0x04C8,0x04CB,0x04CC,0x04D0,
3017         0x04EB,0x04EE,0x04F5,0x04F8,0x04F9,0x0531,0x0556,0x0559,0x0559,0x0561,
3018         0x0586,0x05D0,0x05EA,0x05F0,0x05F2,0x0621,0x063A,0x0641,0x064A,0x0671,
3019         0x06B7,0x06BA,0x06BE,0x06C0,0x06CE,0x06D0,0x06D3,0x06D5,0x06D5,0x06E5,
3020         0x06E6,0x0905,0x0939,0x093D,0x093D,0x0958,0x0961,0x0985,0x098C,0x098F,
3021         0x0990,0x0993,0x09A8,0x09AA,0x09B0,0x09B2,0x09B2,0x09B6,0x09B9,0x09DC,
3022         0x09DD,0x09DF,0x09E1,0x09F0,0x09F1,0x0A05,0x0A0A,0x0A0F,0x0A10,0x0A13,
3023         0x0A28,0x0A2A,0x0A30,0x0A32,0x0A33,0x0A35,0x0A36,0x0A38,0x0A39,0x0A59,
3024         0x0A5C,0x0A5E,0x0A5E,0x0A72,0x0A74,0x0A85,0x0A8B,0x0A8D,0x0A8D,0x0A8F,
3025         0x0A91,0x0A93,0x0AA8,0x0AAA,0x0AB0,0x0AB2,0x0AB3,0x0AB5,0x0AB9,0x0ABD,
3026         0x0ABD,0x0AE0,0x0AE0,0x0B05,0x0B0C,0x0B0F,0x0B10,0x0B13,0x0B28,0x0B2A,
3027         0x0B30,0x0B32,0x0B33,0x0B36,0x0B39,0x0B3D,0x0B3D,0x0B5C,0x0B5D,0x0B5F,
3028         0x0B61,0x0B85,0x0B8A,0x0B8E,0x0B90,0x0B92,0x0B95,0x0B99,0x0B9A,0x0B9C,
3029         0x0B9C,0x0B9E,0x0B9F,0x0BA3,0x0BA4,0x0BA8,0x0BAA,0x0BAE,0x0BB5,0x0BB7,
3030         0x0BB9,0x0C05,0x0C0C,0x0C0E,0x0C10,0x0C12,0x0C28,0x0C2A,0x0C33,0x0C35,
3031         0x0C39,0x0C60,0x0C61,0x0C85,0x0C8C,0x0C8E,0x0C90,0x0C92,0x0CA8,0x0CAA,
3032         0x0CB3,0x0CB5,0x0CB9,0x0CDE,0x0CDE,0x0CE0,0x0CE1,0x0D05,0x0D0C,0x0D0E,
3033         0x0D10,0x0D12,0x0D28,0x0D2A,0x0D39,0x0D60,0x0D61,0x0E01,0x0E2E,0x0E30,
3034         0x0E30,0x0E32,0x0E33,0x0E40,0x0E45,0x0E81,0x0E82,0x0E84,0x0E84,0x0E87,
3035         0x0E88,0x0E8A,0x0E8A,0x0E8D,0x0E8D,0x0E94,0x0E97,0x0E99,0x0E9F,0x0EA1,
3036         0x0EA3,0x0EA5,0x0EA5,0x0EA7,0x0EA7,0x0EAA,0x0EAB,0x0EAD,0x0EAE,0x0EB0,
3037         0x0EB0,0x0EB2,0x0EB3,0x0EBD,0x0EBD,0x0EC0,0x0EC4,0x0F40,0x0F47,0x0F49,
3038         0x0F69,0x10A0,0x10C5,0x10D0,0x10F6,0x1100,0x1100,0x1102,0x1103,0x1105,
3039         0x1107,0x1109,0x1109,0x110B,0x110C,0x110E,0x1112,0x113C,0x113C,0x113E,
3040         0x113E,0x1140,0x1140,0x114C,0x114C,0x114E,0x114E,0x1150,0x1150,0x1154,
3041         0x1155,0x1159,0x1159,0x115F,0x1161,0x1163,0x1163,0x1165,0x1165,0x1167,
3042         0x1167,0x1169,0x1169,0x116D,0x116E,0x1172,0x1173,0x1175,0x1175,0x119E,
3043         0x119E,0x11A8,0x11A8,0x11AB,0x11AB,0x11AE,0x11AF,0x11B7,0x11B8,0x11BA,
3044         0x11BA,0x11BC,0x11C2,0x11EB,0x11EB,0x11F0,0x11F0,0x11F9,0x11F9,0x1E00,
3045         0x1E9B,0x1EA0,0x1EF9,0x1F00,0x1F15,0x1F18,0x1F1D,0x1F20,0x1F45,0x1F48,
3046         0x1F4D,0x1F50,0x1F57,0x1F59,0x1F59,0x1F5B,0x1F5B,0x1F5D,0x1F5D,0x1F5F,
3047         0x1F7D,0x1F80,0x1FB4,0x1FB6,0x1FBC,0x1FBE,0x1FBE,0x1FC2,0x1FC4,0x1FC6,
3048         0x1FCC,0x1FD0,0x1FD3,0x1FD6,0x1FDB,0x1FE0,0x1FEC,0x1FF2,0x1FF4,0x1FF6,
3049         0x1FFC,0x2126,0x2126,0x212A,0x212B,0x212E,0x212E,0x2180,0x2182,0x3041,
3050         0x3094,0x30A1,0x30FA,0x3105,0x312C,0xAC00,0xD7A3];
3051     immutable IdeographicTable=[0x3007,0x3007,0x3021,0x3029,0x4E00,0x9FA5];
3052     immutable CombiningCharTable=[0x0300,0x0345,0x0360,0x0361,0x0483,0x0486,
3053         0x0591,0x05A1,0x05A3,0x05B9,0x05BB,0x05BD,0x05BF,0x05BF,0x05C1,0x05C2,
3054         0x05C4,0x05C4,0x064B,0x0652,0x0670,0x0670,0x06D6,0x06DC,0x06DD,0x06DF,
3055         0x06E0,0x06E4,0x06E7,0x06E8,0x06EA,0x06ED,0x0901,0x0903,0x093C,0x093C,
3056         0x093E,0x094C,0x094D,0x094D,0x0951,0x0954,0x0962,0x0963,0x0981,0x0983,
3057         0x09BC,0x09BC,0x09BE,0x09BE,0x09BF,0x09BF,0x09C0,0x09C4,0x09C7,0x09C8,
3058         0x09CB,0x09CD,0x09D7,0x09D7,0x09E2,0x09E3,0x0A02,0x0A02,0x0A3C,0x0A3C,
3059         0x0A3E,0x0A3E,0x0A3F,0x0A3F,0x0A40,0x0A42,0x0A47,0x0A48,0x0A4B,0x0A4D,
3060         0x0A70,0x0A71,0x0A81,0x0A83,0x0ABC,0x0ABC,0x0ABE,0x0AC5,0x0AC7,0x0AC9,
3061         0x0ACB,0x0ACD,0x0B01,0x0B03,0x0B3C,0x0B3C,0x0B3E,0x0B43,0x0B47,0x0B48,
3062         0x0B4B,0x0B4D,0x0B56,0x0B57,0x0B82,0x0B83,0x0BBE,0x0BC2,0x0BC6,0x0BC8,
3063         0x0BCA,0x0BCD,0x0BD7,0x0BD7,0x0C01,0x0C03,0x0C3E,0x0C44,0x0C46,0x0C48,
3064         0x0C4A,0x0C4D,0x0C55,0x0C56,0x0C82,0x0C83,0x0CBE,0x0CC4,0x0CC6,0x0CC8,
3065         0x0CCA,0x0CCD,0x0CD5,0x0CD6,0x0D02,0x0D03,0x0D3E,0x0D43,0x0D46,0x0D48,
3066         0x0D4A,0x0D4D,0x0D57,0x0D57,0x0E31,0x0E31,0x0E34,0x0E3A,0x0E47,0x0E4E,
3067         0x0EB1,0x0EB1,0x0EB4,0x0EB9,0x0EBB,0x0EBC,0x0EC8,0x0ECD,0x0F18,0x0F19,
3068         0x0F35,0x0F35,0x0F37,0x0F37,0x0F39,0x0F39,0x0F3E,0x0F3E,0x0F3F,0x0F3F,
3069         0x0F71,0x0F84,0x0F86,0x0F8B,0x0F90,0x0F95,0x0F97,0x0F97,0x0F99,0x0FAD,
3070         0x0FB1,0x0FB7,0x0FB9,0x0FB9,0x20D0,0x20DC,0x20E1,0x20E1,0x302A,0x302F,
3071         0x3099,0x3099,0x309A,0x309A];
3072     immutable DigitTable=[0x0030,0x0039,0x0660,0x0669,0x06F0,0x06F9,0x0966,
3073         0x096F,0x09E6,0x09EF,0x0A66,0x0A6F,0x0AE6,0x0AEF,0x0B66,0x0B6F,0x0BE7,
3074         0x0BEF,0x0C66,0x0C6F,0x0CE6,0x0CEF,0x0D66,0x0D6F,0x0E50,0x0E59,0x0ED0,
3075         0x0ED9,0x0F20,0x0F29];
3076     immutable ExtenderTable=[0x00B7,0x00B7,0x02D0,0x02D0,0x02D1,0x02D1,0x0387,
3077         0x0387,0x0640,0x0640,0x0E46,0x0E46,0x0EC6,0x0EC6,0x3005,0x3005,0x3031,
3078         0x3035,0x309D,0x309E,0x30FC,0x30FE];
3079 
lookup(const (int)[]table,int c)3080     bool lookup(const(int)[] table, int c) @safe @nogc nothrow pure
3081     {
3082         while (table.length != 0)
3083         {
3084             auto m = (table.length >> 1) & ~1;
3085             if (c < table[m])
3086             {
3087                 table = table[0 .. m];
3088             }
3089             else if (c > table[m+1])
3090             {
3091                 table = table[m+2..$];
3092             }
3093             else return true;
3094         }
3095         return false;
3096     }
3097 
startOf(string s)3098     string startOf(string s) @safe nothrow pure
3099     {
3100         string r;
3101         foreach (char c;s)
3102         {
3103             r ~= (c < 0x20 || c > 0x7F) ? '.' : c;
3104             if (r.length >= 40) { r ~= "___"; break; }
3105         }
3106         return r;
3107     }
3108 
3109     void exit(string s=null)
3110     {
3111         throw new XMLException(s);
3112     }
3113 }
3114