1 // Written in the D programming language.
2
3 /**
4 $(RED Warning: This module is considered out-dated and not up to Phobos'
5 current standards. It will remain until we have a suitable replacement,
6 but be aware that it will not remain long term.)
7
8 Classes and functions for creating and parsing XML
9
10 The basic architecture of this module is that there are standalone functions,
11 classes for constructing an XML document from scratch (Tag, Element and
12 Document), and also classes for parsing a pre-existing XML file (ElementParser
13 and DocumentParser). The parsing classes <i>may</i> be used to build a
14 Document, but that is not their primary purpose. The handling capabilities of
15 DocumentParser and ElementParser are sufficiently customizable that you can
16 make them do pretty much whatever you want.
17
18 Example: This example creates a DOM (Document Object Model) tree
19 from an XML file.
20 ------------------------------------------------------------------------------
21 import std.xml;
22 import std.stdio;
23 import std.string;
24 import std.file;
25
26 // books.xml is used in various samples throughout the Microsoft XML Core
27 // Services (MSXML) SDK.
28 //
29 // See http://msdn2.microsoft.com/en-us/library/ms762271(VS.85).aspx
30
31 void main()
32 {
33 string s = cast(string) std.file.read("books.xml");
34
35 // Check for well-formedness
36 check(s);
37
38 // Make a DOM tree
39 auto doc = new Document(s);
40
41 // Plain-print it
42 writeln(doc);
43 }
44 ------------------------------------------------------------------------------
45
46 Example: This example does much the same thing, except that the file is
47 deconstructed and reconstructed by hand. This is more work, but the
48 techniques involved offer vastly more power.
49 ------------------------------------------------------------------------------
50 import std.xml;
51 import std.stdio;
52 import std.string;
53
54 struct Book
55 {
56 string id;
57 string author;
58 string title;
59 string genre;
60 string price;
61 string pubDate;
62 string description;
63 }
64
65 void main()
66 {
67 string s = cast(string) std.file.read("books.xml");
68
69 // Check for well-formedness
70 check(s);
71
72 // Take it apart
73 Book[] books;
74
75 auto xml = new DocumentParser(s);
76 xml.onStartTag["book"] = (ElementParser xml)
77 {
78 Book book;
79 book.id = xml.tag.attr["id"];
80
81 xml.onEndTag["author"] = (in Element e) { book.author = e.text(); };
82 xml.onEndTag["title"] = (in Element e) { book.title = e.text(); };
83 xml.onEndTag["genre"] = (in Element e) { book.genre = e.text(); };
84 xml.onEndTag["price"] = (in Element e) { book.price = e.text(); };
85 xml.onEndTag["publish-date"] = (in Element e) { book.pubDate = e.text(); };
86 xml.onEndTag["description"] = (in Element e) { book.description = e.text(); };
87
88 xml.parse();
89
90 books ~= book;
91 };
92 xml.parse();
93
94 // Put it back together again;
95 auto doc = new Document(new Tag("catalog"));
96 foreach (book;books)
97 {
98 auto element = new Element("book");
99 element.tag.attr["id"] = book.id;
100
101 element ~= new Element("author", book.author);
102 element ~= new Element("title", book.title);
103 element ~= new Element("genre", book.genre);
104 element ~= new Element("price", book.price);
105 element ~= new Element("publish-date",book.pubDate);
106 element ~= new Element("description", book.description);
107
108 doc ~= element;
109 }
110
111 // Pretty-print it
112 writefln(join(doc.pretty(3),"\n"));
113 }
114 -------------------------------------------------------------------------------
115 Copyright: Copyright Janice Caron 2008 - 2009.
116 License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
117 Authors: Janice Caron
118 Source: $(PHOBOSSRC std/_xml.d)
119 */
120 /*
121 Copyright Janice Caron 2008 - 2009.
122 Distributed under the Boost Software License, Version 1.0.
123 (See accompanying file LICENSE_1_0.txt or copy at
124 http://www.boost.org/LICENSE_1_0.txt)
125 */
126 module std.xml;
127
128 enum cdata = "<![CDATA[";
129
130 /**
131 * Returns true if the character is a character according to the XML standard
132 *
133 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
134 *
135 * Params:
136 * c = the character to be tested
137 */
isChar(dchar c)138 bool isChar(dchar c) @safe @nogc pure nothrow // rule 2
139 {
140 if (c <= 0xD7FF)
141 {
142 if (c >= 0x20)
143 return true;
144 switch (c)
145 {
146 case 0xA:
147 case 0x9:
148 case 0xD:
149 return true;
150 default:
151 return false;
152 }
153 }
154 else if (0xE000 <= c && c <= 0x10FFFF)
155 {
156 if ((c & 0x1FFFFE) != 0xFFFE) // U+FFFE and U+FFFF
157 return true;
158 }
159 return false;
160 }
161
162 @safe @nogc nothrow pure unittest
163 {
164 assert(!isChar(cast(dchar) 0x8));
165 assert( isChar(cast(dchar) 0x9));
166 assert( isChar(cast(dchar) 0xA));
167 assert(!isChar(cast(dchar) 0xB));
168 assert(!isChar(cast(dchar) 0xC));
169 assert( isChar(cast(dchar) 0xD));
170 assert(!isChar(cast(dchar) 0xE));
171 assert(!isChar(cast(dchar) 0x1F));
172 assert( isChar(cast(dchar) 0x20));
173 assert( isChar('J'));
174 assert( isChar(cast(dchar) 0xD7FF));
175 assert(!isChar(cast(dchar) 0xD800));
176 assert(!isChar(cast(dchar) 0xDFFF));
177 assert( isChar(cast(dchar) 0xE000));
178 assert( isChar(cast(dchar) 0xFFFD));
179 assert(!isChar(cast(dchar) 0xFFFE));
180 assert(!isChar(cast(dchar) 0xFFFF));
181 assert( isChar(cast(dchar) 0x10000));
182 assert( isChar(cast(dchar) 0x10FFFF));
183 assert(!isChar(cast(dchar) 0x110000));
184
debug(stdxml_TestHardcodedChecks)185 debug (stdxml_TestHardcodedChecks)
186 {
187 foreach (c; 0 .. dchar.max + 1)
188 assert(isChar(c) == lookup(CharTable, c));
189 }
190 }
191
192 /**
193 * Returns true if the character is whitespace according to the XML standard
194 *
195 * Only the following characters are considered whitespace in XML - space, tab,
196 * carriage return and linefeed
197 *
198 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
199 *
200 * Params:
201 * c = the character to be tested
202 */
isSpace(dchar c)203 bool isSpace(dchar c) @safe @nogc pure nothrow
204 {
205 return c == '\u0020' || c == '\u0009' || c == '\u000A' || c == '\u000D';
206 }
207
208 /**
209 * Returns true if the character is a digit according to the XML standard
210 *
211 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
212 *
213 * Params:
214 * c = the character to be tested
215 */
isDigit(dchar c)216 bool isDigit(dchar c) @safe @nogc pure nothrow
217 {
218 if (c <= 0x0039 && c >= 0x0030)
219 return true;
220 else
221 return lookup(DigitTable,c);
222 }
223
224 @safe @nogc nothrow pure unittest
225 {
debug(stdxml_TestHardcodedChecks)226 debug (stdxml_TestHardcodedChecks)
227 {
228 foreach (c; 0 .. dchar.max + 1)
229 assert(isDigit(c) == lookup(DigitTable, c));
230 }
231 }
232
233 /**
234 * Returns true if the character is a letter according to the XML standard
235 *
236 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
237 *
238 * Params:
239 * c = the character to be tested
240 */
isLetter(dchar c)241 bool isLetter(dchar c) @safe @nogc nothrow pure // rule 84
242 {
243 return isIdeographic(c) || isBaseChar(c);
244 }
245
246 /**
247 * Returns true if the character is an ideographic character according to the
248 * XML standard
249 *
250 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
251 *
252 * Params:
253 * c = the character to be tested
254 */
isIdeographic(dchar c)255 bool isIdeographic(dchar c) @safe @nogc nothrow pure
256 {
257 if (c == 0x3007)
258 return true;
259 if (c <= 0x3029 && c >= 0x3021 )
260 return true;
261 if (c <= 0x9FA5 && c >= 0x4E00)
262 return true;
263 return false;
264 }
265
266 @safe @nogc nothrow pure unittest
267 {
268 assert(isIdeographic('\u4E00'));
269 assert(isIdeographic('\u9FA5'));
270 assert(isIdeographic('\u3007'));
271 assert(isIdeographic('\u3021'));
272 assert(isIdeographic('\u3029'));
273
debug(stdxml_TestHardcodedChecks)274 debug (stdxml_TestHardcodedChecks)
275 {
276 foreach (c; 0 .. dchar.max + 1)
277 assert(isIdeographic(c) == lookup(IdeographicTable, c));
278 }
279 }
280
281 /**
282 * Returns true if the character is a base character according to the XML
283 * standard
284 *
285 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
286 *
287 * Params:
288 * c = the character to be tested
289 */
isBaseChar(dchar c)290 bool isBaseChar(dchar c) @safe @nogc nothrow pure
291 {
292 return lookup(BaseCharTable,c);
293 }
294
295 /**
296 * Returns true if the character is a combining character according to the
297 * XML standard
298 *
299 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
300 *
301 * Params:
302 * c = the character to be tested
303 */
isCombiningChar(dchar c)304 bool isCombiningChar(dchar c) @safe @nogc nothrow pure
305 {
306 return lookup(CombiningCharTable,c);
307 }
308
309 /**
310 * Returns true if the character is an extender according to the XML standard
311 *
312 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
313 *
314 * Params:
315 * c = the character to be tested
316 */
isExtender(dchar c)317 bool isExtender(dchar c) @safe @nogc nothrow pure
318 {
319 return lookup(ExtenderTable,c);
320 }
321
322 /**
323 * Encodes a string by replacing all characters which need to be escaped with
324 * appropriate predefined XML entities.
325 *
326 * encode() escapes certain characters (ampersand, quote, apostrophe, less-than
327 * and greater-than), and similarly, decode() unescapes them. These functions
328 * are provided for convenience only. You do not need to use them when using
329 * the std.xml classes, because then all the encoding and decoding will be done
330 * for you automatically.
331 *
332 * If the string is not modified, the original will be returned.
333 *
334 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
335 *
336 * Params:
337 * s = The string to be encoded
338 *
339 * Returns: The encoded string
340 *
341 * Example:
342 * --------------
343 * writefln(encode("a > b")); // writes "a > b"
344 * --------------
345 */
encode(S)346 S encode(S)(S s)
347 {
348 import std.array : appender;
349
350 string r;
351 size_t lastI;
352 auto result = appender!S();
353
354 foreach (i, c; s)
355 {
356 switch (c)
357 {
358 case '&': r = "&"; break;
359 case '"': r = """; break;
360 case '\'': r = "'"; break;
361 case '<': r = "<"; break;
362 case '>': r = ">"; break;
363 default: continue;
364 }
365 // Replace with r
366 result.put(s[lastI .. i]);
367 result.put(r);
368 lastI = i + 1;
369 }
370
371 if (!result.data.ptr) return s;
372 result.put(s[lastI .. $]);
373 return result.data;
374 }
375
376 @safe pure unittest
377 {
378 auto s = "hello";
379 assert(encode(s) is s);
380 assert(encode("a > b") == "a > b", encode("a > b"));
381 assert(encode("a < b") == "a < b");
382 assert(encode("don't") == "don't");
383 assert(encode("\"hi\"") == ""hi"", encode("\"hi\""));
384 assert(encode("cat & dog") == "cat & dog");
385 }
386
387 /**
388 * Mode to use for decoding.
389 *
390 * $(DDOC_ENUM_MEMBERS NONE) Do not decode
391 * $(DDOC_ENUM_MEMBERS LOOSE) Decode, but ignore errors
392 * $(DDOC_ENUM_MEMBERS STRICT) Decode, and throw exception on error
393 */
394 enum DecodeMode
395 {
396 NONE, LOOSE, STRICT
397 }
398
399 /**
400 * Decodes a string by unescaping all predefined XML entities.
401 *
402 * encode() escapes certain characters (ampersand, quote, apostrophe, less-than
403 * and greater-than), and similarly, decode() unescapes them. These functions
404 * are provided for convenience only. You do not need to use them when using
405 * the std.xml classes, because then all the encoding and decoding will be done
406 * for you automatically.
407 *
408 * This function decodes the entities &amp;, &quot;, &apos;,
409 * &lt; and &gt,
410 * as well as decimal and hexadecimal entities such as &#x20AC;
411 *
412 * If the string does not contain an ampersand, the original will be returned.
413 *
414 * Note that the "mode" parameter can be one of DecodeMode.NONE (do not
415 * decode), DecodeMode.LOOSE (decode, but ignore errors), or DecodeMode.STRICT
416 * (decode, and throw a DecodeException in the event of an error).
417 *
418 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
419 *
420 * Params:
421 * s = The string to be decoded
422 * mode = (optional) Mode to use for decoding. (Defaults to LOOSE).
423 *
424 * Throws: DecodeException if mode == DecodeMode.STRICT and decode fails
425 *
426 * Returns: The decoded string
427 *
428 * Example:
429 * --------------
430 * writefln(decode("a > b")); // writes "a > b"
431 * --------------
432 */
433 string decode(string s, DecodeMode mode=DecodeMode.LOOSE) @safe pure
434 {
435 import std.algorithm.searching : startsWith;
436
437 if (mode == DecodeMode.NONE) return s;
438
439 string buffer;
440 foreach (ref i; 0 .. s.length)
441 {
442 char c = s[i];
443 if (c != '&')
444 {
445 if (buffer.length != 0) buffer ~= c;
446 }
447 else
448 {
449 if (buffer.length == 0)
450 {
451 buffer = s[0 .. i].dup;
452 }
453 if (startsWith(s[i..$],"&#"))
454 {
455 try
456 {
457 dchar d;
458 string t = s[i..$];
459 checkCharRef(t, d);
460 char[4] temp;
461 import std.utf : encode;
462 buffer ~= temp[0 .. encode(temp, d)];
463 i = s.length - t.length - 1;
464 }
catch(Err e)465 catch (Err e)
466 {
467 if (mode == DecodeMode.STRICT)
468 throw new DecodeException("Unescaped &");
469 buffer ~= '&';
470 }
471 }
472 else if (startsWith(s[i..$],"&" )) { buffer ~= '&'; i += 4; }
473 else if (startsWith(s[i..$],""")) { buffer ~= '"'; i += 5; }
474 else if (startsWith(s[i..$],"'")) { buffer ~= '\''; i += 5; }
475 else if (startsWith(s[i..$],"<" )) { buffer ~= '<'; i += 3; }
476 else if (startsWith(s[i..$],">" )) { buffer ~= '>'; i += 3; }
477 else
478 {
479 if (mode == DecodeMode.STRICT)
480 throw new DecodeException("Unescaped &");
481 buffer ~= '&';
482 }
483 }
484 }
485 return (buffer.length == 0) ? s : buffer;
486 }
487
488 @safe pure unittest
489 {
assertNot(string s)490 void assertNot(string s) pure
491 {
492 bool b = false;
493 try { decode(s,DecodeMode.STRICT); }
494 catch (DecodeException e) { b = true; }
495 assert(b,s);
496 }
497
498 // Assert that things that should work, do
499 auto s = "hello";
500 assert(decode(s, DecodeMode.STRICT) is s);
501 assert(decode("a > b", DecodeMode.STRICT) == "a > b");
502 assert(decode("a < b", DecodeMode.STRICT) == "a < b");
503 assert(decode("don't", DecodeMode.STRICT) == "don't");
504 assert(decode(""hi"", DecodeMode.STRICT) == "\"hi\"");
505 assert(decode("cat & dog", DecodeMode.STRICT) == "cat & dog");
506 assert(decode("*", DecodeMode.STRICT) == "*");
507 assert(decode("*", DecodeMode.STRICT) == "*");
508 assert(decode("cat & dog", DecodeMode.LOOSE) == "cat & dog");
509 assert(decode("a > b", DecodeMode.LOOSE) == "a > b");
510 assert(decode("&#;", DecodeMode.LOOSE) == "&#;");
511 assert(decode("&#x;", DecodeMode.LOOSE) == "&#x;");
512 assert(decode("G;", DecodeMode.LOOSE) == "G;");
513 assert(decode("G;", DecodeMode.LOOSE) == "G;");
514
515 // Assert that things that shouldn't work, don't
516 assertNot("cat & dog");
517 assertNot("a > b");
518 assertNot("&#;");
519 assertNot("&#x;");
520 assertNot("G;");
521 assertNot("G;");
522 }
523
524 /**
525 * Class representing an XML document.
526 *
527 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
528 *
529 */
530 class Document : Element
531 {
532 /**
533 * Contains all text which occurs before the root element.
534 * Defaults to <?xml version="1.0"?>
535 */
536 string prolog = "<?xml version=\"1.0\"?>";
537 /**
538 * Contains all text which occurs after the root element.
539 * Defaults to the empty string
540 */
541 string epilog;
542
543 /**
544 * Constructs a Document by parsing XML text.
545 *
546 * This function creates a complete DOM (Document Object Model) tree.
547 *
548 * The input to this function MUST be valid XML.
549 * This is enforced by DocumentParser's in contract.
550 *
551 * Params:
552 * s = the complete XML text.
553 */
this(string s)554 this(string s)
555 in
556 {
557 assert(s.length != 0);
558 }
559 body
560 {
561 auto xml = new DocumentParser(s);
562 string tagString = xml.tag.tagString;
563
564 this(xml.tag);
565 prolog = s[0 .. tagString.ptr - s.ptr];
566 parse(xml);
567 epilog = *xml.s;
568 }
569
570 /**
571 * Constructs a Document from a Tag.
572 *
573 * Params:
574 * tag = the start tag of the document.
575 */
this(const (Tag)tag)576 this(const(Tag) tag)
577 {
578 super(tag);
579 }
580
581 const
582 {
583 /**
584 * Compares two Documents for equality
585 *
586 * Example:
587 * --------------
588 * Document d1,d2;
589 * if (d1 == d2) { }
590 * --------------
591 */
opEquals(scope const Object o)592 override bool opEquals(scope const Object o) const
593 {
594 const doc = toType!(const Document)(o);
595 return prolog == doc.prolog
596 && (cast(const) this).Element.opEquals(cast(const) doc)
597 && epilog == doc.epilog;
598 }
599
600 /**
601 * Compares two Documents
602 *
603 * You should rarely need to call this function. It exists so that
604 * Documents can be used as associative array keys.
605 *
606 * Example:
607 * --------------
608 * Document d1,d2;
609 * if (d1 < d2) { }
610 * --------------
611 */
opCmp(scope const Object o)612 override int opCmp(scope const Object o) scope const
613 {
614 const doc = toType!(const Document)(o);
615 if (prolog != doc.prolog)
616 return prolog < doc.prolog ? -1 : 1;
617 if (int cmp = this.Element.opCmp(doc))
618 return cmp;
619 if (epilog != doc.epilog)
620 return epilog < doc.epilog ? -1 : 1;
621 return 0;
622 }
623
624 /**
625 * Returns the hash of a Document
626 *
627 * You should rarely need to call this function. It exists so that
628 * Documents can be used as associative array keys.
629 */
toHash()630 override size_t toHash() scope const @trusted
631 {
632 return hash(prolog, hash(epilog, (cast() this).Element.toHash()));
633 }
634
635 /**
636 * Returns the string representation of a Document. (That is, the
637 * complete XML of a document).
638 */
toString()639 override string toString() scope const @safe
640 {
641 return prolog ~ super.toString() ~ epilog;
642 }
643 }
644 }
645
646 @system unittest
647 {
648 // https://issues.dlang.org/show_bug.cgi?id=14966
649 auto xml = `<?xml version="1.0" encoding="UTF-8"?><foo></foo>`;
650
651 auto a = new Document(xml);
652 auto b = new Document(xml);
653 assert(a == b);
654 assert(!(a < b));
655 int[Document] aa;
656 aa[a] = 1;
657 assert(aa[b] == 1);
658
659 b ~= new Element("b");
660 assert(a < b);
661 assert(b > a);
662 }
663
664 /**
665 * Class representing an XML element.
666 *
667 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
668 */
669 class Element : Item
670 {
671 Tag tag; /// The start tag of the element
672 Item[] items; /// The element's items
673 Text[] texts; /// The element's text items
674 CData[] cdatas; /// The element's CData items
675 Comment[] comments; /// The element's comments
676 ProcessingInstruction[] pis; /// The element's processing instructions
677 Element[] elements; /// The element's child elements
678
679 /**
680 * Constructs an Element given a name and a string to be used as a Text
681 * interior.
682 *
683 * Params:
684 * name = the name of the element.
685 * interior = (optional) the string interior.
686 *
687 * Example:
688 * -------------------------------------------------------
689 * auto element = new Element("title","Serenity")
690 * // constructs the element <title>Serenity</title>
691 * -------------------------------------------------------
692 */
693 this(string name, string interior=null) @safe pure
694 {
695 this(new Tag(name));
696 if (interior.length != 0) opCatAssign(new Text(interior));
697 }
698
699 /**
700 * Constructs an Element from a Tag.
701 *
702 * Params:
703 * tag_ = the start or empty tag of the element.
704 */
this(const (Tag)tag_)705 this(const(Tag) tag_) @safe pure
706 {
707 this.tag = new Tag(tag_.name);
708 tag.type = TagType.EMPTY;
709 foreach (k,v;tag_.attr) tag.attr[k] = v;
710 tag.tagString = tag_.tagString;
711 }
712
713 /**
714 * Append a text item to the interior of this element
715 *
716 * Params:
717 * item = the item you wish to append.
718 *
719 * Example:
720 * --------------
721 * Element element;
722 * element ~= new Text("hello");
723 * --------------
724 */
opCatAssign(Text item)725 void opCatAssign(Text item) @safe pure
726 {
727 texts ~= item;
728 appendItem(item);
729 }
730
731 /**
732 * Append a CData item to the interior of this element
733 *
734 * Params:
735 * item = the item you wish to append.
736 *
737 * Example:
738 * --------------
739 * Element element;
740 * element ~= new CData("hello");
741 * --------------
742 */
opCatAssign(CData item)743 void opCatAssign(CData item) @safe pure
744 {
745 cdatas ~= item;
746 appendItem(item);
747 }
748
749 /**
750 * Append a comment to the interior of this element
751 *
752 * Params:
753 * item = the item you wish to append.
754 *
755 * Example:
756 * --------------
757 * Element element;
758 * element ~= new Comment("hello");
759 * --------------
760 */
opCatAssign(Comment item)761 void opCatAssign(Comment item) @safe pure
762 {
763 comments ~= item;
764 appendItem(item);
765 }
766
767 /**
768 * Append a processing instruction to the interior of this element
769 *
770 * Params:
771 * item = the item you wish to append.
772 *
773 * Example:
774 * --------------
775 * Element element;
776 * element ~= new ProcessingInstruction("hello");
777 * --------------
778 */
opCatAssign(ProcessingInstruction item)779 void opCatAssign(ProcessingInstruction item) @safe pure
780 {
781 pis ~= item;
782 appendItem(item);
783 }
784
785 /**
786 * Append a complete element to the interior of this element
787 *
788 * Params:
789 * item = the item you wish to append.
790 *
791 * Example:
792 * --------------
793 * Element element;
794 * Element other = new Element("br");
795 * element ~= other;
796 * // appends element representing <br />
797 * --------------
798 */
opCatAssign(Element item)799 void opCatAssign(Element item) @safe pure
800 {
801 elements ~= item;
802 appendItem(item);
803 }
804
appendItem(Item item)805 private void appendItem(Item item) @safe pure
806 {
807 items ~= item;
808 if (tag.type == TagType.EMPTY && !item.isEmptyXML)
809 tag.type = TagType.START;
810 }
811
parse(ElementParser xml)812 private void parse(ElementParser xml)
813 {
814 xml.onText = (string s) { opCatAssign(new Text(s)); };
815 xml.onCData = (string s) { opCatAssign(new CData(s)); };
816 xml.onComment = (string s) { opCatAssign(new Comment(s)); };
817 xml.onPI = (string s) { opCatAssign(new ProcessingInstruction(s)); };
818
819 xml.onStartTag[null] = (ElementParser xml)
820 {
821 auto e = new Element(xml.tag);
822 e.parse(xml);
823 opCatAssign(e);
824 };
825
826 xml.parse();
827 }
828
829 /**
830 * Compares two Elements for equality
831 *
832 * Example:
833 * --------------
834 * Element e1,e2;
835 * if (e1 == e2) { }
836 * --------------
837 */
opEquals(scope const Object o)838 override bool opEquals(scope const Object o) const
839 {
840 const element = toType!(const Element)(o);
841 immutable len = items.length;
842 if (len != element.items.length) return false;
843 foreach (i; 0 .. len)
844 {
845 if (!items[i].opEquals(element.items[i])) return false;
846 }
847 return true;
848 }
849
850 /**
851 * Compares two Elements
852 *
853 * You should rarely need to call this function. It exists so that Elements
854 * can be used as associative array keys.
855 *
856 * Example:
857 * --------------
858 * Element e1,e2;
859 * if (e1 < e2) { }
860 * --------------
861 */
opCmp(scope const Object o)862 override int opCmp(scope const Object o) @safe const
863 {
864 const element = toType!(const Element)(o);
865 for (uint i=0; ; ++i)
866 {
867 if (i == items.length && i == element.items.length) return 0;
868 if (i == items.length) return -1;
869 if (i == element.items.length) return 1;
870 if (!items[i].opEquals(element.items[i]))
871 return items[i].opCmp(element.items[i]);
872 }
873 }
874
875 /**
876 * Returns the hash of an Element
877 *
878 * You should rarely need to call this function. It exists so that Elements
879 * can be used as associative array keys.
880 */
toHash()881 override size_t toHash() scope const @safe
882 {
883 size_t hash = tag.toHash();
884 foreach (item;items) hash += item.toHash();
885 return hash;
886 }
887
888 const
889 {
890 /**
891 * Returns the decoded interior of an element.
892 *
893 * The element is assumed to contain text <i>only</i>. So, for
894 * example, given XML such as "<title>Good &amp;
895 * Bad</title>", will return "Good & Bad".
896 *
897 * Params:
898 * mode = (optional) Mode to use for decoding. (Defaults to LOOSE).
899 *
900 * Throws: DecodeException if decode fails
901 */
902 string text(DecodeMode mode=DecodeMode.LOOSE)
903 {
904 string buffer;
foreach(item;items)905 foreach (item;items)
906 {
907 Text t = cast(Text) item;
908 if (t is null) throw new DecodeException(item.toString());
909 buffer ~= decode(t.toString(),mode);
910 }
911 return buffer;
912 }
913
914 /**
915 * Returns an indented string representation of this item
916 *
917 * Params:
918 * indent = (optional) number of spaces by which to indent this
919 * element. Defaults to 2.
920 */
921 override string[] pretty(uint indent=2) scope
922 {
923 import std.algorithm.searching : count;
924 import std.string : rightJustify;
925
926 if (isEmptyXML) return [ tag.toEmptyString() ];
927
928 if (items.length == 1)
929 {
930 auto t = cast(const(Text))(items[0]);
931 if (t !is null)
932 {
933 return [tag.toStartString() ~ t.toString() ~ tag.toEndString()];
934 }
935 }
936
937 string[] a = [ tag.toStartString() ];
foreach(item;items)938 foreach (item;items)
939 {
940 string[] b = item.pretty(indent);
941 foreach (s;b)
942 {
943 a ~= rightJustify(s,count(s) + indent);
944 }
945 }
946 a ~= tag.toEndString();
947 return a;
948 }
949
950 /**
951 * Returns the string representation of an Element
952 *
953 * Example:
954 * --------------
955 * auto element = new Element("br");
956 * writefln(element.toString()); // writes "<br />"
957 * --------------
958 */
toString()959 override string toString() scope @safe
960 {
961 if (isEmptyXML) return tag.toEmptyString();
962
963 string buffer = tag.toStartString();
964 foreach (item;items) { buffer ~= item.toString(); }
965 buffer ~= tag.toEndString();
966 return buffer;
967 }
968
isEmptyXML()969 override @property @safe pure @nogc nothrow bool isEmptyXML() const scope { return items.length == 0; }
970 }
971 }
972
973 /**
974 * Tag types.
975 *
976 * $(DDOC_ENUM_MEMBERS START) Used for start tags
977 * $(DDOC_ENUM_MEMBERS END) Used for end tags
978 * $(DDOC_ENUM_MEMBERS EMPTY) Used for empty tags
979 *
980 */
981 enum TagType { START, END, EMPTY }
982
983 /**
984 * Class representing an XML tag.
985 *
986 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
987 *
988 * The class invariant guarantees
989 * <ul>
990 * <li> that $(B type) is a valid enum TagType value</li>
991 * <li> that $(B name) consists of valid characters</li>
992 * <li> that each attribute name consists of valid characters</li>
993 * </ul>
994 */
995 class Tag
996 {
997 TagType type = TagType.START; /// Type of tag
998 string name; /// Tag name
999 string[string] attr; /// Associative array of attributes
1000 private string tagString;
1001
invariant()1002 invariant()
1003 {
1004 string s;
1005 string t;
1006
1007 assert(type == TagType.START
1008 || type == TagType.END
1009 || type == TagType.EMPTY);
1010
1011 s = name;
1012 try { checkName(s,t); }
1013 catch (Err e) { assert(false,"Invalid tag name:" ~ e.toString()); }
1014
1015 foreach (k,v;attr)
1016 {
1017 s = k;
1018 try { checkName(s,t); }
1019 catch (Err e)
1020 { assert(false,"Invalid atrribute name:" ~ e.toString()); }
1021 }
1022 }
1023
1024 /**
1025 * Constructs an instance of Tag with a specified name and type
1026 *
1027 * The constructor does not initialize the attributes. To initialize the
1028 * attributes, you access the $(B attr) member variable.
1029 *
1030 * Params:
1031 * name = the Tag's name
1032 * type = (optional) the Tag's type. If omitted, defaults to
1033 * TagType.START.
1034 *
1035 * Example:
1036 * --------------
1037 * auto tag = new Tag("img",Tag.EMPTY);
1038 * tag.attr["src"] = "http://example.com/example.jpg";
1039 * --------------
1040 */
1041 this(string name, TagType type=TagType.START) @safe pure
1042 {
1043 this.name = name;
1044 this.type = type;
1045 }
1046
1047 /* Private constructor (so don't ddoc this!)
1048 *
1049 * Constructs a Tag by parsing the string representation, e.g. "<html>".
1050 *
1051 * The string is passed by reference, and is advanced over all characters
1052 * consumed.
1053 *
1054 * The second parameter is a dummy parameter only, required solely to
1055 * distinguish this constructor from the public one.
1056 */
this(ref string s,bool dummy)1057 private this(ref string s, bool dummy) @safe pure
1058 {
1059 import std.algorithm.searching : countUntil;
1060 import std.ascii : isWhite;
1061 import std.utf : byCodeUnit;
1062
1063 tagString = s;
1064 try
1065 {
1066 reqc(s,'<');
1067 if (optc(s,'/')) type = TagType.END;
1068 ptrdiff_t i = s.byCodeUnit.countUntil(">", "/>", " ", "\t", "\v", "\r", "\n", "\f");
1069 name = s[0 .. i];
1070 s = s[i .. $];
1071
1072 i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1073 s = s[i .. $];
1074
1075 while (s.length > 0 && s[0] != '>' && s[0] != '/')
1076 {
1077 i = s.byCodeUnit.countUntil("=", " ", "\t", "\v", "\r", "\n", "\f");
1078 string key = s[0 .. i];
1079 s = s[i .. $];
1080
1081 i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1082 s = s[i .. $];
1083 reqc(s,'=');
1084 i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1085 s = s[i .. $];
1086
1087 immutable char quote = requireOneOf(s,"'\"");
1088 i = s.byCodeUnit.countUntil(quote);
1089 string val = decode(s[0 .. i], DecodeMode.LOOSE);
1090 s = s[i .. $];
1091 reqc(s,quote);
1092
1093 i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1094 s = s[i .. $];
1095 attr[key] = val;
1096 }
1097 if (optc(s,'/'))
1098 {
1099 if (type == TagType.END) throw new TagException("");
1100 type = TagType.EMPTY;
1101 }
1102 reqc(s,'>');
1103 tagString.length = tagString.length - s.length;
1104 }
1105 catch (XMLException e)
1106 {
1107 tagString.length = tagString.length - s.length;
1108 throw new TagException(tagString);
1109 }
1110 }
1111
1112 const
1113 {
1114 /**
1115 * Compares two Tags for equality
1116 *
1117 * You should rarely need to call this function. It exists so that Tags
1118 * can be used as associative array keys.
1119 *
1120 * Example:
1121 * --------------
1122 * Tag tag1,tag2
1123 * if (tag1 == tag2) { }
1124 * --------------
1125 */
opEquals(scope Object o)1126 override bool opEquals(scope Object o)
1127 {
1128 const tag = toType!(const Tag)(o);
1129 return
1130 (name != tag.name) ? false : (
1131 (attr != tag.attr) ? false : (
1132 (type != tag.type) ? false : (
1133 true )));
1134 }
1135
1136 /**
1137 * Compares two Tags
1138 *
1139 * Example:
1140 * --------------
1141 * Tag tag1,tag2
1142 * if (tag1 < tag2) { }
1143 * --------------
1144 */
opCmp(Object o)1145 override int opCmp(Object o)
1146 {
1147 const tag = toType!(const Tag)(o);
1148 // Note that attr is an AA, so the comparison is nonsensical (bug 10381)
1149 return
1150 ((name != tag.name) ? ( name < tag.name ? -1 : 1 ) :
1151 ((attr != tag.attr) ? ( cast(void *) attr < cast(void*) tag.attr ? -1 : 1 ) :
1152 ((type != tag.type) ? ( type < tag.type ? -1 : 1 ) :
1153 0 )));
1154 }
1155
1156 /**
1157 * Returns the hash of a Tag
1158 *
1159 * You should rarely need to call this function. It exists so that Tags
1160 * can be used as associative array keys.
1161 */
toHash()1162 override size_t toHash()
1163 {
1164 return typeid(name).getHash(&name);
1165 }
1166
1167 /**
1168 * Returns the string representation of a Tag
1169 *
1170 * Example:
1171 * --------------
1172 * auto tag = new Tag("book",TagType.START);
1173 * writefln(tag.toString()); // writes "<book>"
1174 * --------------
1175 */
toString()1176 override string toString() @safe
1177 {
1178 if (isEmpty) return toEmptyString();
1179 return (isEnd) ? toEndString() : toStartString();
1180 }
1181
1182 private
1183 {
toNonEndString()1184 string toNonEndString() @safe
1185 {
1186 import std.format : format;
1187
1188 string s = "<" ~ name;
1189 foreach (key,val;attr)
1190 s ~= format(" %s=\"%s\"",key,encode(val));
1191 return s;
1192 }
1193
toStartString()1194 string toStartString() @safe { return toNonEndString() ~ ">"; }
1195
toEndString()1196 string toEndString() @safe { return "</" ~ name ~ ">"; }
1197
toEmptyString()1198 string toEmptyString() @safe { return toNonEndString() ~ " />"; }
1199 }
1200
1201 /**
1202 * Returns true if the Tag is a start tag
1203 *
1204 * Example:
1205 * --------------
1206 * if (tag.isStart) { }
1207 * --------------
1208 */
isStart()1209 @property bool isStart() @safe @nogc pure nothrow { return type == TagType.START; }
1210
1211 /**
1212 * Returns true if the Tag is an end tag
1213 *
1214 * Example:
1215 * --------------
1216 * if (tag.isEnd) { }
1217 * --------------
1218 */
isEnd()1219 @property bool isEnd() @safe @nogc pure nothrow { return type == TagType.END; }
1220
1221 /**
1222 * Returns true if the Tag is an empty tag
1223 *
1224 * Example:
1225 * --------------
1226 * if (tag.isEmpty) { }
1227 * --------------
1228 */
isEmpty()1229 @property bool isEmpty() @safe @nogc pure nothrow { return type == TagType.EMPTY; }
1230 }
1231 }
1232
1233 /**
1234 * Class representing a comment
1235 */
1236 class Comment : Item
1237 {
1238 private string content;
1239
1240 /**
1241 * Construct a comment
1242 *
1243 * Params:
1244 * content = the body of the comment
1245 *
1246 * Throws: CommentException if the comment body is illegal (contains "--"
1247 * or exactly equals "-")
1248 *
1249 * Example:
1250 * --------------
1251 * auto item = new Comment("This is a comment");
1252 * // constructs <!--This is a comment-->
1253 * --------------
1254 */
this(string content)1255 this(string content) @safe pure
1256 {
1257 import std.string : indexOf;
1258
1259 if (content == "-" || content.indexOf("--") != -1)
1260 throw new CommentException(content);
1261 this.content = content;
1262 }
1263
1264 /**
1265 * Compares two comments for equality
1266 *
1267 * Example:
1268 * --------------
1269 * Comment item1,item2;
1270 * if (item1 == item2) { }
1271 * --------------
1272 */
opEquals(scope const Object o)1273 override bool opEquals(scope const Object o) const
1274 {
1275 const item = toType!(const Item)(o);
1276 const t = cast(const Comment) item;
1277 return t !is null && content == t.content;
1278 }
1279
1280 /**
1281 * Compares two comments
1282 *
1283 * You should rarely need to call this function. It exists so that Comments
1284 * can be used as associative array keys.
1285 *
1286 * Example:
1287 * --------------
1288 * Comment item1,item2;
1289 * if (item1 < item2) { }
1290 * --------------
1291 */
opCmp(scope const Object o)1292 override int opCmp(scope const Object o) scope const
1293 {
1294 const item = toType!(const Item)(o);
1295 const t = cast(const Comment) item;
1296 return t !is null && (content != t.content
1297 ? (content < t.content ? -1 : 1 ) : 0 );
1298 }
1299
1300 /**
1301 * Returns the hash of a Comment
1302 *
1303 * You should rarely need to call this function. It exists so that Comments
1304 * can be used as associative array keys.
1305 */
toHash()1306 override size_t toHash() scope const nothrow { return hash(content); }
1307
1308 /**
1309 * Returns a string representation of this comment
1310 */
toString()1311 override string toString() scope const @safe pure nothrow { return "<!--" ~ content ~ "-->"; }
1312
isEmptyXML()1313 override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always
1314 }
1315
1316 @safe unittest // issue 16241
1317 {
1318 import std.exception : assertThrown;
1319 auto c = new Comment("==");
1320 assert(c.content == "==");
1321 assertThrown!CommentException(new Comment("--"));
1322 }
1323
1324 /**
1325 * Class representing a Character Data section
1326 */
1327 class CData : Item
1328 {
1329 private string content;
1330
1331 /**
1332 * Construct a character data section
1333 *
1334 * Params:
1335 * content = the body of the character data segment
1336 *
1337 * Throws: CDataException if the segment body is illegal (contains "]]>")
1338 *
1339 * Example:
1340 * --------------
1341 * auto item = new CData("<b>hello</b>");
1342 * // constructs <![CDATA[<b>hello</b>]]>
1343 * --------------
1344 */
this(string content)1345 this(string content) @safe pure
1346 {
1347 import std.string : indexOf;
1348 if (content.indexOf("]]>") != -1) throw new CDataException(content);
1349 this.content = content;
1350 }
1351
1352 /**
1353 * Compares two CDatas for equality
1354 *
1355 * Example:
1356 * --------------
1357 * CData item1,item2;
1358 * if (item1 == item2) { }
1359 * --------------
1360 */
opEquals(scope const Object o)1361 override bool opEquals(scope const Object o) const
1362 {
1363 const item = toType!(const Item)(o);
1364 const t = cast(const CData) item;
1365 return t !is null && content == t.content;
1366 }
1367
1368 /**
1369 * Compares two CDatas
1370 *
1371 * You should rarely need to call this function. It exists so that CDatas
1372 * can be used as associative array keys.
1373 *
1374 * Example:
1375 * --------------
1376 * CData item1,item2;
1377 * if (item1 < item2) { }
1378 * --------------
1379 */
opCmp(scope const Object o)1380 override int opCmp(scope const Object o) scope const
1381 {
1382 const item = toType!(const Item)(o);
1383 const t = cast(const CData) item;
1384 return t !is null && (content != t.content
1385 ? (content < t.content ? -1 : 1 ) : 0 );
1386 }
1387
1388 /**
1389 * Returns the hash of a CData
1390 *
1391 * You should rarely need to call this function. It exists so that CDatas
1392 * can be used as associative array keys.
1393 */
toHash()1394 override size_t toHash() scope const nothrow { return hash(content); }
1395
1396 /**
1397 * Returns a string representation of this CData section
1398 */
toString()1399 override string toString() scope const @safe pure nothrow { return cdata ~ content ~ "]]>"; }
1400
isEmptyXML()1401 override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always
1402 }
1403
1404 /**
1405 * Class representing a text (aka Parsed Character Data) section
1406 */
1407 class Text : Item
1408 {
1409 private string content;
1410
1411 /**
1412 * Construct a text (aka PCData) section
1413 *
1414 * Params:
1415 * content = the text. This function encodes the text before
1416 * insertion, so it is safe to insert any text
1417 *
1418 * Example:
1419 * --------------
1420 * auto Text = new CData("a < b");
1421 * // constructs a < b
1422 * --------------
1423 */
this(string content)1424 this(string content) @safe pure
1425 {
1426 this.content = encode(content);
1427 }
1428
1429 /**
1430 * Compares two text sections for equality
1431 *
1432 * Example:
1433 * --------------
1434 * Text item1,item2;
1435 * if (item1 == item2) { }
1436 * --------------
1437 */
opEquals(scope const Object o)1438 override bool opEquals(scope const Object o) const
1439 {
1440 const item = toType!(const Item)(o);
1441 const t = cast(const Text) item;
1442 return t !is null && content == t.content;
1443 }
1444
1445 /**
1446 * Compares two text sections
1447 *
1448 * You should rarely need to call this function. It exists so that Texts
1449 * can be used as associative array keys.
1450 *
1451 * Example:
1452 * --------------
1453 * Text item1,item2;
1454 * if (item1 < item2) { }
1455 * --------------
1456 */
opCmp(scope const Object o)1457 override int opCmp(scope const Object o) scope const
1458 {
1459 const item = toType!(const Item)(o);
1460 const t = cast(const Text) item;
1461 return t !is null
1462 && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
1463 }
1464
1465 /**
1466 * Returns the hash of a text section
1467 *
1468 * You should rarely need to call this function. It exists so that Texts
1469 * can be used as associative array keys.
1470 */
toHash()1471 override size_t toHash() scope const nothrow { return hash(content); }
1472
1473 /**
1474 * Returns a string representation of this Text section
1475 */
toString()1476 override string toString() scope const @safe @nogc pure nothrow { return content; }
1477
1478 /**
1479 * Returns true if the content is the empty string
1480 */
isEmptyXML()1481 override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return content.length == 0; }
1482 }
1483
1484 /**
1485 * Class representing an XML Instruction section
1486 */
1487 class XMLInstruction : Item
1488 {
1489 private string content;
1490
1491 /**
1492 * Construct an XML Instruction section
1493 *
1494 * Params:
1495 * content = the body of the instruction segment
1496 *
1497 * Throws: XIException if the segment body is illegal (contains ">")
1498 *
1499 * Example:
1500 * --------------
1501 * auto item = new XMLInstruction("ATTLIST");
1502 * // constructs <!ATTLIST>
1503 * --------------
1504 */
this(string content)1505 this(string content) @safe pure
1506 {
1507 import std.string : indexOf;
1508 if (content.indexOf(">") != -1) throw new XIException(content);
1509 this.content = content;
1510 }
1511
1512 /**
1513 * Compares two XML instructions for equality
1514 *
1515 * Example:
1516 * --------------
1517 * XMLInstruction item1,item2;
1518 * if (item1 == item2) { }
1519 * --------------
1520 */
opEquals(scope const Object o)1521 override bool opEquals(scope const Object o) const
1522 {
1523 const item = toType!(const Item)(o);
1524 const t = cast(const XMLInstruction) item;
1525 return t !is null && content == t.content;
1526 }
1527
1528 /**
1529 * Compares two XML instructions
1530 *
1531 * You should rarely need to call this function. It exists so that
1532 * XmlInstructions can be used as associative array keys.
1533 *
1534 * Example:
1535 * --------------
1536 * XMLInstruction item1,item2;
1537 * if (item1 < item2) { }
1538 * --------------
1539 */
opCmp(scope const Object o)1540 override int opCmp(scope const Object o) scope const
1541 {
1542 const item = toType!(const Item)(o);
1543 const t = cast(const XMLInstruction) item;
1544 return t !is null
1545 && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
1546 }
1547
1548 /**
1549 * Returns the hash of an XMLInstruction
1550 *
1551 * You should rarely need to call this function. It exists so that
1552 * XmlInstructions can be used as associative array keys.
1553 */
toHash()1554 override size_t toHash() scope const nothrow { return hash(content); }
1555
1556 /**
1557 * Returns a string representation of this XmlInstruction
1558 */
toString()1559 override string toString() scope const @safe pure nothrow { return "<!" ~ content ~ ">"; }
1560
isEmptyXML()1561 override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always
1562 }
1563
1564 /**
1565 * Class representing a Processing Instruction section
1566 */
1567 class ProcessingInstruction : Item
1568 {
1569 private string content;
1570
1571 /**
1572 * Construct a Processing Instruction section
1573 *
1574 * Params:
1575 * content = the body of the instruction segment
1576 *
1577 * Throws: PIException if the segment body is illegal (contains "?>")
1578 *
1579 * Example:
1580 * --------------
1581 * auto item = new ProcessingInstruction("php");
1582 * // constructs <?php?>
1583 * --------------
1584 */
this(string content)1585 this(string content) @safe pure
1586 {
1587 import std.string : indexOf;
1588 if (content.indexOf("?>") != -1) throw new PIException(content);
1589 this.content = content;
1590 }
1591
1592 /**
1593 * Compares two processing instructions for equality
1594 *
1595 * Example:
1596 * --------------
1597 * ProcessingInstruction item1,item2;
1598 * if (item1 == item2) { }
1599 * --------------
1600 */
opEquals(scope const Object o)1601 override bool opEquals(scope const Object o) const
1602 {
1603 const item = toType!(const Item)(o);
1604 const t = cast(const ProcessingInstruction) item;
1605 return t !is null && content == t.content;
1606 }
1607
1608 /**
1609 * Compares two processing instructions
1610 *
1611 * You should rarely need to call this function. It exists so that
1612 * ProcessingInstructions can be used as associative array keys.
1613 *
1614 * Example:
1615 * --------------
1616 * ProcessingInstruction item1,item2;
1617 * if (item1 < item2) { }
1618 * --------------
1619 */
opCmp(scope const Object o)1620 override int opCmp(scope const Object o) scope const
1621 {
1622 const item = toType!(const Item)(o);
1623 const t = cast(const ProcessingInstruction) item;
1624 return t !is null
1625 && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
1626 }
1627
1628 /**
1629 * Returns the hash of a ProcessingInstruction
1630 *
1631 * You should rarely need to call this function. It exists so that
1632 * ProcessingInstructions can be used as associative array keys.
1633 */
toHash()1634 override size_t toHash() scope const nothrow { return hash(content); }
1635
1636 /**
1637 * Returns a string representation of this ProcessingInstruction
1638 */
toString()1639 override string toString() scope const @safe pure nothrow { return "<?" ~ content ~ "?>"; }
1640
isEmptyXML()1641 override @property @safe @nogc pure nothrow bool isEmptyXML() scope const { return false; } /// Returns false always
1642 }
1643
1644 /**
1645 * Abstract base class for XML items
1646 */
1647 abstract class Item
1648 {
1649 /// Compares with another Item of same type for equality
1650 abstract override bool opEquals(scope const Object o) @safe const;
1651
1652 /// Compares with another Item of same type
1653 abstract override int opCmp(scope const Object o) @safe const;
1654
1655 /// Returns the hash of this item
1656 abstract override size_t toHash() @safe scope const;
1657
1658 /// Returns a string representation of this item
1659 abstract override string toString() @safe scope const;
1660
1661 /**
1662 * Returns an indented string representation of this item
1663 *
1664 * Params:
1665 * indent = number of spaces by which to indent child elements
1666 */
pretty(uint indent)1667 string[] pretty(uint indent) @safe scope const
1668 {
1669 import std.string : strip;
1670 string s = strip(toString());
1671 return s.length == 0 ? [] : [ s ];
1672 }
1673
1674 /// Returns true if the item represents empty XML text
1675 abstract @property @safe @nogc pure nothrow bool isEmptyXML() scope const;
1676 }
1677
1678 /**
1679 * Class for parsing an XML Document.
1680 *
1681 * This is a subclass of ElementParser. Most of the useful functions are
1682 * documented there.
1683 *
1684 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
1685 *
1686 * Bugs:
1687 * Currently only supports UTF documents.
1688 *
1689 * If there is an encoding attribute in the prolog, it is ignored.
1690 *
1691 */
1692 class DocumentParser : ElementParser
1693 {
1694 string xmlText;
1695
1696 /**
1697 * Constructs a DocumentParser.
1698 *
1699 * The input to this function MUST be valid XML.
1700 * This is enforced by the function's in contract.
1701 *
1702 * Params:
1703 * xmlText_ = the entire XML document as text
1704 *
1705 */
this(string xmlText_)1706 this(string xmlText_)
1707 in
1708 {
1709 assert(xmlText_.length != 0);
1710 try
1711 {
1712 // Confirm that the input is valid XML
1713 check(xmlText_);
1714 }
1715 catch (CheckException e)
1716 {
1717 // And if it's not, tell the user why not
1718 assert(false, "\n" ~ e.toString());
1719 }
1720 }
1721 body
1722 {
1723 xmlText = xmlText_;
1724 s = &xmlText;
1725 super(); // Initialize everything
1726 parse(); // Parse through the root tag (but not beyond)
1727 }
1728 }
1729
1730 @system unittest
1731 {
1732 auto doc = new Document("<root><child><grandchild/></child></root>");
1733 assert(doc.elements.length == 1);
1734 assert(doc.elements[0].tag.name == "child");
1735 assert(doc.items == doc.elements);
1736 }
1737
1738 /**
1739 * Class for parsing an XML element.
1740 *
1741 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
1742 *
1743 * Note that you cannot construct instances of this class directly. You can
1744 * construct a DocumentParser (which is a subclass of ElementParser), but
1745 * otherwise, Instances of ElementParser will be created for you by the
1746 * library, and passed your way via onStartTag handlers.
1747 *
1748 */
1749 class ElementParser
1750 {
1751 alias Handler = void delegate(string);
1752 alias ElementHandler = void delegate(in Element element);
1753 alias ParserHandler = void delegate(ElementParser parser);
1754
1755 private
1756 {
1757 Tag tag_;
1758 string elementStart;
1759 string* s;
1760
1761 Handler commentHandler = null;
1762 Handler cdataHandler = null;
1763 Handler xiHandler = null;
1764 Handler piHandler = null;
1765 Handler rawTextHandler = null;
1766 Handler textHandler = null;
1767
1768 // Private constructor for start tags
this(ElementParser parent)1769 this(ElementParser parent) @safe @nogc pure nothrow
1770 {
1771 s = parent.s;
1772 this();
1773 tag_ = parent.tag_;
1774 }
1775
1776 // Private constructor for empty tags
this(Tag tag,string * t)1777 this(Tag tag, string* t) @safe @nogc pure nothrow
1778 {
1779 s = t;
1780 this();
1781 tag_ = tag;
1782 }
1783 }
1784
1785 /**
1786 * The Tag at the start of the element being parsed. You can read this to
1787 * determine the tag's name and attributes.
1788 */
tag()1789 @property @safe @nogc pure nothrow const(Tag) tag() const { return tag_; }
1790
1791 /**
1792 * Register a handler which will be called whenever a start tag is
1793 * encountered which matches the specified name. You can also pass null as
1794 * the name, in which case the handler will be called for any unmatched
1795 * start tag.
1796 *
1797 * Example:
1798 * --------------
1799 * // Call this function whenever a <podcast> start tag is encountered
1800 * onStartTag["podcast"] = (ElementParser xml)
1801 * {
1802 * // Your code here
1803 * //
1804 * // This is a a closure, so code here may reference
1805 * // variables which are outside of this scope
1806 * };
1807 *
1808 * // call myEpisodeStartHandler (defined elsewhere) whenever an <episode>
1809 * // start tag is encountered
1810 * onStartTag["episode"] = &myEpisodeStartHandler;
1811 *
1812 * // call delegate dg for all other start tags
1813 * onStartTag[null] = dg;
1814 * --------------
1815 *
1816 * This library will supply your function with a new instance of
1817 * ElementHandler, which may be used to parse inside the element whose
1818 * start tag was just found, or to identify the tag attributes of the
1819 * element, etc.
1820 *
1821 * Note that your function will be called for both start tags and empty
1822 * tags. That is, we make no distinction between <br></br>
1823 * and <br/>.
1824 */
1825 ParserHandler[string] onStartTag;
1826
1827 /**
1828 * Register a handler which will be called whenever an end tag is
1829 * encountered which matches the specified name. You can also pass null as
1830 * the name, in which case the handler will be called for any unmatched
1831 * end tag.
1832 *
1833 * Example:
1834 * --------------
1835 * // Call this function whenever a </podcast> end tag is encountered
1836 * onEndTag["podcast"] = (in Element e)
1837 * {
1838 * // Your code here
1839 * //
1840 * // This is a a closure, so code here may reference
1841 * // variables which are outside of this scope
1842 * };
1843 *
1844 * // call myEpisodeEndHandler (defined elsewhere) whenever an </episode>
1845 * // end tag is encountered
1846 * onEndTag["episode"] = &myEpisodeEndHandler;
1847 *
1848 * // call delegate dg for all other end tags
1849 * onEndTag[null] = dg;
1850 * --------------
1851 *
1852 * Note that your function will be called for both start tags and empty
1853 * tags. That is, we make no distinction between <br></br>
1854 * and <br/>.
1855 */
1856 ElementHandler[string] onEndTag;
1857
this()1858 protected this() @safe @nogc pure nothrow
1859 {
1860 elementStart = *s;
1861 }
1862
1863 /**
1864 * Register a handler which will be called whenever text is encountered.
1865 *
1866 * Example:
1867 * --------------
1868 * // Call this function whenever text is encountered
1869 * onText = (string s)
1870 * {
1871 * // Your code here
1872 *
1873 * // The passed parameter s will have been decoded by the time you see
1874 * // it, and so may contain any character.
1875 * //
1876 * // This is a a closure, so code here may reference
1877 * // variables which are outside of this scope
1878 * };
1879 * --------------
1880 */
onText(Handler handler)1881 @property @safe @nogc pure nothrow void onText(Handler handler) { textHandler = handler; }
1882
1883 /**
1884 * Register an alternative handler which will be called whenever text
1885 * is encountered. This differs from onText in that onText will decode
1886 * the text, whereas onTextRaw will not. This allows you to make design
1887 * choices, since onText will be more accurate, but slower, while
1888 * onTextRaw will be faster, but less accurate. Of course, you can
1889 * still call decode() within your handler, if you want, but you'd
1890 * probably want to use onTextRaw only in circumstances where you
1891 * know that decoding is unnecessary.
1892 *
1893 * Example:
1894 * --------------
1895 * // Call this function whenever text is encountered
1896 * onText = (string s)
1897 * {
1898 * // Your code here
1899 *
1900 * // The passed parameter s will NOT have been decoded.
1901 * //
1902 * // This is a a closure, so code here may reference
1903 * // variables which are outside of this scope
1904 * };
1905 * --------------
1906 */
onTextRaw(Handler handler)1907 @safe @nogc pure nothrow void onTextRaw(Handler handler) { rawTextHandler = handler; }
1908
1909 /**
1910 * Register a handler which will be called whenever a character data
1911 * segment is encountered.
1912 *
1913 * Example:
1914 * --------------
1915 * // Call this function whenever a CData section is encountered
1916 * onCData = (string s)
1917 * {
1918 * // Your code here
1919 *
1920 * // The passed parameter s does not include the opening <![CDATA[
1921 * // nor closing ]]>
1922 * //
1923 * // This is a a closure, so code here may reference
1924 * // variables which are outside of this scope
1925 * };
1926 * --------------
1927 */
onCData(Handler handler)1928 @property @safe @nogc pure nothrow void onCData(Handler handler) { cdataHandler = handler; }
1929
1930 /**
1931 * Register a handler which will be called whenever a comment is
1932 * encountered.
1933 *
1934 * Example:
1935 * --------------
1936 * // Call this function whenever a comment is encountered
1937 * onComment = (string s)
1938 * {
1939 * // Your code here
1940 *
1941 * // The passed parameter s does not include the opening <!-- nor
1942 * // closing -->
1943 * //
1944 * // This is a a closure, so code here may reference
1945 * // variables which are outside of this scope
1946 * };
1947 * --------------
1948 */
onComment(Handler handler)1949 @property @safe @nogc pure nothrow void onComment(Handler handler) { commentHandler = handler; }
1950
1951 /**
1952 * Register a handler which will be called whenever a processing
1953 * instruction is encountered.
1954 *
1955 * Example:
1956 * --------------
1957 * // Call this function whenever a processing instruction is encountered
1958 * onPI = (string s)
1959 * {
1960 * // Your code here
1961 *
1962 * // The passed parameter s does not include the opening <? nor
1963 * // closing ?>
1964 * //
1965 * // This is a a closure, so code here may reference
1966 * // variables which are outside of this scope
1967 * };
1968 * --------------
1969 */
onPI(Handler handler)1970 @property @safe @nogc pure nothrow void onPI(Handler handler) { piHandler = handler; }
1971
1972 /**
1973 * Register a handler which will be called whenever an XML instruction is
1974 * encountered.
1975 *
1976 * Example:
1977 * --------------
1978 * // Call this function whenever an XML instruction is encountered
1979 * // (Note: XML instructions may only occur preceding the root tag of a
1980 * // document).
1981 * onPI = (string s)
1982 * {
1983 * // Your code here
1984 *
1985 * // The passed parameter s does not include the opening <! nor
1986 * // closing >
1987 * //
1988 * // This is a a closure, so code here may reference
1989 * // variables which are outside of this scope
1990 * };
1991 * --------------
1992 */
onXI(Handler handler)1993 @property @safe @nogc pure nothrow void onXI(Handler handler) { xiHandler = handler; }
1994
1995 /**
1996 * Parse an XML element.
1997 *
1998 * Parsing will continue until the end of the current element. Any items
1999 * encountered for which a handler has been registered will invoke that
2000 * handler.
2001 *
2002 * Throws: various kinds of XMLException
2003 */
parse()2004 void parse()
2005 {
2006 import std.algorithm.searching : startsWith;
2007 import std.string : indexOf;
2008
2009 string t;
2010 const Tag root = tag_;
2011 Tag[string] startTags;
2012 if (tag_ !is null) startTags[tag_.name] = tag_;
2013
2014 while (s.length != 0)
2015 {
2016 if (startsWith(*s,"<!--"))
2017 {
2018 chop(*s,4);
2019 t = chop(*s,indexOf(*s,"-->"));
2020 if (commentHandler.funcptr !is null) commentHandler(t);
2021 chop(*s,3);
2022 }
2023 else if (startsWith(*s,"<![CDATA["))
2024 {
2025 chop(*s,9);
2026 t = chop(*s,indexOf(*s,"]]>"));
2027 if (cdataHandler.funcptr !is null) cdataHandler(t);
2028 chop(*s,3);
2029 }
2030 else if (startsWith(*s,"<!"))
2031 {
2032 chop(*s,2);
2033 t = chop(*s,indexOf(*s,">"));
2034 if (xiHandler.funcptr !is null) xiHandler(t);
2035 chop(*s,1);
2036 }
2037 else if (startsWith(*s,"<?"))
2038 {
2039 chop(*s,2);
2040 t = chop(*s,indexOf(*s,"?>"));
2041 if (piHandler.funcptr !is null) piHandler(t);
2042 chop(*s,2);
2043 }
2044 else if (startsWith(*s,"<"))
2045 {
2046 tag_ = new Tag(*s,true);
2047 if (root is null)
2048 return; // Return to constructor of derived class
2049
2050 if (tag_.isStart)
2051 {
2052 startTags[tag_.name] = tag_;
2053
2054 auto parser = new ElementParser(this);
2055
2056 auto handler = tag_.name in onStartTag;
2057 if (handler !is null) (*handler)(parser);
2058 else
2059 {
2060 handler = null in onStartTag;
2061 if (handler !is null) (*handler)(parser);
2062 }
2063 }
2064 else if (tag_.isEnd)
2065 {
2066 const startTag = startTags[tag_.name];
2067 string text;
2068
2069 if (startTag.tagString.length == 0)
2070 assert(0);
2071
2072 immutable(char)* p = startTag.tagString.ptr
2073 + startTag.tagString.length;
2074 immutable(char)* q = &tag_.tagString[0];
2075 text = decode(p[0..(q-p)], DecodeMode.LOOSE);
2076
2077 auto element = new Element(startTag);
2078 if (text.length != 0) element ~= new Text(text);
2079
2080 auto handler = tag_.name in onEndTag;
2081 if (handler !is null) (*handler)(element);
2082 else
2083 {
2084 handler = null in onEndTag;
2085 if (handler !is null) (*handler)(element);
2086 }
2087
2088 if (tag_.name == root.name) return;
2089 }
2090 else if (tag_.isEmpty)
2091 {
2092 Tag startTag = new Tag(tag_.name);
2093
2094 // FIX by hed010gy, for bug 2979
2095 // http://d.puremagic.com/issues/show_bug.cgi?id=2979
2096 if (tag_.attr.length > 0)
2097 foreach (tn,tv; tag_.attr) startTag.attr[tn]=tv;
2098 // END FIX
2099
2100 // Handle the pretend start tag
2101 string s2;
2102 auto parser = new ElementParser(startTag,&s2);
2103 auto handler1 = startTag.name in onStartTag;
2104 if (handler1 !is null) (*handler1)(parser);
2105 else
2106 {
2107 handler1 = null in onStartTag;
2108 if (handler1 !is null) (*handler1)(parser);
2109 }
2110
2111 // Handle the pretend end tag
2112 auto element = new Element(startTag);
2113 auto handler2 = tag_.name in onEndTag;
2114 if (handler2 !is null) (*handler2)(element);
2115 else
2116 {
2117 handler2 = null in onEndTag;
2118 if (handler2 !is null) (*handler2)(element);
2119 }
2120 }
2121 }
2122 else
2123 {
2124 t = chop(*s,indexOf(*s,"<"));
2125 if (rawTextHandler.funcptr !is null)
2126 rawTextHandler(t);
2127 else if (textHandler.funcptr !is null)
2128 textHandler(decode(t,DecodeMode.LOOSE));
2129 }
2130 }
2131 }
2132
2133 /**
2134 * Returns that part of the element which has already been parsed
2135 */
toString()2136 override string toString() const @nogc @safe pure nothrow
2137 {
2138 assert(elementStart.length >= s.length);
2139 return elementStart[0 .. elementStart.length - s.length];
2140 }
2141
2142 }
2143
2144 private
2145 {
Check(string msg)2146 template Check(string msg)
2147 {
2148 string old = s;
2149
2150 void fail() @safe pure
2151 {
2152 s = old;
2153 throw new Err(s,msg);
2154 }
2155
2156 void fail(Err e) @safe pure
2157 {
2158 s = old;
2159 throw new Err(s,msg,e);
2160 }
2161
2162 void fail(string msg2) @safe pure
2163 {
2164 fail(new Err(s,msg2));
2165 }
2166 }
2167
checkMisc(ref string s)2168 void checkMisc(ref string s) @safe pure // rule 27
2169 {
2170 import std.algorithm.searching : startsWith;
2171
2172 mixin Check!("Misc");
2173
2174 try
2175 {
2176 if (s.startsWith("<!--")) { checkComment(s); }
2177 else if (s.startsWith("<?")) { checkPI(s); }
2178 else { checkSpace(s); }
2179 }
2180 catch (Err e) { fail(e); }
2181 }
2182
checkDocument(ref string s)2183 void checkDocument(ref string s) @safe pure // rule 1
2184 {
2185 mixin Check!("Document");
2186 try
2187 {
2188 checkProlog(s);
2189 checkElement(s);
2190 star!(checkMisc)(s);
2191 }
2192 catch (Err e) { fail(e); }
2193 }
2194
checkChars(ref string s)2195 void checkChars(ref string s) @safe pure // rule 2
2196 {
2197 // TO DO - Fix std.utf stride and decode functions, then use those
2198 // instead
2199 import std.format : format;
2200
2201 mixin Check!("Chars");
2202
2203 dchar c;
2204 ptrdiff_t n = -1;
2205 // 'i' must not be smaller than size_t because size_t is used internally in
2206 // aApply.d and it will be cast e.g to (int *) which fails on BigEndian targets.
2207 foreach (size_t i, dchar d; s)
2208 {
2209 if (!isChar(d))
2210 {
2211 c = d;
2212 n = i;
2213 break;
2214 }
2215 }
2216 if (n != -1)
2217 {
2218 s = s[n..$];
2219 fail(format("invalid character: U+%04X",c));
2220 }
2221 }
2222
checkSpace(ref string s)2223 void checkSpace(ref string s) @safe pure // rule 3
2224 {
2225 import std.algorithm.searching : countUntil;
2226 import std.ascii : isWhite;
2227 import std.utf : byCodeUnit;
2228
2229 mixin Check!("Whitespace");
2230 ptrdiff_t i = s.byCodeUnit.countUntil!(a => !isWhite(a));
2231 if (i == -1 && s.length > 0 && isWhite(s[0]))
2232 s = s[$ .. $];
2233 else if (i > -1)
2234 s = s[i .. $];
2235 if (s is old) fail();
2236 }
2237
checkName(ref string s,out string name)2238 void checkName(ref string s, out string name) @safe pure // rule 5
2239 {
2240 mixin Check!("Name");
2241
2242 if (s.length == 0) fail();
2243 ptrdiff_t n;
2244 // 'i' must not be smaller than size_t because size_t is used internally in
2245 // aApply.d and it will be cast e.g to (int *) which fails on BigEndian targets.
2246 foreach (size_t i, dchar c; s)
2247 {
2248 if (c == '_' || c == ':' || isLetter(c)) continue;
2249 if (i == 0) fail();
2250 if (c == '-' || c == '.' || isDigit(c)
2251 || isCombiningChar(c) || isExtender(c)) continue;
2252 n = i;
2253 break;
2254 }
2255 name = s[0 .. n];
2256 s = s[n..$];
2257 }
2258
checkAttValue(ref string s)2259 void checkAttValue(ref string s) @safe pure // rule 10
2260 {
2261 import std.algorithm.searching : countUntil;
2262 import std.utf : byCodeUnit;
2263
2264 mixin Check!("AttValue");
2265
2266 if (s.length == 0) fail();
2267 char c = s[0];
2268 if (c != '\u0022' && c != '\u0027')
2269 fail("attribute value requires quotes");
2270 s = s[1..$];
2271 for (;;)
2272 {
2273 s = s[s.byCodeUnit.countUntil(c) .. $];
2274 if (s.length == 0) fail("unterminated attribute value");
2275 if (s[0] == '<') fail("< found in attribute value");
2276 if (s[0] == c) break;
2277 try { checkReference(s); } catch (Err e) { fail(e); }
2278 }
2279 s = s[1..$];
2280 }
2281
checkCharData(ref string s)2282 void checkCharData(ref string s) @safe pure // rule 14
2283 {
2284 import std.algorithm.searching : startsWith;
2285
2286 mixin Check!("CharData");
2287
2288 while (s.length != 0)
2289 {
2290 if (s.startsWith("&")) break;
2291 if (s.startsWith("<")) break;
2292 if (s.startsWith("]]>")) fail("]]> found within char data");
2293 s = s[1..$];
2294 }
2295 }
2296
checkComment(ref string s)2297 void checkComment(ref string s) @safe pure // rule 15
2298 {
2299 import std.string : indexOf;
2300
2301 mixin Check!("Comment");
2302
2303 try { checkLiteral("<!--",s); } catch (Err e) { fail(e); }
2304 ptrdiff_t n = s.indexOf("--");
2305 if (n == -1) fail("unterminated comment");
2306 s = s[n..$];
2307 try { checkLiteral("-->",s); } catch (Err e) { fail(e); }
2308 }
2309
checkPI(ref string s)2310 void checkPI(ref string s) @safe pure // rule 16
2311 {
2312 mixin Check!("PI");
2313
2314 try
2315 {
2316 checkLiteral("<?",s);
2317 checkEnd("?>",s);
2318 }
2319 catch (Err e) { fail(e); }
2320 }
2321
checkCDSect(ref string s)2322 void checkCDSect(ref string s) @safe pure // rule 18
2323 {
2324 mixin Check!("CDSect");
2325
2326 try
2327 {
2328 checkLiteral(cdata,s);
2329 checkEnd("]]>",s);
2330 }
2331 catch (Err e) { fail(e); }
2332 }
2333
checkProlog(ref string s)2334 void checkProlog(ref string s) @safe pure // rule 22
2335 {
2336 mixin Check!("Prolog");
2337
2338 try
2339 {
2340 /* The XML declaration is optional
2341 * http://www.w3.org/TR/2008/REC-xml-20081126/#NT-prolog
2342 */
2343 opt!(checkXMLDecl)(s);
2344
2345 star!(checkMisc)(s);
2346 opt!(seq!(checkDocTypeDecl,star!(checkMisc)))(s);
2347 }
2348 catch (Err e) { fail(e); }
2349 }
2350
checkXMLDecl(ref string s)2351 void checkXMLDecl(ref string s) @safe pure // rule 23
2352 {
2353 mixin Check!("XMLDecl");
2354
2355 try
2356 {
2357 checkLiteral("<?xml",s);
2358 checkVersionInfo(s);
2359 opt!(checkEncodingDecl)(s);
2360 opt!(checkSDDecl)(s);
2361 opt!(checkSpace)(s);
2362 checkLiteral("?>",s);
2363 }
2364 catch (Err e) { fail(e); }
2365 }
2366
checkVersionInfo(ref string s)2367 void checkVersionInfo(ref string s) @safe pure // rule 24
2368 {
2369 mixin Check!("VersionInfo");
2370
2371 try
2372 {
2373 checkSpace(s);
2374 checkLiteral("version",s);
2375 checkEq(s);
2376 quoted!(checkVersionNum)(s);
2377 }
2378 catch (Err e) { fail(e); }
2379 }
2380
checkEq(ref string s)2381 void checkEq(ref string s) @safe pure // rule 25
2382 {
2383 mixin Check!("Eq");
2384
2385 try
2386 {
2387 opt!(checkSpace)(s);
2388 checkLiteral("=",s);
2389 opt!(checkSpace)(s);
2390 }
2391 catch (Err e) { fail(e); }
2392 }
2393
checkVersionNum(ref string s)2394 void checkVersionNum(ref string s) @safe pure // rule 26
2395 {
2396 import std.algorithm.searching : countUntil;
2397 import std.utf : byCodeUnit;
2398
2399 mixin Check!("VersionNum");
2400
2401 s = s[s.byCodeUnit.countUntil('\"') .. $];
2402 if (s is old) fail();
2403 }
2404
checkDocTypeDecl(ref string s)2405 void checkDocTypeDecl(ref string s) @safe pure // rule 28
2406 {
2407 mixin Check!("DocTypeDecl");
2408
2409 try
2410 {
2411 checkLiteral("<!DOCTYPE",s);
2412 //
2413 // TO DO -- ensure DOCTYPE is well formed
2414 // (But not yet. That's one of our "future directions")
2415 //
2416 checkEnd(">",s);
2417 }
2418 catch (Err e) { fail(e); }
2419 }
2420
checkSDDecl(ref string s)2421 void checkSDDecl(ref string s) @safe pure // rule 32
2422 {
2423 import std.algorithm.searching : startsWith;
2424
2425 mixin Check!("SDDecl");
2426
2427 try
2428 {
2429 checkSpace(s);
2430 checkLiteral("standalone",s);
2431 checkEq(s);
2432 }
2433 catch (Err e) { fail(e); }
2434
2435 int n = 0;
2436 if (s.startsWith("'yes'") || s.startsWith("\"yes\"")) n = 5;
2437 else if (s.startsWith("'no'" ) || s.startsWith("\"no\"" )) n = 4;
2438 else fail("standalone attribute value must be 'yes', \"yes\","~
2439 " 'no' or \"no\"");
2440 s = s[n..$];
2441 }
2442
checkElement(ref string s)2443 void checkElement(ref string s) @safe pure // rule 39
2444 {
2445 mixin Check!("Element");
2446
2447 string sname,ename,t;
2448 try { checkTag(s,t,sname); } catch (Err e) { fail(e); }
2449
2450 if (t == "STag")
2451 {
2452 try
2453 {
2454 checkContent(s);
2455 t = s;
2456 checkETag(s,ename);
2457 }
2458 catch (Err e) { fail(e); }
2459
2460 if (sname != ename)
2461 {
2462 s = t;
2463 fail("end tag name \"" ~ ename
2464 ~ "\" differs from start tag name \""~sname~"\"");
2465 }
2466 }
2467 }
2468
2469 // rules 40 and 44
checkTag(ref string s,out string type,out string name)2470 void checkTag(ref string s, out string type, out string name) @safe pure
2471 {
2472 mixin Check!("Tag");
2473
2474 try
2475 {
2476 type = "STag";
2477 checkLiteral("<",s);
2478 checkName(s,name);
2479 star!(seq!(checkSpace,checkAttribute))(s);
2480 opt!(checkSpace)(s);
2481 if (s.length != 0 && s[0] == '/')
2482 {
2483 s = s[1..$];
2484 type = "ETag";
2485 }
2486 checkLiteral(">",s);
2487 }
2488 catch (Err e) { fail(e); }
2489 }
2490
checkAttribute(ref string s)2491 void checkAttribute(ref string s) @safe pure // rule 41
2492 {
2493 mixin Check!("Attribute");
2494
2495 try
2496 {
2497 string name;
2498 checkName(s,name);
2499 checkEq(s);
2500 checkAttValue(s);
2501 }
2502 catch (Err e) { fail(e); }
2503 }
2504
checkETag(ref string s,out string name)2505 void checkETag(ref string s, out string name) @safe pure // rule 42
2506 {
2507 mixin Check!("ETag");
2508
2509 try
2510 {
2511 checkLiteral("</",s);
2512 checkName(s,name);
2513 opt!(checkSpace)(s);
2514 checkLiteral(">",s);
2515 }
2516 catch (Err e) { fail(e); }
2517 }
2518
checkContent(ref string s)2519 void checkContent(ref string s) @safe pure // rule 43
2520 {
2521 import std.algorithm.searching : startsWith;
2522
2523 mixin Check!("Content");
2524
2525 try
2526 {
2527 while (s.length != 0)
2528 {
2529 old = s;
2530 if (s.startsWith("&")) { checkReference(s); }
2531 else if (s.startsWith("<!--")) { checkComment(s); }
2532 else if (s.startsWith("<?")) { checkPI(s); }
2533 else if (s.startsWith(cdata)) { checkCDSect(s); }
2534 else if (s.startsWith("</")) { break; }
2535 else if (s.startsWith("<")) { checkElement(s); }
2536 else { checkCharData(s); }
2537 }
2538 }
2539 catch (Err e) { fail(e); }
2540 }
2541
checkCharRef(ref string s,out dchar c)2542 void checkCharRef(ref string s, out dchar c) @safe pure // rule 66
2543 {
2544 import std.format : format;
2545
2546 mixin Check!("CharRef");
2547
2548 c = 0;
2549 try { checkLiteral("&#",s); } catch (Err e) { fail(e); }
2550 int radix = 10;
2551 if (s.length != 0 && s[0] == 'x')
2552 {
2553 s = s[1..$];
2554 radix = 16;
2555 }
2556 if (s.length == 0) fail("unterminated character reference");
2557 if (s[0] == ';')
2558 fail("character reference must have at least one digit");
2559 while (s.length != 0)
2560 {
2561 immutable char d = s[0];
2562 int n = 0;
2563 switch (d)
2564 {
2565 case 'F','f': ++n; goto case;
2566 case 'E','e': ++n; goto case;
2567 case 'D','d': ++n; goto case;
2568 case 'C','c': ++n; goto case;
2569 case 'B','b': ++n; goto case;
2570 case 'A','a': ++n; goto case;
2571 case '9': ++n; goto case;
2572 case '8': ++n; goto case;
2573 case '7': ++n; goto case;
2574 case '6': ++n; goto case;
2575 case '5': ++n; goto case;
2576 case '4': ++n; goto case;
2577 case '3': ++n; goto case;
2578 case '2': ++n; goto case;
2579 case '1': ++n; goto case;
2580 case '0': break;
2581 default: n = 100; break;
2582 }
2583 if (n >= radix) break;
2584 c *= radix;
2585 c += n;
2586 s = s[1..$];
2587 }
2588 if (!isChar(c)) fail(format("U+%04X is not a legal character",c));
2589 if (s.length == 0 || s[0] != ';') fail("expected ;");
2590 else s = s[1..$];
2591 }
2592
checkReference(ref string s)2593 void checkReference(ref string s) @safe pure // rule 67
2594 {
2595 import std.algorithm.searching : startsWith;
2596
2597 mixin Check!("Reference");
2598
2599 try
2600 {
2601 dchar c;
2602 if (s.startsWith("&#")) checkCharRef(s,c);
2603 else checkEntityRef(s);
2604 }
2605 catch (Err e) { fail(e); }
2606 }
2607
checkEntityRef(ref string s)2608 void checkEntityRef(ref string s) @safe pure // rule 68
2609 {
2610 mixin Check!("EntityRef");
2611
2612 try
2613 {
2614 string name;
2615 checkLiteral("&",s);
2616 checkName(s,name);
2617 checkLiteral(";",s);
2618 }
2619 catch (Err e) { fail(e); }
2620 }
2621
checkEncName(ref string s)2622 void checkEncName(ref string s) @safe pure // rule 81
2623 {
2624 import std.algorithm.searching : countUntil;
2625 import std.ascii : isAlpha;
2626 import std.utf : byCodeUnit;
2627
2628 mixin Check!("EncName");
2629
2630 s = s[s.byCodeUnit.countUntil!(a => !isAlpha(a)) .. $];
2631 if (s is old) fail();
2632 s = s[s.byCodeUnit.countUntil('\"', '\'') .. $];
2633 }
2634
checkEncodingDecl(ref string s)2635 void checkEncodingDecl(ref string s) @safe pure // rule 80
2636 {
2637 mixin Check!("EncodingDecl");
2638
2639 try
2640 {
2641 checkSpace(s);
2642 checkLiteral("encoding",s);
2643 checkEq(s);
2644 quoted!(checkEncName)(s);
2645 }
2646 catch (Err e) { fail(e); }
2647 }
2648
2649 // Helper functions
2650
checkLiteral(string literal,ref string s)2651 void checkLiteral(string literal,ref string s) @safe pure
2652 {
2653 import std.string : startsWith;
2654
2655 mixin Check!("Literal");
2656
2657 if (!s.startsWith(literal)) fail("Expected literal \""~literal~"\"");
2658 s = s[literal.length..$];
2659 }
2660
checkEnd(string end,ref string s)2661 void checkEnd(string end,ref string s) @safe pure
2662 {
2663 import std.string : indexOf;
2664 // Deliberately no mixin Check here.
2665
2666 auto n = s.indexOf(end);
2667 if (n == -1) throw new Err(s,"Unable to find terminating \""~end~"\"");
2668 s = s[n..$];
2669 checkLiteral(end,s);
2670 }
2671
2672 // Metafunctions -- none of these use mixin Check
2673
opt(alias f)2674 void opt(alias f)(ref string s)
2675 {
2676 try { f(s); } catch (Err e) {}
2677 }
2678
plus(alias f)2679 void plus(alias f)(ref string s)
2680 {
2681 f(s);
2682 star!(f)(s);
2683 }
2684
star(alias f)2685 void star(alias f)(ref string s)
2686 {
2687 while (s.length != 0)
2688 {
2689 try { f(s); }
2690 catch (Err e) { return; }
2691 }
2692 }
2693
quoted(alias f)2694 void quoted(alias f)(ref string s)
2695 {
2696 import std.string : startsWith;
2697
2698 if (s.startsWith("'"))
2699 {
2700 checkLiteral("'",s);
2701 f(s);
2702 checkLiteral("'",s);
2703 }
2704 else
2705 {
2706 checkLiteral("\"",s);
2707 f(s);
2708 checkLiteral("\"",s);
2709 }
2710 }
2711
seq(alias f,alias g)2712 void seq(alias f,alias g)(ref string s)
2713 {
2714 f(s);
2715 g(s);
2716 }
2717 }
2718
2719 /**
2720 * Check an entire XML document for well-formedness
2721 *
2722 * Params:
2723 * s = the document to be checked, passed as a string
2724 *
2725 * Throws: CheckException if the document is not well formed
2726 *
2727 * CheckException's toString() method will yield the complete hierarchy of
2728 * parse failure (the XML equivalent of a stack trace), giving the line and
2729 * column number of every failure at every level.
2730 */
check(string s)2731 void check(string s) @safe pure
2732 {
2733 try
2734 {
2735 checkChars(s);
2736 checkDocument(s);
2737 if (s.length != 0) throw new Err(s,"Junk found after document");
2738 }
2739 catch (Err e)
2740 {
2741 e.complete(s);
2742 throw e;
2743 }
2744 }
2745
2746 @system pure unittest
2747 {
2748 import std.string : indexOf;
2749
2750 try
2751 {
2752 check(q"[<?xml version="1.0"?>
2753 <catalog>
2754 <book id="bk101">
2755 <author>Gambardella, Matthew</author>
2756 <title>XML Developer's Guide</title>
2757 <genre>Computer</genre>
2758 <price>44.95</price>
2759 <publish_date>2000-10-01</publish_date>
2760 <description>An in-depth look at creating applications
2761 with XML.</description>
2762 </book>
2763 <book id="bk102">
2764 <author>Ralls, Kim</author>
2765 <title>Midnight Rain</title>
2766 <genre>Fantasy</genres>
2767 <price>5.95</price>
2768 <publish_date>2000-12-16</publish_date>
2769 <description>A former architect battles corporate zombies,
2770 an evil sorceress, and her own childhood to become queen
2771 of the world.</description>
2772 </book>
2773 <book id="bk103">
2774 <author>Corets, Eva</author>
2775 <title>Maeve Ascendant</title>
2776 <genre>Fantasy</genre>
2777 <price>5.95</price>
2778 <publish_date>2000-11-17</publish_date>
2779 <description>After the collapse of a nanotechnology
2780 society in England, the young survivors lay the
2781 foundation for a new society.</description>
2782 </book>
2783 </catalog>
2784 ]");
2785 assert(false);
2786 }
catch(CheckException e)2787 catch (CheckException e)
2788 {
2789 auto n = e.toString().indexOf("end tag name \"genres\" differs"~
2790 " from start tag name \"genre\"");
2791 assert(n != -1);
2792 }
2793 }
2794
2795 @system unittest
2796 {
2797 string s = q"EOS
2798 <?xml version="1.0"?>
2799 <set>
2800 <one>A</one>
2801 <!-- comment -->
2802 <two>B</two>
2803 </set>
2804 EOS";
2805 try
2806 {
2807 check(s);
2808 }
catch(CheckException e)2809 catch (CheckException e)
2810 {
2811 assert(0, e.toString());
2812 }
2813 }
2814
2815 @system unittest
2816 {
2817 string test_xml = `<?xml version="1.0" encoding='UTF-8'?><r><stream:stream
2818 xmlns:stream="http://etherx.'jabber'.org/streams"
2819 xmlns="jabber:'client'" from='jid.pl' id="587a5767"
2820 xml:lang="en" version="1.0" attr='a"b"c'>
2821 </stream:stream></r>`;
2822
2823 DocumentParser parser = new DocumentParser(test_xml);
2824 bool tested = false;
2825 parser.onStartTag["stream:stream"] = (ElementParser p) {
2826 assert(p.tag.attr["xmlns"] == "jabber:'client'");
2827 assert(p.tag.attr["from"] == "jid.pl");
2828 assert(p.tag.attr["attr"] == "a\"b\"c");
2829 tested = true;
2830 };
2831 parser.parse();
2832 assert(tested);
2833 }
2834
2835 @system unittest
2836 {
2837 string s = q"EOS
2838 <?xml version="1.0" encoding="utf-8"?> <Tests>
2839 <Test thing="What & Up">What & Up Second</Test>
2840 </Tests>
2841 EOS";
2842 auto xml = new DocumentParser(s);
2843
2844 xml.onStartTag["Test"] = (ElementParser xml) {
2845 assert(xml.tag.attr["thing"] == "What & Up");
2846 };
2847
2848 xml.onEndTag["Test"] = (in Element e) {
2849 assert(e.text() == "What & Up Second");
2850 };
2851 xml.parse();
2852 }
2853
2854 @system unittest
2855 {
2856 string s = `<tag attr=""value>" />`;
2857 auto doc = new Document(s);
2858 assert(doc.toString() == s);
2859 }
2860
2861 /** The base class for exceptions thrown by this module */
this(string msg)2862 class XMLException : Exception { this(string msg) @safe pure { super(msg); } }
2863
2864 // Other exceptions
2865
2866 /// Thrown during Comment constructor
2867 class CommentException : XMLException
this(string msg)2868 { private this(string msg) @safe pure { super(msg); } }
2869
2870 /// Thrown during CData constructor
2871 class CDataException : XMLException
this(string msg)2872 { private this(string msg) @safe pure { super(msg); } }
2873
2874 /// Thrown during XMLInstruction constructor
2875 class XIException : XMLException
this(string msg)2876 { private this(string msg) @safe pure { super(msg); } }
2877
2878 /// Thrown during ProcessingInstruction constructor
2879 class PIException : XMLException
this(string msg)2880 { private this(string msg) @safe pure { super(msg); } }
2881
2882 /// Thrown during Text constructor
2883 class TextException : XMLException
this(string msg)2884 { private this(string msg) @safe pure { super(msg); } }
2885
2886 /// Thrown during decode()
2887 class DecodeException : XMLException
this(string msg)2888 { private this(string msg) @safe pure { super(msg); } }
2889
2890 /// Thrown if comparing with wrong type
2891 class InvalidTypeException : XMLException
this(string msg)2892 { private this(string msg) @safe pure { super(msg); } }
2893
2894 /// Thrown when parsing for Tags
2895 class TagException : XMLException
this(string msg)2896 { private this(string msg) @safe pure { super(msg); } }
2897
2898 /**
2899 * Thrown during check()
2900 */
2901 class CheckException : XMLException
2902 {
2903 CheckException err; /// Parent in hierarchy
2904 private string tail;
2905 /**
2906 * Name of production rule which failed to parse,
2907 * or specific error message
2908 */
2909 string msg;
2910 size_t line = 0; /// Line number at which parse failure occurred
2911 size_t column = 0; /// Column number at which parse failure occurred
2912
2913 private this(string tail,string msg,Err err=null) @safe pure
2914 {
2915 super(null);
2916 this.tail = tail;
2917 this.msg = msg;
2918 this.err = err;
2919 }
2920
complete(string entire)2921 private void complete(string entire) @safe pure
2922 {
2923 import std.string : count, lastIndexOf;
2924 import std.utf : toUTF32;
2925
2926 string head = entire[0..$-tail.length];
2927 ptrdiff_t n = head.lastIndexOf('\n') + 1;
2928 line = head.count("\n") + 1;
2929 dstring t = toUTF32(head[n..$]);
2930 column = t.length + 1;
2931 if (err !is null) err.complete(entire);
2932 }
2933
toString()2934 override string toString() const @safe pure
2935 {
2936 import std.format : format;
2937
2938 string s;
2939 if (line != 0) s = format("Line %d, column %d: ",line,column);
2940 s ~= msg;
2941 s ~= '\n';
2942 if (err !is null) s = err.toString() ~ s;
2943 return s;
2944 }
2945 }
2946
2947 private alias Err = CheckException;
2948
2949 // Private helper functions
2950
2951 private
2952 {
toType(T)2953 inout(T) toType(T)(inout Object o)
2954 {
2955 T t = cast(T)(o);
2956 if (t is null)
2957 {
2958 throw new InvalidTypeException("Attempt to compare a "
2959 ~ T.stringof ~ " with an instance of another type");
2960 }
2961 return t;
2962 }
2963
chop(ref string s,size_t n)2964 string chop(ref string s, size_t n) @safe pure nothrow
2965 {
2966 if (n == -1) n = s.length;
2967 string t = s[0 .. n];
2968 s = s[n..$];
2969 return t;
2970 }
2971
optc(ref string s,char c)2972 bool optc(ref string s, char c) @safe pure nothrow
2973 {
2974 immutable bool b = s.length != 0 && s[0] == c;
2975 if (b) s = s[1..$];
2976 return b;
2977 }
2978
reqc(ref string s,char c)2979 void reqc(ref string s, char c) @safe pure
2980 {
2981 if (s.length == 0 || s[0] != c) throw new TagException("");
2982 s = s[1..$];
2983 }
2984
requireOneOf(ref string s,string chars)2985 char requireOneOf(ref string s, string chars) @safe pure
2986 {
2987 import std.string : indexOf;
2988
2989 if (s.length == 0 || indexOf(chars,s[0]) == -1)
2990 throw new TagException("");
2991 immutable char ch = s[0];
2992 s = s[1..$];
2993 return ch;
2994 }
2995
2996 size_t hash(string s,size_t h=0) @trusted nothrow
2997 {
2998 return typeid(s).getHash(&s) + h;
2999 }
3000
3001 // Definitions from the XML specification
3002 immutable CharTable=[0x9,0x9,0xA,0xA,0xD,0xD,0x20,0xD7FF,0xE000,0xFFFD,
3003 0x10000,0x10FFFF];
3004 immutable BaseCharTable=[0x0041,0x005A,0x0061,0x007A,0x00C0,0x00D6,0x00D8,
3005 0x00F6,0x00F8,0x00FF,0x0100,0x0131,0x0134,0x013E,0x0141,0x0148,0x014A,
3006 0x017E,0x0180,0x01C3,0x01CD,0x01F0,0x01F4,0x01F5,0x01FA,0x0217,0x0250,
3007 0x02A8,0x02BB,0x02C1,0x0386,0x0386,0x0388,0x038A,0x038C,0x038C,0x038E,
3008 0x03A1,0x03A3,0x03CE,0x03D0,0x03D6,0x03DA,0x03DA,0x03DC,0x03DC,0x03DE,
3009 0x03DE,0x03E0,0x03E0,0x03E2,0x03F3,0x0401,0x040C,0x040E,0x044F,0x0451,
3010 0x045C,0x045E,0x0481,0x0490,0x04C4,0x04C7,0x04C8,0x04CB,0x04CC,0x04D0,
3011 0x04EB,0x04EE,0x04F5,0x04F8,0x04F9,0x0531,0x0556,0x0559,0x0559,0x0561,
3012 0x0586,0x05D0,0x05EA,0x05F0,0x05F2,0x0621,0x063A,0x0641,0x064A,0x0671,
3013 0x06B7,0x06BA,0x06BE,0x06C0,0x06CE,0x06D0,0x06D3,0x06D5,0x06D5,0x06E5,
3014 0x06E6,0x0905,0x0939,0x093D,0x093D,0x0958,0x0961,0x0985,0x098C,0x098F,
3015 0x0990,0x0993,0x09A8,0x09AA,0x09B0,0x09B2,0x09B2,0x09B6,0x09B9,0x09DC,
3016 0x09DD,0x09DF,0x09E1,0x09F0,0x09F1,0x0A05,0x0A0A,0x0A0F,0x0A10,0x0A13,
3017 0x0A28,0x0A2A,0x0A30,0x0A32,0x0A33,0x0A35,0x0A36,0x0A38,0x0A39,0x0A59,
3018 0x0A5C,0x0A5E,0x0A5E,0x0A72,0x0A74,0x0A85,0x0A8B,0x0A8D,0x0A8D,0x0A8F,
3019 0x0A91,0x0A93,0x0AA8,0x0AAA,0x0AB0,0x0AB2,0x0AB3,0x0AB5,0x0AB9,0x0ABD,
3020 0x0ABD,0x0AE0,0x0AE0,0x0B05,0x0B0C,0x0B0F,0x0B10,0x0B13,0x0B28,0x0B2A,
3021 0x0B30,0x0B32,0x0B33,0x0B36,0x0B39,0x0B3D,0x0B3D,0x0B5C,0x0B5D,0x0B5F,
3022 0x0B61,0x0B85,0x0B8A,0x0B8E,0x0B90,0x0B92,0x0B95,0x0B99,0x0B9A,0x0B9C,
3023 0x0B9C,0x0B9E,0x0B9F,0x0BA3,0x0BA4,0x0BA8,0x0BAA,0x0BAE,0x0BB5,0x0BB7,
3024 0x0BB9,0x0C05,0x0C0C,0x0C0E,0x0C10,0x0C12,0x0C28,0x0C2A,0x0C33,0x0C35,
3025 0x0C39,0x0C60,0x0C61,0x0C85,0x0C8C,0x0C8E,0x0C90,0x0C92,0x0CA8,0x0CAA,
3026 0x0CB3,0x0CB5,0x0CB9,0x0CDE,0x0CDE,0x0CE0,0x0CE1,0x0D05,0x0D0C,0x0D0E,
3027 0x0D10,0x0D12,0x0D28,0x0D2A,0x0D39,0x0D60,0x0D61,0x0E01,0x0E2E,0x0E30,
3028 0x0E30,0x0E32,0x0E33,0x0E40,0x0E45,0x0E81,0x0E82,0x0E84,0x0E84,0x0E87,
3029 0x0E88,0x0E8A,0x0E8A,0x0E8D,0x0E8D,0x0E94,0x0E97,0x0E99,0x0E9F,0x0EA1,
3030 0x0EA3,0x0EA5,0x0EA5,0x0EA7,0x0EA7,0x0EAA,0x0EAB,0x0EAD,0x0EAE,0x0EB0,
3031 0x0EB0,0x0EB2,0x0EB3,0x0EBD,0x0EBD,0x0EC0,0x0EC4,0x0F40,0x0F47,0x0F49,
3032 0x0F69,0x10A0,0x10C5,0x10D0,0x10F6,0x1100,0x1100,0x1102,0x1103,0x1105,
3033 0x1107,0x1109,0x1109,0x110B,0x110C,0x110E,0x1112,0x113C,0x113C,0x113E,
3034 0x113E,0x1140,0x1140,0x114C,0x114C,0x114E,0x114E,0x1150,0x1150,0x1154,
3035 0x1155,0x1159,0x1159,0x115F,0x1161,0x1163,0x1163,0x1165,0x1165,0x1167,
3036 0x1167,0x1169,0x1169,0x116D,0x116E,0x1172,0x1173,0x1175,0x1175,0x119E,
3037 0x119E,0x11A8,0x11A8,0x11AB,0x11AB,0x11AE,0x11AF,0x11B7,0x11B8,0x11BA,
3038 0x11BA,0x11BC,0x11C2,0x11EB,0x11EB,0x11F0,0x11F0,0x11F9,0x11F9,0x1E00,
3039 0x1E9B,0x1EA0,0x1EF9,0x1F00,0x1F15,0x1F18,0x1F1D,0x1F20,0x1F45,0x1F48,
3040 0x1F4D,0x1F50,0x1F57,0x1F59,0x1F59,0x1F5B,0x1F5B,0x1F5D,0x1F5D,0x1F5F,
3041 0x1F7D,0x1F80,0x1FB4,0x1FB6,0x1FBC,0x1FBE,0x1FBE,0x1FC2,0x1FC4,0x1FC6,
3042 0x1FCC,0x1FD0,0x1FD3,0x1FD6,0x1FDB,0x1FE0,0x1FEC,0x1FF2,0x1FF4,0x1FF6,
3043 0x1FFC,0x2126,0x2126,0x212A,0x212B,0x212E,0x212E,0x2180,0x2182,0x3041,
3044 0x3094,0x30A1,0x30FA,0x3105,0x312C,0xAC00,0xD7A3];
3045 immutable IdeographicTable=[0x3007,0x3007,0x3021,0x3029,0x4E00,0x9FA5];
3046 immutable CombiningCharTable=[0x0300,0x0345,0x0360,0x0361,0x0483,0x0486,
3047 0x0591,0x05A1,0x05A3,0x05B9,0x05BB,0x05BD,0x05BF,0x05BF,0x05C1,0x05C2,
3048 0x05C4,0x05C4,0x064B,0x0652,0x0670,0x0670,0x06D6,0x06DC,0x06DD,0x06DF,
3049 0x06E0,0x06E4,0x06E7,0x06E8,0x06EA,0x06ED,0x0901,0x0903,0x093C,0x093C,
3050 0x093E,0x094C,0x094D,0x094D,0x0951,0x0954,0x0962,0x0963,0x0981,0x0983,
3051 0x09BC,0x09BC,0x09BE,0x09BE,0x09BF,0x09BF,0x09C0,0x09C4,0x09C7,0x09C8,
3052 0x09CB,0x09CD,0x09D7,0x09D7,0x09E2,0x09E3,0x0A02,0x0A02,0x0A3C,0x0A3C,
3053 0x0A3E,0x0A3E,0x0A3F,0x0A3F,0x0A40,0x0A42,0x0A47,0x0A48,0x0A4B,0x0A4D,
3054 0x0A70,0x0A71,0x0A81,0x0A83,0x0ABC,0x0ABC,0x0ABE,0x0AC5,0x0AC7,0x0AC9,
3055 0x0ACB,0x0ACD,0x0B01,0x0B03,0x0B3C,0x0B3C,0x0B3E,0x0B43,0x0B47,0x0B48,
3056 0x0B4B,0x0B4D,0x0B56,0x0B57,0x0B82,0x0B83,0x0BBE,0x0BC2,0x0BC6,0x0BC8,
3057 0x0BCA,0x0BCD,0x0BD7,0x0BD7,0x0C01,0x0C03,0x0C3E,0x0C44,0x0C46,0x0C48,
3058 0x0C4A,0x0C4D,0x0C55,0x0C56,0x0C82,0x0C83,0x0CBE,0x0CC4,0x0CC6,0x0CC8,
3059 0x0CCA,0x0CCD,0x0CD5,0x0CD6,0x0D02,0x0D03,0x0D3E,0x0D43,0x0D46,0x0D48,
3060 0x0D4A,0x0D4D,0x0D57,0x0D57,0x0E31,0x0E31,0x0E34,0x0E3A,0x0E47,0x0E4E,
3061 0x0EB1,0x0EB1,0x0EB4,0x0EB9,0x0EBB,0x0EBC,0x0EC8,0x0ECD,0x0F18,0x0F19,
3062 0x0F35,0x0F35,0x0F37,0x0F37,0x0F39,0x0F39,0x0F3E,0x0F3E,0x0F3F,0x0F3F,
3063 0x0F71,0x0F84,0x0F86,0x0F8B,0x0F90,0x0F95,0x0F97,0x0F97,0x0F99,0x0FAD,
3064 0x0FB1,0x0FB7,0x0FB9,0x0FB9,0x20D0,0x20DC,0x20E1,0x20E1,0x302A,0x302F,
3065 0x3099,0x3099,0x309A,0x309A];
3066 immutable DigitTable=[0x0030,0x0039,0x0660,0x0669,0x06F0,0x06F9,0x0966,
3067 0x096F,0x09E6,0x09EF,0x0A66,0x0A6F,0x0AE6,0x0AEF,0x0B66,0x0B6F,0x0BE7,
3068 0x0BEF,0x0C66,0x0C6F,0x0CE6,0x0CEF,0x0D66,0x0D6F,0x0E50,0x0E59,0x0ED0,
3069 0x0ED9,0x0F20,0x0F29];
3070 immutable ExtenderTable=[0x00B7,0x00B7,0x02D0,0x02D0,0x02D1,0x02D1,0x0387,
3071 0x0387,0x0640,0x0640,0x0E46,0x0E46,0x0EC6,0x0EC6,0x3005,0x3005,0x3031,
3072 0x3035,0x309D,0x309E,0x30FC,0x30FE];
3073
lookup(const (int)[]table,int c)3074 bool lookup(const(int)[] table, int c) @safe @nogc nothrow pure
3075 {
3076 while (table.length != 0)
3077 {
3078 auto m = (table.length >> 1) & ~1;
3079 if (c < table[m])
3080 {
3081 table = table[0 .. m];
3082 }
3083 else if (c > table[m+1])
3084 {
3085 table = table[m+2..$];
3086 }
3087 else return true;
3088 }
3089 return false;
3090 }
3091
startOf(string s)3092 string startOf(string s) @safe nothrow pure
3093 {
3094 string r;
3095 foreach (char c;s)
3096 {
3097 r ~= (c < 0x20 || c > 0x7F) ? '.' : c;
3098 if (r.length >= 40) { r ~= "___"; break; }
3099 }
3100 return r;
3101 }
3102
3103 void exit(string s=null)
3104 {
3105 throw new XMLException(s);
3106 }
3107 }
3108