1 // Written in the D programming language.
2
3 /**
4 $(RED Warning: This module is considered out-dated and not up to Phobos'
5 current standards. It will be removed from Phobos in 2.101.0.
6 If you still need it, go to $(LINK https://github.com/DigitalMars/undeaD))
7 */
8
9 /*
10 Classes and functions for creating and parsing XML
11
12 The basic architecture of this module is that there are standalone functions,
13 classes for constructing an XML document from scratch (Tag, Element and
14 Document), and also classes for parsing a pre-existing XML file (ElementParser
15 and DocumentParser). The parsing classes <i>may</i> be used to build a
16 Document, but that is not their primary purpose. The handling capabilities of
17 DocumentParser and ElementParser are sufficiently customizable that you can
18 make them do pretty much whatever you want.
19
20 Example: This example creates a DOM (Document Object Model) tree
21 from an XML file.
22 ------------------------------------------------------------------------------
23 import std.xml;
24 import std.stdio;
25 import std.string;
26 import std.file;
27
28 // books.xml is used in various samples throughout the Microsoft XML Core
29 // Services (MSXML) SDK.
30 //
31 // See http://msdn2.microsoft.com/en-us/library/ms762271(VS.85).aspx
32
33 void main()
34 {
35 string s = cast(string) std.file.read("books.xml");
36
37 // Check for well-formedness
38 check(s);
39
40 // Make a DOM tree
41 auto doc = new Document(s);
42
43 // Plain-print it
44 writeln(doc);
45 }
46 ------------------------------------------------------------------------------
47
48 Example: This example does much the same thing, except that the file is
49 deconstructed and reconstructed by hand. This is more work, but the
50 techniques involved offer vastly more power.
51 ------------------------------------------------------------------------------
52 import std.xml;
53 import std.stdio;
54 import std.string;
55
56 struct Book
57 {
58 string id;
59 string author;
60 string title;
61 string genre;
62 string price;
63 string pubDate;
64 string description;
65 }
66
67 void main()
68 {
69 string s = cast(string) std.file.read("books.xml");
70
71 // Check for well-formedness
72 check(s);
73
74 // Take it apart
75 Book[] books;
76
77 auto xml = new DocumentParser(s);
78 xml.onStartTag["book"] = (ElementParser xml)
79 {
80 Book book;
81 book.id = xml.tag.attr["id"];
82
83 xml.onEndTag["author"] = (in Element e) { book.author = e.text(); };
84 xml.onEndTag["title"] = (in Element e) { book.title = e.text(); };
85 xml.onEndTag["genre"] = (in Element e) { book.genre = e.text(); };
86 xml.onEndTag["price"] = (in Element e) { book.price = e.text(); };
87 xml.onEndTag["publish-date"] = (in Element e) { book.pubDate = e.text(); };
88 xml.onEndTag["description"] = (in Element e) { book.description = e.text(); };
89
90 xml.parse();
91
92 books ~= book;
93 };
94 xml.parse();
95
96 // Put it back together again;
97 auto doc = new Document(new Tag("catalog"));
98 foreach (book;books)
99 {
100 auto element = new Element("book");
101 element.tag.attr["id"] = book.id;
102
103 element ~= new Element("author", book.author);
104 element ~= new Element("title", book.title);
105 element ~= new Element("genre", book.genre);
106 element ~= new Element("price", book.price);
107 element ~= new Element("publish-date",book.pubDate);
108 element ~= new Element("description", book.description);
109
110 doc ~= element;
111 }
112
113 // Pretty-print it
114 writefln(join(doc.pretty(3),"\n"));
115 }
116 -------------------------------------------------------------------------------
117 Copyright: Copyright Janice Caron 2008 - 2009.
118 License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
119 Authors: Janice Caron
120 Source: $(PHOBOSSRC std/xml.d)
121 */
122 /*
123 Copyright Janice Caron 2008 - 2009.
124 Distributed under the Boost Software License, Version 1.0.
125 (See accompanying file LICENSE_1_0.txt or copy at
126 http://www.boost.org/LICENSE_1_0.txt)
127 */
128 deprecated("Will be removed from Phobos in 2.101.0. If you still need it, go to https://github.com/DigitalMars/undeaD")
129 module std.xml;
130
131 enum cdata = "<![CDATA[";
132
133 /*
134 * Returns true if the character is a character according to the XML standard
135 *
136 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
137 *
138 * Params:
139 * c = the character to be tested
140 */
isChar(dchar c)141 bool isChar(dchar c) @safe @nogc pure nothrow // rule 2
142 {
143 if (c <= 0xD7FF)
144 {
145 if (c >= 0x20)
146 return true;
147 switch (c)
148 {
149 case 0xA:
150 case 0x9:
151 case 0xD:
152 return true;
153 default:
154 return false;
155 }
156 }
157 else if (0xE000 <= c && c <= 0x10FFFF)
158 {
159 if ((c & 0x1FFFFE) != 0xFFFE) // U+FFFE and U+FFFF
160 return true;
161 }
162 return false;
163 }
164
165 @safe @nogc nothrow pure unittest
166 {
167 assert(!isChar(cast(dchar) 0x8));
168 assert( isChar(cast(dchar) 0x9));
169 assert( isChar(cast(dchar) 0xA));
170 assert(!isChar(cast(dchar) 0xB));
171 assert(!isChar(cast(dchar) 0xC));
172 assert( isChar(cast(dchar) 0xD));
173 assert(!isChar(cast(dchar) 0xE));
174 assert(!isChar(cast(dchar) 0x1F));
175 assert( isChar(cast(dchar) 0x20));
176 assert( isChar('J'));
177 assert( isChar(cast(dchar) 0xD7FF));
178 assert(!isChar(cast(dchar) 0xD800));
179 assert(!isChar(cast(dchar) 0xDFFF));
180 assert( isChar(cast(dchar) 0xE000));
181 assert( isChar(cast(dchar) 0xFFFD));
182 assert(!isChar(cast(dchar) 0xFFFE));
183 assert(!isChar(cast(dchar) 0xFFFF));
184 assert( isChar(cast(dchar) 0x10000));
185 assert( isChar(cast(dchar) 0x10FFFF));
186 assert(!isChar(cast(dchar) 0x110000));
187
debug(stdxml_TestHardcodedChecks)188 debug (stdxml_TestHardcodedChecks)
189 {
190 foreach (c; 0 .. dchar.max + 1)
191 assert(isChar(c) == lookup(CharTable, c));
192 }
193 }
194
195 /*
196 * Returns true if the character is whitespace according to the XML standard
197 *
198 * Only the following characters are considered whitespace in XML - space, tab,
199 * carriage return and linefeed
200 *
201 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
202 *
203 * Params:
204 * c = the character to be tested
205 */
isSpace(dchar c)206 bool isSpace(dchar c) @safe @nogc pure nothrow
207 {
208 return c == '\u0020' || c == '\u0009' || c == '\u000A' || c == '\u000D';
209 }
210
211 /*
212 * Returns true if the character is a digit according to the XML standard
213 *
214 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
215 *
216 * Params:
217 * c = the character to be tested
218 */
isDigit(dchar c)219 bool isDigit(dchar c) @safe @nogc pure nothrow
220 {
221 if (c <= 0x0039 && c >= 0x0030)
222 return true;
223 else
224 return lookup(DigitTable,c);
225 }
226
227 @safe @nogc nothrow pure unittest
228 {
debug(stdxml_TestHardcodedChecks)229 debug (stdxml_TestHardcodedChecks)
230 {
231 foreach (c; 0 .. dchar.max + 1)
232 assert(isDigit(c) == lookup(DigitTable, c));
233 }
234 }
235
236 /*
237 * Returns true if the character is a letter according to the XML standard
238 *
239 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
240 *
241 * Params:
242 * c = the character to be tested
243 */
isLetter(dchar c)244 bool isLetter(dchar c) @safe @nogc nothrow pure // rule 84
245 {
246 return isIdeographic(c) || isBaseChar(c);
247 }
248
249 /*
250 * Returns true if the character is an ideographic character according to the
251 * XML standard
252 *
253 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
254 *
255 * Params:
256 * c = the character to be tested
257 */
isIdeographic(dchar c)258 bool isIdeographic(dchar c) @safe @nogc nothrow pure
259 {
260 if (c == 0x3007)
261 return true;
262 if (c <= 0x3029 && c >= 0x3021 )
263 return true;
264 if (c <= 0x9FA5 && c >= 0x4E00)
265 return true;
266 return false;
267 }
268
269 @safe @nogc nothrow pure unittest
270 {
271 assert(isIdeographic('\u4E00'));
272 assert(isIdeographic('\u9FA5'));
273 assert(isIdeographic('\u3007'));
274 assert(isIdeographic('\u3021'));
275 assert(isIdeographic('\u3029'));
276
debug(stdxml_TestHardcodedChecks)277 debug (stdxml_TestHardcodedChecks)
278 {
279 foreach (c; 0 .. dchar.max + 1)
280 assert(isIdeographic(c) == lookup(IdeographicTable, c));
281 }
282 }
283
284 /*
285 * Returns true if the character is a base character according to the XML
286 * standard
287 *
288 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
289 *
290 * Params:
291 * c = the character to be tested
292 */
isBaseChar(dchar c)293 bool isBaseChar(dchar c) @safe @nogc nothrow pure
294 {
295 return lookup(BaseCharTable,c);
296 }
297
298 /*
299 * Returns true if the character is a combining character according to the
300 * XML standard
301 *
302 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
303 *
304 * Params:
305 * c = the character to be tested
306 */
isCombiningChar(dchar c)307 bool isCombiningChar(dchar c) @safe @nogc nothrow pure
308 {
309 return lookup(CombiningCharTable,c);
310 }
311
312 /*
313 * Returns true if the character is an extender according to the XML standard
314 *
315 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
316 *
317 * Params:
318 * c = the character to be tested
319 */
isExtender(dchar c)320 bool isExtender(dchar c) @safe @nogc nothrow pure
321 {
322 return lookup(ExtenderTable,c);
323 }
324
325 /*
326 * Encodes a string by replacing all characters which need to be escaped with
327 * appropriate predefined XML entities.
328 *
329 * encode() escapes certain characters (ampersand, quote, apostrophe, less-than
330 * and greater-than), and similarly, decode() unescapes them. These functions
331 * are provided for convenience only. You do not need to use them when using
332 * the std.xml classes, because then all the encoding and decoding will be done
333 * for you automatically.
334 *
335 * If the string is not modified, the original will be returned.
336 *
337 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
338 *
339 * Params:
340 * s = The string to be encoded
341 *
342 * Returns: The encoded string
343 *
344 * Example:
345 * --------------
346 * writefln(encode("a > b")); // writes "a > b"
347 * --------------
348 */
encode(S)349 S encode(S)(S s)
350 {
351 import std.array : appender;
352
353 string r;
354 size_t lastI;
355 auto result = appender!S();
356
357 foreach (i, c; s)
358 {
359 switch (c)
360 {
361 case '&': r = "&"; break;
362 case '"': r = """; break;
363 case '\'': r = "'"; break;
364 case '<': r = "<"; break;
365 case '>': r = ">"; break;
366 default: continue;
367 }
368 // Replace with r
369 result.put(s[lastI .. i]);
370 result.put(r);
371 lastI = i + 1;
372 }
373
374 if (!result.data.ptr) return s;
375 result.put(s[lastI .. $]);
376 return result.data;
377 }
378
379 @safe pure unittest
380 {
381 auto s = "hello";
382 assert(encode(s) is s);
383 assert(encode("a > b") == "a > b", encode("a > b"));
384 assert(encode("a < b") == "a < b");
385 assert(encode("don't") == "don't");
386 assert(encode("\"hi\"") == ""hi"", encode("\"hi\""));
387 assert(encode("cat & dog") == "cat & dog");
388 }
389
390 /*
391 * Mode to use for decoding.
392 *
393 * $(DDOC_ENUM_MEMBERS NONE) Do not decode
394 * $(DDOC_ENUM_MEMBERS LOOSE) Decode, but ignore errors
395 * $(DDOC_ENUM_MEMBERS STRICT) Decode, and throw exception on error
396 */
397 enum DecodeMode
398 {
399 NONE, LOOSE, STRICT
400 }
401
402 /*
403 * Decodes a string by unescaping all predefined XML entities.
404 *
405 * encode() escapes certain characters (ampersand, quote, apostrophe, less-than
406 * and greater-than), and similarly, decode() unescapes them. These functions
407 * are provided for convenience only. You do not need to use them when using
408 * the std.xml classes, because then all the encoding and decoding will be done
409 * for you automatically.
410 *
411 * This function decodes the entities &amp;, &quot;, &apos;,
412 * &lt; and &gt,
413 * as well as decimal and hexadecimal entities such as &#x20AC;
414 *
415 * If the string does not contain an ampersand, the original will be returned.
416 *
417 * Note that the "mode" parameter can be one of DecodeMode.NONE (do not
418 * decode), DecodeMode.LOOSE (decode, but ignore errors), or DecodeMode.STRICT
419 * (decode, and throw a DecodeException in the event of an error).
420 *
421 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
422 *
423 * Params:
424 * s = The string to be decoded
425 * mode = (optional) Mode to use for decoding. (Defaults to LOOSE).
426 *
427 * Throws: DecodeException if mode == DecodeMode.STRICT and decode fails
428 *
429 * Returns: The decoded string
430 *
431 * Example:
432 * --------------
433 * writefln(decode("a > b")); // writes "a > b"
434 * --------------
435 */
436 string decode(string s, DecodeMode mode=DecodeMode.LOOSE) @safe pure
437 {
438 import std.algorithm.searching : startsWith;
439
440 if (mode == DecodeMode.NONE) return s;
441
442 string buffer;
443 foreach (ref i; 0 .. s.length)
444 {
445 char c = s[i];
446 if (c != '&')
447 {
448 if (buffer.length != 0) buffer ~= c;
449 }
450 else
451 {
452 if (buffer.length == 0)
453 {
454 buffer = s[0 .. i].dup;
455 }
456 if (startsWith(s[i..$],"&#"))
457 {
458 try
459 {
460 dchar d;
461 string t = s[i..$];
462 checkCharRef(t, d);
463 char[4] temp;
464 import std.utf : encode;
465 buffer ~= temp[0 .. encode(temp, d)];
466 i = s.length - t.length - 1;
467 }
catch(Err e)468 catch (Err e)
469 {
470 if (mode == DecodeMode.STRICT)
471 throw new DecodeException("Unescaped &");
472 buffer ~= '&';
473 }
474 }
475 else if (startsWith(s[i..$],"&" )) { buffer ~= '&'; i += 4; }
476 else if (startsWith(s[i..$],""")) { buffer ~= '"'; i += 5; }
477 else if (startsWith(s[i..$],"'")) { buffer ~= '\''; i += 5; }
478 else if (startsWith(s[i..$],"<" )) { buffer ~= '<'; i += 3; }
479 else if (startsWith(s[i..$],">" )) { buffer ~= '>'; i += 3; }
480 else
481 {
482 if (mode == DecodeMode.STRICT)
483 throw new DecodeException("Unescaped &");
484 buffer ~= '&';
485 }
486 }
487 }
488 return (buffer.length == 0) ? s : buffer;
489 }
490
491 @safe pure unittest
492 {
assertNot(string s)493 void assertNot(string s) pure
494 {
495 bool b = false;
496 try { decode(s,DecodeMode.STRICT); }
497 catch (DecodeException e) { b = true; }
498 assert(b,s);
499 }
500
501 // Assert that things that should work, do
502 auto s = "hello";
503 assert(decode(s, DecodeMode.STRICT) is s);
504 assert(decode("a > b", DecodeMode.STRICT) == "a > b");
505 assert(decode("a < b", DecodeMode.STRICT) == "a < b");
506 assert(decode("don't", DecodeMode.STRICT) == "don't");
507 assert(decode(""hi"", DecodeMode.STRICT) == "\"hi\"");
508 assert(decode("cat & dog", DecodeMode.STRICT) == "cat & dog");
509 assert(decode("*", DecodeMode.STRICT) == "*");
510 assert(decode("*", DecodeMode.STRICT) == "*");
511 assert(decode("cat & dog", DecodeMode.LOOSE) == "cat & dog");
512 assert(decode("a > b", DecodeMode.LOOSE) == "a > b");
513 assert(decode("&#;", DecodeMode.LOOSE) == "&#;");
514 assert(decode("&#x;", DecodeMode.LOOSE) == "&#x;");
515 assert(decode("G;", DecodeMode.LOOSE) == "G;");
516 assert(decode("G;", DecodeMode.LOOSE) == "G;");
517
518 // Assert that things that shouldn't work, don't
519 assertNot("cat & dog");
520 assertNot("a > b");
521 assertNot("&#;");
522 assertNot("&#x;");
523 assertNot("G;");
524 assertNot("G;");
525 }
526
527 /*
528 * Class representing an XML document.
529 *
530 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
531 *
532 */
533 class Document : Element
534 {
535 /*
536 * Contains all text which occurs before the root element.
537 * Defaults to <?xml version="1.0"?>
538 */
539 string prolog = "<?xml version=\"1.0\"?>";
540 /*
541 * Contains all text which occurs after the root element.
542 * Defaults to the empty string
543 */
544 string epilog;
545
546 /*
547 * Constructs a Document by parsing XML text.
548 *
549 * This function creates a complete DOM (Document Object Model) tree.
550 *
551 * The input to this function MUST be valid XML.
552 * This is enforced by DocumentParser's in contract.
553 *
554 * Params:
555 * s = the complete XML text.
556 */
this(string s)557 this(string s)
558 in
559 {
560 assert(s.length != 0);
561 }
562 do
563 {
564 auto xml = new DocumentParser(s);
565 string tagString = xml.tag.tagString;
566
567 this(xml.tag);
568 prolog = s[0 .. tagString.ptr - s.ptr];
569 parse(xml);
570 epilog = *xml.s;
571 }
572
573 /*
574 * Constructs a Document from a Tag.
575 *
576 * Params:
577 * tag = the start tag of the document.
578 */
this(const (Tag)tag)579 this(const(Tag) tag)
580 {
581 super(tag);
582 }
583
584 const
585 {
586 /*
587 * Compares two Documents for equality
588 *
589 * Example:
590 * --------------
591 * Document d1,d2;
592 * if (d1 == d2) { }
593 * --------------
594 */
opEquals(scope const Object o)595 override bool opEquals(scope const Object o) const
596 {
597 const doc = toType!(const Document)(o);
598 return prolog == doc.prolog
599 && (cast(const) this).Element.opEquals(cast(const) doc)
600 && epilog == doc.epilog;
601 }
602
603 /*
604 * Compares two Documents
605 *
606 * You should rarely need to call this function. It exists so that
607 * Documents can be used as associative array keys.
608 *
609 * Example:
610 * --------------
611 * Document d1,d2;
612 * if (d1 < d2) { }
613 * --------------
614 */
opCmp(scope const Object o)615 override int opCmp(scope const Object o) scope const
616 {
617 const doc = toType!(const Document)(o);
618 if (prolog != doc.prolog)
619 return prolog < doc.prolog ? -1 : 1;
620 if (int cmp = this.Element.opCmp(doc))
621 return cmp;
622 if (epilog != doc.epilog)
623 return epilog < doc.epilog ? -1 : 1;
624 return 0;
625 }
626
627 /*
628 * Returns the hash of a Document
629 *
630 * You should rarely need to call this function. It exists so that
631 * Documents can be used as associative array keys.
632 */
toHash()633 override size_t toHash() scope const @trusted
634 {
635 return hash(prolog, hash(epilog, (cast() this).Element.toHash()));
636 }
637
638 /*
639 * Returns the string representation of a Document. (That is, the
640 * complete XML of a document).
641 */
toString()642 override string toString() scope const @safe
643 {
644 return prolog ~ super.toString() ~ epilog;
645 }
646 }
647 }
648
649 @system unittest
650 {
651 // https://issues.dlang.org/show_bug.cgi?id=14966
652 auto xml = `<?xml version="1.0" encoding="UTF-8"?><foo></foo>`;
653
654 auto a = new Document(xml);
655 auto b = new Document(xml);
656 assert(a == b);
657 assert(!(a < b));
658 int[Document] aa;
659 aa[a] = 1;
660 assert(aa[b] == 1);
661
662 b ~= new Element("b");
663 assert(a < b);
664 assert(b > a);
665 }
666
667 /*
668 * Class representing an XML element.
669 *
670 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
671 */
672 class Element : Item
673 {
674 Tag tag; // The start tag of the element
675 Item[] items; // The element's items
676 Text[] texts; // The element's text items
677 CData[] cdatas; // The element's CData items
678 Comment[] comments; // The element's comments
679 ProcessingInstruction[] pis; // The element's processing instructions
680 Element[] elements; // The element's child elements
681
682 /*
683 * Constructs an Element given a name and a string to be used as a Text
684 * interior.
685 *
686 * Params:
687 * name = the name of the element.
688 * interior = (optional) the string interior.
689 *
690 * Example:
691 * -------------------------------------------------------
692 * auto element = new Element("title","Serenity")
693 * // constructs the element <title>Serenity</title>
694 * -------------------------------------------------------
695 */
696 this(string name, string interior=null) @safe pure
697 {
698 this(new Tag(name));
699 if (interior.length != 0) opOpAssign!("~")(new Text(interior));
700 }
701
702 /*
703 * Constructs an Element from a Tag.
704 *
705 * Params:
706 * tag_ = the start or empty tag of the element.
707 */
this(const (Tag)tag_)708 this(const(Tag) tag_) @safe pure
709 {
710 this.tag = new Tag(tag_.name);
711 tag.type = TagType.EMPTY;
712 foreach (k,v;tag_.attr) tag.attr[k] = v;
713 tag.tagString = tag_.tagString;
714 }
715
716 /*
717 * Append a text item to the interior of this element
718 *
719 * Params:
720 * item = the item you wish to append.
721 *
722 * Example:
723 * --------------
724 * Element element;
725 * element ~= new Text("hello");
726 * --------------
727 */
728 void opOpAssign(string op)(Text item) @safe pure
729 if (op == "~")
730 {
731 texts ~= item;
732 appendItem(item);
733 }
734
735 /*
736 * Append a CData item to the interior of this element
737 *
738 * Params:
739 * item = the item you wish to append.
740 *
741 * Example:
742 * --------------
743 * Element element;
744 * element ~= new CData("hello");
745 * --------------
746 */
747 void opOpAssign(string op)(CData item) @safe pure
748 if (op == "~")
749 {
750 cdatas ~= item;
751 appendItem(item);
752 }
753
754 /*
755 * Append a comment to the interior of this element
756 *
757 * Params:
758 * item = the item you wish to append.
759 *
760 * Example:
761 * --------------
762 * Element element;
763 * element ~= new Comment("hello");
764 * --------------
765 */
766 void opOpAssign(string op)(Comment item) @safe pure
767 if (op == "~")
768 {
769 comments ~= item;
770 appendItem(item);
771 }
772
773 /*
774 * Append a processing instruction to the interior of this element
775 *
776 * Params:
777 * item = the item you wish to append.
778 *
779 * Example:
780 * --------------
781 * Element element;
782 * element ~= new ProcessingInstruction("hello");
783 * --------------
784 */
785 void opOpAssign(string op)(ProcessingInstruction item) @safe pure
786 if (op == "~")
787 {
788 pis ~= item;
789 appendItem(item);
790 }
791
792 /*
793 * Append a complete element to the interior of this element
794 *
795 * Params:
796 * item = the item you wish to append.
797 *
798 * Example:
799 * --------------
800 * Element element;
801 * Element other = new Element("br");
802 * element ~= other;
803 * // appends element representing <br />
804 * --------------
805 */
806 void opOpAssign(string op)(Element item) @safe pure
807 if (op == "~")
808 {
809 elements ~= item;
810 appendItem(item);
811 }
812
appendItem(Item item)813 private void appendItem(Item item) @safe pure
814 {
815 items ~= item;
816 if (tag.type == TagType.EMPTY && !item.isEmptyXML)
817 tag.type = TagType.START;
818 }
819
parse(ElementParser xml)820 private void parse(ElementParser xml)
821 {
822 xml.onText = (string s) { opOpAssign!("~")(new Text(s)); };
823 xml.onCData = (string s) { opOpAssign!("~")(new CData(s)); };
824 xml.onComment = (string s) { opOpAssign!("~")(new Comment(s)); };
825 xml.onPI = (string s) { opOpAssign!("~")(new ProcessingInstruction(s)); };
826
827 xml.onStartTag[null] = (ElementParser xml)
828 {
829 auto e = new Element(xml.tag);
830 e.parse(xml);
831 opOpAssign!("~")(e);
832 };
833
834 xml.parse();
835 }
836
837 /*
838 * Compares two Elements for equality
839 *
840 * Example:
841 * --------------
842 * Element e1,e2;
843 * if (e1 == e2) { }
844 * --------------
845 */
opEquals(scope const Object o)846 override bool opEquals(scope const Object o) const
847 {
848 const element = toType!(const Element)(o);
849 immutable len = items.length;
850 if (len != element.items.length) return false;
851 foreach (i; 0 .. len)
852 {
853 if (!items[i].opEquals(element.items[i])) return false;
854 }
855 return true;
856 }
857
858 /*
859 * Compares two Elements
860 *
861 * You should rarely need to call this function. It exists so that Elements
862 * can be used as associative array keys.
863 *
864 * Example:
865 * --------------
866 * Element e1,e2;
867 * if (e1 < e2) { }
868 * --------------
869 */
opCmp(scope const Object o)870 override int opCmp(scope const Object o) @safe const
871 {
872 const element = toType!(const Element)(o);
873 for (uint i=0; ; ++i)
874 {
875 if (i == items.length && i == element.items.length) return 0;
876 if (i == items.length) return -1;
877 if (i == element.items.length) return 1;
878 if (!items[i].opEquals(element.items[i]))
879 return items[i].opCmp(element.items[i]);
880 }
881 }
882
883 /*
884 * Returns the hash of an Element
885 *
886 * You should rarely need to call this function. It exists so that Elements
887 * can be used as associative array keys.
888 */
toHash()889 override size_t toHash() scope const @safe
890 {
891 size_t hash = tag.toHash();
892 foreach (item;items) hash += item.toHash();
893 return hash;
894 }
895
896 const
897 {
898 /*
899 * Returns the decoded interior of an element.
900 *
901 * The element is assumed to contain text <i>only</i>. So, for
902 * example, given XML such as "<title>Good &amp;
903 * Bad</title>", will return "Good & Bad".
904 *
905 * Params:
906 * mode = (optional) Mode to use for decoding. (Defaults to LOOSE).
907 *
908 * Throws: DecodeException if decode fails
909 */
910 string text(DecodeMode mode=DecodeMode.LOOSE)
911 {
912 string buffer;
foreach(item;items)913 foreach (item;items)
914 {
915 Text t = cast(Text) item;
916 if (t is null) throw new DecodeException(item.toString());
917 buffer ~= decode(t.toString(),mode);
918 }
919 return buffer;
920 }
921
922 /*
923 * Returns an indented string representation of this item
924 *
925 * Params:
926 * indent = (optional) number of spaces by which to indent this
927 * element. Defaults to 2.
928 */
929 override string[] pretty(uint indent=2) scope
930 {
931 import std.algorithm.searching : count;
932 import std.string : rightJustify;
933
934 if (isEmptyXML) return [ tag.toEmptyString() ];
935
936 if (items.length == 1)
937 {
938 auto t = cast(const(Text))(items[0]);
939 if (t !is null)
940 {
941 return [tag.toStartString() ~ t.toString() ~ tag.toEndString()];
942 }
943 }
944
945 string[] a = [ tag.toStartString() ];
foreach(item;items)946 foreach (item;items)
947 {
948 string[] b = item.pretty(indent);
949 foreach (s;b)
950 {
951 a ~= rightJustify(s,count(s) + indent);
952 }
953 }
954 a ~= tag.toEndString();
955 return a;
956 }
957
958 /*
959 * Returns the string representation of an Element
960 *
961 * Example:
962 * --------------
963 * auto element = new Element("br");
964 * writefln(element.toString()); // writes "<br />"
965 * --------------
966 */
toString()967 override string toString() scope @safe
968 {
969 if (isEmptyXML) return tag.toEmptyString();
970
971 string buffer = tag.toStartString();
972 foreach (item;items) { buffer ~= item.toString(); }
973 buffer ~= tag.toEndString();
974 return buffer;
975 }
976
isEmptyXML()977 override @property @safe pure @nogc nothrow bool isEmptyXML() const scope { return items.length == 0; }
978 }
979 }
980
981 /*
982 * Tag types.
983 *
984 * $(DDOC_ENUM_MEMBERS START) Used for start tags
985 * $(DDOC_ENUM_MEMBERS END) Used for end tags
986 * $(DDOC_ENUM_MEMBERS EMPTY) Used for empty tags
987 *
988 */
989 enum TagType { START, END, EMPTY }
990
991 /*
992 * Class representing an XML tag.
993 *
994 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
995 *
996 * The class invariant guarantees
997 * <ul>
998 * <li> that $(B type) is a valid enum TagType value</li>
999 * <li> that $(B name) consists of valid characters</li>
1000 * <li> that each attribute name consists of valid characters</li>
1001 * </ul>
1002 */
1003 class Tag
1004 {
1005 TagType type = TagType.START; // Type of tag
1006 string name; // Tag name
1007 string[string] attr; // Associative array of attributes
1008 private string tagString;
1009
invariant()1010 invariant()
1011 {
1012 string s;
1013 string t;
1014
1015 assert(type == TagType.START
1016 || type == TagType.END
1017 || type == TagType.EMPTY);
1018
1019 s = name;
1020 try { checkName(s,t); }
1021 catch (Err e) { assert(false,"Invalid tag name:" ~ e.toString()); }
1022
1023 foreach (k,v;attr)
1024 {
1025 s = k;
1026 try { checkName(s,t); }
1027 catch (Err e)
1028 { assert(false,"Invalid attribute name:" ~ e.toString()); }
1029 }
1030 }
1031
1032 /*
1033 * Constructs an instance of Tag with a specified name and type
1034 *
1035 * The constructor does not initialize the attributes. To initialize the
1036 * attributes, you access the $(B attr) member variable.
1037 *
1038 * Params:
1039 * name = the Tag's name
1040 * type = (optional) the Tag's type. If omitted, defaults to
1041 * TagType.START.
1042 *
1043 * Example:
1044 * --------------
1045 * auto tag = new Tag("img",Tag.EMPTY);
1046 * tag.attr["src"] = "http://example.com/example.jpg";
1047 * --------------
1048 */
1049 this(string name, TagType type=TagType.START) @safe pure
1050 {
1051 this.name = name;
1052 this.type = type;
1053 }
1054
1055 /* Private constructor (so don't ddoc this!)
1056 *
1057 * Constructs a Tag by parsing the string representation, e.g. "<html>".
1058 *
1059 * The string is passed by reference, and is advanced over all characters
1060 * consumed.
1061 *
1062 * The second parameter is a dummy parameter only, required solely to
1063 * distinguish this constructor from the public one.
1064 */
this(ref string s,bool dummy)1065 private this(ref string s, bool dummy) @safe pure
1066 {
1067 import std.algorithm.searching : countUntil;
1068 import std.ascii : isWhite;
1069 import std.utf : byCodeUnit;
1070
1071 tagString = s;
1072 try
1073 {
1074 reqc(s,'<');
1075 if (optc(s,'/')) type = TagType.END;
1076 ptrdiff_t i = s.byCodeUnit.countUntil(">", "/>", " ", "\t", "\v", "\r", "\n", "\f");
1077 name = s[0 .. i];
1078 s = s[i .. $];
1079
1080 i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1081 s = s[i .. $];
1082
1083 while (s.length > 0 && s[0] != '>' && s[0] != '/')
1084 {
1085 i = s.byCodeUnit.countUntil("=", " ", "\t", "\v", "\r", "\n", "\f");
1086 string key = s[0 .. i];
1087 s = s[i .. $];
1088
1089 i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1090 s = s[i .. $];
1091 reqc(s,'=');
1092 i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1093 s = s[i .. $];
1094
1095 immutable char quote = requireOneOf(s,"'\"");
1096 i = s.byCodeUnit.countUntil(quote);
1097 string val = decode(s[0 .. i], DecodeMode.LOOSE);
1098 s = s[i .. $];
1099 reqc(s,quote);
1100
1101 i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1102 s = s[i .. $];
1103 attr[key] = val;
1104 }
1105 if (optc(s,'/'))
1106 {
1107 if (type == TagType.END) throw new TagException("");
1108 type = TagType.EMPTY;
1109 }
1110 reqc(s,'>');
1111 tagString.length = tagString.length - s.length;
1112 }
1113 catch (XMLException e)
1114 {
1115 tagString.length = tagString.length - s.length;
1116 throw new TagException(tagString);
1117 }
1118 }
1119
1120 const
1121 {
1122 /*
1123 * Compares two Tags for equality
1124 *
1125 * You should rarely need to call this function. It exists so that Tags
1126 * can be used as associative array keys.
1127 *
1128 * Example:
1129 * --------------
1130 * Tag tag1,tag2
1131 * if (tag1 == tag2) { }
1132 * --------------
1133 */
opEquals(scope Object o)1134 override bool opEquals(scope Object o)
1135 {
1136 const tag = toType!(const Tag)(o);
1137 return
1138 (name != tag.name) ? false : (
1139 (attr != tag.attr) ? false : (
1140 (type != tag.type) ? false : (
1141 true )));
1142 }
1143
1144 /*
1145 * Compares two Tags
1146 *
1147 * Example:
1148 * --------------
1149 * Tag tag1,tag2
1150 * if (tag1 < tag2) { }
1151 * --------------
1152 */
opCmp(Object o)1153 override int opCmp(Object o)
1154 {
1155 const tag = toType!(const Tag)(o);
1156 // Note that attr is an AA, so the comparison is nonsensical (bug 10381)
1157 return
1158 ((name != tag.name) ? ( name < tag.name ? -1 : 1 ) :
1159 ((attr != tag.attr) ? ( cast(void *) attr < cast(void*) tag.attr ? -1 : 1 ) :
1160 ((type != tag.type) ? ( type < tag.type ? -1 : 1 ) :
1161 0 )));
1162 }
1163
1164 /*
1165 * Returns the hash of a Tag
1166 *
1167 * You should rarely need to call this function. It exists so that Tags
1168 * can be used as associative array keys.
1169 */
toHash()1170 override size_t toHash()
1171 {
1172 return .hashOf(name);
1173 }
1174
1175 /*
1176 * Returns the string representation of a Tag
1177 *
1178 * Example:
1179 * --------------
1180 * auto tag = new Tag("book",TagType.START);
1181 * writefln(tag.toString()); // writes "<book>"
1182 * --------------
1183 */
toString()1184 override string toString() @safe
1185 {
1186 if (isEmpty) return toEmptyString();
1187 return (isEnd) ? toEndString() : toStartString();
1188 }
1189
1190 private
1191 {
toNonEndString()1192 string toNonEndString() @safe
1193 {
1194 import std.format : format;
1195
1196 string s = "<" ~ name;
1197 foreach (key,val;attr)
1198 s ~= format(" %s=\"%s\"",key,encode(val));
1199 return s;
1200 }
1201
toStartString()1202 string toStartString() @safe { return toNonEndString() ~ ">"; }
1203
toEndString()1204 string toEndString() @safe { return "</" ~ name ~ ">"; }
1205
toEmptyString()1206 string toEmptyString() @safe { return toNonEndString() ~ " />"; }
1207 }
1208
1209 /*
1210 * Returns true if the Tag is a start tag
1211 *
1212 * Example:
1213 * --------------
1214 * if (tag.isStart) { }
1215 * --------------
1216 */
isStart()1217 @property bool isStart() @safe @nogc pure nothrow { return type == TagType.START; }
1218
1219 /*
1220 * Returns true if the Tag is an end tag
1221 *
1222 * Example:
1223 * --------------
1224 * if (tag.isEnd) { }
1225 * --------------
1226 */
isEnd()1227 @property bool isEnd() @safe @nogc pure nothrow { return type == TagType.END; }
1228
1229 /*
1230 * Returns true if the Tag is an empty tag
1231 *
1232 * Example:
1233 * --------------
1234 * if (tag.isEmpty) { }
1235 * --------------
1236 */
isEmpty()1237 @property bool isEmpty() @safe @nogc pure nothrow { return type == TagType.EMPTY; }
1238 }
1239 }
1240
1241 /*
1242 * Class representing a comment
1243 */
1244 class Comment : Item
1245 {
1246 private string content;
1247
1248 /*
1249 * Construct a comment
1250 *
1251 * Params:
1252 * content = the body of the comment
1253 *
1254 * Throws: CommentException if the comment body is illegal (contains "--"
1255 * or exactly equals "-")
1256 *
1257 * Example:
1258 * --------------
1259 * auto item = new Comment("This is a comment");
1260 * // constructs <!--This is a comment-->
1261 * --------------
1262 */
this(string content)1263 this(string content) @safe pure
1264 {
1265 import std.string : indexOf;
1266
1267 if (content == "-" || content.indexOf("--") != -1)
1268 throw new CommentException(content);
1269 this.content = content;
1270 }
1271
1272 /*
1273 * Compares two comments for equality
1274 *
1275 * Example:
1276 * --------------
1277 * Comment item1,item2;
1278 * if (item1 == item2) { }
1279 * --------------
1280 */
opEquals(scope const Object o)1281 override bool opEquals(scope const Object o) const
1282 {
1283 const item = toType!(const Item)(o);
1284 const t = cast(const Comment) item;
1285 return t !is null && content == t.content;
1286 }
1287
1288 /*
1289 * Compares two comments
1290 *
1291 * You should rarely need to call this function. It exists so that Comments
1292 * can be used as associative array keys.
1293 *
1294 * Example:
1295 * --------------
1296 * Comment item1,item2;
1297 * if (item1 < item2) { }
1298 * --------------
1299 */
opCmp(scope const Object o)1300 override int opCmp(scope const Object o) scope const
1301 {
1302 const item = toType!(const Item)(o);
1303 const t = cast(const Comment) item;
1304 return t !is null && (content != t.content
1305 ? (content < t.content ? -1 : 1 ) : 0 );
1306 }
1307
1308 /*
1309 * Returns the hash of a Comment
1310 *
1311 * You should rarely need to call this function. It exists so that Comments
1312 * can be used as associative array keys.
1313 */
toHash()1314 override size_t toHash() scope const nothrow { return hash(content); }
1315
1316 /*
1317 * Returns a string representation of this comment
1318 */
toString()1319 override string toString() scope const @safe pure nothrow { return "<!--" ~ content ~ "-->"; }
1320
isEmptyXML()1321 override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } // Returns false always
1322 }
1323
1324 // https://issues.dlang.org/show_bug.cgi?id=16241
1325 @safe unittest
1326 {
1327 import std.exception : assertThrown;
1328 auto c = new Comment("==");
1329 assert(c.content == "==");
1330 assertThrown!CommentException(new Comment("--"));
1331 }
1332
1333 /*
1334 * Class representing a Character Data section
1335 */
1336 class CData : Item
1337 {
1338 private string content;
1339
1340 /*
1341 * Construct a character data section
1342 *
1343 * Params:
1344 * content = the body of the character data segment
1345 *
1346 * Throws: CDataException if the segment body is illegal (contains "]]>")
1347 *
1348 * Example:
1349 * --------------
1350 * auto item = new CData("<b>hello</b>");
1351 * // constructs <![CDATA[<b>hello</b>]]>
1352 * --------------
1353 */
this(string content)1354 this(string content) @safe pure
1355 {
1356 import std.string : indexOf;
1357 if (content.indexOf("]]>") != -1) throw new CDataException(content);
1358 this.content = content;
1359 }
1360
1361 /*
1362 * Compares two CDatas for equality
1363 *
1364 * Example:
1365 * --------------
1366 * CData item1,item2;
1367 * if (item1 == item2) { }
1368 * --------------
1369 */
opEquals(scope const Object o)1370 override bool opEquals(scope const Object o) const
1371 {
1372 const item = toType!(const Item)(o);
1373 const t = cast(const CData) item;
1374 return t !is null && content == t.content;
1375 }
1376
1377 /*
1378 * Compares two CDatas
1379 *
1380 * You should rarely need to call this function. It exists so that CDatas
1381 * can be used as associative array keys.
1382 *
1383 * Example:
1384 * --------------
1385 * CData item1,item2;
1386 * if (item1 < item2) { }
1387 * --------------
1388 */
opCmp(scope const Object o)1389 override int opCmp(scope const Object o) scope const
1390 {
1391 const item = toType!(const Item)(o);
1392 const t = cast(const CData) item;
1393 return t !is null && (content != t.content
1394 ? (content < t.content ? -1 : 1 ) : 0 );
1395 }
1396
1397 /*
1398 * Returns the hash of a CData
1399 *
1400 * You should rarely need to call this function. It exists so that CDatas
1401 * can be used as associative array keys.
1402 */
toHash()1403 override size_t toHash() scope const nothrow { return hash(content); }
1404
1405 /*
1406 * Returns a string representation of this CData section
1407 */
toString()1408 override string toString() scope const @safe pure nothrow { return cdata ~ content ~ "]]>"; }
1409
isEmptyXML()1410 override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } // Returns false always
1411 }
1412
1413 /*
1414 * Class representing a text (aka Parsed Character Data) section
1415 */
1416 class Text : Item
1417 {
1418 private string content;
1419
1420 /*
1421 * Construct a text (aka PCData) section
1422 *
1423 * Params:
1424 * content = the text. This function encodes the text before
1425 * insertion, so it is safe to insert any text
1426 *
1427 * Example:
1428 * --------------
1429 * auto Text = new CData("a < b");
1430 * // constructs a < b
1431 * --------------
1432 */
this(string content)1433 this(string content) @safe pure
1434 {
1435 this.content = encode(content);
1436 }
1437
1438 /*
1439 * Compares two text sections for equality
1440 *
1441 * Example:
1442 * --------------
1443 * Text item1,item2;
1444 * if (item1 == item2) { }
1445 * --------------
1446 */
opEquals(scope const Object o)1447 override bool opEquals(scope const Object o) const
1448 {
1449 const item = toType!(const Item)(o);
1450 const t = cast(const Text) item;
1451 return t !is null && content == t.content;
1452 }
1453
1454 /*
1455 * Compares two text sections
1456 *
1457 * You should rarely need to call this function. It exists so that Texts
1458 * can be used as associative array keys.
1459 *
1460 * Example:
1461 * --------------
1462 * Text item1,item2;
1463 * if (item1 < item2) { }
1464 * --------------
1465 */
opCmp(scope const Object o)1466 override int opCmp(scope const Object o) scope const
1467 {
1468 const item = toType!(const Item)(o);
1469 const t = cast(const Text) item;
1470 return t !is null
1471 && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
1472 }
1473
1474 /*
1475 * Returns the hash of a text section
1476 *
1477 * You should rarely need to call this function. It exists so that Texts
1478 * can be used as associative array keys.
1479 */
toHash()1480 override size_t toHash() scope const nothrow { return hash(content); }
1481
1482 /*
1483 * Returns a string representation of this Text section
1484 */
toString()1485 override string toString() scope const @safe @nogc pure nothrow { return content; }
1486
1487 /*
1488 * Returns true if the content is the empty string
1489 */
isEmptyXML()1490 override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return content.length == 0; }
1491 }
1492
1493 /*
1494 * Class representing an XML Instruction section
1495 */
1496 class XMLInstruction : Item
1497 {
1498 private string content;
1499
1500 /*
1501 * Construct an XML Instruction section
1502 *
1503 * Params:
1504 * content = the body of the instruction segment
1505 *
1506 * Throws: XIException if the segment body is illegal (contains ">")
1507 *
1508 * Example:
1509 * --------------
1510 * auto item = new XMLInstruction("ATTLIST");
1511 * // constructs <!ATTLIST>
1512 * --------------
1513 */
this(string content)1514 this(string content) @safe pure
1515 {
1516 import std.string : indexOf;
1517 if (content.indexOf(">") != -1) throw new XIException(content);
1518 this.content = content;
1519 }
1520
1521 /*
1522 * Compares two XML instructions for equality
1523 *
1524 * Example:
1525 * --------------
1526 * XMLInstruction item1,item2;
1527 * if (item1 == item2) { }
1528 * --------------
1529 */
opEquals(scope const Object o)1530 override bool opEquals(scope const Object o) const
1531 {
1532 const item = toType!(const Item)(o);
1533 const t = cast(const XMLInstruction) item;
1534 return t !is null && content == t.content;
1535 }
1536
1537 /*
1538 * Compares two XML instructions
1539 *
1540 * You should rarely need to call this function. It exists so that
1541 * XmlInstructions can be used as associative array keys.
1542 *
1543 * Example:
1544 * --------------
1545 * XMLInstruction item1,item2;
1546 * if (item1 < item2) { }
1547 * --------------
1548 */
opCmp(scope const Object o)1549 override int opCmp(scope const Object o) scope const
1550 {
1551 const item = toType!(const Item)(o);
1552 const t = cast(const XMLInstruction) item;
1553 return t !is null
1554 && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
1555 }
1556
1557 /*
1558 * Returns the hash of an XMLInstruction
1559 *
1560 * You should rarely need to call this function. It exists so that
1561 * XmlInstructions can be used as associative array keys.
1562 */
toHash()1563 override size_t toHash() scope const nothrow { return hash(content); }
1564
1565 /*
1566 * Returns a string representation of this XmlInstruction
1567 */
toString()1568 override string toString() scope const @safe pure nothrow { return "<!" ~ content ~ ">"; }
1569
isEmptyXML()1570 override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } // Returns false always
1571 }
1572
1573 /*
1574 * Class representing a Processing Instruction section
1575 */
1576 class ProcessingInstruction : Item
1577 {
1578 private string content;
1579
1580 /*
1581 * Construct a Processing Instruction section
1582 *
1583 * Params:
1584 * content = the body of the instruction segment
1585 *
1586 * Throws: PIException if the segment body is illegal (contains "?>")
1587 *
1588 * Example:
1589 * --------------
1590 * auto item = new ProcessingInstruction("php");
1591 * // constructs <?php?>
1592 * --------------
1593 */
this(string content)1594 this(string content) @safe pure
1595 {
1596 import std.string : indexOf;
1597 if (content.indexOf("?>") != -1) throw new PIException(content);
1598 this.content = content;
1599 }
1600
1601 /*
1602 * Compares two processing instructions for equality
1603 *
1604 * Example:
1605 * --------------
1606 * ProcessingInstruction item1,item2;
1607 * if (item1 == item2) { }
1608 * --------------
1609 */
opEquals(scope const Object o)1610 override bool opEquals(scope const Object o) const
1611 {
1612 const item = toType!(const Item)(o);
1613 const t = cast(const ProcessingInstruction) item;
1614 return t !is null && content == t.content;
1615 }
1616
1617 /*
1618 * Compares two processing instructions
1619 *
1620 * You should rarely need to call this function. It exists so that
1621 * ProcessingInstructions can be used as associative array keys.
1622 *
1623 * Example:
1624 * --------------
1625 * ProcessingInstruction item1,item2;
1626 * if (item1 < item2) { }
1627 * --------------
1628 */
opCmp(scope const Object o)1629 override int opCmp(scope const Object o) scope const
1630 {
1631 const item = toType!(const Item)(o);
1632 const t = cast(const ProcessingInstruction) item;
1633 return t !is null
1634 && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
1635 }
1636
1637 /*
1638 * Returns the hash of a ProcessingInstruction
1639 *
1640 * You should rarely need to call this function. It exists so that
1641 * ProcessingInstructions can be used as associative array keys.
1642 */
toHash()1643 override size_t toHash() scope const nothrow { return hash(content); }
1644
1645 /*
1646 * Returns a string representation of this ProcessingInstruction
1647 */
toString()1648 override string toString() scope const @safe pure nothrow { return "<?" ~ content ~ "?>"; }
1649
isEmptyXML()1650 override @property @safe @nogc pure nothrow bool isEmptyXML() scope const { return false; } // Returns false always
1651 }
1652
1653 /*
1654 * Abstract base class for XML items
1655 */
1656 abstract class Item
1657 {
1658 // Compares with another Item of same type for equality
1659 abstract override bool opEquals(scope const Object o) @safe const;
1660
1661 // Compares with another Item of same type
1662 abstract override int opCmp(scope const Object o) @safe const;
1663
1664 // Returns the hash of this item
1665 abstract override size_t toHash() @safe scope const;
1666
1667 // Returns a string representation of this item
1668 abstract override string toString() @safe scope const;
1669
1670 /*
1671 * Returns an indented string representation of this item
1672 *
1673 * Params:
1674 * indent = number of spaces by which to indent child elements
1675 */
pretty(uint indent)1676 string[] pretty(uint indent) @safe scope const
1677 {
1678 import std.string : strip;
1679 string s = strip(toString());
1680 return s.length == 0 ? [] : [ s ];
1681 }
1682
1683 // Returns true if the item represents empty XML text
1684 abstract @property @safe @nogc pure nothrow bool isEmptyXML() scope const;
1685 }
1686
1687 /*
1688 * Class for parsing an XML Document.
1689 *
1690 * This is a subclass of ElementParser. Most of the useful functions are
1691 * documented there.
1692 *
1693 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
1694 *
1695 * Bugs:
1696 * Currently only supports UTF documents.
1697 *
1698 * If there is an encoding attribute in the prolog, it is ignored.
1699 *
1700 */
1701 class DocumentParser : ElementParser
1702 {
1703 string xmlText;
1704
1705 /*
1706 * Constructs a DocumentParser.
1707 *
1708 * The input to this function MUST be valid XML.
1709 * This is enforced by the function's in contract.
1710 *
1711 * Params:
1712 * xmlText_ = the entire XML document as text
1713 *
1714 */
this(string xmlText_)1715 this(string xmlText_)
1716 in
1717 {
1718 assert(xmlText_.length != 0);
1719 try
1720 {
1721 // Confirm that the input is valid XML
1722 check(xmlText_);
1723 }
1724 catch (CheckException e)
1725 {
1726 // And if it's not, tell the user why not
1727 assert(false, "\n" ~ e.toString());
1728 }
1729 }
1730 do
1731 {
1732 xmlText = xmlText_;
1733 s = &xmlText;
1734 super(); // Initialize everything
1735 parse(); // Parse through the root tag (but not beyond)
1736 }
1737 }
1738
1739 @system unittest
1740 {
1741 auto doc = new Document("<root><child><grandchild/></child></root>");
1742 assert(doc.elements.length == 1);
1743 assert(doc.elements[0].tag.name == "child");
1744 assert(doc.items == doc.elements);
1745 }
1746
1747 /*
1748 * Class for parsing an XML element.
1749 *
1750 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
1751 *
1752 * Note that you cannot construct instances of this class directly. You can
1753 * construct a DocumentParser (which is a subclass of ElementParser), but
1754 * otherwise, Instances of ElementParser will be created for you by the
1755 * library, and passed your way via onStartTag handlers.
1756 *
1757 */
1758 class ElementParser
1759 {
1760 alias Handler = void delegate(string);
1761 alias ElementHandler = void delegate(in Element element);
1762 alias ParserHandler = void delegate(ElementParser parser);
1763
1764 private
1765 {
1766 Tag tag_;
1767 string elementStart;
1768 string* s;
1769
1770 Handler commentHandler = null;
1771 Handler cdataHandler = null;
1772 Handler xiHandler = null;
1773 Handler piHandler = null;
1774 Handler rawTextHandler = null;
1775 Handler textHandler = null;
1776
1777 // Private constructor for start tags
this(ElementParser parent)1778 this(ElementParser parent) @safe @nogc pure nothrow
1779 {
1780 s = parent.s;
1781 this();
1782 tag_ = parent.tag_;
1783 }
1784
1785 // Private constructor for empty tags
this(Tag tag,string * t)1786 this(Tag tag, string* t) @safe @nogc pure nothrow
1787 {
1788 s = t;
1789 this();
1790 tag_ = tag;
1791 }
1792 }
1793
1794 /*
1795 * The Tag at the start of the element being parsed. You can read this to
1796 * determine the tag's name and attributes.
1797 */
tag()1798 @property @safe @nogc pure nothrow const(Tag) tag() const { return tag_; }
1799
1800 /*
1801 * Register a handler which will be called whenever a start tag is
1802 * encountered which matches the specified name. You can also pass null as
1803 * the name, in which case the handler will be called for any unmatched
1804 * start tag.
1805 *
1806 * Example:
1807 * --------------
1808 * // Call this function whenever a <podcast> start tag is encountered
1809 * onStartTag["podcast"] = (ElementParser xml)
1810 * {
1811 * // Your code here
1812 * //
1813 * // This is a a closure, so code here may reference
1814 * // variables which are outside of this scope
1815 * };
1816 *
1817 * // call myEpisodeStartHandler (defined elsewhere) whenever an <episode>
1818 * // start tag is encountered
1819 * onStartTag["episode"] = &myEpisodeStartHandler;
1820 *
1821 * // call delegate dg for all other start tags
1822 * onStartTag[null] = dg;
1823 * --------------
1824 *
1825 * This library will supply your function with a new instance of
1826 * ElementHandler, which may be used to parse inside the element whose
1827 * start tag was just found, or to identify the tag attributes of the
1828 * element, etc.
1829 *
1830 * Note that your function will be called for both start tags and empty
1831 * tags. That is, we make no distinction between <br></br>
1832 * and <br/>.
1833 */
1834 ParserHandler[string] onStartTag;
1835
1836 /*
1837 * Register a handler which will be called whenever an end tag is
1838 * encountered which matches the specified name. You can also pass null as
1839 * the name, in which case the handler will be called for any unmatched
1840 * end tag.
1841 *
1842 * Example:
1843 * --------------
1844 * // Call this function whenever a </podcast> end tag is encountered
1845 * onEndTag["podcast"] = (in Element e)
1846 * {
1847 * // Your code here
1848 * //
1849 * // This is a a closure, so code here may reference
1850 * // variables which are outside of this scope
1851 * };
1852 *
1853 * // call myEpisodeEndHandler (defined elsewhere) whenever an </episode>
1854 * // end tag is encountered
1855 * onEndTag["episode"] = &myEpisodeEndHandler;
1856 *
1857 * // call delegate dg for all other end tags
1858 * onEndTag[null] = dg;
1859 * --------------
1860 *
1861 * Note that your function will be called for both start tags and empty
1862 * tags. That is, we make no distinction between <br></br>
1863 * and <br/>.
1864 */
1865 ElementHandler[string] onEndTag;
1866
this()1867 protected this() @safe @nogc pure nothrow
1868 {
1869 elementStart = *s;
1870 }
1871
1872 /*
1873 * Register a handler which will be called whenever text is encountered.
1874 *
1875 * Example:
1876 * --------------
1877 * // Call this function whenever text is encountered
1878 * onText = (string s)
1879 * {
1880 * // Your code here
1881 *
1882 * // The passed parameter s will have been decoded by the time you see
1883 * // it, and so may contain any character.
1884 * //
1885 * // This is a a closure, so code here may reference
1886 * // variables which are outside of this scope
1887 * };
1888 * --------------
1889 */
onText(Handler handler)1890 @property @safe @nogc pure nothrow void onText(Handler handler) { textHandler = handler; }
1891
1892 /*
1893 * Register an alternative handler which will be called whenever text
1894 * is encountered. This differs from onText in that onText will decode
1895 * the text, whereas onTextRaw will not. This allows you to make design
1896 * choices, since onText will be more accurate, but slower, while
1897 * onTextRaw will be faster, but less accurate. Of course, you can
1898 * still call decode() within your handler, if you want, but you'd
1899 * probably want to use onTextRaw only in circumstances where you
1900 * know that decoding is unnecessary.
1901 *
1902 * Example:
1903 * --------------
1904 * // Call this function whenever text is encountered
1905 * onText = (string s)
1906 * {
1907 * // Your code here
1908 *
1909 * // The passed parameter s will NOT have been decoded.
1910 * //
1911 * // This is a a closure, so code here may reference
1912 * // variables which are outside of this scope
1913 * };
1914 * --------------
1915 */
onTextRaw(Handler handler)1916 @safe @nogc pure nothrow void onTextRaw(Handler handler) { rawTextHandler = handler; }
1917
1918 /*
1919 * Register a handler which will be called whenever a character data
1920 * segment is encountered.
1921 *
1922 * Example:
1923 * --------------
1924 * // Call this function whenever a CData section is encountered
1925 * onCData = (string s)
1926 * {
1927 * // Your code here
1928 *
1929 * // The passed parameter s does not include the opening <![CDATA[
1930 * // nor closing ]]>
1931 * //
1932 * // This is a a closure, so code here may reference
1933 * // variables which are outside of this scope
1934 * };
1935 * --------------
1936 */
onCData(Handler handler)1937 @property @safe @nogc pure nothrow void onCData(Handler handler) { cdataHandler = handler; }
1938
1939 /*
1940 * Register a handler which will be called whenever a comment is
1941 * encountered.
1942 *
1943 * Example:
1944 * --------------
1945 * // Call this function whenever a comment is encountered
1946 * onComment = (string s)
1947 * {
1948 * // Your code here
1949 *
1950 * // The passed parameter s does not include the opening <!-- nor
1951 * // closing -->
1952 * //
1953 * // This is a a closure, so code here may reference
1954 * // variables which are outside of this scope
1955 * };
1956 * --------------
1957 */
onComment(Handler handler)1958 @property @safe @nogc pure nothrow void onComment(Handler handler) { commentHandler = handler; }
1959
1960 /*
1961 * Register a handler which will be called whenever a processing
1962 * instruction is encountered.
1963 *
1964 * Example:
1965 * --------------
1966 * // Call this function whenever a processing instruction is encountered
1967 * onPI = (string s)
1968 * {
1969 * // Your code here
1970 *
1971 * // The passed parameter s does not include the opening <? nor
1972 * // closing ?>
1973 * //
1974 * // This is a a closure, so code here may reference
1975 * // variables which are outside of this scope
1976 * };
1977 * --------------
1978 */
onPI(Handler handler)1979 @property @safe @nogc pure nothrow void onPI(Handler handler) { piHandler = handler; }
1980
1981 /*
1982 * Register a handler which will be called whenever an XML instruction is
1983 * encountered.
1984 *
1985 * Example:
1986 * --------------
1987 * // Call this function whenever an XML instruction is encountered
1988 * // (Note: XML instructions may only occur preceding the root tag of a
1989 * // document).
1990 * onPI = (string s)
1991 * {
1992 * // Your code here
1993 *
1994 * // The passed parameter s does not include the opening <! nor
1995 * // closing >
1996 * //
1997 * // This is a a closure, so code here may reference
1998 * // variables which are outside of this scope
1999 * };
2000 * --------------
2001 */
onXI(Handler handler)2002 @property @safe @nogc pure nothrow void onXI(Handler handler) { xiHandler = handler; }
2003
2004 /*
2005 * Parse an XML element.
2006 *
2007 * Parsing will continue until the end of the current element. Any items
2008 * encountered for which a handler has been registered will invoke that
2009 * handler.
2010 *
2011 * Throws: various kinds of XMLException
2012 */
parse()2013 void parse()
2014 {
2015 import std.algorithm.searching : startsWith;
2016 import std.string : indexOf;
2017
2018 string t;
2019 const Tag root = tag_;
2020 Tag[string] startTags;
2021 if (tag_ !is null) startTags[tag_.name] = tag_;
2022
2023 while (s.length != 0)
2024 {
2025 if (startsWith(*s,"<!--"))
2026 {
2027 chop(*s,4);
2028 t = chop(*s,indexOf(*s,"-->"));
2029 if (commentHandler.funcptr !is null) commentHandler(t);
2030 chop(*s,3);
2031 }
2032 else if (startsWith(*s,"<![CDATA["))
2033 {
2034 chop(*s,9);
2035 t = chop(*s,indexOf(*s,"]]>"));
2036 if (cdataHandler.funcptr !is null) cdataHandler(t);
2037 chop(*s,3);
2038 }
2039 else if (startsWith(*s,"<!"))
2040 {
2041 chop(*s,2);
2042 t = chop(*s,indexOf(*s,">"));
2043 if (xiHandler.funcptr !is null) xiHandler(t);
2044 chop(*s,1);
2045 }
2046 else if (startsWith(*s,"<?"))
2047 {
2048 chop(*s,2);
2049 t = chop(*s,indexOf(*s,"?>"));
2050 if (piHandler.funcptr !is null) piHandler(t);
2051 chop(*s,2);
2052 }
2053 else if (startsWith(*s,"<"))
2054 {
2055 tag_ = new Tag(*s,true);
2056 if (root is null)
2057 return; // Return to constructor of derived class
2058
2059 if (tag_.isStart)
2060 {
2061 startTags[tag_.name] = tag_;
2062
2063 auto parser = new ElementParser(this);
2064
2065 auto handler = tag_.name in onStartTag;
2066 if (handler !is null) (*handler)(parser);
2067 else
2068 {
2069 handler = null in onStartTag;
2070 if (handler !is null) (*handler)(parser);
2071 }
2072 }
2073 else if (tag_.isEnd)
2074 {
2075 const startTag = startTags[tag_.name];
2076 string text;
2077
2078 if (startTag.tagString.length == 0)
2079 assert(0);
2080
2081 immutable(char)* p = startTag.tagString.ptr
2082 + startTag.tagString.length;
2083 immutable(char)* q = &tag_.tagString[0];
2084 text = decode(p[0..(q-p)], DecodeMode.LOOSE);
2085
2086 auto element = new Element(startTag);
2087 if (text.length != 0) element ~= new Text(text);
2088
2089 auto handler = tag_.name in onEndTag;
2090 if (handler !is null) (*handler)(element);
2091 else
2092 {
2093 handler = null in onEndTag;
2094 if (handler !is null) (*handler)(element);
2095 }
2096
2097 if (tag_.name == root.name) return;
2098 }
2099 else if (tag_.isEmpty)
2100 {
2101 Tag startTag = new Tag(tag_.name);
2102
2103 // FIX by hed010gy
2104 // https://issues.dlang.org/show_bug.cgi?id=2979
2105 if (tag_.attr.length > 0)
2106 foreach (tn,tv; tag_.attr) startTag.attr[tn]=tv;
2107 // END FIX
2108
2109 // Handle the pretend start tag
2110 string s2;
2111 auto parser = new ElementParser(startTag,&s2);
2112 auto handler1 = startTag.name in onStartTag;
2113 if (handler1 !is null) (*handler1)(parser);
2114 else
2115 {
2116 handler1 = null in onStartTag;
2117 if (handler1 !is null) (*handler1)(parser);
2118 }
2119
2120 // Handle the pretend end tag
2121 auto element = new Element(startTag);
2122 auto handler2 = tag_.name in onEndTag;
2123 if (handler2 !is null) (*handler2)(element);
2124 else
2125 {
2126 handler2 = null in onEndTag;
2127 if (handler2 !is null) (*handler2)(element);
2128 }
2129 }
2130 }
2131 else
2132 {
2133 t = chop(*s,indexOf(*s,"<"));
2134 if (rawTextHandler.funcptr !is null)
2135 rawTextHandler(t);
2136 else if (textHandler.funcptr !is null)
2137 textHandler(decode(t,DecodeMode.LOOSE));
2138 }
2139 }
2140 }
2141
2142 /*
2143 * Returns that part of the element which has already been parsed
2144 */
toString()2145 override string toString() const @nogc @safe pure nothrow
2146 {
2147 assert(elementStart.length >= s.length);
2148 return elementStart[0 .. elementStart.length - s.length];
2149 }
2150
2151 }
2152
2153 private
2154 {
Check(string msg)2155 template Check(string msg)
2156 {
2157 string old = s;
2158
2159 void fail() @safe pure
2160 {
2161 s = old;
2162 throw new Err(s,msg);
2163 }
2164
2165 void fail(Err e) @safe pure
2166 {
2167 s = old;
2168 throw new Err(s,msg,e);
2169 }
2170
2171 void fail(string msg2) @safe pure
2172 {
2173 fail(new Err(s,msg2));
2174 }
2175 }
2176
checkMisc(ref string s)2177 void checkMisc(ref string s) @safe pure // rule 27
2178 {
2179 import std.algorithm.searching : startsWith;
2180
2181 mixin Check!("Misc");
2182
2183 try
2184 {
2185 if (s.startsWith("<!--")) { checkComment(s); }
2186 else if (s.startsWith("<?")) { checkPI(s); }
2187 else { checkSpace(s); }
2188 }
2189 catch (Err e) { fail(e); }
2190 }
2191
checkDocument(ref string s)2192 void checkDocument(ref string s) @safe pure // rule 1
2193 {
2194 mixin Check!("Document");
2195 try
2196 {
2197 checkProlog(s);
2198 checkElement(s);
2199 star!(checkMisc)(s);
2200 }
2201 catch (Err e) { fail(e); }
2202 }
2203
checkChars(ref string s)2204 void checkChars(ref string s) @safe pure // rule 2
2205 {
2206 // TO DO - Fix std.utf stride and decode functions, then use those
2207 // instead
2208 import std.format : format;
2209
2210 mixin Check!("Chars");
2211
2212 dchar c;
2213 ptrdiff_t n = -1;
2214 // 'i' must not be smaller than size_t because size_t is used internally in
2215 // aApply.d and it will be cast e.g to (int *) which fails on BigEndian targets.
2216 foreach (size_t i, dchar d; s)
2217 {
2218 if (!isChar(d))
2219 {
2220 c = d;
2221 n = i;
2222 break;
2223 }
2224 }
2225 if (n != -1)
2226 {
2227 s = s[n..$];
2228 fail(format("invalid character: U+%04X",c));
2229 }
2230 }
2231
checkSpace(ref string s)2232 void checkSpace(ref string s) @safe pure // rule 3
2233 {
2234 import std.algorithm.searching : countUntil;
2235 import std.ascii : isWhite;
2236 import std.utf : byCodeUnit;
2237
2238 mixin Check!("Whitespace");
2239 ptrdiff_t i = s.byCodeUnit.countUntil!(a => !isWhite(a));
2240 if (i == -1 && s.length > 0 && isWhite(s[0]))
2241 s = s[$ .. $];
2242 else if (i > -1)
2243 s = s[i .. $];
2244 if (s is old) fail();
2245 }
2246
checkName(ref string s,out string name)2247 void checkName(ref string s, out string name) @safe pure // rule 5
2248 {
2249 mixin Check!("Name");
2250
2251 if (s.length == 0) fail();
2252 ptrdiff_t n;
2253 // 'i' must not be smaller than size_t because size_t is used internally in
2254 // aApply.d and it will be cast e.g to (int *) which fails on BigEndian targets.
2255 foreach (size_t i, dchar c; s)
2256 {
2257 if (c == '_' || c == ':' || isLetter(c)) continue;
2258 if (i == 0) fail();
2259 if (c == '-' || c == '.' || isDigit(c)
2260 || isCombiningChar(c) || isExtender(c)) continue;
2261 n = i;
2262 break;
2263 }
2264 name = s[0 .. n];
2265 s = s[n..$];
2266 }
2267
checkAttValue(ref string s)2268 void checkAttValue(ref string s) @safe pure // rule 10
2269 {
2270 import std.algorithm.searching : countUntil;
2271 import std.utf : byCodeUnit;
2272
2273 mixin Check!("AttValue");
2274
2275 if (s.length == 0) fail();
2276 char c = s[0];
2277 if (c != '\u0022' && c != '\u0027')
2278 fail("attribute value requires quotes");
2279 s = s[1..$];
2280 for (;;)
2281 {
2282 s = s[s.byCodeUnit.countUntil(c) .. $];
2283 if (s.length == 0) fail("unterminated attribute value");
2284 if (s[0] == '<') fail("< found in attribute value");
2285 if (s[0] == c) break;
2286 try { checkReference(s); } catch (Err e) { fail(e); }
2287 }
2288 s = s[1..$];
2289 }
2290
checkCharData(ref string s)2291 void checkCharData(ref string s) @safe pure // rule 14
2292 {
2293 import std.algorithm.searching : startsWith;
2294
2295 mixin Check!("CharData");
2296
2297 while (s.length != 0)
2298 {
2299 if (s.startsWith("&")) break;
2300 if (s.startsWith("<")) break;
2301 if (s.startsWith("]]>")) fail("]]> found within char data");
2302 s = s[1..$];
2303 }
2304 }
2305
checkComment(ref string s)2306 void checkComment(ref string s) @safe pure // rule 15
2307 {
2308 import std.string : indexOf;
2309
2310 mixin Check!("Comment");
2311
2312 try { checkLiteral("<!--",s); } catch (Err e) { fail(e); }
2313 ptrdiff_t n = s.indexOf("--");
2314 if (n == -1) fail("unterminated comment");
2315 s = s[n..$];
2316 try { checkLiteral("-->",s); } catch (Err e) { fail(e); }
2317 }
2318
checkPI(ref string s)2319 void checkPI(ref string s) @safe pure // rule 16
2320 {
2321 mixin Check!("PI");
2322
2323 try
2324 {
2325 checkLiteral("<?",s);
2326 checkEnd("?>",s);
2327 }
2328 catch (Err e) { fail(e); }
2329 }
2330
checkCDSect(ref string s)2331 void checkCDSect(ref string s) @safe pure // rule 18
2332 {
2333 mixin Check!("CDSect");
2334
2335 try
2336 {
2337 checkLiteral(cdata,s);
2338 checkEnd("]]>",s);
2339 }
2340 catch (Err e) { fail(e); }
2341 }
2342
checkProlog(ref string s)2343 void checkProlog(ref string s) @safe pure // rule 22
2344 {
2345 mixin Check!("Prolog");
2346
2347 try
2348 {
2349 /* The XML declaration is optional
2350 * http://www.w3.org/TR/2008/REC-xml-20081126/#NT-prolog
2351 */
2352 opt!(checkXMLDecl)(s);
2353
2354 star!(checkMisc)(s);
2355 opt!(seq!(checkDocTypeDecl,star!(checkMisc)))(s);
2356 }
2357 catch (Err e) { fail(e); }
2358 }
2359
checkXMLDecl(ref string s)2360 void checkXMLDecl(ref string s) @safe pure // rule 23
2361 {
2362 mixin Check!("XMLDecl");
2363
2364 try
2365 {
2366 checkLiteral("<?xml",s);
2367 checkVersionInfo(s);
2368 opt!(checkEncodingDecl)(s);
2369 opt!(checkSDDecl)(s);
2370 opt!(checkSpace)(s);
2371 checkLiteral("?>",s);
2372 }
2373 catch (Err e) { fail(e); }
2374 }
2375
checkVersionInfo(ref string s)2376 void checkVersionInfo(ref string s) @safe pure // rule 24
2377 {
2378 mixin Check!("VersionInfo");
2379
2380 try
2381 {
2382 checkSpace(s);
2383 checkLiteral("version",s);
2384 checkEq(s);
2385 quoted!(checkVersionNum)(s);
2386 }
2387 catch (Err e) { fail(e); }
2388 }
2389
checkEq(ref string s)2390 void checkEq(ref string s) @safe pure // rule 25
2391 {
2392 mixin Check!("Eq");
2393
2394 try
2395 {
2396 opt!(checkSpace)(s);
2397 checkLiteral("=",s);
2398 opt!(checkSpace)(s);
2399 }
2400 catch (Err e) { fail(e); }
2401 }
2402
checkVersionNum(ref string s)2403 void checkVersionNum(ref string s) @safe pure // rule 26
2404 {
2405 import std.algorithm.searching : countUntil;
2406 import std.utf : byCodeUnit;
2407
2408 mixin Check!("VersionNum");
2409
2410 s = s[s.byCodeUnit.countUntil('\"') .. $];
2411 if (s is old) fail();
2412 }
2413
checkDocTypeDecl(ref string s)2414 void checkDocTypeDecl(ref string s) @safe pure // rule 28
2415 {
2416 mixin Check!("DocTypeDecl");
2417
2418 try
2419 {
2420 checkLiteral("<!DOCTYPE",s);
2421 //
2422 // TO DO -- ensure DOCTYPE is well formed
2423 // (But not yet. That's one of our "future directions")
2424 //
2425 checkEnd(">",s);
2426 }
2427 catch (Err e) { fail(e); }
2428 }
2429
checkSDDecl(ref string s)2430 void checkSDDecl(ref string s) @safe pure // rule 32
2431 {
2432 import std.algorithm.searching : startsWith;
2433
2434 mixin Check!("SDDecl");
2435
2436 try
2437 {
2438 checkSpace(s);
2439 checkLiteral("standalone",s);
2440 checkEq(s);
2441 }
2442 catch (Err e) { fail(e); }
2443
2444 int n = 0;
2445 if (s.startsWith("'yes'") || s.startsWith("\"yes\"")) n = 5;
2446 else if (s.startsWith("'no'" ) || s.startsWith("\"no\"" )) n = 4;
2447 else fail("standalone attribute value must be 'yes', \"yes\","~
2448 " 'no' or \"no\"");
2449 s = s[n..$];
2450 }
2451
checkElement(ref string s)2452 void checkElement(ref string s) @safe pure // rule 39
2453 {
2454 mixin Check!("Element");
2455
2456 string sname,ename,t;
2457 try { checkTag(s,t,sname); } catch (Err e) { fail(e); }
2458
2459 if (t == "STag")
2460 {
2461 try
2462 {
2463 checkContent(s);
2464 t = s;
2465 checkETag(s,ename);
2466 }
2467 catch (Err e) { fail(e); }
2468
2469 if (sname != ename)
2470 {
2471 s = t;
2472 fail("end tag name \"" ~ ename
2473 ~ "\" differs from start tag name \""~sname~"\"");
2474 }
2475 }
2476 }
2477
2478 // rules 40 and 44
checkTag(ref string s,out string type,out string name)2479 void checkTag(ref string s, out string type, out string name) @safe pure
2480 {
2481 mixin Check!("Tag");
2482
2483 try
2484 {
2485 type = "STag";
2486 checkLiteral("<",s);
2487 checkName(s,name);
2488 star!(seq!(checkSpace,checkAttribute))(s);
2489 opt!(checkSpace)(s);
2490 if (s.length != 0 && s[0] == '/')
2491 {
2492 s = s[1..$];
2493 type = "ETag";
2494 }
2495 checkLiteral(">",s);
2496 }
2497 catch (Err e) { fail(e); }
2498 }
2499
checkAttribute(ref string s)2500 void checkAttribute(ref string s) @safe pure // rule 41
2501 {
2502 mixin Check!("Attribute");
2503
2504 try
2505 {
2506 string name;
2507 checkName(s,name);
2508 checkEq(s);
2509 checkAttValue(s);
2510 }
2511 catch (Err e) { fail(e); }
2512 }
2513
checkETag(ref string s,out string name)2514 void checkETag(ref string s, out string name) @safe pure // rule 42
2515 {
2516 mixin Check!("ETag");
2517
2518 try
2519 {
2520 checkLiteral("</",s);
2521 checkName(s,name);
2522 opt!(checkSpace)(s);
2523 checkLiteral(">",s);
2524 }
2525 catch (Err e) { fail(e); }
2526 }
2527
checkContent(ref string s)2528 void checkContent(ref string s) @safe pure // rule 43
2529 {
2530 import std.algorithm.searching : startsWith;
2531
2532 mixin Check!("Content");
2533
2534 try
2535 {
2536 while (s.length != 0)
2537 {
2538 old = s;
2539 if (s.startsWith("&")) { checkReference(s); }
2540 else if (s.startsWith("<!--")) { checkComment(s); }
2541 else if (s.startsWith("<?")) { checkPI(s); }
2542 else if (s.startsWith(cdata)) { checkCDSect(s); }
2543 else if (s.startsWith("</")) { break; }
2544 else if (s.startsWith("<")) { checkElement(s); }
2545 else { checkCharData(s); }
2546 }
2547 }
2548 catch (Err e) { fail(e); }
2549 }
2550
checkCharRef(ref string s,out dchar c)2551 void checkCharRef(ref string s, out dchar c) @safe pure // rule 66
2552 {
2553 import std.format : format;
2554
2555 mixin Check!("CharRef");
2556
2557 c = 0;
2558 try { checkLiteral("&#",s); } catch (Err e) { fail(e); }
2559 int radix = 10;
2560 if (s.length != 0 && s[0] == 'x')
2561 {
2562 s = s[1..$];
2563 radix = 16;
2564 }
2565 if (s.length == 0) fail("unterminated character reference");
2566 if (s[0] == ';')
2567 fail("character reference must have at least one digit");
2568 while (s.length != 0)
2569 {
2570 immutable char d = s[0];
2571 int n = 0;
2572 switch (d)
2573 {
2574 case 'F','f': ++n; goto case;
2575 case 'E','e': ++n; goto case;
2576 case 'D','d': ++n; goto case;
2577 case 'C','c': ++n; goto case;
2578 case 'B','b': ++n; goto case;
2579 case 'A','a': ++n; goto case;
2580 case '9': ++n; goto case;
2581 case '8': ++n; goto case;
2582 case '7': ++n; goto case;
2583 case '6': ++n; goto case;
2584 case '5': ++n; goto case;
2585 case '4': ++n; goto case;
2586 case '3': ++n; goto case;
2587 case '2': ++n; goto case;
2588 case '1': ++n; goto case;
2589 case '0': break;
2590 default: n = 100; break;
2591 }
2592 if (n >= radix) break;
2593 c *= radix;
2594 c += n;
2595 s = s[1..$];
2596 }
2597 if (!isChar(c)) fail(format("U+%04X is not a legal character",c));
2598 if (s.length == 0 || s[0] != ';') fail("expected ;");
2599 else s = s[1..$];
2600 }
2601
checkReference(ref string s)2602 void checkReference(ref string s) @safe pure // rule 67
2603 {
2604 import std.algorithm.searching : startsWith;
2605
2606 mixin Check!("Reference");
2607
2608 try
2609 {
2610 dchar c;
2611 if (s.startsWith("&#")) checkCharRef(s,c);
2612 else checkEntityRef(s);
2613 }
2614 catch (Err e) { fail(e); }
2615 }
2616
checkEntityRef(ref string s)2617 void checkEntityRef(ref string s) @safe pure // rule 68
2618 {
2619 mixin Check!("EntityRef");
2620
2621 try
2622 {
2623 string name;
2624 checkLiteral("&",s);
2625 checkName(s,name);
2626 checkLiteral(";",s);
2627 }
2628 catch (Err e) { fail(e); }
2629 }
2630
checkEncName(ref string s)2631 void checkEncName(ref string s) @safe pure // rule 81
2632 {
2633 import std.algorithm.searching : countUntil;
2634 import std.ascii : isAlpha;
2635 import std.utf : byCodeUnit;
2636
2637 mixin Check!("EncName");
2638
2639 s = s[s.byCodeUnit.countUntil!(a => !isAlpha(a)) .. $];
2640 if (s is old) fail();
2641 s = s[s.byCodeUnit.countUntil('\"', '\'') .. $];
2642 }
2643
checkEncodingDecl(ref string s)2644 void checkEncodingDecl(ref string s) @safe pure // rule 80
2645 {
2646 mixin Check!("EncodingDecl");
2647
2648 try
2649 {
2650 checkSpace(s);
2651 checkLiteral("encoding",s);
2652 checkEq(s);
2653 quoted!(checkEncName)(s);
2654 }
2655 catch (Err e) { fail(e); }
2656 }
2657
2658 // Helper functions
2659
checkLiteral(string literal,ref string s)2660 void checkLiteral(string literal,ref string s) @safe pure
2661 {
2662 import std.string : startsWith;
2663
2664 mixin Check!("Literal");
2665
2666 if (!s.startsWith(literal)) fail("Expected literal \""~literal~"\"");
2667 s = s[literal.length..$];
2668 }
2669
checkEnd(string end,ref string s)2670 void checkEnd(string end,ref string s) @safe pure
2671 {
2672 import std.string : indexOf;
2673 // Deliberately no mixin Check here.
2674
2675 auto n = s.indexOf(end);
2676 if (n == -1) throw new Err(s,"Unable to find terminating \""~end~"\"");
2677 s = s[n..$];
2678 checkLiteral(end,s);
2679 }
2680
2681 // Metafunctions -- none of these use mixin Check
2682
opt(alias f)2683 void opt(alias f)(ref string s)
2684 {
2685 try { f(s); } catch (Err e) {}
2686 }
2687
plus(alias f)2688 void plus(alias f)(ref string s)
2689 {
2690 f(s);
2691 star!(f)(s);
2692 }
2693
star(alias f)2694 void star(alias f)(ref string s)
2695 {
2696 while (s.length != 0)
2697 {
2698 try { f(s); }
2699 catch (Err e) { return; }
2700 }
2701 }
2702
quoted(alias f)2703 void quoted(alias f)(ref string s)
2704 {
2705 import std.string : startsWith;
2706
2707 if (s.startsWith("'"))
2708 {
2709 checkLiteral("'",s);
2710 f(s);
2711 checkLiteral("'",s);
2712 }
2713 else
2714 {
2715 checkLiteral("\"",s);
2716 f(s);
2717 checkLiteral("\"",s);
2718 }
2719 }
2720
seq(alias f,alias g)2721 void seq(alias f,alias g)(ref string s)
2722 {
2723 f(s);
2724 g(s);
2725 }
2726 }
2727
2728 /*
2729 * Check an entire XML document for well-formedness
2730 *
2731 * Params:
2732 * s = the document to be checked, passed as a string
2733 *
2734 * Throws: CheckException if the document is not well formed
2735 *
2736 * CheckException's toString() method will yield the complete hierarchy of
2737 * parse failure (the XML equivalent of a stack trace), giving the line and
2738 * column number of every failure at every level.
2739 */
check(string s)2740 void check(string s) @safe pure
2741 {
2742 try
2743 {
2744 checkChars(s);
2745 checkDocument(s);
2746 if (s.length != 0) throw new Err(s,"Junk found after document");
2747 }
2748 catch (Err e)
2749 {
2750 e.complete(s);
2751 throw e;
2752 }
2753 }
2754
2755 @system pure unittest
2756 {
2757 import std.string : indexOf;
2758
2759 try
2760 {
2761 check(q"[<?xml version="1.0"?>
2762 <catalog>
2763 <book id="bk101">
2764 <author>Gambardella, Matthew</author>
2765 <title>XML Developer's Guide</title>
2766 <genre>Computer</genre>
2767 <price>44.95</price>
2768 <publish_date>2000-10-01</publish_date>
2769 <description>An in-depth look at creating applications
2770 with XML.</description>
2771 </book>
2772 <book id="bk102">
2773 <author>Ralls, Kim</author>
2774 <title>Midnight Rain</title>
2775 <genre>Fantasy</genres>
2776 <price>5.95</price>
2777 <publish_date>2000-12-16</publish_date>
2778 <description>A former architect battles corporate zombies,
2779 an evil sorceress, and her own childhood to become queen
2780 of the world.</description>
2781 </book>
2782 <book id="bk103">
2783 <author>Corets, Eva</author>
2784 <title>Maeve Ascendant</title>
2785 <genre>Fantasy</genre>
2786 <price>5.95</price>
2787 <publish_date>2000-11-17</publish_date>
2788 <description>After the collapse of a nanotechnology
2789 society in England, the young survivors lay the
2790 foundation for a new society.</description>
2791 </book>
2792 </catalog>
2793 ]");
2794 assert(false);
2795 }
catch(CheckException e)2796 catch (CheckException e)
2797 {
2798 auto n = e.toString().indexOf("end tag name \"genres\" differs"~
2799 " from start tag name \"genre\"");
2800 assert(n != -1);
2801 }
2802 }
2803
2804 @system unittest
2805 {
2806 string s = q"EOS
2807 <?xml version="1.0"?>
2808 <set>
2809 <one>A</one>
2810 <!-- comment -->
2811 <two>B</two>
2812 </set>
2813 EOS";
2814 try
2815 {
2816 check(s);
2817 }
catch(CheckException e)2818 catch (CheckException e)
2819 {
2820 assert(0, e.toString());
2821 }
2822 }
2823
2824 @system unittest
2825 {
2826 string test_xml = `<?xml version="1.0" encoding='UTF-8'?><r><stream:stream
2827 xmlns:stream="http://etherx.'jabber'.org/streams"
2828 xmlns="jabber:'client'" from='jid.pl' id="587a5767"
2829 xml:lang="en" version="1.0" attr='a"b"c'>
2830 </stream:stream></r>`;
2831
2832 DocumentParser parser = new DocumentParser(test_xml);
2833 bool tested = false;
2834 parser.onStartTag["stream:stream"] = (ElementParser p) {
2835 assert(p.tag.attr["xmlns"] == "jabber:'client'");
2836 assert(p.tag.attr["from"] == "jid.pl");
2837 assert(p.tag.attr["attr"] == "a\"b\"c");
2838 tested = true;
2839 };
2840 parser.parse();
2841 assert(tested);
2842 }
2843
2844 @system unittest
2845 {
2846 string s = q"EOS
2847 <?xml version="1.0" encoding="utf-8"?> <Tests>
2848 <Test thing="What & Up">What & Up Second</Test>
2849 </Tests>
2850 EOS";
2851 auto xml = new DocumentParser(s);
2852
2853 xml.onStartTag["Test"] = (ElementParser xml) {
2854 assert(xml.tag.attr["thing"] == "What & Up");
2855 };
2856
2857 xml.onEndTag["Test"] = (in Element e) {
2858 assert(e.text() == "What & Up Second");
2859 };
2860 xml.parse();
2861 }
2862
2863 @system unittest
2864 {
2865 string s = `<tag attr=""value>" />`;
2866 auto doc = new Document(s);
2867 assert(doc.toString() == s);
2868 }
2869
2870 /* The base class for exceptions thrown by this module */
this(string msg)2871 class XMLException : Exception { this(string msg) @safe pure { super(msg); } }
2872
2873 // Other exceptions
2874
2875 // Thrown during Comment constructor
2876 class CommentException : XMLException
this(string msg)2877 { private this(string msg) @safe pure { super(msg); } }
2878
2879 // Thrown during CData constructor
2880 class CDataException : XMLException
this(string msg)2881 { private this(string msg) @safe pure { super(msg); } }
2882
2883 // Thrown during XMLInstruction constructor
2884 class XIException : XMLException
this(string msg)2885 { private this(string msg) @safe pure { super(msg); } }
2886
2887 // Thrown during ProcessingInstruction constructor
2888 class PIException : XMLException
this(string msg)2889 { private this(string msg) @safe pure { super(msg); } }
2890
2891 // Thrown during Text constructor
2892 class TextException : XMLException
this(string msg)2893 { private this(string msg) @safe pure { super(msg); } }
2894
2895 // Thrown during decode()
2896 class DecodeException : XMLException
this(string msg)2897 { private this(string msg) @safe pure { super(msg); } }
2898
2899 // Thrown if comparing with wrong type
2900 class InvalidTypeException : XMLException
this(string msg)2901 { private this(string msg) @safe pure { super(msg); } }
2902
2903 // Thrown when parsing for Tags
2904 class TagException : XMLException
this(string msg)2905 { private this(string msg) @safe pure { super(msg); } }
2906
2907 /*
2908 * Thrown during check()
2909 */
2910 class CheckException : XMLException
2911 {
2912 CheckException err; // Parent in hierarchy
2913 private string tail;
2914 /*
2915 * Name of production rule which failed to parse,
2916 * or specific error message
2917 */
2918 string msg;
2919 size_t line = 0; // Line number at which parse failure occurred
2920 size_t column = 0; // Column number at which parse failure occurred
2921
2922 private this(string tail,string msg,Err err=null) @safe pure
2923 {
2924 super(null);
2925 this.tail = tail;
2926 this.msg = msg;
2927 this.err = err;
2928 }
2929
complete(string entire)2930 private void complete(string entire) @safe pure
2931 {
2932 import std.string : count, lastIndexOf;
2933 import std.utf : toUTF32;
2934
2935 string head = entire[0..$-tail.length];
2936 ptrdiff_t n = head.lastIndexOf('\n') + 1;
2937 line = head.count("\n") + 1;
2938 dstring t = toUTF32(head[n..$]);
2939 column = t.length + 1;
2940 if (err !is null) err.complete(entire);
2941 }
2942
toString()2943 override string toString() const @safe pure
2944 {
2945 import std.format : format;
2946
2947 string s;
2948 if (line != 0) s = format("Line %d, column %d: ",line,column);
2949 s ~= msg;
2950 s ~= '\n';
2951 if (err !is null) s = err.toString() ~ s;
2952 return s;
2953 }
2954 }
2955
2956 private alias Err = CheckException;
2957
2958 // Private helper functions
2959
2960 private
2961 {
toType(T)2962 inout(T) toType(T)(inout return scope Object o)
2963 {
2964 T t = cast(T)(o);
2965 if (t is null)
2966 {
2967 throw new InvalidTypeException("Attempt to compare a "
2968 ~ T.stringof ~ " with an instance of another type");
2969 }
2970 return t;
2971 }
2972
chop(ref string s,size_t n)2973 string chop(ref string s, size_t n) @safe pure nothrow
2974 {
2975 if (n == -1) n = s.length;
2976 string t = s[0 .. n];
2977 s = s[n..$];
2978 return t;
2979 }
2980
optc(ref string s,char c)2981 bool optc(ref string s, char c) @safe pure nothrow
2982 {
2983 immutable bool b = s.length != 0 && s[0] == c;
2984 if (b) s = s[1..$];
2985 return b;
2986 }
2987
reqc(ref string s,char c)2988 void reqc(ref string s, char c) @safe pure
2989 {
2990 if (s.length == 0 || s[0] != c) throw new TagException("");
2991 s = s[1..$];
2992 }
2993
requireOneOf(ref string s,string chars)2994 char requireOneOf(ref string s, string chars) @safe pure
2995 {
2996 import std.string : indexOf;
2997
2998 if (s.length == 0 || indexOf(chars,s[0]) == -1)
2999 throw new TagException("");
3000 immutable char ch = s[0];
3001 s = s[1..$];
3002 return ch;
3003 }
3004
3005 alias hash = .hashOf;
3006
3007 // Definitions from the XML specification
3008 immutable CharTable=[0x9,0x9,0xA,0xA,0xD,0xD,0x20,0xD7FF,0xE000,0xFFFD,
3009 0x10000,0x10FFFF];
3010 immutable BaseCharTable=[0x0041,0x005A,0x0061,0x007A,0x00C0,0x00D6,0x00D8,
3011 0x00F6,0x00F8,0x00FF,0x0100,0x0131,0x0134,0x013E,0x0141,0x0148,0x014A,
3012 0x017E,0x0180,0x01C3,0x01CD,0x01F0,0x01F4,0x01F5,0x01FA,0x0217,0x0250,
3013 0x02A8,0x02BB,0x02C1,0x0386,0x0386,0x0388,0x038A,0x038C,0x038C,0x038E,
3014 0x03A1,0x03A3,0x03CE,0x03D0,0x03D6,0x03DA,0x03DA,0x03DC,0x03DC,0x03DE,
3015 0x03DE,0x03E0,0x03E0,0x03E2,0x03F3,0x0401,0x040C,0x040E,0x044F,0x0451,
3016 0x045C,0x045E,0x0481,0x0490,0x04C4,0x04C7,0x04C8,0x04CB,0x04CC,0x04D0,
3017 0x04EB,0x04EE,0x04F5,0x04F8,0x04F9,0x0531,0x0556,0x0559,0x0559,0x0561,
3018 0x0586,0x05D0,0x05EA,0x05F0,0x05F2,0x0621,0x063A,0x0641,0x064A,0x0671,
3019 0x06B7,0x06BA,0x06BE,0x06C0,0x06CE,0x06D0,0x06D3,0x06D5,0x06D5,0x06E5,
3020 0x06E6,0x0905,0x0939,0x093D,0x093D,0x0958,0x0961,0x0985,0x098C,0x098F,
3021 0x0990,0x0993,0x09A8,0x09AA,0x09B0,0x09B2,0x09B2,0x09B6,0x09B9,0x09DC,
3022 0x09DD,0x09DF,0x09E1,0x09F0,0x09F1,0x0A05,0x0A0A,0x0A0F,0x0A10,0x0A13,
3023 0x0A28,0x0A2A,0x0A30,0x0A32,0x0A33,0x0A35,0x0A36,0x0A38,0x0A39,0x0A59,
3024 0x0A5C,0x0A5E,0x0A5E,0x0A72,0x0A74,0x0A85,0x0A8B,0x0A8D,0x0A8D,0x0A8F,
3025 0x0A91,0x0A93,0x0AA8,0x0AAA,0x0AB0,0x0AB2,0x0AB3,0x0AB5,0x0AB9,0x0ABD,
3026 0x0ABD,0x0AE0,0x0AE0,0x0B05,0x0B0C,0x0B0F,0x0B10,0x0B13,0x0B28,0x0B2A,
3027 0x0B30,0x0B32,0x0B33,0x0B36,0x0B39,0x0B3D,0x0B3D,0x0B5C,0x0B5D,0x0B5F,
3028 0x0B61,0x0B85,0x0B8A,0x0B8E,0x0B90,0x0B92,0x0B95,0x0B99,0x0B9A,0x0B9C,
3029 0x0B9C,0x0B9E,0x0B9F,0x0BA3,0x0BA4,0x0BA8,0x0BAA,0x0BAE,0x0BB5,0x0BB7,
3030 0x0BB9,0x0C05,0x0C0C,0x0C0E,0x0C10,0x0C12,0x0C28,0x0C2A,0x0C33,0x0C35,
3031 0x0C39,0x0C60,0x0C61,0x0C85,0x0C8C,0x0C8E,0x0C90,0x0C92,0x0CA8,0x0CAA,
3032 0x0CB3,0x0CB5,0x0CB9,0x0CDE,0x0CDE,0x0CE0,0x0CE1,0x0D05,0x0D0C,0x0D0E,
3033 0x0D10,0x0D12,0x0D28,0x0D2A,0x0D39,0x0D60,0x0D61,0x0E01,0x0E2E,0x0E30,
3034 0x0E30,0x0E32,0x0E33,0x0E40,0x0E45,0x0E81,0x0E82,0x0E84,0x0E84,0x0E87,
3035 0x0E88,0x0E8A,0x0E8A,0x0E8D,0x0E8D,0x0E94,0x0E97,0x0E99,0x0E9F,0x0EA1,
3036 0x0EA3,0x0EA5,0x0EA5,0x0EA7,0x0EA7,0x0EAA,0x0EAB,0x0EAD,0x0EAE,0x0EB0,
3037 0x0EB0,0x0EB2,0x0EB3,0x0EBD,0x0EBD,0x0EC0,0x0EC4,0x0F40,0x0F47,0x0F49,
3038 0x0F69,0x10A0,0x10C5,0x10D0,0x10F6,0x1100,0x1100,0x1102,0x1103,0x1105,
3039 0x1107,0x1109,0x1109,0x110B,0x110C,0x110E,0x1112,0x113C,0x113C,0x113E,
3040 0x113E,0x1140,0x1140,0x114C,0x114C,0x114E,0x114E,0x1150,0x1150,0x1154,
3041 0x1155,0x1159,0x1159,0x115F,0x1161,0x1163,0x1163,0x1165,0x1165,0x1167,
3042 0x1167,0x1169,0x1169,0x116D,0x116E,0x1172,0x1173,0x1175,0x1175,0x119E,
3043 0x119E,0x11A8,0x11A8,0x11AB,0x11AB,0x11AE,0x11AF,0x11B7,0x11B8,0x11BA,
3044 0x11BA,0x11BC,0x11C2,0x11EB,0x11EB,0x11F0,0x11F0,0x11F9,0x11F9,0x1E00,
3045 0x1E9B,0x1EA0,0x1EF9,0x1F00,0x1F15,0x1F18,0x1F1D,0x1F20,0x1F45,0x1F48,
3046 0x1F4D,0x1F50,0x1F57,0x1F59,0x1F59,0x1F5B,0x1F5B,0x1F5D,0x1F5D,0x1F5F,
3047 0x1F7D,0x1F80,0x1FB4,0x1FB6,0x1FBC,0x1FBE,0x1FBE,0x1FC2,0x1FC4,0x1FC6,
3048 0x1FCC,0x1FD0,0x1FD3,0x1FD6,0x1FDB,0x1FE0,0x1FEC,0x1FF2,0x1FF4,0x1FF6,
3049 0x1FFC,0x2126,0x2126,0x212A,0x212B,0x212E,0x212E,0x2180,0x2182,0x3041,
3050 0x3094,0x30A1,0x30FA,0x3105,0x312C,0xAC00,0xD7A3];
3051 immutable IdeographicTable=[0x3007,0x3007,0x3021,0x3029,0x4E00,0x9FA5];
3052 immutable CombiningCharTable=[0x0300,0x0345,0x0360,0x0361,0x0483,0x0486,
3053 0x0591,0x05A1,0x05A3,0x05B9,0x05BB,0x05BD,0x05BF,0x05BF,0x05C1,0x05C2,
3054 0x05C4,0x05C4,0x064B,0x0652,0x0670,0x0670,0x06D6,0x06DC,0x06DD,0x06DF,
3055 0x06E0,0x06E4,0x06E7,0x06E8,0x06EA,0x06ED,0x0901,0x0903,0x093C,0x093C,
3056 0x093E,0x094C,0x094D,0x094D,0x0951,0x0954,0x0962,0x0963,0x0981,0x0983,
3057 0x09BC,0x09BC,0x09BE,0x09BE,0x09BF,0x09BF,0x09C0,0x09C4,0x09C7,0x09C8,
3058 0x09CB,0x09CD,0x09D7,0x09D7,0x09E2,0x09E3,0x0A02,0x0A02,0x0A3C,0x0A3C,
3059 0x0A3E,0x0A3E,0x0A3F,0x0A3F,0x0A40,0x0A42,0x0A47,0x0A48,0x0A4B,0x0A4D,
3060 0x0A70,0x0A71,0x0A81,0x0A83,0x0ABC,0x0ABC,0x0ABE,0x0AC5,0x0AC7,0x0AC9,
3061 0x0ACB,0x0ACD,0x0B01,0x0B03,0x0B3C,0x0B3C,0x0B3E,0x0B43,0x0B47,0x0B48,
3062 0x0B4B,0x0B4D,0x0B56,0x0B57,0x0B82,0x0B83,0x0BBE,0x0BC2,0x0BC6,0x0BC8,
3063 0x0BCA,0x0BCD,0x0BD7,0x0BD7,0x0C01,0x0C03,0x0C3E,0x0C44,0x0C46,0x0C48,
3064 0x0C4A,0x0C4D,0x0C55,0x0C56,0x0C82,0x0C83,0x0CBE,0x0CC4,0x0CC6,0x0CC8,
3065 0x0CCA,0x0CCD,0x0CD5,0x0CD6,0x0D02,0x0D03,0x0D3E,0x0D43,0x0D46,0x0D48,
3066 0x0D4A,0x0D4D,0x0D57,0x0D57,0x0E31,0x0E31,0x0E34,0x0E3A,0x0E47,0x0E4E,
3067 0x0EB1,0x0EB1,0x0EB4,0x0EB9,0x0EBB,0x0EBC,0x0EC8,0x0ECD,0x0F18,0x0F19,
3068 0x0F35,0x0F35,0x0F37,0x0F37,0x0F39,0x0F39,0x0F3E,0x0F3E,0x0F3F,0x0F3F,
3069 0x0F71,0x0F84,0x0F86,0x0F8B,0x0F90,0x0F95,0x0F97,0x0F97,0x0F99,0x0FAD,
3070 0x0FB1,0x0FB7,0x0FB9,0x0FB9,0x20D0,0x20DC,0x20E1,0x20E1,0x302A,0x302F,
3071 0x3099,0x3099,0x309A,0x309A];
3072 immutable DigitTable=[0x0030,0x0039,0x0660,0x0669,0x06F0,0x06F9,0x0966,
3073 0x096F,0x09E6,0x09EF,0x0A66,0x0A6F,0x0AE6,0x0AEF,0x0B66,0x0B6F,0x0BE7,
3074 0x0BEF,0x0C66,0x0C6F,0x0CE6,0x0CEF,0x0D66,0x0D6F,0x0E50,0x0E59,0x0ED0,
3075 0x0ED9,0x0F20,0x0F29];
3076 immutable ExtenderTable=[0x00B7,0x00B7,0x02D0,0x02D0,0x02D1,0x02D1,0x0387,
3077 0x0387,0x0640,0x0640,0x0E46,0x0E46,0x0EC6,0x0EC6,0x3005,0x3005,0x3031,
3078 0x3035,0x309D,0x309E,0x30FC,0x30FE];
3079
lookup(const (int)[]table,int c)3080 bool lookup(const(int)[] table, int c) @safe @nogc nothrow pure
3081 {
3082 while (table.length != 0)
3083 {
3084 auto m = (table.length >> 1) & ~1;
3085 if (c < table[m])
3086 {
3087 table = table[0 .. m];
3088 }
3089 else if (c > table[m+1])
3090 {
3091 table = table[m+2..$];
3092 }
3093 else return true;
3094 }
3095 return false;
3096 }
3097
startOf(string s)3098 string startOf(string s) @safe nothrow pure
3099 {
3100 string r;
3101 foreach (char c;s)
3102 {
3103 r ~= (c < 0x20 || c > 0x7F) ? '.' : c;
3104 if (r.length >= 40) { r ~= "___"; break; }
3105 }
3106 return r;
3107 }
3108
3109 void exit(string s=null)
3110 {
3111 throw new XMLException(s);
3112 }
3113 }
3114