1// Copyright 2009 The Go Authors.  All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package xml
6
7import (
8	"bytes"
9	"fmt"
10	"io"
11	"reflect"
12	"strings"
13	"testing"
14	"unicode/utf8"
15)
16
17const testInput = `
18<?xml version="1.0" encoding="UTF-8"?>
19<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
20  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
21<body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` +
22	"\r\n\t" + `  >
23  <hello lang="en">World &lt;&gt;&apos;&quot; &#x767d;&#40300;翔</hello>
24  <query>&何; &is-it;</query>
25  <goodbye />
26  <outer foo:attr="value" xmlns:tag="ns4">
27    <inner/>
28  </outer>
29  <tag:name>
30    <![CDATA[Some text here.]]>
31  </tag:name>
32</body><!-- missing final newline -->`
33
34var testEntity = map[string]string{"何": "What", "is-it": "is it?"}
35
36var rawTokens = []Token{
37	CharData("\n"),
38	ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
39	CharData("\n"),
40	Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
41  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
42	CharData("\n"),
43	StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
44	CharData("\n  "),
45	StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
46	CharData("World <>'\" 白鵬翔"),
47	EndElement{Name{"", "hello"}},
48	CharData("\n  "),
49	StartElement{Name{"", "query"}, []Attr{}},
50	CharData("What is it?"),
51	EndElement{Name{"", "query"}},
52	CharData("\n  "),
53	StartElement{Name{"", "goodbye"}, []Attr{}},
54	EndElement{Name{"", "goodbye"}},
55	CharData("\n  "),
56	StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
57	CharData("\n    "),
58	StartElement{Name{"", "inner"}, []Attr{}},
59	EndElement{Name{"", "inner"}},
60	CharData("\n  "),
61	EndElement{Name{"", "outer"}},
62	CharData("\n  "),
63	StartElement{Name{"tag", "name"}, []Attr{}},
64	CharData("\n    "),
65	CharData("Some text here."),
66	CharData("\n  "),
67	EndElement{Name{"tag", "name"}},
68	CharData("\n"),
69	EndElement{Name{"", "body"}},
70	Comment(" missing final newline "),
71}
72
73var cookedTokens = []Token{
74	CharData("\n"),
75	ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
76	CharData("\n"),
77	Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
78  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
79	CharData("\n"),
80	StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
81	CharData("\n  "),
82	StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
83	CharData("World <>'\" 白鵬翔"),
84	EndElement{Name{"ns2", "hello"}},
85	CharData("\n  "),
86	StartElement{Name{"ns2", "query"}, []Attr{}},
87	CharData("What is it?"),
88	EndElement{Name{"ns2", "query"}},
89	CharData("\n  "),
90	StartElement{Name{"ns2", "goodbye"}, []Attr{}},
91	EndElement{Name{"ns2", "goodbye"}},
92	CharData("\n  "),
93	StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
94	CharData("\n    "),
95	StartElement{Name{"ns2", "inner"}, []Attr{}},
96	EndElement{Name{"ns2", "inner"}},
97	CharData("\n  "),
98	EndElement{Name{"ns2", "outer"}},
99	CharData("\n  "),
100	StartElement{Name{"ns3", "name"}, []Attr{}},
101	CharData("\n    "),
102	CharData("Some text here."),
103	CharData("\n  "),
104	EndElement{Name{"ns3", "name"}},
105	CharData("\n"),
106	EndElement{Name{"ns2", "body"}},
107	Comment(" missing final newline "),
108}
109
110const testInputAltEncoding = `
111<?xml version="1.0" encoding="x-testing-uppercase"?>
112<TAG>VALUE</TAG>`
113
114var rawTokensAltEncoding = []Token{
115	CharData("\n"),
116	ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)},
117	CharData("\n"),
118	StartElement{Name{"", "tag"}, []Attr{}},
119	CharData("value"),
120	EndElement{Name{"", "tag"}},
121}
122
123var xmlInput = []string{
124	// unexpected EOF cases
125	"<",
126	"<t",
127	"<t ",
128	"<t/",
129	"<!",
130	"<!-",
131	"<!--",
132	"<!--c-",
133	"<!--c--",
134	"<!d",
135	"<t></",
136	"<t></t",
137	"<?",
138	"<?p",
139	"<t a",
140	"<t a=",
141	"<t a='",
142	"<t a=''",
143	"<t/><![",
144	"<t/><![C",
145	"<t/><![CDATA[d",
146	"<t/><![CDATA[d]",
147	"<t/><![CDATA[d]]",
148
149	// other Syntax errors
150	"<>",
151	"<t/a",
152	"<0 />",
153	"<?0 >",
154	//	"<!0 >",	// let the Token() caller handle
155	"</0>",
156	"<t 0=''>",
157	"<t a='&'>",
158	"<t a='<'>",
159	"<t>&nbspc;</t>",
160	"<t a>",
161	"<t a=>",
162	"<t a=v>",
163	//	"<![CDATA[d]]>",	// let the Token() caller handle
164	"<t></e>",
165	"<t></>",
166	"<t></t!",
167	"<t>cdata]]></t>",
168}
169
170func TestRawToken(t *testing.T) {
171	d := NewDecoder(strings.NewReader(testInput))
172	d.Entity = testEntity
173	testRawToken(t, d, rawTokens)
174}
175
176const nonStrictInput = `
177<tag>non&entity</tag>
178<tag>&unknown;entity</tag>
179<tag>&#123</tag>
180<tag>&#zzz;</tag>
181<tag>&なまえ3;</tag>
182<tag>&lt-gt;</tag>
183<tag>&;</tag>
184<tag>&0a;</tag>
185`
186
187var nonStringEntity = map[string]string{"": "oops!", "0a": "oops!"}
188
189var nonStrictTokens = []Token{
190	CharData("\n"),
191	StartElement{Name{"", "tag"}, []Attr{}},
192	CharData("non&entity"),
193	EndElement{Name{"", "tag"}},
194	CharData("\n"),
195	StartElement{Name{"", "tag"}, []Attr{}},
196	CharData("&unknown;entity"),
197	EndElement{Name{"", "tag"}},
198	CharData("\n"),
199	StartElement{Name{"", "tag"}, []Attr{}},
200	CharData("&#123"),
201	EndElement{Name{"", "tag"}},
202	CharData("\n"),
203	StartElement{Name{"", "tag"}, []Attr{}},
204	CharData("&#zzz;"),
205	EndElement{Name{"", "tag"}},
206	CharData("\n"),
207	StartElement{Name{"", "tag"}, []Attr{}},
208	CharData("&なまえ3;"),
209	EndElement{Name{"", "tag"}},
210	CharData("\n"),
211	StartElement{Name{"", "tag"}, []Attr{}},
212	CharData("&lt-gt;"),
213	EndElement{Name{"", "tag"}},
214	CharData("\n"),
215	StartElement{Name{"", "tag"}, []Attr{}},
216	CharData("&;"),
217	EndElement{Name{"", "tag"}},
218	CharData("\n"),
219	StartElement{Name{"", "tag"}, []Attr{}},
220	CharData("&0a;"),
221	EndElement{Name{"", "tag"}},
222	CharData("\n"),
223}
224
225func TestNonStrictRawToken(t *testing.T) {
226	d := NewDecoder(strings.NewReader(nonStrictInput))
227	d.Strict = false
228	testRawToken(t, d, nonStrictTokens)
229}
230
231type downCaser struct {
232	t *testing.T
233	r io.ByteReader
234}
235
236func (d *downCaser) ReadByte() (c byte, err error) {
237	c, err = d.r.ReadByte()
238	if c >= 'A' && c <= 'Z' {
239		c += 'a' - 'A'
240	}
241	return
242}
243
244func (d *downCaser) Read(p []byte) (int, error) {
245	d.t.Fatalf("unexpected Read call on downCaser reader")
246	panic("unreachable")
247}
248
249func TestRawTokenAltEncoding(t *testing.T) {
250	d := NewDecoder(strings.NewReader(testInputAltEncoding))
251	d.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
252		if charset != "x-testing-uppercase" {
253			t.Fatalf("unexpected charset %q", charset)
254		}
255		return &downCaser{t, input.(io.ByteReader)}, nil
256	}
257	testRawToken(t, d, rawTokensAltEncoding)
258}
259
260func TestRawTokenAltEncodingNoConverter(t *testing.T) {
261	d := NewDecoder(strings.NewReader(testInputAltEncoding))
262	token, err := d.RawToken()
263	if token == nil {
264		t.Fatalf("expected a token on first RawToken call")
265	}
266	if err != nil {
267		t.Fatal(err)
268	}
269	token, err = d.RawToken()
270	if token != nil {
271		t.Errorf("expected a nil token; got %#v", token)
272	}
273	if err == nil {
274		t.Fatalf("expected an error on second RawToken call")
275	}
276	const encoding = "x-testing-uppercase"
277	if !strings.Contains(err.Error(), encoding) {
278		t.Errorf("expected error to contain %q; got error: %v",
279			encoding, err)
280	}
281}
282
283func testRawToken(t *testing.T, d *Decoder, rawTokens []Token) {
284	for i, want := range rawTokens {
285		have, err := d.RawToken()
286		if err != nil {
287			t.Fatalf("token %d: unexpected error: %s", i, err)
288		}
289		if !reflect.DeepEqual(have, want) {
290			var shave, swant string
291			if _, ok := have.(CharData); ok {
292				shave = fmt.Sprintf("CharData(%q)", have)
293			} else {
294				shave = fmt.Sprintf("%#v", have)
295			}
296			if _, ok := want.(CharData); ok {
297				swant = fmt.Sprintf("CharData(%q)", want)
298			} else {
299				swant = fmt.Sprintf("%#v", want)
300			}
301			t.Errorf("token %d = %s, want %s", i, shave, swant)
302		}
303	}
304}
305
306// Ensure that directives (specifically !DOCTYPE) include the complete
307// text of any nested directives, noting that < and > do not change
308// nesting depth if they are in single or double quotes.
309
310var nestedDirectivesInput = `
311<!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
312<!DOCTYPE [<!ENTITY xlt ">">]>
313<!DOCTYPE [<!ENTITY xlt "<">]>
314<!DOCTYPE [<!ENTITY xlt '>'>]>
315<!DOCTYPE [<!ENTITY xlt '<'>]>
316<!DOCTYPE [<!ENTITY xlt '">'>]>
317<!DOCTYPE [<!ENTITY xlt "'<">]>
318`
319
320var nestedDirectivesTokens = []Token{
321	CharData("\n"),
322	Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
323	CharData("\n"),
324	Directive(`DOCTYPE [<!ENTITY xlt ">">]`),
325	CharData("\n"),
326	Directive(`DOCTYPE [<!ENTITY xlt "<">]`),
327	CharData("\n"),
328	Directive(`DOCTYPE [<!ENTITY xlt '>'>]`),
329	CharData("\n"),
330	Directive(`DOCTYPE [<!ENTITY xlt '<'>]`),
331	CharData("\n"),
332	Directive(`DOCTYPE [<!ENTITY xlt '">'>]`),
333	CharData("\n"),
334	Directive(`DOCTYPE [<!ENTITY xlt "'<">]`),
335	CharData("\n"),
336}
337
338func TestNestedDirectives(t *testing.T) {
339	d := NewDecoder(strings.NewReader(nestedDirectivesInput))
340
341	for i, want := range nestedDirectivesTokens {
342		have, err := d.Token()
343		if err != nil {
344			t.Fatalf("token %d: unexpected error: %s", i, err)
345		}
346		if !reflect.DeepEqual(have, want) {
347			t.Errorf("token %d = %#v want %#v", i, have, want)
348		}
349	}
350}
351
352func TestToken(t *testing.T) {
353	d := NewDecoder(strings.NewReader(testInput))
354	d.Entity = testEntity
355
356	for i, want := range cookedTokens {
357		have, err := d.Token()
358		if err != nil {
359			t.Fatalf("token %d: unexpected error: %s", i, err)
360		}
361		if !reflect.DeepEqual(have, want) {
362			t.Errorf("token %d = %#v want %#v", i, have, want)
363		}
364	}
365}
366
367func TestSyntax(t *testing.T) {
368	for i := range xmlInput {
369		d := NewDecoder(strings.NewReader(xmlInput[i]))
370		var err error
371		for _, err = d.Token(); err == nil; _, err = d.Token() {
372		}
373		if _, ok := err.(*SyntaxError); !ok {
374			t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i])
375		}
376	}
377}
378
379type allScalars struct {
380	True1     bool
381	True2     bool
382	False1    bool
383	False2    bool
384	Int       int
385	Int8      int8
386	Int16     int16
387	Int32     int32
388	Int64     int64
389	Uint      int
390	Uint8     uint8
391	Uint16    uint16
392	Uint32    uint32
393	Uint64    uint64
394	Uintptr   uintptr
395	Float32   float32
396	Float64   float64
397	String    string
398	PtrString *string
399}
400
401var all = allScalars{
402	True1:     true,
403	True2:     true,
404	False1:    false,
405	False2:    false,
406	Int:       1,
407	Int8:      -2,
408	Int16:     3,
409	Int32:     -4,
410	Int64:     5,
411	Uint:      6,
412	Uint8:     7,
413	Uint16:    8,
414	Uint32:    9,
415	Uint64:    10,
416	Uintptr:   11,
417	Float32:   13.0,
418	Float64:   14.0,
419	String:    "15",
420	PtrString: &sixteen,
421}
422
423var sixteen = "16"
424
425const testScalarsInput = `<allscalars>
426	<True1>true</True1>
427	<True2>1</True2>
428	<False1>false</False1>
429	<False2>0</False2>
430	<Int>1</Int>
431	<Int8>-2</Int8>
432	<Int16>3</Int16>
433	<Int32>-4</Int32>
434	<Int64>5</Int64>
435	<Uint>6</Uint>
436	<Uint8>7</Uint8>
437	<Uint16>8</Uint16>
438	<Uint32>9</Uint32>
439	<Uint64>10</Uint64>
440	<Uintptr>11</Uintptr>
441	<Float>12.0</Float>
442	<Float32>13.0</Float32>
443	<Float64>14.0</Float64>
444	<String>15</String>
445	<PtrString>16</PtrString>
446</allscalars>`
447
448func TestAllScalars(t *testing.T) {
449	var a allScalars
450	err := Unmarshal([]byte(testScalarsInput), &a)
451
452	if err != nil {
453		t.Fatal(err)
454	}
455	if !reflect.DeepEqual(a, all) {
456		t.Errorf("have %+v want %+v", a, all)
457	}
458}
459
460type item struct {
461	Field_a string
462}
463
464func TestIssue569(t *testing.T) {
465	data := `<item><Field_a>abcd</Field_a></item>`
466	var i item
467	err := Unmarshal([]byte(data), &i)
468
469	if err != nil || i.Field_a != "abcd" {
470		t.Fatal("Expecting abcd")
471	}
472}
473
474func TestUnquotedAttrs(t *testing.T) {
475	data := "<tag attr=azAZ09:-_\t>"
476	d := NewDecoder(strings.NewReader(data))
477	d.Strict = false
478	token, err := d.Token()
479	if _, ok := err.(*SyntaxError); ok {
480		t.Errorf("Unexpected error: %v", err)
481	}
482	if token.(StartElement).Name.Local != "tag" {
483		t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
484	}
485	attr := token.(StartElement).Attr[0]
486	if attr.Value != "azAZ09:-_" {
487		t.Errorf("Unexpected attribute value: %v", attr.Value)
488	}
489	if attr.Name.Local != "attr" {
490		t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
491	}
492}
493
494func TestValuelessAttrs(t *testing.T) {
495	tests := [][3]string{
496		{"<p nowrap>", "p", "nowrap"},
497		{"<p nowrap >", "p", "nowrap"},
498		{"<input checked/>", "input", "checked"},
499		{"<input checked />", "input", "checked"},
500	}
501	for _, test := range tests {
502		d := NewDecoder(strings.NewReader(test[0]))
503		d.Strict = false
504		token, err := d.Token()
505		if _, ok := err.(*SyntaxError); ok {
506			t.Errorf("Unexpected error: %v", err)
507		}
508		if token.(StartElement).Name.Local != test[1] {
509			t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
510		}
511		attr := token.(StartElement).Attr[0]
512		if attr.Value != test[2] {
513			t.Errorf("Unexpected attribute value: %v", attr.Value)
514		}
515		if attr.Name.Local != test[2] {
516			t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
517		}
518	}
519}
520
521func TestCopyTokenCharData(t *testing.T) {
522	data := []byte("same data")
523	var tok1 Token = CharData(data)
524	tok2 := CopyToken(tok1)
525	if !reflect.DeepEqual(tok1, tok2) {
526		t.Error("CopyToken(CharData) != CharData")
527	}
528	data[1] = 'o'
529	if reflect.DeepEqual(tok1, tok2) {
530		t.Error("CopyToken(CharData) uses same buffer.")
531	}
532}
533
534func TestCopyTokenStartElement(t *testing.T) {
535	elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}
536	var tok1 Token = elt
537	tok2 := CopyToken(tok1)
538	if tok1.(StartElement).Attr[0].Value != "en" {
539		t.Error("CopyToken overwrote Attr[0]")
540	}
541	if !reflect.DeepEqual(tok1, tok2) {
542		t.Error("CopyToken(StartElement) != StartElement")
543	}
544	tok1.(StartElement).Attr[0] = Attr{Name{"", "lang"}, "de"}
545	if reflect.DeepEqual(tok1, tok2) {
546		t.Error("CopyToken(CharData) uses same buffer.")
547	}
548}
549
550func TestSyntaxErrorLineNum(t *testing.T) {
551	testInput := "<P>Foo<P>\n\n<P>Bar</>\n"
552	d := NewDecoder(strings.NewReader(testInput))
553	var err error
554	for _, err = d.Token(); err == nil; _, err = d.Token() {
555	}
556	synerr, ok := err.(*SyntaxError)
557	if !ok {
558		t.Error("Expected SyntaxError.")
559	}
560	if synerr.Line != 3 {
561		t.Error("SyntaxError didn't have correct line number.")
562	}
563}
564
565func TestTrailingRawToken(t *testing.T) {
566	input := `<FOO></FOO>  `
567	d := NewDecoder(strings.NewReader(input))
568	var err error
569	for _, err = d.RawToken(); err == nil; _, err = d.RawToken() {
570	}
571	if err != io.EOF {
572		t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err)
573	}
574}
575
576func TestTrailingToken(t *testing.T) {
577	input := `<FOO></FOO>  `
578	d := NewDecoder(strings.NewReader(input))
579	var err error
580	for _, err = d.Token(); err == nil; _, err = d.Token() {
581	}
582	if err != io.EOF {
583		t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
584	}
585}
586
587func TestEntityInsideCDATA(t *testing.T) {
588	input := `<test><![CDATA[ &val=foo ]]></test>`
589	d := NewDecoder(strings.NewReader(input))
590	var err error
591	for _, err = d.Token(); err == nil; _, err = d.Token() {
592	}
593	if err != io.EOF {
594		t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
595	}
596}
597
598var characterTests = []struct {
599	in  string
600	err string
601}{
602	{"\x12<doc/>", "illegal character code U+0012"},
603	{"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"},
604	{"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"},
605	{"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"},
606	{"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"},
607	{"<doc>&abc\x01;</doc>", "invalid character entity &abc (no semicolon)"},
608	{"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"},
609	{"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &\uFFFE;"},
610	{"<doc>&hello;</doc>", "invalid character entity &hello;"},
611}
612
613func TestDisallowedCharacters(t *testing.T) {
614
615	for i, tt := range characterTests {
616		d := NewDecoder(strings.NewReader(tt.in))
617		var err error
618
619		for err == nil {
620			_, err = d.Token()
621		}
622		synerr, ok := err.(*SyntaxError)
623		if !ok {
624			t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err)
625		}
626		if synerr.Msg != tt.err {
627			t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg)
628		}
629	}
630}
631
632type procInstEncodingTest struct {
633	expect, got string
634}
635
636var procInstTests = []struct {
637	input, expect string
638}{
639	{`version="1.0" encoding="utf-8"`, "utf-8"},
640	{`version="1.0" encoding='utf-8'`, "utf-8"},
641	{`version="1.0" encoding='utf-8' `, "utf-8"},
642	{`version="1.0" encoding=utf-8`, ""},
643	{`encoding="FOO" `, "FOO"},
644}
645
646func TestProcInstEncoding(t *testing.T) {
647	for _, test := range procInstTests {
648		got := procInstEncoding(test.input)
649		if got != test.expect {
650			t.Errorf("procInstEncoding(%q) = %q; want %q", test.input, got, test.expect)
651		}
652	}
653}
654
655// Ensure that directives with comments include the complete
656// text of any nested directives.
657
658var directivesWithCommentsInput = `
659<!DOCTYPE [<!-- a comment --><!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
660<!DOCTYPE [<!ENTITY go "Golang"><!-- a comment-->]>
661<!DOCTYPE <!-> <!> <!----> <!-->--> <!--->--> [<!ENTITY go "Golang"><!-- a comment-->]>
662`
663
664var directivesWithCommentsTokens = []Token{
665	CharData("\n"),
666	Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
667	CharData("\n"),
668	Directive(`DOCTYPE [<!ENTITY go "Golang">]`),
669	CharData("\n"),
670	Directive(`DOCTYPE <!-> <!>    [<!ENTITY go "Golang">]`),
671	CharData("\n"),
672}
673
674func TestDirectivesWithComments(t *testing.T) {
675	d := NewDecoder(strings.NewReader(directivesWithCommentsInput))
676
677	for i, want := range directivesWithCommentsTokens {
678		have, err := d.Token()
679		if err != nil {
680			t.Fatalf("token %d: unexpected error: %s", i, err)
681		}
682		if !reflect.DeepEqual(have, want) {
683			t.Errorf("token %d = %#v want %#v", i, have, want)
684		}
685	}
686}
687
688// Writer whose Write method always returns an error.
689type errWriter struct{}
690
691func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") }
692
693func TestEscapeTextIOErrors(t *testing.T) {
694	expectErr := "unwritable"
695	err := EscapeText(errWriter{}, []byte{'A'})
696
697	if err == nil || err.Error() != expectErr {
698		t.Errorf("have %v, want %v", err, expectErr)
699	}
700}
701
702func TestEscapeTextInvalidChar(t *testing.T) {
703	input := []byte("A \x00 terminated string.")
704	expected := "A \uFFFD terminated string."
705
706	buff := new(bytes.Buffer)
707	if err := EscapeText(buff, input); err != nil {
708		t.Fatalf("have %v, want nil", err)
709	}
710	text := buff.String()
711
712	if text != expected {
713		t.Errorf("have %v, want %v", text, expected)
714	}
715}
716
717func TestIssue5880(t *testing.T) {
718	type T []byte
719	data, err := Marshal(T{192, 168, 0, 1})
720	if err != nil {
721		t.Errorf("Marshal error: %v", err)
722	}
723	if !utf8.Valid(data) {
724		t.Errorf("Marshal generated invalid UTF-8: %x", data)
725	}
726}
727