1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package xml
6
7import (
8	"bytes"
9	"fmt"
10	"io"
11	"reflect"
12	"strings"
13	"testing"
14	"unicode/utf8"
15)
16
17const testInput = `
18<?xml version="1.0" encoding="UTF-8"?>
19<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
20  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
21<body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` +
22	"\r\n\t" + `  >
23  <hello lang="en">World &lt;&gt;&apos;&quot; &#x767d;&#40300;翔</hello>
24  <query>&何; &is-it;</query>
25  <goodbye />
26  <outer foo:attr="value" xmlns:tag="ns4">
27    <inner/>
28  </outer>
29  <tag:name>
30    <![CDATA[Some text here.]]>
31  </tag:name>
32</body><!-- missing final newline -->`
33
34var testEntity = map[string]string{"何": "What", "is-it": "is it?"}
35
36var rawTokens = []Token{
37	CharData("\n"),
38	ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
39	CharData("\n"),
40	Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
41  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
42	CharData("\n"),
43	StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
44	CharData("\n  "),
45	StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
46	CharData("World <>'\" 白鵬翔"),
47	EndElement{Name{"", "hello"}},
48	CharData("\n  "),
49	StartElement{Name{"", "query"}, []Attr{}},
50	CharData("What is it?"),
51	EndElement{Name{"", "query"}},
52	CharData("\n  "),
53	StartElement{Name{"", "goodbye"}, []Attr{}},
54	EndElement{Name{"", "goodbye"}},
55	CharData("\n  "),
56	StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
57	CharData("\n    "),
58	StartElement{Name{"", "inner"}, []Attr{}},
59	EndElement{Name{"", "inner"}},
60	CharData("\n  "),
61	EndElement{Name{"", "outer"}},
62	CharData("\n  "),
63	StartElement{Name{"tag", "name"}, []Attr{}},
64	CharData("\n    "),
65	CharData("Some text here."),
66	CharData("\n  "),
67	EndElement{Name{"tag", "name"}},
68	CharData("\n"),
69	EndElement{Name{"", "body"}},
70	Comment(" missing final newline "),
71}
72
73var cookedTokens = []Token{
74	CharData("\n"),
75	ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
76	CharData("\n"),
77	Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
78  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
79	CharData("\n"),
80	StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
81	CharData("\n  "),
82	StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
83	CharData("World <>'\" 白鵬翔"),
84	EndElement{Name{"ns2", "hello"}},
85	CharData("\n  "),
86	StartElement{Name{"ns2", "query"}, []Attr{}},
87	CharData("What is it?"),
88	EndElement{Name{"ns2", "query"}},
89	CharData("\n  "),
90	StartElement{Name{"ns2", "goodbye"}, []Attr{}},
91	EndElement{Name{"ns2", "goodbye"}},
92	CharData("\n  "),
93	StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
94	CharData("\n    "),
95	StartElement{Name{"ns2", "inner"}, []Attr{}},
96	EndElement{Name{"ns2", "inner"}},
97	CharData("\n  "),
98	EndElement{Name{"ns2", "outer"}},
99	CharData("\n  "),
100	StartElement{Name{"ns3", "name"}, []Attr{}},
101	CharData("\n    "),
102	CharData("Some text here."),
103	CharData("\n  "),
104	EndElement{Name{"ns3", "name"}},
105	CharData("\n"),
106	EndElement{Name{"ns2", "body"}},
107	Comment(" missing final newline "),
108}
109
110const testInputAltEncoding = `
111<?xml version="1.0" encoding="x-testing-uppercase"?>
112<TAG>VALUE</TAG>`
113
114var rawTokensAltEncoding = []Token{
115	CharData("\n"),
116	ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)},
117	CharData("\n"),
118	StartElement{Name{"", "tag"}, []Attr{}},
119	CharData("value"),
120	EndElement{Name{"", "tag"}},
121}
122
123var xmlInput = []string{
124	// unexpected EOF cases
125	"<",
126	"<t",
127	"<t ",
128	"<t/",
129	"<!",
130	"<!-",
131	"<!--",
132	"<!--c-",
133	"<!--c--",
134	"<!d",
135	"<t></",
136	"<t></t",
137	"<?",
138	"<?p",
139	"<t a",
140	"<t a=",
141	"<t a='",
142	"<t a=''",
143	"<t/><![",
144	"<t/><![C",
145	"<t/><![CDATA[d",
146	"<t/><![CDATA[d]",
147	"<t/><![CDATA[d]]",
148
149	// other Syntax errors
150	"<>",
151	"<t/a",
152	"<0 />",
153	"<?0 >",
154	//	"<!0 >",	// let the Token() caller handle
155	"</0>",
156	"<t 0=''>",
157	"<t a='&'>",
158	"<t a='<'>",
159	"<t>&nbspc;</t>",
160	"<t a>",
161	"<t a=>",
162	"<t a=v>",
163	//	"<![CDATA[d]]>",	// let the Token() caller handle
164	"<t></e>",
165	"<t></>",
166	"<t></t!",
167	"<t>cdata]]></t>",
168}
169
170func TestRawToken(t *testing.T) {
171	d := NewDecoder(strings.NewReader(testInput))
172	d.Entity = testEntity
173	testRawToken(t, d, testInput, rawTokens)
174}
175
176const nonStrictInput = `
177<tag>non&entity</tag>
178<tag>&unknown;entity</tag>
179<tag>&#123</tag>
180<tag>&#zzz;</tag>
181<tag>&なまえ3;</tag>
182<tag>&lt-gt;</tag>
183<tag>&;</tag>
184<tag>&0a;</tag>
185`
186
187var nonStringEntity = map[string]string{"": "oops!", "0a": "oops!"}
188
189var nonStrictTokens = []Token{
190	CharData("\n"),
191	StartElement{Name{"", "tag"}, []Attr{}},
192	CharData("non&entity"),
193	EndElement{Name{"", "tag"}},
194	CharData("\n"),
195	StartElement{Name{"", "tag"}, []Attr{}},
196	CharData("&unknown;entity"),
197	EndElement{Name{"", "tag"}},
198	CharData("\n"),
199	StartElement{Name{"", "tag"}, []Attr{}},
200	CharData("&#123"),
201	EndElement{Name{"", "tag"}},
202	CharData("\n"),
203	StartElement{Name{"", "tag"}, []Attr{}},
204	CharData("&#zzz;"),
205	EndElement{Name{"", "tag"}},
206	CharData("\n"),
207	StartElement{Name{"", "tag"}, []Attr{}},
208	CharData("&なまえ3;"),
209	EndElement{Name{"", "tag"}},
210	CharData("\n"),
211	StartElement{Name{"", "tag"}, []Attr{}},
212	CharData("&lt-gt;"),
213	EndElement{Name{"", "tag"}},
214	CharData("\n"),
215	StartElement{Name{"", "tag"}, []Attr{}},
216	CharData("&;"),
217	EndElement{Name{"", "tag"}},
218	CharData("\n"),
219	StartElement{Name{"", "tag"}, []Attr{}},
220	CharData("&0a;"),
221	EndElement{Name{"", "tag"}},
222	CharData("\n"),
223}
224
225func TestNonStrictRawToken(t *testing.T) {
226	d := NewDecoder(strings.NewReader(nonStrictInput))
227	d.Strict = false
228	testRawToken(t, d, nonStrictInput, nonStrictTokens)
229}
230
231type downCaser struct {
232	t *testing.T
233	r io.ByteReader
234}
235
236func (d *downCaser) ReadByte() (c byte, err error) {
237	c, err = d.r.ReadByte()
238	if c >= 'A' && c <= 'Z' {
239		c += 'a' - 'A'
240	}
241	return
242}
243
244func (d *downCaser) Read(p []byte) (int, error) {
245	d.t.Fatalf("unexpected Read call on downCaser reader")
246	panic("unreachable")
247}
248
249func TestRawTokenAltEncoding(t *testing.T) {
250	d := NewDecoder(strings.NewReader(testInputAltEncoding))
251	d.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
252		if charset != "x-testing-uppercase" {
253			t.Fatalf("unexpected charset %q", charset)
254		}
255		return &downCaser{t, input.(io.ByteReader)}, nil
256	}
257	testRawToken(t, d, testInputAltEncoding, rawTokensAltEncoding)
258}
259
260func TestRawTokenAltEncodingNoConverter(t *testing.T) {
261	d := NewDecoder(strings.NewReader(testInputAltEncoding))
262	token, err := d.RawToken()
263	if token == nil {
264		t.Fatalf("expected a token on first RawToken call")
265	}
266	if err != nil {
267		t.Fatal(err)
268	}
269	token, err = d.RawToken()
270	if token != nil {
271		t.Errorf("expected a nil token; got %#v", token)
272	}
273	if err == nil {
274		t.Fatalf("expected an error on second RawToken call")
275	}
276	const encoding = "x-testing-uppercase"
277	if !strings.Contains(err.Error(), encoding) {
278		t.Errorf("expected error to contain %q; got error: %v",
279			encoding, err)
280	}
281}
282
283func testRawToken(t *testing.T, d *Decoder, raw string, rawTokens []Token) {
284	lastEnd := int64(0)
285	for i, want := range rawTokens {
286		start := d.InputOffset()
287		have, err := d.RawToken()
288		end := d.InputOffset()
289		if err != nil {
290			t.Fatalf("token %d: unexpected error: %s", i, err)
291		}
292		if !reflect.DeepEqual(have, want) {
293			var shave, swant string
294			if _, ok := have.(CharData); ok {
295				shave = fmt.Sprintf("CharData(%q)", have)
296			} else {
297				shave = fmt.Sprintf("%#v", have)
298			}
299			if _, ok := want.(CharData); ok {
300				swant = fmt.Sprintf("CharData(%q)", want)
301			} else {
302				swant = fmt.Sprintf("%#v", want)
303			}
304			t.Errorf("token %d = %s, want %s", i, shave, swant)
305		}
306
307		// Check that InputOffset returned actual token.
308		switch {
309		case start < lastEnd:
310			t.Errorf("token %d: position [%d,%d) for %T is before previous token", i, start, end, have)
311		case start >= end:
312			// Special case: EndElement can be synthesized.
313			if start == end && end == lastEnd {
314				break
315			}
316			t.Errorf("token %d: position [%d,%d) for %T is empty", i, start, end, have)
317		case end > int64(len(raw)):
318			t.Errorf("token %d: position [%d,%d) for %T extends beyond input", i, start, end, have)
319		default:
320			text := raw[start:end]
321			if strings.ContainsAny(text, "<>") && (!strings.HasPrefix(text, "<") || !strings.HasSuffix(text, ">")) {
322				t.Errorf("token %d: misaligned raw token %#q for %T", i, text, have)
323			}
324		}
325		lastEnd = end
326	}
327}
328
329// Ensure that directives (specifically !DOCTYPE) include the complete
330// text of any nested directives, noting that < and > do not change
331// nesting depth if they are in single or double quotes.
332
333var nestedDirectivesInput = `
334<!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
335<!DOCTYPE [<!ENTITY xlt ">">]>
336<!DOCTYPE [<!ENTITY xlt "<">]>
337<!DOCTYPE [<!ENTITY xlt '>'>]>
338<!DOCTYPE [<!ENTITY xlt '<'>]>
339<!DOCTYPE [<!ENTITY xlt '">'>]>
340<!DOCTYPE [<!ENTITY xlt "'<">]>
341`
342
343var nestedDirectivesTokens = []Token{
344	CharData("\n"),
345	Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
346	CharData("\n"),
347	Directive(`DOCTYPE [<!ENTITY xlt ">">]`),
348	CharData("\n"),
349	Directive(`DOCTYPE [<!ENTITY xlt "<">]`),
350	CharData("\n"),
351	Directive(`DOCTYPE [<!ENTITY xlt '>'>]`),
352	CharData("\n"),
353	Directive(`DOCTYPE [<!ENTITY xlt '<'>]`),
354	CharData("\n"),
355	Directive(`DOCTYPE [<!ENTITY xlt '">'>]`),
356	CharData("\n"),
357	Directive(`DOCTYPE [<!ENTITY xlt "'<">]`),
358	CharData("\n"),
359}
360
361func TestNestedDirectives(t *testing.T) {
362	d := NewDecoder(strings.NewReader(nestedDirectivesInput))
363
364	for i, want := range nestedDirectivesTokens {
365		have, err := d.Token()
366		if err != nil {
367			t.Fatalf("token %d: unexpected error: %s", i, err)
368		}
369		if !reflect.DeepEqual(have, want) {
370			t.Errorf("token %d = %#v want %#v", i, have, want)
371		}
372	}
373}
374
375func TestToken(t *testing.T) {
376	d := NewDecoder(strings.NewReader(testInput))
377	d.Entity = testEntity
378
379	for i, want := range cookedTokens {
380		have, err := d.Token()
381		if err != nil {
382			t.Fatalf("token %d: unexpected error: %s", i, err)
383		}
384		if !reflect.DeepEqual(have, want) {
385			t.Errorf("token %d = %#v want %#v", i, have, want)
386		}
387	}
388}
389
390func TestSyntax(t *testing.T) {
391	for i := range xmlInput {
392		d := NewDecoder(strings.NewReader(xmlInput[i]))
393		var err error
394		for _, err = d.Token(); err == nil; _, err = d.Token() {
395		}
396		if _, ok := err.(*SyntaxError); !ok {
397			t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i])
398		}
399	}
400}
401
402type allScalars struct {
403	True1     bool
404	True2     bool
405	False1    bool
406	False2    bool
407	Int       int
408	Int8      int8
409	Int16     int16
410	Int32     int32
411	Int64     int64
412	Uint      int
413	Uint8     uint8
414	Uint16    uint16
415	Uint32    uint32
416	Uint64    uint64
417	Uintptr   uintptr
418	Float32   float32
419	Float64   float64
420	String    string
421	PtrString *string
422}
423
424var all = allScalars{
425	True1:     true,
426	True2:     true,
427	False1:    false,
428	False2:    false,
429	Int:       1,
430	Int8:      -2,
431	Int16:     3,
432	Int32:     -4,
433	Int64:     5,
434	Uint:      6,
435	Uint8:     7,
436	Uint16:    8,
437	Uint32:    9,
438	Uint64:    10,
439	Uintptr:   11,
440	Float32:   13.0,
441	Float64:   14.0,
442	String:    "15",
443	PtrString: &sixteen,
444}
445
446var sixteen = "16"
447
448const testScalarsInput = `<allscalars>
449	<True1>true</True1>
450	<True2>1</True2>
451	<False1>false</False1>
452	<False2>0</False2>
453	<Int>1</Int>
454	<Int8>-2</Int8>
455	<Int16>3</Int16>
456	<Int32>-4</Int32>
457	<Int64>5</Int64>
458	<Uint>6</Uint>
459	<Uint8>7</Uint8>
460	<Uint16>8</Uint16>
461	<Uint32>9</Uint32>
462	<Uint64>10</Uint64>
463	<Uintptr>11</Uintptr>
464	<Float>12.0</Float>
465	<Float32>13.0</Float32>
466	<Float64>14.0</Float64>
467	<String>15</String>
468	<PtrString>16</PtrString>
469</allscalars>`
470
471func TestAllScalars(t *testing.T) {
472	var a allScalars
473	err := Unmarshal([]byte(testScalarsInput), &a)
474
475	if err != nil {
476		t.Fatal(err)
477	}
478	if !reflect.DeepEqual(a, all) {
479		t.Errorf("have %+v want %+v", a, all)
480	}
481}
482
483type item struct {
484	Field_a string
485}
486
487func TestIssue569(t *testing.T) {
488	data := `<item><Field_a>abcd</Field_a></item>`
489	var i item
490	err := Unmarshal([]byte(data), &i)
491
492	if err != nil || i.Field_a != "abcd" {
493		t.Fatal("Expecting abcd")
494	}
495}
496
497func TestUnquotedAttrs(t *testing.T) {
498	data := "<tag attr=azAZ09:-_\t>"
499	d := NewDecoder(strings.NewReader(data))
500	d.Strict = false
501	token, err := d.Token()
502	if _, ok := err.(*SyntaxError); ok {
503		t.Errorf("Unexpected error: %v", err)
504	}
505	if token.(StartElement).Name.Local != "tag" {
506		t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
507	}
508	attr := token.(StartElement).Attr[0]
509	if attr.Value != "azAZ09:-_" {
510		t.Errorf("Unexpected attribute value: %v", attr.Value)
511	}
512	if attr.Name.Local != "attr" {
513		t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
514	}
515}
516
517func TestValuelessAttrs(t *testing.T) {
518	tests := [][3]string{
519		{"<p nowrap>", "p", "nowrap"},
520		{"<p nowrap >", "p", "nowrap"},
521		{"<input checked/>", "input", "checked"},
522		{"<input checked />", "input", "checked"},
523	}
524	for _, test := range tests {
525		d := NewDecoder(strings.NewReader(test[0]))
526		d.Strict = false
527		token, err := d.Token()
528		if _, ok := err.(*SyntaxError); ok {
529			t.Errorf("Unexpected error: %v", err)
530		}
531		if token.(StartElement).Name.Local != test[1] {
532			t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
533		}
534		attr := token.(StartElement).Attr[0]
535		if attr.Value != test[2] {
536			t.Errorf("Unexpected attribute value: %v", attr.Value)
537		}
538		if attr.Name.Local != test[2] {
539			t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
540		}
541	}
542}
543
544func TestCopyTokenCharData(t *testing.T) {
545	data := []byte("same data")
546	var tok1 Token = CharData(data)
547	tok2 := CopyToken(tok1)
548	if !reflect.DeepEqual(tok1, tok2) {
549		t.Error("CopyToken(CharData) != CharData")
550	}
551	data[1] = 'o'
552	if reflect.DeepEqual(tok1, tok2) {
553		t.Error("CopyToken(CharData) uses same buffer.")
554	}
555}
556
557func TestCopyTokenStartElement(t *testing.T) {
558	elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}
559	var tok1 Token = elt
560	tok2 := CopyToken(tok1)
561	if tok1.(StartElement).Attr[0].Value != "en" {
562		t.Error("CopyToken overwrote Attr[0]")
563	}
564	if !reflect.DeepEqual(tok1, tok2) {
565		t.Error("CopyToken(StartElement) != StartElement")
566	}
567	tok1.(StartElement).Attr[0] = Attr{Name{"", "lang"}, "de"}
568	if reflect.DeepEqual(tok1, tok2) {
569		t.Error("CopyToken(CharData) uses same buffer.")
570	}
571}
572
573func TestSyntaxErrorLineNum(t *testing.T) {
574	testInput := "<P>Foo<P>\n\n<P>Bar</>\n"
575	d := NewDecoder(strings.NewReader(testInput))
576	var err error
577	for _, err = d.Token(); err == nil; _, err = d.Token() {
578	}
579	synerr, ok := err.(*SyntaxError)
580	if !ok {
581		t.Error("Expected SyntaxError.")
582	}
583	if synerr.Line != 3 {
584		t.Error("SyntaxError didn't have correct line number.")
585	}
586}
587
588func TestTrailingRawToken(t *testing.T) {
589	input := `<FOO></FOO>  `
590	d := NewDecoder(strings.NewReader(input))
591	var err error
592	for _, err = d.RawToken(); err == nil; _, err = d.RawToken() {
593	}
594	if err != io.EOF {
595		t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err)
596	}
597}
598
599func TestTrailingToken(t *testing.T) {
600	input := `<FOO></FOO>  `
601	d := NewDecoder(strings.NewReader(input))
602	var err error
603	for _, err = d.Token(); err == nil; _, err = d.Token() {
604	}
605	if err != io.EOF {
606		t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
607	}
608}
609
610func TestEntityInsideCDATA(t *testing.T) {
611	input := `<test><![CDATA[ &val=foo ]]></test>`
612	d := NewDecoder(strings.NewReader(input))
613	var err error
614	for _, err = d.Token(); err == nil; _, err = d.Token() {
615	}
616	if err != io.EOF {
617		t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
618	}
619}
620
621var characterTests = []struct {
622	in  string
623	err string
624}{
625	{"\x12<doc/>", "illegal character code U+0012"},
626	{"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"},
627	{"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"},
628	{"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"},
629	{"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"},
630	{"<doc>&abc\x01;</doc>", "invalid character entity &abc (no semicolon)"},
631	{"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"},
632	{"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &\uFFFE;"},
633	{"<doc>&hello;</doc>", "invalid character entity &hello;"},
634}
635
636func TestDisallowedCharacters(t *testing.T) {
637
638	for i, tt := range characterTests {
639		d := NewDecoder(strings.NewReader(tt.in))
640		var err error
641
642		for err == nil {
643			_, err = d.Token()
644		}
645		synerr, ok := err.(*SyntaxError)
646		if !ok {
647			t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err)
648		}
649		if synerr.Msg != tt.err {
650			t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg)
651		}
652	}
653}
654
655type procInstEncodingTest struct {
656	expect, got string
657}
658
659var procInstTests = []struct {
660	input  string
661	expect [2]string
662}{
663	{`version="1.0" encoding="utf-8"`, [2]string{"1.0", "utf-8"}},
664	{`version="1.0" encoding='utf-8'`, [2]string{"1.0", "utf-8"}},
665	{`version="1.0" encoding='utf-8' `, [2]string{"1.0", "utf-8"}},
666	{`version="1.0" encoding=utf-8`, [2]string{"1.0", ""}},
667	{`encoding="FOO" `, [2]string{"", "FOO"}},
668}
669
670func TestProcInstEncoding(t *testing.T) {
671	for _, test := range procInstTests {
672		if got := procInst("version", test.input); got != test.expect[0] {
673			t.Errorf("procInst(version, %q) = %q; want %q", test.input, got, test.expect[0])
674		}
675		if got := procInst("encoding", test.input); got != test.expect[1] {
676			t.Errorf("procInst(encoding, %q) = %q; want %q", test.input, got, test.expect[1])
677		}
678	}
679}
680
681// Ensure that directives with comments include the complete
682// text of any nested directives.
683
684var directivesWithCommentsInput = `
685<!DOCTYPE [<!-- a comment --><!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
686<!DOCTYPE [<!ENTITY go "Golang"><!-- a comment-->]>
687<!DOCTYPE <!-> <!> <!----> <!-->--> <!--->--> [<!ENTITY go "Golang"><!-- a comment-->]>
688`
689
690var directivesWithCommentsTokens = []Token{
691	CharData("\n"),
692	Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
693	CharData("\n"),
694	Directive(`DOCTYPE [<!ENTITY go "Golang">]`),
695	CharData("\n"),
696	Directive(`DOCTYPE <!-> <!>    [<!ENTITY go "Golang">]`),
697	CharData("\n"),
698}
699
700func TestDirectivesWithComments(t *testing.T) {
701	d := NewDecoder(strings.NewReader(directivesWithCommentsInput))
702
703	for i, want := range directivesWithCommentsTokens {
704		have, err := d.Token()
705		if err != nil {
706			t.Fatalf("token %d: unexpected error: %s", i, err)
707		}
708		if !reflect.DeepEqual(have, want) {
709			t.Errorf("token %d = %#v want %#v", i, have, want)
710		}
711	}
712}
713
714// Writer whose Write method always returns an error.
715type errWriter struct{}
716
717func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") }
718
719func TestEscapeTextIOErrors(t *testing.T) {
720	expectErr := "unwritable"
721	err := EscapeText(errWriter{}, []byte{'A'})
722
723	if err == nil || err.Error() != expectErr {
724		t.Errorf("have %v, want %v", err, expectErr)
725	}
726}
727
728func TestEscapeTextInvalidChar(t *testing.T) {
729	input := []byte("A \x00 terminated string.")
730	expected := "A \uFFFD terminated string."
731
732	buff := new(bytes.Buffer)
733	if err := EscapeText(buff, input); err != nil {
734		t.Fatalf("have %v, want nil", err)
735	}
736	text := buff.String()
737
738	if text != expected {
739		t.Errorf("have %v, want %v", text, expected)
740	}
741}
742
743func TestIssue5880(t *testing.T) {
744	type T []byte
745	data, err := Marshal(T{192, 168, 0, 1})
746	if err != nil {
747		t.Errorf("Marshal error: %v", err)
748	}
749	if !utf8.Valid(data) {
750		t.Errorf("Marshal generated invalid UTF-8: %x", data)
751	}
752}
753