1// Copyright 2015-2019 Brett Vickers.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package etree
6
7import (
8	"bufio"
9	"io"
10	"strings"
11	"unicode/utf8"
12)
13
14// A simple stack
15type stack struct {
16	data []interface{}
17}
18
19func (s *stack) empty() bool {
20	return len(s.data) == 0
21}
22
23func (s *stack) push(value interface{}) {
24	s.data = append(s.data, value)
25}
26
27func (s *stack) pop() interface{} {
28	value := s.data[len(s.data)-1]
29	s.data[len(s.data)-1] = nil
30	s.data = s.data[:len(s.data)-1]
31	return value
32}
33
34func (s *stack) peek() interface{} {
35	return s.data[len(s.data)-1]
36}
37
38// A fifo is a simple first-in-first-out queue.
39type fifo struct {
40	data       []interface{}
41	head, tail int
42}
43
44func (f *fifo) add(value interface{}) {
45	if f.len()+1 >= len(f.data) {
46		f.grow()
47	}
48	f.data[f.tail] = value
49	if f.tail++; f.tail == len(f.data) {
50		f.tail = 0
51	}
52}
53
54func (f *fifo) remove() interface{} {
55	value := f.data[f.head]
56	f.data[f.head] = nil
57	if f.head++; f.head == len(f.data) {
58		f.head = 0
59	}
60	return value
61}
62
63func (f *fifo) len() int {
64	if f.tail >= f.head {
65		return f.tail - f.head
66	}
67	return len(f.data) - f.head + f.tail
68}
69
70func (f *fifo) grow() {
71	c := len(f.data) * 2
72	if c == 0 {
73		c = 4
74	}
75	buf, count := make([]interface{}, c), f.len()
76	if f.tail >= f.head {
77		copy(buf[0:count], f.data[f.head:f.tail])
78	} else {
79		hindex := len(f.data) - f.head
80		copy(buf[0:hindex], f.data[f.head:])
81		copy(buf[hindex:count], f.data[:f.tail])
82	}
83	f.data, f.head, f.tail = buf, 0, count
84}
85
86// countReader implements a proxy reader that counts the number of
87// bytes read from its encapsulated reader.
88type countReader struct {
89	r     io.Reader
90	bytes int64
91}
92
93func newCountReader(r io.Reader) *countReader {
94	return &countReader{r: r}
95}
96
97func (cr *countReader) Read(p []byte) (n int, err error) {
98	b, err := cr.r.Read(p)
99	cr.bytes += int64(b)
100	return b, err
101}
102
103// countWriter implements a proxy writer that counts the number of
104// bytes written by its encapsulated writer.
105type countWriter struct {
106	w     io.Writer
107	bytes int64
108}
109
110func newCountWriter(w io.Writer) *countWriter {
111	return &countWriter{w: w}
112}
113
114func (cw *countWriter) Write(p []byte) (n int, err error) {
115	b, err := cw.w.Write(p)
116	cw.bytes += int64(b)
117	return b, err
118}
119
120// isWhitespace returns true if the byte slice contains only
121// whitespace characters.
122func isWhitespace(s string) bool {
123	for i := 0; i < len(s); i++ {
124		if c := s[i]; c != ' ' && c != '\t' && c != '\n' && c != '\r' {
125			return false
126		}
127	}
128	return true
129}
130
131// spaceMatch returns true if namespace a is the empty string
132// or if namespace a equals namespace b.
133func spaceMatch(a, b string) bool {
134	switch {
135	case a == "":
136		return true
137	default:
138		return a == b
139	}
140}
141
142// spaceDecompose breaks a namespace:tag identifier at the ':'
143// and returns the two parts.
144func spaceDecompose(str string) (space, key string) {
145	colon := strings.IndexByte(str, ':')
146	if colon == -1 {
147		return "", str
148	}
149	return str[:colon], str[colon+1:]
150}
151
152// Strings used by indentCRLF and indentLF
153const (
154	indentSpaces = "\r\n                                                                "
155	indentTabs   = "\r\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"
156)
157
158// indentCRLF returns a CRLF newline followed by n copies of the first
159// non-CRLF character in the source string.
160func indentCRLF(n int, source string) string {
161	switch {
162	case n < 0:
163		return source[:2]
164	case n < len(source)-1:
165		return source[:n+2]
166	default:
167		return source + strings.Repeat(source[2:3], n-len(source)+2)
168	}
169}
170
171// indentLF returns a LF newline followed by n copies of the first non-LF
172// character in the source string.
173func indentLF(n int, source string) string {
174	switch {
175	case n < 0:
176		return source[1:2]
177	case n < len(source)-1:
178		return source[1 : n+2]
179	default:
180		return source[1:] + strings.Repeat(source[2:3], n-len(source)+2)
181	}
182}
183
184// nextIndex returns the index of the next occurrence of sep in s,
185// starting from offset.  It returns -1 if the sep string is not found.
186func nextIndex(s, sep string, offset int) int {
187	switch i := strings.Index(s[offset:], sep); i {
188	case -1:
189		return -1
190	default:
191		return offset + i
192	}
193}
194
195// isInteger returns true if the string s contains an integer.
196func isInteger(s string) bool {
197	for i := 0; i < len(s); i++ {
198		if (s[i] < '0' || s[i] > '9') && !(i == 0 && s[i] == '-') {
199			return false
200		}
201	}
202	return true
203}
204
205type escapeMode byte
206
207const (
208	escapeNormal escapeMode = iota
209	escapeCanonicalText
210	escapeCanonicalAttr
211)
212
213// escapeString writes an escaped version of a string to the writer.
214func escapeString(w *bufio.Writer, s string, m escapeMode) {
215	var esc []byte
216	last := 0
217	for i := 0; i < len(s); {
218		r, width := utf8.DecodeRuneInString(s[i:])
219		i += width
220		switch r {
221		case '&':
222			esc = []byte("&amp;")
223		case '<':
224			esc = []byte("&lt;")
225		case '>':
226			if m == escapeCanonicalAttr {
227				continue
228			}
229			esc = []byte("&gt;")
230		case '\'':
231			if m != escapeNormal {
232				continue
233			}
234			esc = []byte("&apos;")
235		case '"':
236			if m == escapeCanonicalText {
237				continue
238			}
239			esc = []byte("&quot;")
240		case '\t':
241			if m != escapeCanonicalAttr {
242				continue
243			}
244			esc = []byte("&#x9;")
245		case '\n':
246			if m != escapeCanonicalAttr {
247				continue
248			}
249			esc = []byte("&#xA;")
250		case '\r':
251			if m == escapeNormal {
252				continue
253			}
254			esc = []byte("&#xD;")
255		default:
256			if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) {
257				esc = []byte("\uFFFD")
258				break
259			}
260			continue
261		}
262		w.WriteString(s[last : i-width])
263		w.Write(esc)
264		last = i
265	}
266	w.WriteString(s[last:])
267}
268
269func isInCharacterRange(r rune) bool {
270	return r == 0x09 ||
271		r == 0x0A ||
272		r == 0x0D ||
273		r >= 0x20 && r <= 0xD7FF ||
274		r >= 0xE000 && r <= 0xFFFD ||
275		r >= 0x10000 && r <= 0x10FFFF
276}
277