1// Copyright 2015-2019 Brett Vickers. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package etree 6 7import ( 8 "bufio" 9 "io" 10 "strings" 11 "unicode/utf8" 12) 13 14// A simple stack 15type stack struct { 16 data []interface{} 17} 18 19func (s *stack) empty() bool { 20 return len(s.data) == 0 21} 22 23func (s *stack) push(value interface{}) { 24 s.data = append(s.data, value) 25} 26 27func (s *stack) pop() interface{} { 28 value := s.data[len(s.data)-1] 29 s.data[len(s.data)-1] = nil 30 s.data = s.data[:len(s.data)-1] 31 return value 32} 33 34func (s *stack) peek() interface{} { 35 return s.data[len(s.data)-1] 36} 37 38// A fifo is a simple first-in-first-out queue. 39type fifo struct { 40 data []interface{} 41 head, tail int 42} 43 44func (f *fifo) add(value interface{}) { 45 if f.len()+1 >= len(f.data) { 46 f.grow() 47 } 48 f.data[f.tail] = value 49 if f.tail++; f.tail == len(f.data) { 50 f.tail = 0 51 } 52} 53 54func (f *fifo) remove() interface{} { 55 value := f.data[f.head] 56 f.data[f.head] = nil 57 if f.head++; f.head == len(f.data) { 58 f.head = 0 59 } 60 return value 61} 62 63func (f *fifo) len() int { 64 if f.tail >= f.head { 65 return f.tail - f.head 66 } 67 return len(f.data) - f.head + f.tail 68} 69 70func (f *fifo) grow() { 71 c := len(f.data) * 2 72 if c == 0 { 73 c = 4 74 } 75 buf, count := make([]interface{}, c), f.len() 76 if f.tail >= f.head { 77 copy(buf[0:count], f.data[f.head:f.tail]) 78 } else { 79 hindex := len(f.data) - f.head 80 copy(buf[0:hindex], f.data[f.head:]) 81 copy(buf[hindex:count], f.data[:f.tail]) 82 } 83 f.data, f.head, f.tail = buf, 0, count 84} 85 86// countReader implements a proxy reader that counts the number of 87// bytes read from its encapsulated reader. 88type countReader struct { 89 r io.Reader 90 bytes int64 91} 92 93func newCountReader(r io.Reader) *countReader { 94 return &countReader{r: r} 95} 96 97func (cr *countReader) Read(p []byte) (n int, err error) { 98 b, err := cr.r.Read(p) 99 cr.bytes += int64(b) 100 return b, err 101} 102 103// countWriter implements a proxy writer that counts the number of 104// bytes written by its encapsulated writer. 105type countWriter struct { 106 w io.Writer 107 bytes int64 108} 109 110func newCountWriter(w io.Writer) *countWriter { 111 return &countWriter{w: w} 112} 113 114func (cw *countWriter) Write(p []byte) (n int, err error) { 115 b, err := cw.w.Write(p) 116 cw.bytes += int64(b) 117 return b, err 118} 119 120// isWhitespace returns true if the byte slice contains only 121// whitespace characters. 122func isWhitespace(s string) bool { 123 for i := 0; i < len(s); i++ { 124 if c := s[i]; c != ' ' && c != '\t' && c != '\n' && c != '\r' { 125 return false 126 } 127 } 128 return true 129} 130 131// spaceMatch returns true if namespace a is the empty string 132// or if namespace a equals namespace b. 133func spaceMatch(a, b string) bool { 134 switch { 135 case a == "": 136 return true 137 default: 138 return a == b 139 } 140} 141 142// spaceDecompose breaks a namespace:tag identifier at the ':' 143// and returns the two parts. 144func spaceDecompose(str string) (space, key string) { 145 colon := strings.IndexByte(str, ':') 146 if colon == -1 { 147 return "", str 148 } 149 return str[:colon], str[colon+1:] 150} 151 152// Strings used by indentCRLF and indentLF 153const ( 154 indentSpaces = "\r\n " 155 indentTabs = "\r\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" 156) 157 158// indentCRLF returns a CRLF newline followed by n copies of the first 159// non-CRLF character in the source string. 160func indentCRLF(n int, source string) string { 161 switch { 162 case n < 0: 163 return source[:2] 164 case n < len(source)-1: 165 return source[:n+2] 166 default: 167 return source + strings.Repeat(source[2:3], n-len(source)+2) 168 } 169} 170 171// indentLF returns a LF newline followed by n copies of the first non-LF 172// character in the source string. 173func indentLF(n int, source string) string { 174 switch { 175 case n < 0: 176 return source[1:2] 177 case n < len(source)-1: 178 return source[1 : n+2] 179 default: 180 return source[1:] + strings.Repeat(source[2:3], n-len(source)+2) 181 } 182} 183 184// nextIndex returns the index of the next occurrence of sep in s, 185// starting from offset. It returns -1 if the sep string is not found. 186func nextIndex(s, sep string, offset int) int { 187 switch i := strings.Index(s[offset:], sep); i { 188 case -1: 189 return -1 190 default: 191 return offset + i 192 } 193} 194 195// isInteger returns true if the string s contains an integer. 196func isInteger(s string) bool { 197 for i := 0; i < len(s); i++ { 198 if (s[i] < '0' || s[i] > '9') && !(i == 0 && s[i] == '-') { 199 return false 200 } 201 } 202 return true 203} 204 205type escapeMode byte 206 207const ( 208 escapeNormal escapeMode = iota 209 escapeCanonicalText 210 escapeCanonicalAttr 211) 212 213// escapeString writes an escaped version of a string to the writer. 214func escapeString(w *bufio.Writer, s string, m escapeMode) { 215 var esc []byte 216 last := 0 217 for i := 0; i < len(s); { 218 r, width := utf8.DecodeRuneInString(s[i:]) 219 i += width 220 switch r { 221 case '&': 222 esc = []byte("&") 223 case '<': 224 esc = []byte("<") 225 case '>': 226 if m == escapeCanonicalAttr { 227 continue 228 } 229 esc = []byte(">") 230 case '\'': 231 if m != escapeNormal { 232 continue 233 } 234 esc = []byte("'") 235 case '"': 236 if m == escapeCanonicalText { 237 continue 238 } 239 esc = []byte(""") 240 case '\t': 241 if m != escapeCanonicalAttr { 242 continue 243 } 244 esc = []byte("	") 245 case '\n': 246 if m != escapeCanonicalAttr { 247 continue 248 } 249 esc = []byte("
") 250 case '\r': 251 if m == escapeNormal { 252 continue 253 } 254 esc = []byte("
") 255 default: 256 if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) { 257 esc = []byte("\uFFFD") 258 break 259 } 260 continue 261 } 262 w.WriteString(s[last : i-width]) 263 w.Write(esc) 264 last = i 265 } 266 w.WriteString(s[last:]) 267} 268 269func isInCharacterRange(r rune) bool { 270 return r == 0x09 || 271 r == 0x0A || 272 r == 0x0D || 273 r >= 0x20 && r <= 0xD7FF || 274 r >= 0xE000 && r <= 0xFFFD || 275 r >= 0x10000 && r <= 0x10FFFF 276} 277