1// Copyright 2015 Unknwon 2// 3// Licensed under the Apache License, Version 2.0 (the "License"): you may 4// not use this file except in compliance with the License. You may obtain 5// a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12// License for the specific language governing permissions and limitations 13// under the License. 14 15package ini 16 17import ( 18 "bufio" 19 "bytes" 20 "fmt" 21 "io" 22 "regexp" 23 "strconv" 24 "strings" 25 "unicode" 26) 27 28var pythonMultiline = regexp.MustCompile("^(\\s+)([^\n]+)") 29 30type tokenType int 31 32const ( 33 _TOKEN_INVALID tokenType = iota 34 _TOKEN_COMMENT 35 _TOKEN_SECTION 36 _TOKEN_KEY 37) 38 39type parser struct { 40 buf *bufio.Reader 41 isEOF bool 42 count int 43 comment *bytes.Buffer 44} 45 46func newParser(r io.Reader) *parser { 47 return &parser{ 48 buf: bufio.NewReader(r), 49 count: 1, 50 comment: &bytes.Buffer{}, 51 } 52} 53 54// BOM handles header of UTF-8, UTF-16 LE and UTF-16 BE's BOM format. 55// http://en.wikipedia.org/wiki/Byte_order_mark#Representations_of_byte_order_marks_by_encoding 56func (p *parser) BOM() error { 57 mask, err := p.buf.Peek(2) 58 if err != nil && err != io.EOF { 59 return err 60 } else if len(mask) < 2 { 61 return nil 62 } 63 64 switch { 65 case mask[0] == 254 && mask[1] == 255: 66 fallthrough 67 case mask[0] == 255 && mask[1] == 254: 68 p.buf.Read(mask) 69 case mask[0] == 239 && mask[1] == 187: 70 mask, err := p.buf.Peek(3) 71 if err != nil && err != io.EOF { 72 return err 73 } else if len(mask) < 3 { 74 return nil 75 } 76 if mask[2] == 191 { 77 p.buf.Read(mask) 78 } 79 } 80 return nil 81} 82 83func (p *parser) readUntil(delim byte) ([]byte, error) { 84 data, err := p.buf.ReadBytes(delim) 85 if err != nil { 86 if err == io.EOF { 87 p.isEOF = true 88 } else { 89 return nil, err 90 } 91 } 92 return data, nil 93} 94 95func cleanComment(in []byte) ([]byte, bool) { 96 i := bytes.IndexAny(in, "#;") 97 if i == -1 { 98 return nil, false 99 } 100 return in[i:], true 101} 102 103func readKeyName(delimiters string, in []byte) (string, int, error) { 104 line := string(in) 105 106 // Check if key name surrounded by quotes. 107 var keyQuote string 108 if line[0] == '"' { 109 if len(line) > 6 && string(line[0:3]) == `"""` { 110 keyQuote = `"""` 111 } else { 112 keyQuote = `"` 113 } 114 } else if line[0] == '`' { 115 keyQuote = "`" 116 } 117 118 // Get out key name 119 endIdx := -1 120 if len(keyQuote) > 0 { 121 startIdx := len(keyQuote) 122 // FIXME: fail case -> """"""name"""=value 123 pos := strings.Index(line[startIdx:], keyQuote) 124 if pos == -1 { 125 return "", -1, fmt.Errorf("missing closing key quote: %s", line) 126 } 127 pos += startIdx 128 129 // Find key-value delimiter 130 i := strings.IndexAny(line[pos+startIdx:], delimiters) 131 if i < 0 { 132 return "", -1, ErrDelimiterNotFound{line} 133 } 134 endIdx = pos + i 135 return strings.TrimSpace(line[startIdx:pos]), endIdx + startIdx + 1, nil 136 } 137 138 endIdx = strings.IndexAny(line, delimiters) 139 if endIdx < 0 { 140 return "", -1, ErrDelimiterNotFound{line} 141 } 142 return strings.TrimSpace(line[0:endIdx]), endIdx + 1, nil 143} 144 145func (p *parser) readMultilines(line, val, valQuote string) (string, error) { 146 for { 147 data, err := p.readUntil('\n') 148 if err != nil { 149 return "", err 150 } 151 next := string(data) 152 153 pos := strings.LastIndex(next, valQuote) 154 if pos > -1 { 155 val += next[:pos] 156 157 comment, has := cleanComment([]byte(next[pos:])) 158 if has { 159 p.comment.Write(bytes.TrimSpace(comment)) 160 } 161 break 162 } 163 val += next 164 if p.isEOF { 165 return "", fmt.Errorf("missing closing key quote from '%s' to '%s'", line, next) 166 } 167 } 168 return val, nil 169} 170 171func (p *parser) readContinuationLines(val string) (string, error) { 172 for { 173 data, err := p.readUntil('\n') 174 if err != nil { 175 return "", err 176 } 177 next := strings.TrimSpace(string(data)) 178 179 if len(next) == 0 { 180 break 181 } 182 val += next 183 if val[len(val)-1] != '\\' { 184 break 185 } 186 val = val[:len(val)-1] 187 } 188 return val, nil 189} 190 191// hasSurroundedQuote check if and only if the first and last characters 192// are quotes \" or \'. 193// It returns false if any other parts also contain same kind of quotes. 194func hasSurroundedQuote(in string, quote byte) bool { 195 return len(in) >= 2 && in[0] == quote && in[len(in)-1] == quote && 196 strings.IndexByte(in[1:], quote) == len(in)-2 197} 198 199func (p *parser) readValue(in []byte, 200 parserBufferSize int, 201 ignoreContinuation, ignoreInlineComment, unescapeValueDoubleQuotes, unescapeValueCommentSymbols, allowPythonMultilines, spaceBeforeInlineComment, preserveSurroundedQuote bool) (string, error) { 202 203 line := strings.TrimLeftFunc(string(in), unicode.IsSpace) 204 if len(line) == 0 { 205 return "", nil 206 } 207 208 var valQuote string 209 if len(line) > 3 && string(line[0:3]) == `"""` { 210 valQuote = `"""` 211 } else if line[0] == '`' { 212 valQuote = "`" 213 } else if unescapeValueDoubleQuotes && line[0] == '"' { 214 valQuote = `"` 215 } 216 217 if len(valQuote) > 0 { 218 startIdx := len(valQuote) 219 pos := strings.LastIndex(line[startIdx:], valQuote) 220 // Check for multi-line value 221 if pos == -1 { 222 return p.readMultilines(line, line[startIdx:], valQuote) 223 } 224 225 if unescapeValueDoubleQuotes && valQuote == `"` { 226 return strings.Replace(line[startIdx:pos+startIdx], `\"`, `"`, -1), nil 227 } 228 return line[startIdx : pos+startIdx], nil 229 } 230 231 lastChar := line[len(line)-1] 232 // Won't be able to reach here if value only contains whitespace 233 line = strings.TrimSpace(line) 234 trimmedLastChar := line[len(line)-1] 235 236 // Check continuation lines when desired 237 if !ignoreContinuation && trimmedLastChar == '\\' { 238 return p.readContinuationLines(line[:len(line)-1]) 239 } 240 241 // Check if ignore inline comment 242 if !ignoreInlineComment { 243 var i int 244 if spaceBeforeInlineComment { 245 i = strings.Index(line, " #") 246 if i == -1 { 247 i = strings.Index(line, " ;") 248 } 249 250 } else { 251 i = strings.IndexAny(line, "#;") 252 } 253 254 if i > -1 { 255 p.comment.WriteString(line[i:]) 256 line = strings.TrimSpace(line[:i]) 257 } 258 259 } 260 261 // Trim single and double quotes 262 if (hasSurroundedQuote(line, '\'') || 263 hasSurroundedQuote(line, '"')) && !preserveSurroundedQuote { 264 line = line[1 : len(line)-1] 265 } else if len(valQuote) == 0 && unescapeValueCommentSymbols { 266 if strings.Contains(line, `\;`) { 267 line = strings.Replace(line, `\;`, ";", -1) 268 } 269 if strings.Contains(line, `\#`) { 270 line = strings.Replace(line, `\#`, "#", -1) 271 } 272 } else if allowPythonMultilines && lastChar == '\n' { 273 parserBufferPeekResult, _ := p.buf.Peek(parserBufferSize) 274 peekBuffer := bytes.NewBuffer(parserBufferPeekResult) 275 276 val := line 277 278 for { 279 peekData, peekErr := peekBuffer.ReadBytes('\n') 280 if peekErr != nil { 281 if peekErr == io.EOF { 282 return val, nil 283 } 284 return "", peekErr 285 } 286 287 peekMatches := pythonMultiline.FindStringSubmatch(string(peekData)) 288 if len(peekMatches) != 3 { 289 return val, nil 290 } 291 292 // NOTE: Return if not a python-ini multi-line value. 293 currentIdentSize := len(peekMatches[1]) 294 if currentIdentSize <= 0 { 295 return val, nil 296 } 297 298 // NOTE: Just advance the parser reader (buffer) in-sync with the peek buffer. 299 _, err := p.readUntil('\n') 300 if err != nil { 301 return "", err 302 } 303 304 val += fmt.Sprintf("\n%s", peekMatches[2]) 305 } 306 } 307 308 return line, nil 309} 310 311// parse parses data through an io.Reader. 312func (f *File) parse(reader io.Reader) (err error) { 313 p := newParser(reader) 314 if err = p.BOM(); err != nil { 315 return fmt.Errorf("BOM: %v", err) 316 } 317 318 // Ignore error because default section name is never empty string. 319 name := DEFAULT_SECTION 320 if f.options.Insensitive { 321 name = strings.ToLower(DEFAULT_SECTION) 322 } 323 section, _ := f.NewSection(name) 324 325 // This "last" is not strictly equivalent to "previous one" if current key is not the first nested key 326 var isLastValueEmpty bool 327 var lastRegularKey *Key 328 329 var line []byte 330 var inUnparseableSection bool 331 332 // NOTE: Iterate and increase `currentPeekSize` until 333 // the size of the parser buffer is found. 334 // TODO(unknwon): When Golang 1.10 is the lowest version supported, replace with `parserBufferSize := p.buf.Size()`. 335 parserBufferSize := 0 336 // NOTE: Peek 1kb at a time. 337 currentPeekSize := 1024 338 339 if f.options.AllowPythonMultilineValues { 340 for { 341 peekBytes, _ := p.buf.Peek(currentPeekSize) 342 peekBytesLength := len(peekBytes) 343 344 if parserBufferSize >= peekBytesLength { 345 break 346 } 347 348 currentPeekSize *= 2 349 parserBufferSize = peekBytesLength 350 } 351 } 352 353 for !p.isEOF { 354 line, err = p.readUntil('\n') 355 if err != nil { 356 return err 357 } 358 359 if f.options.AllowNestedValues && 360 isLastValueEmpty && len(line) > 0 { 361 if line[0] == ' ' || line[0] == '\t' { 362 lastRegularKey.addNestedValue(string(bytes.TrimSpace(line))) 363 continue 364 } 365 } 366 367 line = bytes.TrimLeftFunc(line, unicode.IsSpace) 368 if len(line) == 0 { 369 continue 370 } 371 372 // Comments 373 if line[0] == '#' || line[0] == ';' { 374 // Note: we do not care ending line break, 375 // it is needed for adding second line, 376 // so just clean it once at the end when set to value. 377 p.comment.Write(line) 378 continue 379 } 380 381 // Section 382 if line[0] == '[' { 383 // Read to the next ']' (TODO: support quoted strings) 384 closeIdx := bytes.LastIndexByte(line, ']') 385 if closeIdx == -1 { 386 return fmt.Errorf("unclosed section: %s", line) 387 } 388 389 name := string(line[1:closeIdx]) 390 section, err = f.NewSection(name) 391 if err != nil { 392 return err 393 } 394 395 comment, has := cleanComment(line[closeIdx+1:]) 396 if has { 397 p.comment.Write(comment) 398 } 399 400 section.Comment = strings.TrimSpace(p.comment.String()) 401 402 // Reset aotu-counter and comments 403 p.comment.Reset() 404 p.count = 1 405 406 inUnparseableSection = false 407 for i := range f.options.UnparseableSections { 408 if f.options.UnparseableSections[i] == name || 409 (f.options.Insensitive && strings.ToLower(f.options.UnparseableSections[i]) == strings.ToLower(name)) { 410 inUnparseableSection = true 411 continue 412 } 413 } 414 continue 415 } 416 417 if inUnparseableSection { 418 section.isRawSection = true 419 section.rawBody += string(line) 420 continue 421 } 422 423 kname, offset, err := readKeyName(f.options.KeyValueDelimiters, line) 424 if err != nil { 425 // Treat as boolean key when desired, and whole line is key name. 426 if IsErrDelimiterNotFound(err) { 427 switch { 428 case f.options.AllowBooleanKeys: 429 kname, err := p.readValue(line, 430 parserBufferSize, 431 f.options.IgnoreContinuation, 432 f.options.IgnoreInlineComment, 433 f.options.UnescapeValueDoubleQuotes, 434 f.options.UnescapeValueCommentSymbols, 435 f.options.AllowPythonMultilineValues, 436 f.options.SpaceBeforeInlineComment, 437 f.options.PreserveSurroundedQuote) 438 if err != nil { 439 return err 440 } 441 key, err := section.NewBooleanKey(kname) 442 if err != nil { 443 return err 444 } 445 key.Comment = strings.TrimSpace(p.comment.String()) 446 p.comment.Reset() 447 continue 448 449 case f.options.SkipUnrecognizableLines: 450 continue 451 } 452 } 453 return err 454 } 455 456 // Auto increment. 457 isAutoIncr := false 458 if kname == "-" { 459 isAutoIncr = true 460 kname = "#" + strconv.Itoa(p.count) 461 p.count++ 462 } 463 464 value, err := p.readValue(line[offset:], 465 parserBufferSize, 466 f.options.IgnoreContinuation, 467 f.options.IgnoreInlineComment, 468 f.options.UnescapeValueDoubleQuotes, 469 f.options.UnescapeValueCommentSymbols, 470 f.options.AllowPythonMultilineValues, 471 f.options.SpaceBeforeInlineComment, 472 f.options.PreserveSurroundedQuote) 473 if err != nil { 474 return err 475 } 476 isLastValueEmpty = len(value) == 0 477 478 key, err := section.NewKey(kname, value) 479 if err != nil { 480 return err 481 } 482 key.isAutoIncrement = isAutoIncr 483 key.Comment = strings.TrimSpace(p.comment.String()) 484 p.comment.Reset() 485 lastRegularKey = key 486 } 487 return nil 488} 489