1// Copyright 2015 Unknwon 2// 3// Licensed under the Apache License, Version 2.0 (the "License"): you may 4// not use this file except in compliance with the License. You may obtain 5// a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12// License for the specific language governing permissions and limitations 13// under the License. 14 15package ini 16 17import ( 18 "bufio" 19 "bytes" 20 "fmt" 21 "io" 22 "regexp" 23 "strconv" 24 "strings" 25 "unicode" 26) 27 28const minReaderBufferSize = 4096 29 30var pythonMultiline = regexp.MustCompile(`^([\t\f ]+)(.*)`) 31 32type parserOptions struct { 33 IgnoreContinuation bool 34 IgnoreInlineComment bool 35 AllowPythonMultilineValues bool 36 SpaceBeforeInlineComment bool 37 UnescapeValueDoubleQuotes bool 38 UnescapeValueCommentSymbols bool 39 PreserveSurroundedQuote bool 40 DebugFunc DebugFunc 41 ReaderBufferSize int 42} 43 44type parser struct { 45 buf *bufio.Reader 46 options parserOptions 47 48 isEOF bool 49 count int 50 comment *bytes.Buffer 51} 52 53func (p *parser) debug(format string, args ...interface{}) { 54 if p.options.DebugFunc != nil { 55 p.options.DebugFunc(fmt.Sprintf(format, args...)) 56 } 57} 58 59func newParser(r io.Reader, opts parserOptions) *parser { 60 size := opts.ReaderBufferSize 61 if size < minReaderBufferSize { 62 size = minReaderBufferSize 63 } 64 65 return &parser{ 66 buf: bufio.NewReaderSize(r, size), 67 options: opts, 68 count: 1, 69 comment: &bytes.Buffer{}, 70 } 71} 72 73// BOM handles header of UTF-8, UTF-16 LE and UTF-16 BE's BOM format. 74// http://en.wikipedia.org/wiki/Byte_order_mark#Representations_of_byte_order_marks_by_encoding 75func (p *parser) BOM() error { 76 mask, err := p.buf.Peek(2) 77 if err != nil && err != io.EOF { 78 return err 79 } else if len(mask) < 2 { 80 return nil 81 } 82 83 switch { 84 case mask[0] == 254 && mask[1] == 255: 85 fallthrough 86 case mask[0] == 255 && mask[1] == 254: 87 _, err = p.buf.Read(mask) 88 if err != nil { 89 return err 90 } 91 case mask[0] == 239 && mask[1] == 187: 92 mask, err := p.buf.Peek(3) 93 if err != nil && err != io.EOF { 94 return err 95 } else if len(mask) < 3 { 96 return nil 97 } 98 if mask[2] == 191 { 99 _, err = p.buf.Read(mask) 100 if err != nil { 101 return err 102 } 103 } 104 } 105 return nil 106} 107 108func (p *parser) readUntil(delim byte) ([]byte, error) { 109 data, err := p.buf.ReadBytes(delim) 110 if err != nil { 111 if err == io.EOF { 112 p.isEOF = true 113 } else { 114 return nil, err 115 } 116 } 117 return data, nil 118} 119 120func cleanComment(in []byte) ([]byte, bool) { 121 i := bytes.IndexAny(in, "#;") 122 if i == -1 { 123 return nil, false 124 } 125 return in[i:], true 126} 127 128func readKeyName(delimiters string, in []byte) (string, int, error) { 129 line := string(in) 130 131 // Check if key name surrounded by quotes. 132 var keyQuote string 133 if line[0] == '"' { 134 if len(line) > 6 && string(line[0:3]) == `"""` { 135 keyQuote = `"""` 136 } else { 137 keyQuote = `"` 138 } 139 } else if line[0] == '`' { 140 keyQuote = "`" 141 } 142 143 // Get out key name 144 var endIdx int 145 if len(keyQuote) > 0 { 146 startIdx := len(keyQuote) 147 // FIXME: fail case -> """"""name"""=value 148 pos := strings.Index(line[startIdx:], keyQuote) 149 if pos == -1 { 150 return "", -1, fmt.Errorf("missing closing key quote: %s", line) 151 } 152 pos += startIdx 153 154 // Find key-value delimiter 155 i := strings.IndexAny(line[pos+startIdx:], delimiters) 156 if i < 0 { 157 return "", -1, ErrDelimiterNotFound{line} 158 } 159 endIdx = pos + i 160 return strings.TrimSpace(line[startIdx:pos]), endIdx + startIdx + 1, nil 161 } 162 163 endIdx = strings.IndexAny(line, delimiters) 164 if endIdx < 0 { 165 return "", -1, ErrDelimiterNotFound{line} 166 } 167 return strings.TrimSpace(line[0:endIdx]), endIdx + 1, nil 168} 169 170func (p *parser) readMultilines(line, val, valQuote string) (string, error) { 171 for { 172 data, err := p.readUntil('\n') 173 if err != nil { 174 return "", err 175 } 176 next := string(data) 177 178 pos := strings.LastIndex(next, valQuote) 179 if pos > -1 { 180 val += next[:pos] 181 182 comment, has := cleanComment([]byte(next[pos:])) 183 if has { 184 p.comment.Write(bytes.TrimSpace(comment)) 185 } 186 break 187 } 188 val += next 189 if p.isEOF { 190 return "", fmt.Errorf("missing closing key quote from %q to %q", line, next) 191 } 192 } 193 return val, nil 194} 195 196func (p *parser) readContinuationLines(val string) (string, error) { 197 for { 198 data, err := p.readUntil('\n') 199 if err != nil { 200 return "", err 201 } 202 next := strings.TrimSpace(string(data)) 203 204 if len(next) == 0 { 205 break 206 } 207 val += next 208 if val[len(val)-1] != '\\' { 209 break 210 } 211 val = val[:len(val)-1] 212 } 213 return val, nil 214} 215 216// hasSurroundedQuote check if and only if the first and last characters 217// are quotes \" or \'. 218// It returns false if any other parts also contain same kind of quotes. 219func hasSurroundedQuote(in string, quote byte) bool { 220 return len(in) >= 2 && in[0] == quote && in[len(in)-1] == quote && 221 strings.IndexByte(in[1:], quote) == len(in)-2 222} 223 224func (p *parser) readValue(in []byte, bufferSize int) (string, error) { 225 226 line := strings.TrimLeftFunc(string(in), unicode.IsSpace) 227 if len(line) == 0 { 228 if p.options.AllowPythonMultilineValues && len(in) > 0 && in[len(in)-1] == '\n' { 229 return p.readPythonMultilines(line, bufferSize) 230 } 231 return "", nil 232 } 233 234 var valQuote string 235 if len(line) > 3 && string(line[0:3]) == `"""` { 236 valQuote = `"""` 237 } else if line[0] == '`' { 238 valQuote = "`" 239 } else if p.options.UnescapeValueDoubleQuotes && line[0] == '"' { 240 valQuote = `"` 241 } 242 243 if len(valQuote) > 0 { 244 startIdx := len(valQuote) 245 pos := strings.LastIndex(line[startIdx:], valQuote) 246 // Check for multi-line value 247 if pos == -1 { 248 return p.readMultilines(line, line[startIdx:], valQuote) 249 } 250 251 if p.options.UnescapeValueDoubleQuotes && valQuote == `"` { 252 return strings.Replace(line[startIdx:pos+startIdx], `\"`, `"`, -1), nil 253 } 254 return line[startIdx : pos+startIdx], nil 255 } 256 257 lastChar := line[len(line)-1] 258 // Won't be able to reach here if value only contains whitespace 259 line = strings.TrimSpace(line) 260 trimmedLastChar := line[len(line)-1] 261 262 // Check continuation lines when desired 263 if !p.options.IgnoreContinuation && trimmedLastChar == '\\' { 264 return p.readContinuationLines(line[:len(line)-1]) 265 } 266 267 // Check if ignore inline comment 268 if !p.options.IgnoreInlineComment { 269 var i int 270 if p.options.SpaceBeforeInlineComment { 271 i = strings.Index(line, " #") 272 if i == -1 { 273 i = strings.Index(line, " ;") 274 } 275 276 } else { 277 i = strings.IndexAny(line, "#;") 278 } 279 280 if i > -1 { 281 p.comment.WriteString(line[i:]) 282 line = strings.TrimSpace(line[:i]) 283 } 284 285 } 286 287 // Trim single and double quotes 288 if (hasSurroundedQuote(line, '\'') || 289 hasSurroundedQuote(line, '"')) && !p.options.PreserveSurroundedQuote { 290 line = line[1 : len(line)-1] 291 } else if len(valQuote) == 0 && p.options.UnescapeValueCommentSymbols { 292 if strings.Contains(line, `\;`) { 293 line = strings.Replace(line, `\;`, ";", -1) 294 } 295 if strings.Contains(line, `\#`) { 296 line = strings.Replace(line, `\#`, "#", -1) 297 } 298 } else if p.options.AllowPythonMultilineValues && lastChar == '\n' { 299 return p.readPythonMultilines(line, bufferSize) 300 } 301 302 return line, nil 303} 304 305func (p *parser) readPythonMultilines(line string, bufferSize int) (string, error) { 306 parserBufferPeekResult, _ := p.buf.Peek(bufferSize) 307 peekBuffer := bytes.NewBuffer(parserBufferPeekResult) 308 309 indentSize := 0 310 for { 311 peekData, peekErr := peekBuffer.ReadBytes('\n') 312 if peekErr != nil { 313 if peekErr == io.EOF { 314 p.debug("readPythonMultilines: io.EOF, peekData: %q, line: %q", string(peekData), line) 315 return line, nil 316 } 317 318 p.debug("readPythonMultilines: failed to peek with error: %v", peekErr) 319 return "", peekErr 320 } 321 322 p.debug("readPythonMultilines: parsing %q", string(peekData)) 323 324 peekMatches := pythonMultiline.FindStringSubmatch(string(peekData)) 325 p.debug("readPythonMultilines: matched %d parts", len(peekMatches)) 326 for n, v := range peekMatches { 327 p.debug(" %d: %q", n, v) 328 } 329 330 // Return if not a Python multiline value. 331 if len(peekMatches) != 3 { 332 p.debug("readPythonMultilines: end of value, got: %q", line) 333 return line, nil 334 } 335 336 // Determine indent size and line prefix. 337 currentIndentSize := len(peekMatches[1]) 338 if indentSize < 1 { 339 indentSize = currentIndentSize 340 p.debug("readPythonMultilines: indent size is %d", indentSize) 341 } 342 343 // Make sure each line is indented at least as far as first line. 344 if currentIndentSize < indentSize { 345 p.debug("readPythonMultilines: end of value, current indent: %d, expected indent: %d, line: %q", currentIndentSize, indentSize, line) 346 return line, nil 347 } 348 349 // Advance the parser reader (buffer) in-sync with the peek buffer. 350 _, err := p.buf.Discard(len(peekData)) 351 if err != nil { 352 p.debug("readPythonMultilines: failed to skip to the end, returning error") 353 return "", err 354 } 355 356 // Handle indented empty line. 357 line += "\n" + peekMatches[1][indentSize:] + peekMatches[2] 358 } 359} 360 361// parse parses data through an io.Reader. 362func (f *File) parse(reader io.Reader) (err error) { 363 p := newParser(reader, parserOptions{ 364 IgnoreContinuation: f.options.IgnoreContinuation, 365 IgnoreInlineComment: f.options.IgnoreInlineComment, 366 AllowPythonMultilineValues: f.options.AllowPythonMultilineValues, 367 SpaceBeforeInlineComment: f.options.SpaceBeforeInlineComment, 368 UnescapeValueDoubleQuotes: f.options.UnescapeValueDoubleQuotes, 369 UnescapeValueCommentSymbols: f.options.UnescapeValueCommentSymbols, 370 PreserveSurroundedQuote: f.options.PreserveSurroundedQuote, 371 DebugFunc: f.options.DebugFunc, 372 ReaderBufferSize: f.options.ReaderBufferSize, 373 }) 374 if err = p.BOM(); err != nil { 375 return fmt.Errorf("BOM: %v", err) 376 } 377 378 // Ignore error because default section name is never empty string. 379 name := DefaultSection 380 if f.options.Insensitive || f.options.InsensitiveSections { 381 name = strings.ToLower(DefaultSection) 382 } 383 section, _ := f.NewSection(name) 384 385 // This "last" is not strictly equivalent to "previous one" if current key is not the first nested key 386 var isLastValueEmpty bool 387 var lastRegularKey *Key 388 389 var line []byte 390 var inUnparseableSection bool 391 392 // NOTE: Iterate and increase `currentPeekSize` until 393 // the size of the parser buffer is found. 394 // TODO(unknwon): When Golang 1.10 is the lowest version supported, replace with `parserBufferSize := p.buf.Size()`. 395 parserBufferSize := 0 396 // NOTE: Peek 4kb at a time. 397 currentPeekSize := minReaderBufferSize 398 399 if f.options.AllowPythonMultilineValues { 400 for { 401 peekBytes, _ := p.buf.Peek(currentPeekSize) 402 peekBytesLength := len(peekBytes) 403 404 if parserBufferSize >= peekBytesLength { 405 break 406 } 407 408 currentPeekSize *= 2 409 parserBufferSize = peekBytesLength 410 } 411 } 412 413 for !p.isEOF { 414 line, err = p.readUntil('\n') 415 if err != nil { 416 return err 417 } 418 419 if f.options.AllowNestedValues && 420 isLastValueEmpty && len(line) > 0 { 421 if line[0] == ' ' || line[0] == '\t' { 422 err = lastRegularKey.addNestedValue(string(bytes.TrimSpace(line))) 423 if err != nil { 424 return err 425 } 426 continue 427 } 428 } 429 430 line = bytes.TrimLeftFunc(line, unicode.IsSpace) 431 if len(line) == 0 { 432 continue 433 } 434 435 // Comments 436 if line[0] == '#' || line[0] == ';' { 437 // Note: we do not care ending line break, 438 // it is needed for adding second line, 439 // so just clean it once at the end when set to value. 440 p.comment.Write(line) 441 continue 442 } 443 444 // Section 445 if line[0] == '[' { 446 // Read to the next ']' (TODO: support quoted strings) 447 closeIdx := bytes.LastIndexByte(line, ']') 448 if closeIdx == -1 { 449 return fmt.Errorf("unclosed section: %s", line) 450 } 451 452 name := string(line[1:closeIdx]) 453 section, err = f.NewSection(name) 454 if err != nil { 455 return err 456 } 457 458 comment, has := cleanComment(line[closeIdx+1:]) 459 if has { 460 p.comment.Write(comment) 461 } 462 463 section.Comment = strings.TrimSpace(p.comment.String()) 464 465 // Reset auto-counter and comments 466 p.comment.Reset() 467 p.count = 1 468 469 inUnparseableSection = false 470 for i := range f.options.UnparseableSections { 471 if f.options.UnparseableSections[i] == name || 472 ((f.options.Insensitive || f.options.InsensitiveSections) && strings.EqualFold(f.options.UnparseableSections[i], name)) { 473 inUnparseableSection = true 474 continue 475 } 476 } 477 continue 478 } 479 480 if inUnparseableSection { 481 section.isRawSection = true 482 section.rawBody += string(line) 483 continue 484 } 485 486 kname, offset, err := readKeyName(f.options.KeyValueDelimiters, line) 487 if err != nil { 488 // Treat as boolean key when desired, and whole line is key name. 489 if IsErrDelimiterNotFound(err) { 490 switch { 491 case f.options.AllowBooleanKeys: 492 kname, err := p.readValue(line, parserBufferSize) 493 if err != nil { 494 return err 495 } 496 key, err := section.NewBooleanKey(kname) 497 if err != nil { 498 return err 499 } 500 key.Comment = strings.TrimSpace(p.comment.String()) 501 p.comment.Reset() 502 continue 503 504 case f.options.SkipUnrecognizableLines: 505 continue 506 } 507 } 508 return err 509 } 510 511 // Auto increment. 512 isAutoIncr := false 513 if kname == "-" { 514 isAutoIncr = true 515 kname = "#" + strconv.Itoa(p.count) 516 p.count++ 517 } 518 519 value, err := p.readValue(line[offset:], parserBufferSize) 520 if err != nil { 521 return err 522 } 523 isLastValueEmpty = len(value) == 0 524 525 key, err := section.NewKey(kname, value) 526 if err != nil { 527 return err 528 } 529 key.isAutoIncrement = isAutoIncr 530 key.Comment = strings.TrimSpace(p.comment.String()) 531 p.comment.Reset() 532 lastRegularKey = key 533 } 534 return nil 535} 536