1// Copyright 2015 Unknwon 2// 3// Licensed under the Apache License, Version 2.0 (the "License"): you may 4// not use this file except in compliance with the License. You may obtain 5// a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12// License for the specific language governing permissions and limitations 13// under the License. 14 15package ini 16 17import ( 18 "bufio" 19 "bytes" 20 "fmt" 21 "io" 22 "regexp" 23 "strconv" 24 "strings" 25 "unicode" 26) 27 28const minReaderBufferSize = 4096 29 30var pythonMultiline = regexp.MustCompile(`^([\t\f ]+)(.*)`) 31 32type parserOptions struct { 33 IgnoreContinuation bool 34 IgnoreInlineComment bool 35 AllowPythonMultilineValues bool 36 SpaceBeforeInlineComment bool 37 UnescapeValueDoubleQuotes bool 38 UnescapeValueCommentSymbols bool 39 PreserveSurroundedQuote bool 40 DebugFunc DebugFunc 41 ReaderBufferSize int 42} 43 44type parser struct { 45 buf *bufio.Reader 46 options parserOptions 47 48 isEOF bool 49 count int 50 comment *bytes.Buffer 51} 52 53func (p *parser) debug(format string, args ...interface{}) { 54 if p.options.DebugFunc != nil { 55 p.options.DebugFunc(fmt.Sprintf(format, args...)) 56 } 57} 58 59func newParser(r io.Reader, opts parserOptions) *parser { 60 size := opts.ReaderBufferSize 61 if size < minReaderBufferSize { 62 size = minReaderBufferSize 63 } 64 65 return &parser{ 66 buf: bufio.NewReaderSize(r, size), 67 options: opts, 68 count: 1, 69 comment: &bytes.Buffer{}, 70 } 71} 72 73// BOM handles header of UTF-8, UTF-16 LE and UTF-16 BE's BOM format. 74// http://en.wikipedia.org/wiki/Byte_order_mark#Representations_of_byte_order_marks_by_encoding 75func (p *parser) BOM() error { 76 mask, err := p.buf.Peek(2) 77 if err != nil && err != io.EOF { 78 return err 79 } else if len(mask) < 2 { 80 return nil 81 } 82 83 switch { 84 case mask[0] == 254 && mask[1] == 255: 85 fallthrough 86 case mask[0] == 255 && mask[1] == 254: 87 p.buf.Read(mask) 88 case mask[0] == 239 && mask[1] == 187: 89 mask, err := p.buf.Peek(3) 90 if err != nil && err != io.EOF { 91 return err 92 } else if len(mask) < 3 { 93 return nil 94 } 95 if mask[2] == 191 { 96 p.buf.Read(mask) 97 } 98 } 99 return nil 100} 101 102func (p *parser) readUntil(delim byte) ([]byte, error) { 103 data, err := p.buf.ReadBytes(delim) 104 if err != nil { 105 if err == io.EOF { 106 p.isEOF = true 107 } else { 108 return nil, err 109 } 110 } 111 return data, nil 112} 113 114func cleanComment(in []byte) ([]byte, bool) { 115 i := bytes.IndexAny(in, "#;") 116 if i == -1 { 117 return nil, false 118 } 119 return in[i:], true 120} 121 122func readKeyName(delimiters string, in []byte) (string, int, error) { 123 line := string(in) 124 125 // Check if key name surrounded by quotes. 126 var keyQuote string 127 if line[0] == '"' { 128 if len(line) > 6 && string(line[0:3]) == `"""` { 129 keyQuote = `"""` 130 } else { 131 keyQuote = `"` 132 } 133 } else if line[0] == '`' { 134 keyQuote = "`" 135 } 136 137 // Get out key name 138 endIdx := -1 139 if len(keyQuote) > 0 { 140 startIdx := len(keyQuote) 141 // FIXME: fail case -> """"""name"""=value 142 pos := strings.Index(line[startIdx:], keyQuote) 143 if pos == -1 { 144 return "", -1, fmt.Errorf("missing closing key quote: %s", line) 145 } 146 pos += startIdx 147 148 // Find key-value delimiter 149 i := strings.IndexAny(line[pos+startIdx:], delimiters) 150 if i < 0 { 151 return "", -1, ErrDelimiterNotFound{line} 152 } 153 endIdx = pos + i 154 return strings.TrimSpace(line[startIdx:pos]), endIdx + startIdx + 1, nil 155 } 156 157 endIdx = strings.IndexAny(line, delimiters) 158 if endIdx < 0 { 159 return "", -1, ErrDelimiterNotFound{line} 160 } 161 return strings.TrimSpace(line[0:endIdx]), endIdx + 1, nil 162} 163 164func (p *parser) readMultilines(line, val, valQuote string) (string, error) { 165 for { 166 data, err := p.readUntil('\n') 167 if err != nil { 168 return "", err 169 } 170 next := string(data) 171 172 pos := strings.LastIndex(next, valQuote) 173 if pos > -1 { 174 val += next[:pos] 175 176 comment, has := cleanComment([]byte(next[pos:])) 177 if has { 178 p.comment.Write(bytes.TrimSpace(comment)) 179 } 180 break 181 } 182 val += next 183 if p.isEOF { 184 return "", fmt.Errorf("missing closing key quote from '%s' to '%s'", line, next) 185 } 186 } 187 return val, nil 188} 189 190func (p *parser) readContinuationLines(val string) (string, error) { 191 for { 192 data, err := p.readUntil('\n') 193 if err != nil { 194 return "", err 195 } 196 next := strings.TrimSpace(string(data)) 197 198 if len(next) == 0 { 199 break 200 } 201 val += next 202 if val[len(val)-1] != '\\' { 203 break 204 } 205 val = val[:len(val)-1] 206 } 207 return val, nil 208} 209 210// hasSurroundedQuote check if and only if the first and last characters 211// are quotes \" or \'. 212// It returns false if any other parts also contain same kind of quotes. 213func hasSurroundedQuote(in string, quote byte) bool { 214 return len(in) >= 2 && in[0] == quote && in[len(in)-1] == quote && 215 strings.IndexByte(in[1:], quote) == len(in)-2 216} 217 218func (p *parser) readValue(in []byte, bufferSize int) (string, error) { 219 220 line := strings.TrimLeftFunc(string(in), unicode.IsSpace) 221 if len(line) == 0 { 222 if p.options.AllowPythonMultilineValues && len(in) > 0 && in[len(in)-1] == '\n' { 223 return p.readPythonMultilines(line, bufferSize) 224 } 225 return "", nil 226 } 227 228 var valQuote string 229 if len(line) > 3 && string(line[0:3]) == `"""` { 230 valQuote = `"""` 231 } else if line[0] == '`' { 232 valQuote = "`" 233 } else if p.options.UnescapeValueDoubleQuotes && line[0] == '"' { 234 valQuote = `"` 235 } 236 237 if len(valQuote) > 0 { 238 startIdx := len(valQuote) 239 pos := strings.LastIndex(line[startIdx:], valQuote) 240 // Check for multi-line value 241 if pos == -1 { 242 return p.readMultilines(line, line[startIdx:], valQuote) 243 } 244 245 if p.options.UnescapeValueDoubleQuotes && valQuote == `"` { 246 return strings.Replace(line[startIdx:pos+startIdx], `\"`, `"`, -1), nil 247 } 248 return line[startIdx : pos+startIdx], nil 249 } 250 251 lastChar := line[len(line)-1] 252 // Won't be able to reach here if value only contains whitespace 253 line = strings.TrimSpace(line) 254 trimmedLastChar := line[len(line)-1] 255 256 // Check continuation lines when desired 257 if !p.options.IgnoreContinuation && trimmedLastChar == '\\' { 258 return p.readContinuationLines(line[:len(line)-1]) 259 } 260 261 // Check if ignore inline comment 262 if !p.options.IgnoreInlineComment { 263 var i int 264 if p.options.SpaceBeforeInlineComment { 265 i = strings.Index(line, " #") 266 if i == -1 { 267 i = strings.Index(line, " ;") 268 } 269 270 } else { 271 i = strings.IndexAny(line, "#;") 272 } 273 274 if i > -1 { 275 p.comment.WriteString(line[i:]) 276 line = strings.TrimSpace(line[:i]) 277 } 278 279 } 280 281 // Trim single and double quotes 282 if (hasSurroundedQuote(line, '\'') || 283 hasSurroundedQuote(line, '"')) && !p.options.PreserveSurroundedQuote { 284 line = line[1 : len(line)-1] 285 } else if len(valQuote) == 0 && p.options.UnescapeValueCommentSymbols { 286 if strings.Contains(line, `\;`) { 287 line = strings.Replace(line, `\;`, ";", -1) 288 } 289 if strings.Contains(line, `\#`) { 290 line = strings.Replace(line, `\#`, "#", -1) 291 } 292 } else if p.options.AllowPythonMultilineValues && lastChar == '\n' { 293 return p.readPythonMultilines(line, bufferSize) 294 } 295 296 return line, nil 297} 298 299func (p *parser) readPythonMultilines(line string, bufferSize int) (string, error) { 300 parserBufferPeekResult, _ := p.buf.Peek(bufferSize) 301 peekBuffer := bytes.NewBuffer(parserBufferPeekResult) 302 303 indentSize := 0 304 for { 305 peekData, peekErr := peekBuffer.ReadBytes('\n') 306 if peekErr != nil { 307 if peekErr == io.EOF { 308 p.debug("readPythonMultilines: io.EOF, peekData: %q, line: %q", string(peekData), line) 309 return line, nil 310 } 311 312 p.debug("readPythonMultilines: failed to peek with error: %v", peekErr) 313 return "", peekErr 314 } 315 316 p.debug("readPythonMultilines: parsing %q", string(peekData)) 317 318 peekMatches := pythonMultiline.FindStringSubmatch(string(peekData)) 319 p.debug("readPythonMultilines: matched %d parts", len(peekMatches)) 320 for n, v := range peekMatches { 321 p.debug(" %d: %q", n, v) 322 } 323 324 // Return if not a Python multiline value. 325 if len(peekMatches) != 3 { 326 p.debug("readPythonMultilines: end of value, got: %q", line) 327 return line, nil 328 } 329 330 // Determine indent size and line prefix. 331 currentIndentSize := len(peekMatches[1]) 332 if indentSize < 1 { 333 indentSize = currentIndentSize 334 p.debug("readPythonMultilines: indent size is %d", indentSize) 335 } 336 337 // Make sure each line is indented at least as far as first line. 338 if currentIndentSize < indentSize { 339 p.debug("readPythonMultilines: end of value, current indent: %d, expected indent: %d, line: %q", currentIndentSize, indentSize, line) 340 return line, nil 341 } 342 343 // Advance the parser reader (buffer) in-sync with the peek buffer. 344 _, err := p.buf.Discard(len(peekData)) 345 if err != nil { 346 p.debug("readPythonMultilines: failed to skip to the end, returning error") 347 return "", err 348 } 349 350 // Handle indented empty line. 351 line += "\n" + peekMatches[1][indentSize:] + peekMatches[2] 352 } 353} 354 355// parse parses data through an io.Reader. 356func (f *File) parse(reader io.Reader) (err error) { 357 p := newParser(reader, parserOptions{ 358 IgnoreContinuation: f.options.IgnoreContinuation, 359 IgnoreInlineComment: f.options.IgnoreInlineComment, 360 AllowPythonMultilineValues: f.options.AllowPythonMultilineValues, 361 SpaceBeforeInlineComment: f.options.SpaceBeforeInlineComment, 362 UnescapeValueDoubleQuotes: f.options.UnescapeValueDoubleQuotes, 363 UnescapeValueCommentSymbols: f.options.UnescapeValueCommentSymbols, 364 PreserveSurroundedQuote: f.options.PreserveSurroundedQuote, 365 DebugFunc: f.options.DebugFunc, 366 ReaderBufferSize: f.options.ReaderBufferSize, 367 }) 368 if err = p.BOM(); err != nil { 369 return fmt.Errorf("BOM: %v", err) 370 } 371 372 // Ignore error because default section name is never empty string. 373 name := DefaultSection 374 if f.options.Insensitive { 375 name = strings.ToLower(DefaultSection) 376 } 377 section, _ := f.NewSection(name) 378 379 // This "last" is not strictly equivalent to "previous one" if current key is not the first nested key 380 var isLastValueEmpty bool 381 var lastRegularKey *Key 382 383 var line []byte 384 var inUnparseableSection bool 385 386 // NOTE: Iterate and increase `currentPeekSize` until 387 // the size of the parser buffer is found. 388 // TODO(unknwon): When Golang 1.10 is the lowest version supported, replace with `parserBufferSize := p.buf.Size()`. 389 parserBufferSize := 0 390 // NOTE: Peek 4kb at a time. 391 currentPeekSize := minReaderBufferSize 392 393 if f.options.AllowPythonMultilineValues { 394 for { 395 peekBytes, _ := p.buf.Peek(currentPeekSize) 396 peekBytesLength := len(peekBytes) 397 398 if parserBufferSize >= peekBytesLength { 399 break 400 } 401 402 currentPeekSize *= 2 403 parserBufferSize = peekBytesLength 404 } 405 } 406 407 for !p.isEOF { 408 line, err = p.readUntil('\n') 409 if err != nil { 410 return err 411 } 412 413 if f.options.AllowNestedValues && 414 isLastValueEmpty && len(line) > 0 { 415 if line[0] == ' ' || line[0] == '\t' { 416 lastRegularKey.addNestedValue(string(bytes.TrimSpace(line))) 417 continue 418 } 419 } 420 421 line = bytes.TrimLeftFunc(line, unicode.IsSpace) 422 if len(line) == 0 { 423 continue 424 } 425 426 // Comments 427 if line[0] == '#' || line[0] == ';' { 428 // Note: we do not care ending line break, 429 // it is needed for adding second line, 430 // so just clean it once at the end when set to value. 431 p.comment.Write(line) 432 continue 433 } 434 435 // Section 436 if line[0] == '[' { 437 // Read to the next ']' (TODO: support quoted strings) 438 closeIdx := bytes.LastIndexByte(line, ']') 439 if closeIdx == -1 { 440 return fmt.Errorf("unclosed section: %s", line) 441 } 442 443 name := string(line[1:closeIdx]) 444 section, err = f.NewSection(name) 445 if err != nil { 446 return err 447 } 448 449 comment, has := cleanComment(line[closeIdx+1:]) 450 if has { 451 p.comment.Write(comment) 452 } 453 454 section.Comment = strings.TrimSpace(p.comment.String()) 455 456 // Reset aotu-counter and comments 457 p.comment.Reset() 458 p.count = 1 459 460 inUnparseableSection = false 461 for i := range f.options.UnparseableSections { 462 if f.options.UnparseableSections[i] == name || 463 (f.options.Insensitive && strings.ToLower(f.options.UnparseableSections[i]) == strings.ToLower(name)) { 464 inUnparseableSection = true 465 continue 466 } 467 } 468 continue 469 } 470 471 if inUnparseableSection { 472 section.isRawSection = true 473 section.rawBody += string(line) 474 continue 475 } 476 477 kname, offset, err := readKeyName(f.options.KeyValueDelimiters, line) 478 if err != nil { 479 // Treat as boolean key when desired, and whole line is key name. 480 if IsErrDelimiterNotFound(err) { 481 switch { 482 case f.options.AllowBooleanKeys: 483 kname, err := p.readValue(line, parserBufferSize) 484 if err != nil { 485 return err 486 } 487 key, err := section.NewBooleanKey(kname) 488 if err != nil { 489 return err 490 } 491 key.Comment = strings.TrimSpace(p.comment.String()) 492 p.comment.Reset() 493 continue 494 495 case f.options.SkipUnrecognizableLines: 496 continue 497 } 498 } 499 return err 500 } 501 502 // Auto increment. 503 isAutoIncr := false 504 if kname == "-" { 505 isAutoIncr = true 506 kname = "#" + strconv.Itoa(p.count) 507 p.count++ 508 } 509 510 value, err := p.readValue(line[offset:], parserBufferSize) 511 if err != nil { 512 return err 513 } 514 isLastValueEmpty = len(value) == 0 515 516 key, err := section.NewKey(kname, value) 517 if err != nil { 518 return err 519 } 520 key.isAutoIncrement = isAutoIncr 521 key.Comment = strings.TrimSpace(p.comment.String()) 522 p.comment.Reset() 523 lastRegularKey = key 524 } 525 return nil 526} 527