1// Copyright 2015 Unknwon
2//
3// Licensed under the Apache License, Version 2.0 (the "License"): you may
4// not use this file except in compliance with the License. You may obtain
5// a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12// License for the specific language governing permissions and limitations
13// under the License.
14
15package ini
16
17import (
18	"bufio"
19	"bytes"
20	"fmt"
21	"io"
22	"regexp"
23	"strconv"
24	"strings"
25	"unicode"
26)
27
28const minReaderBufferSize = 4096
29
30var pythonMultiline = regexp.MustCompile(`^([\t\f ]+)(.*)`)
31
32type parserOptions struct {
33	IgnoreContinuation          bool
34	IgnoreInlineComment         bool
35	AllowPythonMultilineValues  bool
36	SpaceBeforeInlineComment    bool
37	UnescapeValueDoubleQuotes   bool
38	UnescapeValueCommentSymbols bool
39	PreserveSurroundedQuote     bool
40	DebugFunc                   DebugFunc
41	ReaderBufferSize            int
42}
43
44type parser struct {
45	buf     *bufio.Reader
46	options parserOptions
47
48	isEOF   bool
49	count   int
50	comment *bytes.Buffer
51}
52
53func (p *parser) debug(format string, args ...interface{}) {
54	if p.options.DebugFunc != nil {
55		p.options.DebugFunc(fmt.Sprintf(format, args...))
56	}
57}
58
59func newParser(r io.Reader, opts parserOptions) *parser {
60	size := opts.ReaderBufferSize
61	if size < minReaderBufferSize {
62		size = minReaderBufferSize
63	}
64
65	return &parser{
66		buf:     bufio.NewReaderSize(r, size),
67		options: opts,
68		count:   1,
69		comment: &bytes.Buffer{},
70	}
71}
72
73// BOM handles header of UTF-8, UTF-16 LE and UTF-16 BE's BOM format.
74// http://en.wikipedia.org/wiki/Byte_order_mark#Representations_of_byte_order_marks_by_encoding
75func (p *parser) BOM() error {
76	mask, err := p.buf.Peek(2)
77	if err != nil && err != io.EOF {
78		return err
79	} else if len(mask) < 2 {
80		return nil
81	}
82
83	switch {
84	case mask[0] == 254 && mask[1] == 255:
85		fallthrough
86	case mask[0] == 255 && mask[1] == 254:
87		_, err = p.buf.Read(mask)
88		if err != nil {
89			return err
90		}
91	case mask[0] == 239 && mask[1] == 187:
92		mask, err := p.buf.Peek(3)
93		if err != nil && err != io.EOF {
94			return err
95		} else if len(mask) < 3 {
96			return nil
97		}
98		if mask[2] == 191 {
99			_, err = p.buf.Read(mask)
100			if err != nil {
101				return err
102			}
103		}
104	}
105	return nil
106}
107
108func (p *parser) readUntil(delim byte) ([]byte, error) {
109	data, err := p.buf.ReadBytes(delim)
110	if err != nil {
111		if err == io.EOF {
112			p.isEOF = true
113		} else {
114			return nil, err
115		}
116	}
117	return data, nil
118}
119
120func cleanComment(in []byte) ([]byte, bool) {
121	i := bytes.IndexAny(in, "#;")
122	if i == -1 {
123		return nil, false
124	}
125	return in[i:], true
126}
127
128func readKeyName(delimiters string, in []byte) (string, int, error) {
129	line := string(in)
130
131	// Check if key name surrounded by quotes.
132	var keyQuote string
133	if line[0] == '"' {
134		if len(line) > 6 && string(line[0:3]) == `"""` {
135			keyQuote = `"""`
136		} else {
137			keyQuote = `"`
138		}
139	} else if line[0] == '`' {
140		keyQuote = "`"
141	}
142
143	// Get out key name
144	var endIdx int
145	if len(keyQuote) > 0 {
146		startIdx := len(keyQuote)
147		// FIXME: fail case -> """"""name"""=value
148		pos := strings.Index(line[startIdx:], keyQuote)
149		if pos == -1 {
150			return "", -1, fmt.Errorf("missing closing key quote: %s", line)
151		}
152		pos += startIdx
153
154		// Find key-value delimiter
155		i := strings.IndexAny(line[pos+startIdx:], delimiters)
156		if i < 0 {
157			return "", -1, ErrDelimiterNotFound{line}
158		}
159		endIdx = pos + i
160		return strings.TrimSpace(line[startIdx:pos]), endIdx + startIdx + 1, nil
161	}
162
163	endIdx = strings.IndexAny(line, delimiters)
164	if endIdx < 0 {
165		return "", -1, ErrDelimiterNotFound{line}
166	}
167	return strings.TrimSpace(line[0:endIdx]), endIdx + 1, nil
168}
169
170func (p *parser) readMultilines(line, val, valQuote string) (string, error) {
171	for {
172		data, err := p.readUntil('\n')
173		if err != nil {
174			return "", err
175		}
176		next := string(data)
177
178		pos := strings.LastIndex(next, valQuote)
179		if pos > -1 {
180			val += next[:pos]
181
182			comment, has := cleanComment([]byte(next[pos:]))
183			if has {
184				p.comment.Write(bytes.TrimSpace(comment))
185			}
186			break
187		}
188		val += next
189		if p.isEOF {
190			return "", fmt.Errorf("missing closing key quote from %q to %q", line, next)
191		}
192	}
193	return val, nil
194}
195
196func (p *parser) readContinuationLines(val string) (string, error) {
197	for {
198		data, err := p.readUntil('\n')
199		if err != nil {
200			return "", err
201		}
202		next := strings.TrimSpace(string(data))
203
204		if len(next) == 0 {
205			break
206		}
207		val += next
208		if val[len(val)-1] != '\\' {
209			break
210		}
211		val = val[:len(val)-1]
212	}
213	return val, nil
214}
215
216// hasSurroundedQuote check if and only if the first and last characters
217// are quotes \" or \'.
218// It returns false if any other parts also contain same kind of quotes.
219func hasSurroundedQuote(in string, quote byte) bool {
220	return len(in) >= 2 && in[0] == quote && in[len(in)-1] == quote &&
221		strings.IndexByte(in[1:], quote) == len(in)-2
222}
223
224func (p *parser) readValue(in []byte, bufferSize int) (string, error) {
225
226	line := strings.TrimLeftFunc(string(in), unicode.IsSpace)
227	if len(line) == 0 {
228		if p.options.AllowPythonMultilineValues && len(in) > 0 && in[len(in)-1] == '\n' {
229			return p.readPythonMultilines(line, bufferSize)
230		}
231		return "", nil
232	}
233
234	var valQuote string
235	if len(line) > 3 && string(line[0:3]) == `"""` {
236		valQuote = `"""`
237	} else if line[0] == '`' {
238		valQuote = "`"
239	} else if p.options.UnescapeValueDoubleQuotes && line[0] == '"' {
240		valQuote = `"`
241	}
242
243	if len(valQuote) > 0 {
244		startIdx := len(valQuote)
245		pos := strings.LastIndex(line[startIdx:], valQuote)
246		// Check for multi-line value
247		if pos == -1 {
248			return p.readMultilines(line, line[startIdx:], valQuote)
249		}
250
251		if p.options.UnescapeValueDoubleQuotes && valQuote == `"` {
252			return strings.Replace(line[startIdx:pos+startIdx], `\"`, `"`, -1), nil
253		}
254		return line[startIdx : pos+startIdx], nil
255	}
256
257	lastChar := line[len(line)-1]
258	// Won't be able to reach here if value only contains whitespace
259	line = strings.TrimSpace(line)
260	trimmedLastChar := line[len(line)-1]
261
262	// Check continuation lines when desired
263	if !p.options.IgnoreContinuation && trimmedLastChar == '\\' {
264		return p.readContinuationLines(line[:len(line)-1])
265	}
266
267	// Check if ignore inline comment
268	if !p.options.IgnoreInlineComment {
269		var i int
270		if p.options.SpaceBeforeInlineComment {
271			i = strings.Index(line, " #")
272			if i == -1 {
273				i = strings.Index(line, " ;")
274			}
275
276		} else {
277			i = strings.IndexAny(line, "#;")
278		}
279
280		if i > -1 {
281			p.comment.WriteString(line[i:])
282			line = strings.TrimSpace(line[:i])
283		}
284
285	}
286
287	// Trim single and double quotes
288	if (hasSurroundedQuote(line, '\'') ||
289		hasSurroundedQuote(line, '"')) && !p.options.PreserveSurroundedQuote {
290		line = line[1 : len(line)-1]
291	} else if len(valQuote) == 0 && p.options.UnescapeValueCommentSymbols {
292		if strings.Contains(line, `\;`) {
293			line = strings.Replace(line, `\;`, ";", -1)
294		}
295		if strings.Contains(line, `\#`) {
296			line = strings.Replace(line, `\#`, "#", -1)
297		}
298	} else if p.options.AllowPythonMultilineValues && lastChar == '\n' {
299		return p.readPythonMultilines(line, bufferSize)
300	}
301
302	return line, nil
303}
304
305func (p *parser) readPythonMultilines(line string, bufferSize int) (string, error) {
306	parserBufferPeekResult, _ := p.buf.Peek(bufferSize)
307	peekBuffer := bytes.NewBuffer(parserBufferPeekResult)
308
309	indentSize := 0
310	for {
311		peekData, peekErr := peekBuffer.ReadBytes('\n')
312		if peekErr != nil {
313			if peekErr == io.EOF {
314				p.debug("readPythonMultilines: io.EOF, peekData: %q, line: %q", string(peekData), line)
315				return line, nil
316			}
317
318			p.debug("readPythonMultilines: failed to peek with error: %v", peekErr)
319			return "", peekErr
320		}
321
322		p.debug("readPythonMultilines: parsing %q", string(peekData))
323
324		peekMatches := pythonMultiline.FindStringSubmatch(string(peekData))
325		p.debug("readPythonMultilines: matched %d parts", len(peekMatches))
326		for n, v := range peekMatches {
327			p.debug("   %d: %q", n, v)
328		}
329
330		// Return if not a Python multiline value.
331		if len(peekMatches) != 3 {
332			p.debug("readPythonMultilines: end of value, got: %q", line)
333			return line, nil
334		}
335
336		// Determine indent size and line prefix.
337		currentIndentSize := len(peekMatches[1])
338		if indentSize < 1 {
339			indentSize = currentIndentSize
340			p.debug("readPythonMultilines: indent size is %d", indentSize)
341		}
342
343		// Make sure each line is indented at least as far as first line.
344		if currentIndentSize < indentSize {
345			p.debug("readPythonMultilines: end of value, current indent: %d, expected indent: %d, line: %q", currentIndentSize, indentSize, line)
346			return line, nil
347		}
348
349		// Advance the parser reader (buffer) in-sync with the peek buffer.
350		_, err := p.buf.Discard(len(peekData))
351		if err != nil {
352			p.debug("readPythonMultilines: failed to skip to the end, returning error")
353			return "", err
354		}
355
356		// Handle indented empty line.
357		line += "\n" + peekMatches[1][indentSize:] + peekMatches[2]
358	}
359}
360
361// parse parses data through an io.Reader.
362func (f *File) parse(reader io.Reader) (err error) {
363	p := newParser(reader, parserOptions{
364		IgnoreContinuation:          f.options.IgnoreContinuation,
365		IgnoreInlineComment:         f.options.IgnoreInlineComment,
366		AllowPythonMultilineValues:  f.options.AllowPythonMultilineValues,
367		SpaceBeforeInlineComment:    f.options.SpaceBeforeInlineComment,
368		UnescapeValueDoubleQuotes:   f.options.UnescapeValueDoubleQuotes,
369		UnescapeValueCommentSymbols: f.options.UnescapeValueCommentSymbols,
370		PreserveSurroundedQuote:     f.options.PreserveSurroundedQuote,
371		DebugFunc:                   f.options.DebugFunc,
372		ReaderBufferSize:            f.options.ReaderBufferSize,
373	})
374	if err = p.BOM(); err != nil {
375		return fmt.Errorf("BOM: %v", err)
376	}
377
378	// Ignore error because default section name is never empty string.
379	name := DefaultSection
380	if f.options.Insensitive || f.options.InsensitiveSections {
381		name = strings.ToLower(DefaultSection)
382	}
383	section, _ := f.NewSection(name)
384
385	// This "last" is not strictly equivalent to "previous one" if current key is not the first nested key
386	var isLastValueEmpty bool
387	var lastRegularKey *Key
388
389	var line []byte
390	var inUnparseableSection bool
391
392	// NOTE: Iterate and increase `currentPeekSize` until
393	// the size of the parser buffer is found.
394	// TODO(unknwon): When Golang 1.10 is the lowest version supported, replace with `parserBufferSize := p.buf.Size()`.
395	parserBufferSize := 0
396	// NOTE: Peek 4kb at a time.
397	currentPeekSize := minReaderBufferSize
398
399	if f.options.AllowPythonMultilineValues {
400		for {
401			peekBytes, _ := p.buf.Peek(currentPeekSize)
402			peekBytesLength := len(peekBytes)
403
404			if parserBufferSize >= peekBytesLength {
405				break
406			}
407
408			currentPeekSize *= 2
409			parserBufferSize = peekBytesLength
410		}
411	}
412
413	for !p.isEOF {
414		line, err = p.readUntil('\n')
415		if err != nil {
416			return err
417		}
418
419		if f.options.AllowNestedValues &&
420			isLastValueEmpty && len(line) > 0 {
421			if line[0] == ' ' || line[0] == '\t' {
422				err = lastRegularKey.addNestedValue(string(bytes.TrimSpace(line)))
423				if err != nil {
424					return err
425				}
426				continue
427			}
428		}
429
430		line = bytes.TrimLeftFunc(line, unicode.IsSpace)
431		if len(line) == 0 {
432			continue
433		}
434
435		// Comments
436		if line[0] == '#' || line[0] == ';' {
437			// Note: we do not care ending line break,
438			// it is needed for adding second line,
439			// so just clean it once at the end when set to value.
440			p.comment.Write(line)
441			continue
442		}
443
444		// Section
445		if line[0] == '[' {
446			// Read to the next ']' (TODO: support quoted strings)
447			closeIdx := bytes.LastIndexByte(line, ']')
448			if closeIdx == -1 {
449				return fmt.Errorf("unclosed section: %s", line)
450			}
451
452			name := string(line[1:closeIdx])
453			section, err = f.NewSection(name)
454			if err != nil {
455				return err
456			}
457
458			comment, has := cleanComment(line[closeIdx+1:])
459			if has {
460				p.comment.Write(comment)
461			}
462
463			section.Comment = strings.TrimSpace(p.comment.String())
464
465			// Reset auto-counter and comments
466			p.comment.Reset()
467			p.count = 1
468
469			inUnparseableSection = false
470			for i := range f.options.UnparseableSections {
471				if f.options.UnparseableSections[i] == name ||
472					((f.options.Insensitive || f.options.InsensitiveSections) && strings.EqualFold(f.options.UnparseableSections[i], name)) {
473					inUnparseableSection = true
474					continue
475				}
476			}
477			continue
478		}
479
480		if inUnparseableSection {
481			section.isRawSection = true
482			section.rawBody += string(line)
483			continue
484		}
485
486		kname, offset, err := readKeyName(f.options.KeyValueDelimiters, line)
487		if err != nil {
488			// Treat as boolean key when desired, and whole line is key name.
489			if IsErrDelimiterNotFound(err) {
490				switch {
491				case f.options.AllowBooleanKeys:
492					kname, err := p.readValue(line, parserBufferSize)
493					if err != nil {
494						return err
495					}
496					key, err := section.NewBooleanKey(kname)
497					if err != nil {
498						return err
499					}
500					key.Comment = strings.TrimSpace(p.comment.String())
501					p.comment.Reset()
502					continue
503
504				case f.options.SkipUnrecognizableLines:
505					continue
506				}
507			}
508			return err
509		}
510
511		// Auto increment.
512		isAutoIncr := false
513		if kname == "-" {
514			isAutoIncr = true
515			kname = "#" + strconv.Itoa(p.count)
516			p.count++
517		}
518
519		value, err := p.readValue(line[offset:], parserBufferSize)
520		if err != nil {
521			return err
522		}
523		isLastValueEmpty = len(value) == 0
524
525		key, err := section.NewKey(kname, value)
526		if err != nil {
527			return err
528		}
529		key.isAutoIncrement = isAutoIncr
530		key.Comment = strings.TrimSpace(p.comment.String())
531		p.comment.Reset()
532		lastRegularKey = key
533	}
534	return nil
535}
536