1// Copyright 2015 Unknwon
2//
3// Licensed under the Apache License, Version 2.0 (the "License"): you may
4// not use this file except in compliance with the License. You may obtain
5// a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12// License for the specific language governing permissions and limitations
13// under the License.
14
15package ini
16
17import (
18	"bufio"
19	"bytes"
20	"fmt"
21	"io"
22	"regexp"
23	"strconv"
24	"strings"
25	"unicode"
26)
27
28const minReaderBufferSize = 4096
29
30var pythonMultiline = regexp.MustCompile(`^([\t\f ]+)(.*)`)
31
32type parserOptions struct {
33	IgnoreContinuation          bool
34	IgnoreInlineComment         bool
35	AllowPythonMultilineValues  bool
36	SpaceBeforeInlineComment    bool
37	UnescapeValueDoubleQuotes   bool
38	UnescapeValueCommentSymbols bool
39	PreserveSurroundedQuote     bool
40	DebugFunc                   DebugFunc
41	ReaderBufferSize            int
42}
43
44type parser struct {
45	buf     *bufio.Reader
46	options parserOptions
47
48	isEOF   bool
49	count   int
50	comment *bytes.Buffer
51}
52
53func (p *parser) debug(format string, args ...interface{}) {
54	if p.options.DebugFunc != nil {
55		p.options.DebugFunc(fmt.Sprintf(format, args...))
56	}
57}
58
59func newParser(r io.Reader, opts parserOptions) *parser {
60	size := opts.ReaderBufferSize
61	if size < minReaderBufferSize {
62		size = minReaderBufferSize
63	}
64
65	return &parser{
66		buf:     bufio.NewReaderSize(r, size),
67		options: opts,
68		count:   1,
69		comment: &bytes.Buffer{},
70	}
71}
72
73// BOM handles header of UTF-8, UTF-16 LE and UTF-16 BE's BOM format.
74// http://en.wikipedia.org/wiki/Byte_order_mark#Representations_of_byte_order_marks_by_encoding
75func (p *parser) BOM() error {
76	mask, err := p.buf.Peek(2)
77	if err != nil && err != io.EOF {
78		return err
79	} else if len(mask) < 2 {
80		return nil
81	}
82
83	switch {
84	case mask[0] == 254 && mask[1] == 255:
85		fallthrough
86	case mask[0] == 255 && mask[1] == 254:
87		p.buf.Read(mask)
88	case mask[0] == 239 && mask[1] == 187:
89		mask, err := p.buf.Peek(3)
90		if err != nil && err != io.EOF {
91			return err
92		} else if len(mask) < 3 {
93			return nil
94		}
95		if mask[2] == 191 {
96			p.buf.Read(mask)
97		}
98	}
99	return nil
100}
101
102func (p *parser) readUntil(delim byte) ([]byte, error) {
103	data, err := p.buf.ReadBytes(delim)
104	if err != nil {
105		if err == io.EOF {
106			p.isEOF = true
107		} else {
108			return nil, err
109		}
110	}
111	return data, nil
112}
113
114func cleanComment(in []byte) ([]byte, bool) {
115	i := bytes.IndexAny(in, "#;")
116	if i == -1 {
117		return nil, false
118	}
119	return in[i:], true
120}
121
122func readKeyName(delimiters string, in []byte) (string, int, error) {
123	line := string(in)
124
125	// Check if key name surrounded by quotes.
126	var keyQuote string
127	if line[0] == '"' {
128		if len(line) > 6 && string(line[0:3]) == `"""` {
129			keyQuote = `"""`
130		} else {
131			keyQuote = `"`
132		}
133	} else if line[0] == '`' {
134		keyQuote = "`"
135	}
136
137	// Get out key name
138	endIdx := -1
139	if len(keyQuote) > 0 {
140		startIdx := len(keyQuote)
141		// FIXME: fail case -> """"""name"""=value
142		pos := strings.Index(line[startIdx:], keyQuote)
143		if pos == -1 {
144			return "", -1, fmt.Errorf("missing closing key quote: %s", line)
145		}
146		pos += startIdx
147
148		// Find key-value delimiter
149		i := strings.IndexAny(line[pos+startIdx:], delimiters)
150		if i < 0 {
151			return "", -1, ErrDelimiterNotFound{line}
152		}
153		endIdx = pos + i
154		return strings.TrimSpace(line[startIdx:pos]), endIdx + startIdx + 1, nil
155	}
156
157	endIdx = strings.IndexAny(line, delimiters)
158	if endIdx < 0 {
159		return "", -1, ErrDelimiterNotFound{line}
160	}
161	return strings.TrimSpace(line[0:endIdx]), endIdx + 1, nil
162}
163
164func (p *parser) readMultilines(line, val, valQuote string) (string, error) {
165	for {
166		data, err := p.readUntil('\n')
167		if err != nil {
168			return "", err
169		}
170		next := string(data)
171
172		pos := strings.LastIndex(next, valQuote)
173		if pos > -1 {
174			val += next[:pos]
175
176			comment, has := cleanComment([]byte(next[pos:]))
177			if has {
178				p.comment.Write(bytes.TrimSpace(comment))
179			}
180			break
181		}
182		val += next
183		if p.isEOF {
184			return "", fmt.Errorf("missing closing key quote from '%s' to '%s'", line, next)
185		}
186	}
187	return val, nil
188}
189
190func (p *parser) readContinuationLines(val string) (string, error) {
191	for {
192		data, err := p.readUntil('\n')
193		if err != nil {
194			return "", err
195		}
196		next := strings.TrimSpace(string(data))
197
198		if len(next) == 0 {
199			break
200		}
201		val += next
202		if val[len(val)-1] != '\\' {
203			break
204		}
205		val = val[:len(val)-1]
206	}
207	return val, nil
208}
209
210// hasSurroundedQuote check if and only if the first and last characters
211// are quotes \" or \'.
212// It returns false if any other parts also contain same kind of quotes.
213func hasSurroundedQuote(in string, quote byte) bool {
214	return len(in) >= 2 && in[0] == quote && in[len(in)-1] == quote &&
215		strings.IndexByte(in[1:], quote) == len(in)-2
216}
217
218func (p *parser) readValue(in []byte, bufferSize int) (string, error) {
219
220	line := strings.TrimLeftFunc(string(in), unicode.IsSpace)
221	if len(line) == 0 {
222		if p.options.AllowPythonMultilineValues && len(in) > 0 && in[len(in)-1] == '\n' {
223			return p.readPythonMultilines(line, bufferSize)
224		}
225		return "", nil
226	}
227
228	var valQuote string
229	if len(line) > 3 && string(line[0:3]) == `"""` {
230		valQuote = `"""`
231	} else if line[0] == '`' {
232		valQuote = "`"
233	} else if p.options.UnescapeValueDoubleQuotes && line[0] == '"' {
234		valQuote = `"`
235	}
236
237	if len(valQuote) > 0 {
238		startIdx := len(valQuote)
239		pos := strings.LastIndex(line[startIdx:], valQuote)
240		// Check for multi-line value
241		if pos == -1 {
242			return p.readMultilines(line, line[startIdx:], valQuote)
243		}
244
245		if p.options.UnescapeValueDoubleQuotes && valQuote == `"` {
246			return strings.Replace(line[startIdx:pos+startIdx], `\"`, `"`, -1), nil
247		}
248		return line[startIdx : pos+startIdx], nil
249	}
250
251	lastChar := line[len(line)-1]
252	// Won't be able to reach here if value only contains whitespace
253	line = strings.TrimSpace(line)
254	trimmedLastChar := line[len(line)-1]
255
256	// Check continuation lines when desired
257	if !p.options.IgnoreContinuation && trimmedLastChar == '\\' {
258		return p.readContinuationLines(line[:len(line)-1])
259	}
260
261	// Check if ignore inline comment
262	if !p.options.IgnoreInlineComment {
263		var i int
264		if p.options.SpaceBeforeInlineComment {
265			i = strings.Index(line, " #")
266			if i == -1 {
267				i = strings.Index(line, " ;")
268			}
269
270		} else {
271			i = strings.IndexAny(line, "#;")
272		}
273
274		if i > -1 {
275			p.comment.WriteString(line[i:])
276			line = strings.TrimSpace(line[:i])
277		}
278
279	}
280
281	// Trim single and double quotes
282	if (hasSurroundedQuote(line, '\'') ||
283		hasSurroundedQuote(line, '"')) && !p.options.PreserveSurroundedQuote {
284		line = line[1 : len(line)-1]
285	} else if len(valQuote) == 0 && p.options.UnescapeValueCommentSymbols {
286		if strings.Contains(line, `\;`) {
287			line = strings.Replace(line, `\;`, ";", -1)
288		}
289		if strings.Contains(line, `\#`) {
290			line = strings.Replace(line, `\#`, "#", -1)
291		}
292	} else if p.options.AllowPythonMultilineValues && lastChar == '\n' {
293		return p.readPythonMultilines(line, bufferSize)
294	}
295
296	return line, nil
297}
298
299func (p *parser) readPythonMultilines(line string, bufferSize int) (string, error) {
300	parserBufferPeekResult, _ := p.buf.Peek(bufferSize)
301	peekBuffer := bytes.NewBuffer(parserBufferPeekResult)
302
303	indentSize := 0
304	for {
305		peekData, peekErr := peekBuffer.ReadBytes('\n')
306		if peekErr != nil {
307			if peekErr == io.EOF {
308				p.debug("readPythonMultilines: io.EOF, peekData: %q, line: %q", string(peekData), line)
309				return line, nil
310			}
311
312			p.debug("readPythonMultilines: failed to peek with error: %v", peekErr)
313			return "", peekErr
314		}
315
316		p.debug("readPythonMultilines: parsing %q", string(peekData))
317
318		peekMatches := pythonMultiline.FindStringSubmatch(string(peekData))
319		p.debug("readPythonMultilines: matched %d parts", len(peekMatches))
320		for n, v := range peekMatches {
321			p.debug("   %d: %q", n, v)
322		}
323
324		// Return if not a Python multiline value.
325		if len(peekMatches) != 3 {
326			p.debug("readPythonMultilines: end of value, got: %q", line)
327			return line, nil
328		}
329
330		// Determine indent size and line prefix.
331		currentIndentSize := len(peekMatches[1])
332		if indentSize < 1 {
333			indentSize = currentIndentSize
334			p.debug("readPythonMultilines: indent size is %d", indentSize)
335		}
336
337		// Make sure each line is indented at least as far as first line.
338		if currentIndentSize < indentSize {
339			p.debug("readPythonMultilines: end of value, current indent: %d, expected indent: %d, line: %q", currentIndentSize, indentSize, line)
340			return line, nil
341		}
342
343		// Advance the parser reader (buffer) in-sync with the peek buffer.
344		_, err := p.buf.Discard(len(peekData))
345		if err != nil {
346			p.debug("readPythonMultilines: failed to skip to the end, returning error")
347			return "", err
348		}
349
350		// Handle indented empty line.
351		line += "\n" + peekMatches[1][indentSize:] + peekMatches[2]
352	}
353}
354
355// parse parses data through an io.Reader.
356func (f *File) parse(reader io.Reader) (err error) {
357	p := newParser(reader, parserOptions{
358		IgnoreContinuation:          f.options.IgnoreContinuation,
359		IgnoreInlineComment:         f.options.IgnoreInlineComment,
360		AllowPythonMultilineValues:  f.options.AllowPythonMultilineValues,
361		SpaceBeforeInlineComment:    f.options.SpaceBeforeInlineComment,
362		UnescapeValueDoubleQuotes:   f.options.UnescapeValueDoubleQuotes,
363		UnescapeValueCommentSymbols: f.options.UnescapeValueCommentSymbols,
364		PreserveSurroundedQuote:     f.options.PreserveSurroundedQuote,
365		DebugFunc:                   f.options.DebugFunc,
366		ReaderBufferSize:            f.options.ReaderBufferSize,
367	})
368	if err = p.BOM(); err != nil {
369		return fmt.Errorf("BOM: %v", err)
370	}
371
372	// Ignore error because default section name is never empty string.
373	name := DefaultSection
374	if f.options.Insensitive {
375		name = strings.ToLower(DefaultSection)
376	}
377	section, _ := f.NewSection(name)
378
379	// This "last" is not strictly equivalent to "previous one" if current key is not the first nested key
380	var isLastValueEmpty bool
381	var lastRegularKey *Key
382
383	var line []byte
384	var inUnparseableSection bool
385
386	// NOTE: Iterate and increase `currentPeekSize` until
387	// the size of the parser buffer is found.
388	// TODO(unknwon): When Golang 1.10 is the lowest version supported, replace with `parserBufferSize := p.buf.Size()`.
389	parserBufferSize := 0
390	// NOTE: Peek 4kb at a time.
391	currentPeekSize := minReaderBufferSize
392
393	if f.options.AllowPythonMultilineValues {
394		for {
395			peekBytes, _ := p.buf.Peek(currentPeekSize)
396			peekBytesLength := len(peekBytes)
397
398			if parserBufferSize >= peekBytesLength {
399				break
400			}
401
402			currentPeekSize *= 2
403			parserBufferSize = peekBytesLength
404		}
405	}
406
407	for !p.isEOF {
408		line, err = p.readUntil('\n')
409		if err != nil {
410			return err
411		}
412
413		if f.options.AllowNestedValues &&
414			isLastValueEmpty && len(line) > 0 {
415			if line[0] == ' ' || line[0] == '\t' {
416				lastRegularKey.addNestedValue(string(bytes.TrimSpace(line)))
417				continue
418			}
419		}
420
421		line = bytes.TrimLeftFunc(line, unicode.IsSpace)
422		if len(line) == 0 {
423			continue
424		}
425
426		// Comments
427		if line[0] == '#' || line[0] == ';' {
428			// Note: we do not care ending line break,
429			// it is needed for adding second line,
430			// so just clean it once at the end when set to value.
431			p.comment.Write(line)
432			continue
433		}
434
435		// Section
436		if line[0] == '[' {
437			// Read to the next ']' (TODO: support quoted strings)
438			closeIdx := bytes.LastIndexByte(line, ']')
439			if closeIdx == -1 {
440				return fmt.Errorf("unclosed section: %s", line)
441			}
442
443			name := string(line[1:closeIdx])
444			section, err = f.NewSection(name)
445			if err != nil {
446				return err
447			}
448
449			comment, has := cleanComment(line[closeIdx+1:])
450			if has {
451				p.comment.Write(comment)
452			}
453
454			section.Comment = strings.TrimSpace(p.comment.String())
455
456			// Reset aotu-counter and comments
457			p.comment.Reset()
458			p.count = 1
459
460			inUnparseableSection = false
461			for i := range f.options.UnparseableSections {
462				if f.options.UnparseableSections[i] == name ||
463					(f.options.Insensitive && strings.ToLower(f.options.UnparseableSections[i]) == strings.ToLower(name)) {
464					inUnparseableSection = true
465					continue
466				}
467			}
468			continue
469		}
470
471		if inUnparseableSection {
472			section.isRawSection = true
473			section.rawBody += string(line)
474			continue
475		}
476
477		kname, offset, err := readKeyName(f.options.KeyValueDelimiters, line)
478		if err != nil {
479			// Treat as boolean key when desired, and whole line is key name.
480			if IsErrDelimiterNotFound(err) {
481				switch {
482				case f.options.AllowBooleanKeys:
483					kname, err := p.readValue(line, parserBufferSize)
484					if err != nil {
485						return err
486					}
487					key, err := section.NewBooleanKey(kname)
488					if err != nil {
489						return err
490					}
491					key.Comment = strings.TrimSpace(p.comment.String())
492					p.comment.Reset()
493					continue
494
495				case f.options.SkipUnrecognizableLines:
496					continue
497				}
498			}
499			return err
500		}
501
502		// Auto increment.
503		isAutoIncr := false
504		if kname == "-" {
505			isAutoIncr = true
506			kname = "#" + strconv.Itoa(p.count)
507			p.count++
508		}
509
510		value, err := p.readValue(line[offset:], parserBufferSize)
511		if err != nil {
512			return err
513		}
514		isLastValueEmpty = len(value) == 0
515
516		key, err := section.NewKey(kname, value)
517		if err != nil {
518			return err
519		}
520		key.isAutoIncrement = isAutoIncr
521		key.Comment = strings.TrimSpace(p.comment.String())
522		p.comment.Reset()
523		lastRegularKey = key
524	}
525	return nil
526}
527