1// Copyright 2015 Unknwon
2//
3// Licensed under the Apache License, Version 2.0 (the "License"): you may
4// not use this file except in compliance with the License. You may obtain
5// a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12// License for the specific language governing permissions and limitations
13// under the License.
14
15package ini
16
17import (
18	"bufio"
19	"bytes"
20	"fmt"
21	"io"
22	"regexp"
23	"strconv"
24	"strings"
25	"unicode"
26)
27
28var pythonMultiline = regexp.MustCompile("^(\\s+)([^\n]+)")
29
30type tokenType int
31
32const (
33	_TOKEN_INVALID tokenType = iota
34	_TOKEN_COMMENT
35	_TOKEN_SECTION
36	_TOKEN_KEY
37)
38
39type parser struct {
40	buf     *bufio.Reader
41	isEOF   bool
42	count   int
43	comment *bytes.Buffer
44}
45
46func newParser(r io.Reader) *parser {
47	return &parser{
48		buf:     bufio.NewReader(r),
49		count:   1,
50		comment: &bytes.Buffer{},
51	}
52}
53
54// BOM handles header of UTF-8, UTF-16 LE and UTF-16 BE's BOM format.
55// http://en.wikipedia.org/wiki/Byte_order_mark#Representations_of_byte_order_marks_by_encoding
56func (p *parser) BOM() error {
57	mask, err := p.buf.Peek(2)
58	if err != nil && err != io.EOF {
59		return err
60	} else if len(mask) < 2 {
61		return nil
62	}
63
64	switch {
65	case mask[0] == 254 && mask[1] == 255:
66		fallthrough
67	case mask[0] == 255 && mask[1] == 254:
68		p.buf.Read(mask)
69	case mask[0] == 239 && mask[1] == 187:
70		mask, err := p.buf.Peek(3)
71		if err != nil && err != io.EOF {
72			return err
73		} else if len(mask) < 3 {
74			return nil
75		}
76		if mask[2] == 191 {
77			p.buf.Read(mask)
78		}
79	}
80	return nil
81}
82
83func (p *parser) readUntil(delim byte) ([]byte, error) {
84	data, err := p.buf.ReadBytes(delim)
85	if err != nil {
86		if err == io.EOF {
87			p.isEOF = true
88		} else {
89			return nil, err
90		}
91	}
92	return data, nil
93}
94
95func cleanComment(in []byte) ([]byte, bool) {
96	i := bytes.IndexAny(in, "#;")
97	if i == -1 {
98		return nil, false
99	}
100	return in[i:], true
101}
102
103func readKeyName(delimiters string, in []byte) (string, int, error) {
104	line := string(in)
105
106	// Check if key name surrounded by quotes.
107	var keyQuote string
108	if line[0] == '"' {
109		if len(line) > 6 && string(line[0:3]) == `"""` {
110			keyQuote = `"""`
111		} else {
112			keyQuote = `"`
113		}
114	} else if line[0] == '`' {
115		keyQuote = "`"
116	}
117
118	// Get out key name
119	endIdx := -1
120	if len(keyQuote) > 0 {
121		startIdx := len(keyQuote)
122		// FIXME: fail case -> """"""name"""=value
123		pos := strings.Index(line[startIdx:], keyQuote)
124		if pos == -1 {
125			return "", -1, fmt.Errorf("missing closing key quote: %s", line)
126		}
127		pos += startIdx
128
129		// Find key-value delimiter
130		i := strings.IndexAny(line[pos+startIdx:], delimiters)
131		if i < 0 {
132			return "", -1, ErrDelimiterNotFound{line}
133		}
134		endIdx = pos + i
135		return strings.TrimSpace(line[startIdx:pos]), endIdx + startIdx + 1, nil
136	}
137
138	endIdx = strings.IndexAny(line, delimiters)
139	if endIdx < 0 {
140		return "", -1, ErrDelimiterNotFound{line}
141	}
142	return strings.TrimSpace(line[0:endIdx]), endIdx + 1, nil
143}
144
145func (p *parser) readMultilines(line, val, valQuote string) (string, error) {
146	for {
147		data, err := p.readUntil('\n')
148		if err != nil {
149			return "", err
150		}
151		next := string(data)
152
153		pos := strings.LastIndex(next, valQuote)
154		if pos > -1 {
155			val += next[:pos]
156
157			comment, has := cleanComment([]byte(next[pos:]))
158			if has {
159				p.comment.Write(bytes.TrimSpace(comment))
160			}
161			break
162		}
163		val += next
164		if p.isEOF {
165			return "", fmt.Errorf("missing closing key quote from '%s' to '%s'", line, next)
166		}
167	}
168	return val, nil
169}
170
171func (p *parser) readContinuationLines(val string) (string, error) {
172	for {
173		data, err := p.readUntil('\n')
174		if err != nil {
175			return "", err
176		}
177		next := strings.TrimSpace(string(data))
178
179		if len(next) == 0 {
180			break
181		}
182		val += next
183		if val[len(val)-1] != '\\' {
184			break
185		}
186		val = val[:len(val)-1]
187	}
188	return val, nil
189}
190
191// hasSurroundedQuote check if and only if the first and last characters
192// are quotes \" or \'.
193// It returns false if any other parts also contain same kind of quotes.
194func hasSurroundedQuote(in string, quote byte) bool {
195	return len(in) >= 2 && in[0] == quote && in[len(in)-1] == quote &&
196		strings.IndexByte(in[1:], quote) == len(in)-2
197}
198
199func (p *parser) readValue(in []byte,
200	parserBufferSize int,
201	ignoreContinuation, ignoreInlineComment, unescapeValueDoubleQuotes, unescapeValueCommentSymbols, allowPythonMultilines, spaceBeforeInlineComment, preserveSurroundedQuote bool) (string, error) {
202
203	line := strings.TrimLeftFunc(string(in), unicode.IsSpace)
204	if len(line) == 0 {
205		return "", nil
206	}
207
208	var valQuote string
209	if len(line) > 3 && string(line[0:3]) == `"""` {
210		valQuote = `"""`
211	} else if line[0] == '`' {
212		valQuote = "`"
213	} else if unescapeValueDoubleQuotes && line[0] == '"' {
214		valQuote = `"`
215	}
216
217	if len(valQuote) > 0 {
218		startIdx := len(valQuote)
219		pos := strings.LastIndex(line[startIdx:], valQuote)
220		// Check for multi-line value
221		if pos == -1 {
222			return p.readMultilines(line, line[startIdx:], valQuote)
223		}
224
225		if unescapeValueDoubleQuotes && valQuote == `"` {
226			return strings.Replace(line[startIdx:pos+startIdx], `\"`, `"`, -1), nil
227		}
228		return line[startIdx : pos+startIdx], nil
229	}
230
231	lastChar := line[len(line)-1]
232	// Won't be able to reach here if value only contains whitespace
233	line = strings.TrimSpace(line)
234	trimmedLastChar := line[len(line)-1]
235
236	// Check continuation lines when desired
237	if !ignoreContinuation && trimmedLastChar == '\\' {
238		return p.readContinuationLines(line[:len(line)-1])
239	}
240
241	// Check if ignore inline comment
242	if !ignoreInlineComment {
243		var i int
244		if spaceBeforeInlineComment {
245			i = strings.Index(line, " #")
246			if i == -1 {
247				i = strings.Index(line, " ;")
248			}
249
250		} else {
251			i = strings.IndexAny(line, "#;")
252		}
253
254		if i > -1 {
255			p.comment.WriteString(line[i:])
256			line = strings.TrimSpace(line[:i])
257		}
258
259	}
260
261	// Trim single and double quotes
262	if (hasSurroundedQuote(line, '\'') ||
263		hasSurroundedQuote(line, '"')) && !preserveSurroundedQuote {
264		line = line[1 : len(line)-1]
265	} else if len(valQuote) == 0 && unescapeValueCommentSymbols {
266		if strings.Contains(line, `\;`) {
267			line = strings.Replace(line, `\;`, ";", -1)
268		}
269		if strings.Contains(line, `\#`) {
270			line = strings.Replace(line, `\#`, "#", -1)
271		}
272	} else if allowPythonMultilines && lastChar == '\n' {
273		parserBufferPeekResult, _ := p.buf.Peek(parserBufferSize)
274		peekBuffer := bytes.NewBuffer(parserBufferPeekResult)
275
276		val := line
277
278		for {
279			peekData, peekErr := peekBuffer.ReadBytes('\n')
280			if peekErr != nil {
281				if peekErr == io.EOF {
282					return val, nil
283				}
284				return "", peekErr
285			}
286
287			peekMatches := pythonMultiline.FindStringSubmatch(string(peekData))
288			if len(peekMatches) != 3 {
289				return val, nil
290			}
291
292			// NOTE: Return if not a python-ini multi-line value.
293			currentIdentSize := len(peekMatches[1])
294			if currentIdentSize <= 0 {
295				return val, nil
296			}
297
298			// NOTE: Just advance the parser reader (buffer) in-sync with the peek buffer.
299			_, err := p.readUntil('\n')
300			if err != nil {
301				return "", err
302			}
303
304			val += fmt.Sprintf("\n%s", peekMatches[2])
305		}
306	}
307
308	return line, nil
309}
310
311// parse parses data through an io.Reader.
312func (f *File) parse(reader io.Reader) (err error) {
313	p := newParser(reader)
314	if err = p.BOM(); err != nil {
315		return fmt.Errorf("BOM: %v", err)
316	}
317
318	// Ignore error because default section name is never empty string.
319	name := DEFAULT_SECTION
320	if f.options.Insensitive {
321		name = strings.ToLower(DEFAULT_SECTION)
322	}
323	section, _ := f.NewSection(name)
324
325	// This "last" is not strictly equivalent to "previous one" if current key is not the first nested key
326	var isLastValueEmpty bool
327	var lastRegularKey *Key
328
329	var line []byte
330	var inUnparseableSection bool
331
332	// NOTE: Iterate and increase `currentPeekSize` until
333	// the size of the parser buffer is found.
334	// TODO(unknwon): When Golang 1.10 is the lowest version supported, replace with `parserBufferSize := p.buf.Size()`.
335	parserBufferSize := 0
336	// NOTE: Peek 1kb at a time.
337	currentPeekSize := 1024
338
339	if f.options.AllowPythonMultilineValues {
340		for {
341			peekBytes, _ := p.buf.Peek(currentPeekSize)
342			peekBytesLength := len(peekBytes)
343
344			if parserBufferSize >= peekBytesLength {
345				break
346			}
347
348			currentPeekSize *= 2
349			parserBufferSize = peekBytesLength
350		}
351	}
352
353	for !p.isEOF {
354		line, err = p.readUntil('\n')
355		if err != nil {
356			return err
357		}
358
359		if f.options.AllowNestedValues &&
360			isLastValueEmpty && len(line) > 0 {
361			if line[0] == ' ' || line[0] == '\t' {
362				lastRegularKey.addNestedValue(string(bytes.TrimSpace(line)))
363				continue
364			}
365		}
366
367		line = bytes.TrimLeftFunc(line, unicode.IsSpace)
368		if len(line) == 0 {
369			continue
370		}
371
372		// Comments
373		if line[0] == '#' || line[0] == ';' {
374			// Note: we do not care ending line break,
375			// it is needed for adding second line,
376			// so just clean it once at the end when set to value.
377			p.comment.Write(line)
378			continue
379		}
380
381		// Section
382		if line[0] == '[' {
383			// Read to the next ']' (TODO: support quoted strings)
384			closeIdx := bytes.LastIndexByte(line, ']')
385			if closeIdx == -1 {
386				return fmt.Errorf("unclosed section: %s", line)
387			}
388
389			name := string(line[1:closeIdx])
390			section, err = f.NewSection(name)
391			if err != nil {
392				return err
393			}
394
395			comment, has := cleanComment(line[closeIdx+1:])
396			if has {
397				p.comment.Write(comment)
398			}
399
400			section.Comment = strings.TrimSpace(p.comment.String())
401
402			// Reset aotu-counter and comments
403			p.comment.Reset()
404			p.count = 1
405
406			inUnparseableSection = false
407			for i := range f.options.UnparseableSections {
408				if f.options.UnparseableSections[i] == name ||
409					(f.options.Insensitive && strings.ToLower(f.options.UnparseableSections[i]) == strings.ToLower(name)) {
410					inUnparseableSection = true
411					continue
412				}
413			}
414			continue
415		}
416
417		if inUnparseableSection {
418			section.isRawSection = true
419			section.rawBody += string(line)
420			continue
421		}
422
423		kname, offset, err := readKeyName(f.options.KeyValueDelimiters, line)
424		if err != nil {
425			// Treat as boolean key when desired, and whole line is key name.
426			if IsErrDelimiterNotFound(err) {
427				switch {
428				case f.options.AllowBooleanKeys:
429					kname, err := p.readValue(line,
430						parserBufferSize,
431						f.options.IgnoreContinuation,
432						f.options.IgnoreInlineComment,
433						f.options.UnescapeValueDoubleQuotes,
434						f.options.UnescapeValueCommentSymbols,
435						f.options.AllowPythonMultilineValues,
436						f.options.SpaceBeforeInlineComment,
437						f.options.PreserveSurroundedQuote)
438					if err != nil {
439						return err
440					}
441					key, err := section.NewBooleanKey(kname)
442					if err != nil {
443						return err
444					}
445					key.Comment = strings.TrimSpace(p.comment.String())
446					p.comment.Reset()
447					continue
448
449				case f.options.SkipUnrecognizableLines:
450					continue
451				}
452			}
453			return err
454		}
455
456		// Auto increment.
457		isAutoIncr := false
458		if kname == "-" {
459			isAutoIncr = true
460			kname = "#" + strconv.Itoa(p.count)
461			p.count++
462		}
463
464		value, err := p.readValue(line[offset:],
465			parserBufferSize,
466			f.options.IgnoreContinuation,
467			f.options.IgnoreInlineComment,
468			f.options.UnescapeValueDoubleQuotes,
469			f.options.UnescapeValueCommentSymbols,
470			f.options.AllowPythonMultilineValues,
471			f.options.SpaceBeforeInlineComment,
472			f.options.PreserveSurroundedQuote)
473		if err != nil {
474			return err
475		}
476		isLastValueEmpty = len(value) == 0
477
478		key, err := section.NewKey(kname, value)
479		if err != nil {
480			return err
481		}
482		key.isAutoIncrement = isAutoIncr
483		key.Comment = strings.TrimSpace(p.comment.String())
484		p.comment.Reset()
485		lastRegularKey = key
486	}
487	return nil
488}
489