1# This Source Code Form is subject to the terms of the Mozilla Public
2# License, v. 2.0. If a copy of the MPL was not distributed with this
3# file, You can obtain one at http://mozilla.org/MPL/2.0/.
4
5from __future__ import absolute_import
6from __future__ import unicode_literals
7import re
8
9from .base import (
10    Entity, OffsetComment, Whitespace,
11    Parser
12)
13from six import unichr
14
15
16class PropertiesEntityMixin(object):
17    escape = re.compile(r'\\((?P<uni>u[0-9a-fA-F]{1,4})|'
18                        '(?P<nl>\n[ \t]*)|(?P<single>.))', re.M)
19    known_escapes = {'n': '\n', 'r': '\r', 't': '\t', '\\': '\\'}
20
21    @property
22    def val(self):
23        def unescape(m):
24            found = m.groupdict()
25            if found['uni']:
26                return unichr(int(found['uni'][1:], 16))
27            if found['nl']:
28                return ''
29            return self.known_escapes.get(found['single'], found['single'])
30
31        return self.escape.sub(unescape, self.raw_val)
32
33
34class PropertiesEntity(PropertiesEntityMixin, Entity):
35    pass
36
37
38class PropertiesParser(Parser):
39
40    Comment = OffsetComment
41
42    def __init__(self):
43        self.reKey = re.compile(
44            '(?P<key>[^#! \t\r\n][^=:\n]*?)[ \t]*[:=][ \t]*', re.M)
45        self.reComment = re.compile('(?:[#!][^\n]*\n)*(?:[#!][^\n]*)', re.M)
46        self._escapedEnd = re.compile(r'\\+$')
47        self._trailingWS = re.compile(r'[ \t\r\n]*(?:\n|\Z)', re.M)
48        Parser.__init__(self)
49
50    def getNext(self, ctx, offset):
51        junk_offset = offset
52        # overwritten to parse values line by line
53        contents = ctx.contents
54
55        m = self.reComment.match(contents, offset)
56        if m:
57            current_comment = self.Comment(ctx, m.span())
58            if offset == 0 and 'License' in current_comment.val:
59                # Heuristic. A early comment with "License" is probably
60                # a license header, and should be standalone.
61                return current_comment
62            offset = m.end()
63        else:
64            current_comment = None
65
66        m = self.reWhitespace.match(contents, offset)
67        if m:
68            white_space = Whitespace(ctx, m.span())
69            offset = m.end()
70            if (
71                current_comment is not None
72                and white_space.raw_val.count('\n') > 1
73            ):
74                # standalone comment
75                return current_comment
76            if current_comment is None:
77                return white_space
78        else:
79            white_space = None
80
81        m = self.reKey.match(contents, offset)
82        if m:
83            startline = offset = m.end()
84            while True:
85                endval = nextline = contents.find('\n', offset)
86                if nextline == -1:
87                    endval = offset = len(contents)
88                    break
89                # is newline escaped?
90                _e = self._escapedEnd.search(contents, offset, nextline)
91                offset = nextline + 1
92                if _e is None:
93                    break
94                # backslashes at end of line, if 2*n, not escaped
95                if len(_e.group()) % 2 == 0:
96                    break
97                startline = offset
98
99            # strip trailing whitespace
100            ws = self._trailingWS.search(contents, startline)
101            if ws:
102                endval = ws.start()
103
104            entity = PropertiesEntity(
105                ctx, current_comment, white_space,
106                (m.start(), endval),   # full span
107                m.span('key'),
108                (m.end(), endval))   # value span
109            return entity
110
111        if current_comment is not None:
112            return current_comment
113        if white_space is not None:
114            return white_space
115
116        return self.getJunk(ctx, junk_offset, self.reKey, self.reComment)
117