1from __future__ import unicode_literals
2from .errors import ParseError
3
4
5class ParserStream(object):
6    def __init__(self, string):
7        self.string = string
8        self.index = 0
9        self.peek_offset = 0
10
11    def get(self, offset):
12        try:
13            return self.string[offset]
14        except IndexError:
15            return None
16
17    def char_at(self, offset):
18        # When the cursor is at CRLF, return LF but don't move the cursor. The
19        # cursor still points to the EOL position, which in this case is the
20        # beginning of the compound CRLF sequence. This ensures slices of
21        # [inclusive, exclusive) continue to work properly.
22        if self.get(offset) == '\r' \
23                and self.get(offset + 1) == '\n':
24            return '\n'
25
26        return self.get(offset)
27
28    @property
29    def current_char(self):
30        return self.char_at(self.index)
31
32    @property
33    def current_peek(self):
34        return self.char_at(self.index + self.peek_offset)
35
36    def next(self):
37        self.peek_offset = 0
38        # Skip over CRLF as if it was a single character.
39        if self.get(self.index) == '\r' \
40                and self.get(self.index + 1) == '\n':
41            self.index += 1
42        self.index += 1
43        return self.get(self.index)
44
45    def peek(self):
46        # Skip over CRLF as if it was a single character.
47        if self.get(self.index + self.peek_offset) == '\r' \
48                and self.get(self.index + self.peek_offset + 1) == '\n':
49            self.peek_offset += 1
50        self.peek_offset += 1
51        return self.get(self.index + self.peek_offset)
52
53    def reset_peek(self, offset=0):
54        self.peek_offset = offset
55
56    def skip_to_peek(self):
57        self.index += self.peek_offset
58        self.peek_offset = 0
59
60
61EOL = '\n'
62EOF = None
63SPECIAL_LINE_START_CHARS = ('}', '.', '[', '*')
64
65
66class FluentParserStream(ParserStream):
67    last_comment_zero_four_syntax = False
68
69    def peek_blank_inline(self):
70        start = self.index + self.peek_offset
71        while self.current_peek == ' ':
72            self.peek()
73        return self.string[start:self.index + self.peek_offset]
74
75    def skip_blank_inline(self):
76        blank = self.peek_blank_inline()
77        self.skip_to_peek()
78        return blank
79
80    def peek_blank_block(self):
81        blank = ""
82        while True:
83            line_start = self.peek_offset
84            self.peek_blank_inline()
85
86            if self.current_peek == EOL:
87                blank += EOL
88                self.peek()
89                continue
90
91            if self.current_peek is EOF:
92                # Treat the blank line at EOF as a blank block.
93                return blank
94
95            # Any other char; reset to column 1 on this line.
96            self.reset_peek(line_start)
97            return blank
98
99    def skip_blank_block(self):
100        blank = self.peek_blank_block()
101        self.skip_to_peek()
102        return blank
103
104    def peek_blank(self):
105        while self.current_peek in (" ", EOL):
106            self.peek()
107
108    def skip_blank(self):
109        self.peek_blank()
110        self.skip_to_peek()
111
112    def expect_char(self, ch):
113        if self.current_char == ch:
114            self.next()
115            return True
116
117        raise ParseError('E0003', ch)
118
119    def expect_line_end(self):
120        if self.current_char is EOF:
121            # EOF is a valid line end in Fluent.
122            return True
123
124        if self.current_char == EOL:
125            self.next()
126            return True
127
128        # Unicode Character 'SYMBOL FOR NEWLINE' (U+2424)
129        raise ParseError('E0003', '\u2424')
130
131    def take_char(self, f):
132        ch = self.current_char
133        if ch is EOF:
134            return EOF
135        if f(ch):
136            self.next()
137            return ch
138        return False
139
140    def is_char_id_start(self, ch):
141        if ch is EOF:
142            return False
143
144        cc = ord(ch)
145        return (cc >= 97 and cc <= 122) or \
146               (cc >= 65 and cc <= 90)
147
148    def is_identifier_start(self):
149        return self.is_char_id_start(self.current_peek)
150
151    def is_number_start(self):
152        ch = self.peek() if self.current_char == '-' else self.current_char
153        if ch is EOF:
154            self.reset_peek()
155            return False
156
157        cc = ord(ch)
158        is_digit = cc >= 48 and cc <= 57
159        self.reset_peek()
160        return is_digit
161
162    def is_char_pattern_continuation(self, ch):
163        if ch is EOF:
164            return False
165
166        return ch not in SPECIAL_LINE_START_CHARS
167
168    def is_value_start(self):
169        # Inline Patterns may start with any char.
170        return self.current_peek is not EOF and self.current_peek != EOL
171
172    def is_value_continuation(self):
173        column1 = self.peek_offset
174        self.peek_blank_inline()
175
176        if self.current_peek == '{':
177            self.reset_peek(column1)
178            return True
179
180        if self.peek_offset - column1 == 0:
181            return False
182
183        if self.is_char_pattern_continuation(self.current_peek):
184            self.reset_peek(column1)
185            return True
186
187        return False
188
189    def is_next_line_zero_four_comment(self):
190        if self.current_peek != EOL:
191            return False
192
193        is_comment = (self.peek(), self.peek()) == ('/', '/')
194        self.reset_peek()
195        return is_comment
196
197    # -1 - any
198    #  0 - comment
199    #  1 - group comment
200    #  2 - resource comment
201    def is_next_line_comment(self, level=-1):
202        if self.current_peek != EOL:
203            return False
204
205        i = 0
206
207        while (i <= level or (level == -1 and i < 3)):
208            if self.peek() != '#':
209                if i <= level and level != -1:
210                    self.reset_peek()
211                    return False
212                break
213            i += 1
214
215        # The first char after #, ## or ###.
216        if self.peek() in (' ', EOL):
217            self.reset_peek()
218            return True
219
220        self.reset_peek()
221        return False
222
223    def is_variant_start(self):
224        current_peek_offset = self.peek_offset
225        if self.current_peek == '*':
226            self.peek()
227        if self.current_peek == '[' and self.peek() != '[':
228            self.reset_peek(current_peek_offset)
229            return True
230
231        self.reset_peek(current_peek_offset)
232        return False
233
234    def is_attribute_start(self):
235        return self.current_peek == '.'
236
237    def skip_to_next_entry_start(self, junk_start):
238        last_newline = self.string.rfind(EOL, 0, self.index)
239        if junk_start < last_newline:
240            # Last seen newline is _after_ the junk start. It's safe to rewind
241            # without the risk of resuming at the same broken entry.
242            self.index = last_newline
243
244        while self.current_char:
245            # We're only interested in beginnings of line.
246            if self.current_char != EOL:
247                self.next()
248                continue
249
250            # Break if the first char in this line looks like an entry start.
251            first = self.next()
252            if self.is_char_id_start(first) or first == '-' or first == '#':
253                break
254
255            # Syntax 0.4 compatibility
256            peek = self.peek()
257            self.reset_peek()
258            if (first, peek) == ('/', '/') or (first, peek) == ('[', '['):
259                break
260
261    def take_id_start(self):
262        if self.is_char_id_start(self.current_char):
263            ret = self.current_char
264            self.next()
265            return ret
266
267        raise ParseError('E0004', 'a-zA-Z')
268
269    def take_id_char(self):
270        def closure(ch):
271            cc = ord(ch)
272            return ((cc >= 97 and cc <= 122) or
273                    (cc >= 65 and cc <= 90) or
274                    (cc >= 48 and cc <= 57) or
275                    cc == 95 or cc == 45)
276        return self.take_char(closure)
277
278    def take_digit(self):
279        def closure(ch):
280            cc = ord(ch)
281            return (cc >= 48 and cc <= 57)
282        return self.take_char(closure)
283
284    def take_hex_digit(self):
285        def closure(ch):
286            cc = ord(ch)
287            return (
288                (cc >= 48 and cc <= 57)   # 0-9
289                or (cc >= 65 and cc <= 70)  # A-F
290                or (cc >= 97 and cc <= 102))  # a-f
291        return self.take_char(closure)
292