1from __future__ import unicode_literals
2from .errors import ParseError
3
4
5class ParserStream(object):
6    def __init__(self, string):
7        self.string = string
8        self.index = 0
9        self.peek_offset = 0
10
11    def get(self, offset):
12        try:
13            return self.string[offset]
14        except IndexError:
15            return None
16
17    def char_at(self, offset):
18        # When the cursor is at CRLF, return LF but don't move the cursor. The
19        # cursor still points to the EOL position, which in this case is the
20        # beginning of the compound CRLF sequence. This ensures slices of
21        # [inclusive, exclusive) continue to work properly.
22        if self.get(offset) == '\r' \
23                and self.get(offset + 1) == '\n':
24            return '\n'
25
26        return self.get(offset)
27
28    @property
29    def current_char(self):
30        return self.char_at(self.index)
31
32    @property
33    def current_peek(self):
34        return self.char_at(self.index + self.peek_offset)
35
36    def next(self):
37        self.peek_offset = 0
38        # Skip over CRLF as if it was a single character.
39        if self.get(self.index) == '\r' \
40                and self.get(self.index + 1) == '\n':
41            self.index += 1
42        self.index += 1
43        return self.get(self.index)
44
45    def peek(self):
46        # Skip over CRLF as if it was a single character.
47        if self.get(self.index + self.peek_offset) == '\r' \
48                and self.get(self.index + self.peek_offset + 1) == '\n':
49            self.peek_offset += 1
50        self.peek_offset += 1
51        return self.get(self.index + self.peek_offset)
52
53    def reset_peek(self, offset=0):
54        self.peek_offset = offset
55
56    def skip_to_peek(self):
57        self.index += self.peek_offset
58        self.peek_offset = 0
59
60
61EOL = '\n'
62EOF = None
63SPECIAL_LINE_START_CHARS = ('}', '.', '[', '*')
64
65
66class FluentParserStream(ParserStream):
67
68    def peek_blank_inline(self):
69        start = self.index + self.peek_offset
70        while self.current_peek == ' ':
71            self.peek()
72        return self.string[start:self.index + self.peek_offset]
73
74    def skip_blank_inline(self):
75        blank = self.peek_blank_inline()
76        self.skip_to_peek()
77        return blank
78
79    def peek_blank_block(self):
80        blank = ""
81        while True:
82            line_start = self.peek_offset
83            self.peek_blank_inline()
84
85            if self.current_peek == EOL:
86                blank += EOL
87                self.peek()
88                continue
89
90            if self.current_peek is EOF:
91                # Treat the blank line at EOF as a blank block.
92                return blank
93
94            # Any other char; reset to column 1 on this line.
95            self.reset_peek(line_start)
96            return blank
97
98    def skip_blank_block(self):
99        blank = self.peek_blank_block()
100        self.skip_to_peek()
101        return blank
102
103    def peek_blank(self):
104        while self.current_peek in (" ", EOL):
105            self.peek()
106
107    def skip_blank(self):
108        self.peek_blank()
109        self.skip_to_peek()
110
111    def expect_char(self, ch):
112        if self.current_char == ch:
113            self.next()
114            return True
115
116        raise ParseError('E0003', ch)
117
118    def expect_line_end(self):
119        if self.current_char is EOF:
120            # EOF is a valid line end in Fluent.
121            return True
122
123        if self.current_char == EOL:
124            self.next()
125            return True
126
127        # Unicode Character 'SYMBOL FOR NEWLINE' (U+2424)
128        raise ParseError('E0003', '\u2424')
129
130    def take_char(self, f):
131        ch = self.current_char
132        if ch is EOF:
133            return EOF
134        if f(ch):
135            self.next()
136            return ch
137        return False
138
139    def is_char_id_start(self, ch):
140        if ch is EOF:
141            return False
142
143        cc = ord(ch)
144        return (cc >= 97 and cc <= 122) or \
145               (cc >= 65 and cc <= 90)
146
147    def is_identifier_start(self):
148        return self.is_char_id_start(self.current_peek)
149
150    def is_number_start(self):
151        ch = self.peek() if self.current_char == '-' else self.current_char
152        if ch is EOF:
153            self.reset_peek()
154            return False
155
156        cc = ord(ch)
157        is_digit = cc >= 48 and cc <= 57
158        self.reset_peek()
159        return is_digit
160
161    def is_char_pattern_continuation(self, ch):
162        if ch is EOF:
163            return False
164
165        return ch not in SPECIAL_LINE_START_CHARS
166
167    def is_value_start(self):
168        # Inline Patterns may start with any char.
169        return self.current_peek is not EOF and self.current_peek != EOL
170
171    def is_value_continuation(self):
172        column1 = self.peek_offset
173        self.peek_blank_inline()
174
175        if self.current_peek == '{':
176            self.reset_peek(column1)
177            return True
178
179        if self.peek_offset - column1 == 0:
180            return False
181
182        if self.is_char_pattern_continuation(self.current_peek):
183            self.reset_peek(column1)
184            return True
185
186        return False
187
188    # -1 - any
189    #  0 - comment
190    #  1 - group comment
191    #  2 - resource comment
192    def is_next_line_comment(self, level=-1):
193        if self.current_peek != EOL:
194            return False
195
196        i = 0
197
198        while (i <= level or (level == -1 and i < 3)):
199            if self.peek() != '#':
200                if i <= level and level != -1:
201                    self.reset_peek()
202                    return False
203                break
204            i += 1
205
206        # The first char after #, ## or ###.
207        if self.peek() in (' ', EOL):
208            self.reset_peek()
209            return True
210
211        self.reset_peek()
212        return False
213
214    def is_variant_start(self):
215        current_peek_offset = self.peek_offset
216        if self.current_peek == '*':
217            self.peek()
218        if self.current_peek == '[' and self.peek() != '[':
219            self.reset_peek(current_peek_offset)
220            return True
221
222        self.reset_peek(current_peek_offset)
223        return False
224
225    def is_attribute_start(self):
226        return self.current_peek == '.'
227
228    def skip_to_next_entry_start(self, junk_start):
229        last_newline = self.string.rfind(EOL, 0, self.index)
230        if junk_start < last_newline:
231            # Last seen newline is _after_ the junk start. It's safe to rewind
232            # without the risk of resuming at the same broken entry.
233            self.index = last_newline
234
235        while self.current_char:
236            # We're only interested in beginnings of line.
237            if self.current_char != EOL:
238                self.next()
239                continue
240
241            # Break if the first char in this line looks like an entry start.
242            first = self.next()
243            if self.is_char_id_start(first) or first == '-' or first == '#':
244                break
245
246            # Syntax 0.4 compatibility
247            peek = self.peek()
248            self.reset_peek()
249            if (first, peek) == ('/', '/') or (first, peek) == ('[', '['):
250                break
251
252    def take_id_start(self):
253        if self.is_char_id_start(self.current_char):
254            ret = self.current_char
255            self.next()
256            return ret
257
258        raise ParseError('E0004', 'a-zA-Z')
259
260    def take_id_char(self):
261        def closure(ch):
262            cc = ord(ch)
263            return ((cc >= 97 and cc <= 122) or
264                    (cc >= 65 and cc <= 90) or
265                    (cc >= 48 and cc <= 57) or
266                    cc == 95 or cc == 45)
267        return self.take_char(closure)
268
269    def take_digit(self):
270        def closure(ch):
271            cc = ord(ch)
272            return (cc >= 48 and cc <= 57)
273        return self.take_char(closure)
274
275    def take_hex_digit(self):
276        def closure(ch):
277            cc = ord(ch)
278            return (
279                (cc >= 48 and cc <= 57)   # 0-9
280                or (cc >= 65 and cc <= 70)  # A-F
281                or (cc >= 97 and cc <= 102))  # a-f
282        return self.take_char(closure)
283