1from __future__ import unicode_literals 2from .errors import ParseError 3 4 5class ParserStream(object): 6 def __init__(self, string): 7 self.string = string 8 self.index = 0 9 self.peek_offset = 0 10 11 def get(self, offset): 12 try: 13 return self.string[offset] 14 except IndexError: 15 return None 16 17 def char_at(self, offset): 18 # When the cursor is at CRLF, return LF but don't move the cursor. The 19 # cursor still points to the EOL position, which in this case is the 20 # beginning of the compound CRLF sequence. This ensures slices of 21 # [inclusive, exclusive) continue to work properly. 22 if self.get(offset) == '\r' \ 23 and self.get(offset + 1) == '\n': 24 return '\n' 25 26 return self.get(offset) 27 28 @property 29 def current_char(self): 30 return self.char_at(self.index) 31 32 @property 33 def current_peek(self): 34 return self.char_at(self.index + self.peek_offset) 35 36 def next(self): 37 self.peek_offset = 0 38 # Skip over CRLF as if it was a single character. 39 if self.get(self.index) == '\r' \ 40 and self.get(self.index + 1) == '\n': 41 self.index += 1 42 self.index += 1 43 return self.get(self.index) 44 45 def peek(self): 46 # Skip over CRLF as if it was a single character. 47 if self.get(self.index + self.peek_offset) == '\r' \ 48 and self.get(self.index + self.peek_offset + 1) == '\n': 49 self.peek_offset += 1 50 self.peek_offset += 1 51 return self.get(self.index + self.peek_offset) 52 53 def reset_peek(self, offset=0): 54 self.peek_offset = offset 55 56 def skip_to_peek(self): 57 self.index += self.peek_offset 58 self.peek_offset = 0 59 60 61EOL = '\n' 62EOF = None 63SPECIAL_LINE_START_CHARS = ('}', '.', '[', '*') 64 65 66class FluentParserStream(ParserStream): 67 last_comment_zero_four_syntax = False 68 69 def peek_blank_inline(self): 70 start = self.index + self.peek_offset 71 while self.current_peek == ' ': 72 self.peek() 73 return self.string[start:self.index + self.peek_offset] 74 75 def skip_blank_inline(self): 76 blank = self.peek_blank_inline() 77 self.skip_to_peek() 78 return blank 79 80 def peek_blank_block(self): 81 blank = "" 82 while True: 83 line_start = self.peek_offset 84 self.peek_blank_inline() 85 86 if self.current_peek == EOL: 87 blank += EOL 88 self.peek() 89 continue 90 91 if self.current_peek is EOF: 92 # Treat the blank line at EOF as a blank block. 93 return blank 94 95 # Any other char; reset to column 1 on this line. 96 self.reset_peek(line_start) 97 return blank 98 99 def skip_blank_block(self): 100 blank = self.peek_blank_block() 101 self.skip_to_peek() 102 return blank 103 104 def peek_blank(self): 105 while self.current_peek in (" ", EOL): 106 self.peek() 107 108 def skip_blank(self): 109 self.peek_blank() 110 self.skip_to_peek() 111 112 def expect_char(self, ch): 113 if self.current_char == ch: 114 self.next() 115 return True 116 117 raise ParseError('E0003', ch) 118 119 def expect_line_end(self): 120 if self.current_char is EOF: 121 # EOF is a valid line end in Fluent. 122 return True 123 124 if self.current_char == EOL: 125 self.next() 126 return True 127 128 # Unicode Character 'SYMBOL FOR NEWLINE' (U+2424) 129 raise ParseError('E0003', '\u2424') 130 131 def take_char(self, f): 132 ch = self.current_char 133 if ch is EOF: 134 return EOF 135 if f(ch): 136 self.next() 137 return ch 138 return False 139 140 def is_char_id_start(self, ch): 141 if ch is EOF: 142 return False 143 144 cc = ord(ch) 145 return (cc >= 97 and cc <= 122) or \ 146 (cc >= 65 and cc <= 90) 147 148 def is_identifier_start(self): 149 return self.is_char_id_start(self.current_peek) 150 151 def is_number_start(self): 152 ch = self.peek() if self.current_char == '-' else self.current_char 153 if ch is EOF: 154 self.reset_peek() 155 return False 156 157 cc = ord(ch) 158 is_digit = cc >= 48 and cc <= 57 159 self.reset_peek() 160 return is_digit 161 162 def is_char_pattern_continuation(self, ch): 163 if ch is EOF: 164 return False 165 166 return ch not in SPECIAL_LINE_START_CHARS 167 168 def is_value_start(self): 169 # Inline Patterns may start with any char. 170 return self.current_peek is not EOF and self.current_peek != EOL 171 172 def is_value_continuation(self): 173 column1 = self.peek_offset 174 self.peek_blank_inline() 175 176 if self.current_peek == '{': 177 self.reset_peek(column1) 178 return True 179 180 if self.peek_offset - column1 == 0: 181 return False 182 183 if self.is_char_pattern_continuation(self.current_peek): 184 self.reset_peek(column1) 185 return True 186 187 return False 188 189 def is_next_line_zero_four_comment(self): 190 if self.current_peek != EOL: 191 return False 192 193 is_comment = (self.peek(), self.peek()) == ('/', '/') 194 self.reset_peek() 195 return is_comment 196 197 # -1 - any 198 # 0 - comment 199 # 1 - group comment 200 # 2 - resource comment 201 def is_next_line_comment(self, level=-1): 202 if self.current_peek != EOL: 203 return False 204 205 i = 0 206 207 while (i <= level or (level == -1 and i < 3)): 208 if self.peek() != '#': 209 if i <= level and level != -1: 210 self.reset_peek() 211 return False 212 break 213 i += 1 214 215 # The first char after #, ## or ###. 216 if self.peek() in (' ', EOL): 217 self.reset_peek() 218 return True 219 220 self.reset_peek() 221 return False 222 223 def is_variant_start(self): 224 current_peek_offset = self.peek_offset 225 if self.current_peek == '*': 226 self.peek() 227 if self.current_peek == '[' and self.peek() != '[': 228 self.reset_peek(current_peek_offset) 229 return True 230 231 self.reset_peek(current_peek_offset) 232 return False 233 234 def is_attribute_start(self): 235 return self.current_peek == '.' 236 237 def skip_to_next_entry_start(self, junk_start): 238 last_newline = self.string.rfind(EOL, 0, self.index) 239 if junk_start < last_newline: 240 # Last seen newline is _after_ the junk start. It's safe to rewind 241 # without the risk of resuming at the same broken entry. 242 self.index = last_newline 243 244 while self.current_char: 245 # We're only interested in beginnings of line. 246 if self.current_char != EOL: 247 self.next() 248 continue 249 250 # Break if the first char in this line looks like an entry start. 251 first = self.next() 252 if self.is_char_id_start(first) or first == '-' or first == '#': 253 break 254 255 # Syntax 0.4 compatibility 256 peek = self.peek() 257 self.reset_peek() 258 if (first, peek) == ('/', '/') or (first, peek) == ('[', '['): 259 break 260 261 def take_id_start(self): 262 if self.is_char_id_start(self.current_char): 263 ret = self.current_char 264 self.next() 265 return ret 266 267 raise ParseError('E0004', 'a-zA-Z') 268 269 def take_id_char(self): 270 def closure(ch): 271 cc = ord(ch) 272 return ((cc >= 97 and cc <= 122) or 273 (cc >= 65 and cc <= 90) or 274 (cc >= 48 and cc <= 57) or 275 cc == 95 or cc == 45) 276 return self.take_char(closure) 277 278 def take_digit(self): 279 def closure(ch): 280 cc = ord(ch) 281 return (cc >= 48 and cc <= 57) 282 return self.take_char(closure) 283 284 def take_hex_digit(self): 285 def closure(ch): 286 cc = ord(ch) 287 return ( 288 (cc >= 48 and cc <= 57) # 0-9 289 or (cc >= 65 and cc <= 70) # A-F 290 or (cc >= 97 and cc <= 102)) # a-f 291 return self.take_char(closure) 292