1from __future__ import unicode_literals 2from .errors import ParseError 3 4 5class ParserStream(object): 6 def __init__(self, string): 7 self.string = string 8 self.index = 0 9 self.peek_offset = 0 10 11 def get(self, offset): 12 try: 13 return self.string[offset] 14 except IndexError: 15 return None 16 17 def char_at(self, offset): 18 # When the cursor is at CRLF, return LF but don't move the cursor. The 19 # cursor still points to the EOL position, which in this case is the 20 # beginning of the compound CRLF sequence. This ensures slices of 21 # [inclusive, exclusive) continue to work properly. 22 if self.get(offset) == '\r' \ 23 and self.get(offset + 1) == '\n': 24 return '\n' 25 26 return self.get(offset) 27 28 @property 29 def current_char(self): 30 return self.char_at(self.index) 31 32 @property 33 def current_peek(self): 34 return self.char_at(self.index + self.peek_offset) 35 36 def next(self): 37 self.peek_offset = 0 38 # Skip over CRLF as if it was a single character. 39 if self.get(self.index) == '\r' \ 40 and self.get(self.index + 1) == '\n': 41 self.index += 1 42 self.index += 1 43 return self.get(self.index) 44 45 def peek(self): 46 # Skip over CRLF as if it was a single character. 47 if self.get(self.index + self.peek_offset) == '\r' \ 48 and self.get(self.index + self.peek_offset + 1) == '\n': 49 self.peek_offset += 1 50 self.peek_offset += 1 51 return self.get(self.index + self.peek_offset) 52 53 def reset_peek(self, offset=0): 54 self.peek_offset = offset 55 56 def skip_to_peek(self): 57 self.index += self.peek_offset 58 self.peek_offset = 0 59 60 61EOL = '\n' 62EOF = None 63SPECIAL_LINE_START_CHARS = ('}', '.', '[', '*') 64 65 66class FluentParserStream(ParserStream): 67 68 def peek_blank_inline(self): 69 start = self.index + self.peek_offset 70 while self.current_peek == ' ': 71 self.peek() 72 return self.string[start:self.index + self.peek_offset] 73 74 def skip_blank_inline(self): 75 blank = self.peek_blank_inline() 76 self.skip_to_peek() 77 return blank 78 79 def peek_blank_block(self): 80 blank = "" 81 while True: 82 line_start = self.peek_offset 83 self.peek_blank_inline() 84 85 if self.current_peek == EOL: 86 blank += EOL 87 self.peek() 88 continue 89 90 if self.current_peek is EOF: 91 # Treat the blank line at EOF as a blank block. 92 return blank 93 94 # Any other char; reset to column 1 on this line. 95 self.reset_peek(line_start) 96 return blank 97 98 def skip_blank_block(self): 99 blank = self.peek_blank_block() 100 self.skip_to_peek() 101 return blank 102 103 def peek_blank(self): 104 while self.current_peek in (" ", EOL): 105 self.peek() 106 107 def skip_blank(self): 108 self.peek_blank() 109 self.skip_to_peek() 110 111 def expect_char(self, ch): 112 if self.current_char == ch: 113 self.next() 114 return True 115 116 raise ParseError('E0003', ch) 117 118 def expect_line_end(self): 119 if self.current_char is EOF: 120 # EOF is a valid line end in Fluent. 121 return True 122 123 if self.current_char == EOL: 124 self.next() 125 return True 126 127 # Unicode Character 'SYMBOL FOR NEWLINE' (U+2424) 128 raise ParseError('E0003', '\u2424') 129 130 def take_char(self, f): 131 ch = self.current_char 132 if ch is EOF: 133 return EOF 134 if f(ch): 135 self.next() 136 return ch 137 return False 138 139 def is_char_id_start(self, ch): 140 if ch is EOF: 141 return False 142 143 cc = ord(ch) 144 return (cc >= 97 and cc <= 122) or \ 145 (cc >= 65 and cc <= 90) 146 147 def is_identifier_start(self): 148 return self.is_char_id_start(self.current_peek) 149 150 def is_number_start(self): 151 ch = self.peek() if self.current_char == '-' else self.current_char 152 if ch is EOF: 153 self.reset_peek() 154 return False 155 156 cc = ord(ch) 157 is_digit = cc >= 48 and cc <= 57 158 self.reset_peek() 159 return is_digit 160 161 def is_char_pattern_continuation(self, ch): 162 if ch is EOF: 163 return False 164 165 return ch not in SPECIAL_LINE_START_CHARS 166 167 def is_value_start(self): 168 # Inline Patterns may start with any char. 169 return self.current_peek is not EOF and self.current_peek != EOL 170 171 def is_value_continuation(self): 172 column1 = self.peek_offset 173 self.peek_blank_inline() 174 175 if self.current_peek == '{': 176 self.reset_peek(column1) 177 return True 178 179 if self.peek_offset - column1 == 0: 180 return False 181 182 if self.is_char_pattern_continuation(self.current_peek): 183 self.reset_peek(column1) 184 return True 185 186 return False 187 188 # -1 - any 189 # 0 - comment 190 # 1 - group comment 191 # 2 - resource comment 192 def is_next_line_comment(self, level=-1): 193 if self.current_peek != EOL: 194 return False 195 196 i = 0 197 198 while (i <= level or (level == -1 and i < 3)): 199 if self.peek() != '#': 200 if i <= level and level != -1: 201 self.reset_peek() 202 return False 203 break 204 i += 1 205 206 # The first char after #, ## or ###. 207 if self.peek() in (' ', EOL): 208 self.reset_peek() 209 return True 210 211 self.reset_peek() 212 return False 213 214 def is_variant_start(self): 215 current_peek_offset = self.peek_offset 216 if self.current_peek == '*': 217 self.peek() 218 if self.current_peek == '[' and self.peek() != '[': 219 self.reset_peek(current_peek_offset) 220 return True 221 222 self.reset_peek(current_peek_offset) 223 return False 224 225 def is_attribute_start(self): 226 return self.current_peek == '.' 227 228 def skip_to_next_entry_start(self, junk_start): 229 last_newline = self.string.rfind(EOL, 0, self.index) 230 if junk_start < last_newline: 231 # Last seen newline is _after_ the junk start. It's safe to rewind 232 # without the risk of resuming at the same broken entry. 233 self.index = last_newline 234 235 while self.current_char: 236 # We're only interested in beginnings of line. 237 if self.current_char != EOL: 238 self.next() 239 continue 240 241 # Break if the first char in this line looks like an entry start. 242 first = self.next() 243 if self.is_char_id_start(first) or first == '-' or first == '#': 244 break 245 246 # Syntax 0.4 compatibility 247 peek = self.peek() 248 self.reset_peek() 249 if (first, peek) == ('/', '/') or (first, peek) == ('[', '['): 250 break 251 252 def take_id_start(self): 253 if self.is_char_id_start(self.current_char): 254 ret = self.current_char 255 self.next() 256 return ret 257 258 raise ParseError('E0004', 'a-zA-Z') 259 260 def take_id_char(self): 261 def closure(ch): 262 cc = ord(ch) 263 return ((cc >= 97 and cc <= 122) or 264 (cc >= 65 and cc <= 90) or 265 (cc >= 48 and cc <= 57) or 266 cc == 95 or cc == 45) 267 return self.take_char(closure) 268 269 def take_digit(self): 270 def closure(ch): 271 cc = ord(ch) 272 return (cc >= 48 and cc <= 57) 273 return self.take_char(closure) 274 275 def take_hex_digit(self): 276 def closure(ch): 277 cc = ord(ch) 278 return ( 279 (cc >= 48 and cc <= 57) # 0-9 280 or (cc >= 65 and cc <= 70) # A-F 281 or (cc >= 97 and cc <= 102)) # a-f 282 return self.take_char(closure) 283