1# 2# Copyright (C) 2010-2017 Samuel Abels 3# The MIT License (MIT) 4# 5# Permission is hereby granted, free of charge, to any person obtaining 6# a copy of this software and associated documentation files 7# (the "Software"), to deal in the Software without restriction, 8# including without limitation the rights to use, copy, modify, merge, 9# publish, distribute, sublicense, and/or sell copies of the Software, 10# and to permit persons to whom the Software is furnished to do so, 11# subject to the following conditions: 12# 13# The above copyright notice and this permission notice shall be 14# included in all copies or substantial portions of the Software. 15# 16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 21# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 22# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23from __future__ import print_function, absolute_import 24import re 25from .string import String 26 27# Matches any opening parenthesis that is neither preceeded by a backslash 28# nor has a "?:" or "?<" appended. 29bracket_re = re.compile(r'(?<!\\)\((?!\?[:<])', re.I) 30 31modifier_grammar = ( 32 ('modifier', r'[i]'), 33 ('invalid_char', r'.'), 34) 35 36modifier_grammar_c = [] 37for thetype, regex in modifier_grammar: 38 modifier_grammar_c.append((thetype, re.compile(regex, re.M | re.S))) 39 40 41class Regex(String): 42 43 def __init__(self, lexer, parser, parent): 44 self.delimiter = lexer.token()[1] 45 # String parser collects the regex. 46 String.__init__(self, lexer, parser, parent) 47 self.n_groups = len(bracket_re.findall(self.string)) 48 self.flags = 0 49 50 # Collect modifiers. 51 lexer.set_grammar(modifier_grammar_c) 52 while lexer.current_is('modifier'): 53 if lexer.next_if('modifier', 'i'): 54 self.flags = self.flags | re.I 55 else: 56 modifier = lexer.token()[1] 57 error = 'Invalid regular expression modifier "%s"' % modifier 58 lexer.syntax_error(error, self) 59 lexer.restore_grammar() 60 61 # Compile the regular expression. 62 try: 63 re.compile(self.string, self.flags) 64 except Exception as e: 65 error = 'Invalid regular expression %s: %s' % ( 66 repr(self.string), e) 67 lexer.syntax_error(error, self) 68 69 def _escape(self, token): 70 char = token[1] 71 if char == self.delimiter: 72 return char 73 return token 74 75 def value(self, context): 76 pattern = String.value(self, context)[0] 77 return re.compile(pattern, self.flags) 78 79 def dump(self, indent=0): 80 print((' ' * indent) + self.name, self.string) 81