1"""Python string parsers with escape characters 2 3Python-string-like operation as much as possible, this includes: 4 support for single and double-quoted strings 5 support for triple-quoted versions of the same 6 support for special character escapes as seen in 8-bit python strings 7 support for octal and hexidecimal character escapes 8 9 10 string_single_quote 11 string_double_quote 12 string_triple_single 13 string_triple_double 14 Individual string types with the above features 15 16 string 17 Any of the above string types, in a simple FirstOf group 18 with the triple-quoted types first, then the single quoted 19 i.e. generated with this grammar: 20 21 string_triple_double/string_triple_single/string_double_quote/string_single_quote 22 23 24Interpreters: 25 StringInterpreter 26 Interprets any/all of the above as a normal (non-Raw) Python 27 regular (non-unicode) string. Hopefully the action is identical 28 to doing eval( matchedString, {},{}), without the negative security 29 implications of that approach. Note that you need to make the 30 interpreter available under each name you use directly in your 31 grammar, so if you use string_single_quote and string_double_quote 32 directly, then you need to add: 33 string_single_quote = myStringInterpreterInstance 34 string_double_quote = myStringInterpreterInstance 35 to your processor class. 36""" 37 38from simpleparse.parser import Parser 39from simpleparse import common, objectgenerator 40from simpleparse.common import chartypes 41assert chartypes 42from simpleparse.dispatchprocessor import * 43 44c = {} 45 46stringDeclaration = r""" 47# note that non-delimiter can never be hit by non-triple strings 48str := delimiter, (char_no_quote/escaped_char/backslash_char/nondelimiter)*,delimiter 49 50escaped_char := '\\',( string_special_escapes / ('x',hex_escaped_char) / octal_escaped_char ) 51octal_escaped_char := octdigit, octdigit?, octdigit? 52hex_escaped_char := hexdigit,hexdigit 53 54backslash_char := "\\" # i.e. a backslash preceding a non-special char 55 56""" 57 58_stringTypeData = [ 59 ("string_double_quote", """ 60<delimiter> := '"' 61nondelimiter := -'"' 62char_no_quote := -[\\\\"]+ 63string_special_escapes := [\\\\abfnrtv"] 64"""), 65 ("string_single_quote", """ 66<delimiter> := "'" 67nondelimiter := -"'" 68char_no_quote := -[\\\\']+ 69string_special_escapes := [\\\\abfnrtv'] 70"""), 71 ("string_triple_single", """ 72nondelimiter := -"'''" 73<delimiter> := "'''" 74char_no_quote := -[\\\\']+ 75string_special_escapes := [\\\\abfnrtv'] 76"""), 77 ("string_triple_double",''' 78nondelimiter := -'"""' 79<delimiter> := '"""' 80char_no_quote := -[\\\\"]+ 81string_special_escapes := [\\\\abfnrtv"] 82'''), 83] 84 85for name, partial in _stringTypeData: 86 _p = Parser( stringDeclaration + partial ) 87 c[ name ] = objectgenerator.LibraryElement( 88 generator = _p._generator, 89 production = "str", 90 ) 91common.share( c ) 92_p = Parser( """ 93string := string_triple_double/string_triple_single/string_double_quote/string_single_quote 94""" ) 95c[ "string"] = objectgenerator.LibraryElement( 96 generator = _p._generator, 97 production = "string", 98) 99 100class StringInterpreter(DispatchProcessor): 101 """Processor for converting parsed string values to their "intended" value 102 103 Basically this processor handles de-escaping and stripping the 104 surrounding quotes, so that you get the string as a Python string 105 value. You use the processor by creating an instance of 106 StringInterpreter() as an item in another processor's 107 methodSource object (often the Parser itself). 108 109 For example: 110 111 class MyProcessor( DispatchProcessor ): 112 string = StringInterpreter() 113 114 # following would be used if you have, for instance, 115 # used string_single_quote in an area where double 116 # or triple-quoted strings are not allowed, but have 117 # used string in another area. 118 string_single_quote = string 119 """ 120 def string( self, info, buffer): 121 """Dispatch any of the string types and return the result""" 122 (tag, left, right, sublist) = info 123 return dispatch( self, sublist[0], buffer ) 124 125 def string_single_quote( self, info, buffer): 126 (tag, left, right, sublist) = info 127 return "".join(dispatchList(self, sublist, buffer)) 128 string_double_quote = string_single_quote 129 string_triple_single = string_single_quote 130 string_triple_double = string_single_quote 131 132 def char_no_quote( self, info, buffer): 133 (tag, left, right, sublist) = info 134 return buffer[left:right] 135 nondelimiter = char_no_quote 136 137 def escaped_char( self, info, buffer): 138 (tag, left, right, sublist) = info 139 return "".join(dispatchList(self,sublist,buffer)) 140 141 def octal_escaped_char(self, info, buffer): 142 (tag, left, right, sublist) = info 143 return chr(int( buffer[left:right], 8 )) 144 def hex_escaped_char( self, info, buffer): 145 (tag, left, right, sublist) = info 146 return chr(int( buffer[left:right], 16 )) 147 148 def backslash_char( self, info, buffer): 149 return "\\" 150 151 def string_special_escapes( self, info, buffer): 152 """Maps "special" escapes to the corresponding characters""" 153 (tag, left, right, sublist) = info 154 return self.specialescapedmap[ buffer[left:right]] 155 specialescapedmap = { 156 'a':'\a', 157 'b':'\b', 158 'f':'\f', 159 'n':'\n', 160 'r':'\r', 161 't':'\t', 162 'v':'\v', 163 '\\':'\\', 164 '\n':'', 165 '"':'"', 166 "'":"'", 167 } 168 169