1"""Python string parsers with escape characters
2
3Python-string-like operation as much as possible, this includes:
4    support for single and double-quoted strings
5    support for triple-quoted versions of the same
6    support for special character escapes as seen in 8-bit python strings
7    support for octal and hexidecimal character escapes
8
9
10    string_single_quote
11    string_double_quote
12    string_triple_single
13    string_triple_double
14        Individual string types with the above features
15
16    string
17        Any of the above string types, in a simple FirstOf group
18        with the triple-quoted types first, then the single quoted
19        i.e. generated with this grammar:
20
21        string_triple_double/string_triple_single/string_double_quote/string_single_quote
22
23
24Interpreters:
25    StringInterpreter
26        Interprets any/all of the above as a normal (non-Raw) Python
27        regular (non-unicode) string.  Hopefully the action is identical
28        to doing eval( matchedString, {},{}), without the negative security
29        implications of that approach.  Note that you need to make the
30        interpreter available under each name you use directly in your
31        grammar, so if you use string_single_quote and string_double_quote
32        directly, then you need to add:
33            string_single_quote = myStringInterpreterInstance
34            string_double_quote = myStringInterpreterInstance
35        to your processor class.
36"""
37
38from simpleparse.parser import Parser
39from simpleparse import common, objectgenerator
40from simpleparse.common import chartypes
41assert chartypes
42from simpleparse.dispatchprocessor import *
43
44c = {}
45
46stringDeclaration = r"""
47# note that non-delimiter can never be hit by non-triple strings
48str              :=  delimiter, (char_no_quote/escaped_char/backslash_char/nondelimiter)*,delimiter
49
50escaped_char        :=  '\\',( string_special_escapes / ('x',hex_escaped_char) / octal_escaped_char )
51octal_escaped_char  :=  octdigit, octdigit?, octdigit?
52hex_escaped_char    :=  hexdigit,hexdigit
53
54backslash_char      :=  "\\" # i.e. a backslash preceding a non-special char
55
56"""
57
58_stringTypeData = [
59    ("string_double_quote", """
60<delimiter>                :=  '"'
61nondelimiter               :=  -'"'
62char_no_quote              :=  -[\\\\"]+
63string_special_escapes     := [\\\\abfnrtv"]
64"""),
65    ("string_single_quote", """
66<delimiter>                :=  "'"
67nondelimiter               :=  -"'"
68char_no_quote              :=  -[\\\\']+
69string_special_escapes     := [\\\\abfnrtv']
70"""),
71    ("string_triple_single", """
72nondelimiter               :=  -"'''"
73<delimiter>                :=  "'''"
74char_no_quote              :=  -[\\\\']+
75string_special_escapes     := [\\\\abfnrtv']
76"""),
77    ("string_triple_double",'''
78nondelimiter               :=  -'"""'
79<delimiter>                :=  '"""'
80char_no_quote              :=  -[\\\\"]+
81string_special_escapes     := [\\\\abfnrtv"]
82'''),
83]
84
85for name, partial in _stringTypeData:
86    _p = Parser( stringDeclaration + partial )
87    c[ name ] = objectgenerator.LibraryElement(
88        generator = _p._generator,
89        production = "str",
90    )
91common.share( c )
92_p = Parser( """
93string :=  string_triple_double/string_triple_single/string_double_quote/string_single_quote
94""" )
95c[ "string"] = objectgenerator.LibraryElement(
96    generator = _p._generator,
97    production = "string",
98)
99
100class StringInterpreter(DispatchProcessor):
101    """Processor for converting parsed string values to their "intended" value
102
103    Basically this processor handles de-escaping and stripping the
104    surrounding quotes, so that you get the string as a Python string
105    value.  You use the processor by creating an instance of
106    StringInterpreter() as an item in another processor's
107    methodSource object (often the Parser itself).
108
109    For example:
110
111        class MyProcessor( DispatchProcessor ):
112            string = StringInterpreter()
113
114            # following would be used if you have, for instance,
115            # used string_single_quote in an area where double
116            # or triple-quoted strings are not allowed, but have
117            # used string in another area.
118            string_single_quote = string
119    """
120    def string( self, info, buffer):
121        """Dispatch any of the string types and return the result"""
122        (tag, left, right, sublist) = info
123        return dispatch( self, sublist[0], buffer )
124
125    def string_single_quote( self, info, buffer):
126        (tag, left, right, sublist) = info
127        return "".join(dispatchList(self, sublist, buffer))
128    string_double_quote = string_single_quote
129    string_triple_single = string_single_quote
130    string_triple_double = string_single_quote
131
132    def char_no_quote( self, info, buffer):
133        (tag, left, right, sublist) = info
134        return buffer[left:right]
135    nondelimiter = char_no_quote
136
137    def escaped_char( self, info, buffer):
138        (tag, left, right, sublist) = info
139        return "".join(dispatchList(self,sublist,buffer))
140
141    def octal_escaped_char(self, info, buffer):
142        (tag, left, right, sublist) = info
143        return chr(int( buffer[left:right], 8 ))
144    def hex_escaped_char( self, info, buffer):
145        (tag, left, right, sublist) = info
146        return chr(int( buffer[left:right], 16 ))
147
148    def backslash_char( self, info, buffer):
149        return "\\"
150
151    def string_special_escapes( self, info, buffer):
152        """Maps "special" escapes to the corresponding characters"""
153        (tag, left, right, sublist) = info
154        return self.specialescapedmap[ buffer[left:right]]
155    specialescapedmap = {
156    'a':'\a',
157    'b':'\b',
158    'f':'\f',
159    'n':'\n',
160    'r':'\r',
161    't':'\t',
162    'v':'\v',
163    '\\':'\\',
164    '\n':'',
165    '"':'"',
166    "'":"'",
167    }
168
169