1'''Default SimpleParse EBNF grammar as a generator with productions
2
3This module defines the original SimpleParse
4grammar.  It uses the generator objects directly
5as this is the first grammar being written.
6'''
7from simpleparse.objectgenerator import *
8from simpleparse import generator, baseparser
9from simpleparse.dispatchprocessor import *
10
11try:
12    _unichr = unichr
13    _unicode = unicode
14except NameError:
15    _unichr = chr
16    _unicode = str
17
18# note that whitespace is slightly different
19# due to a bug with NULL-matching repeating groups
20# we make all the ts references ts?
21whitespace = Name (value = "ts", report = 0)
22element_token = Name( value = "element_token" )
23literal = Name ( value = "literal")
24group = Name ( value = "group")
25characterrange = Name ( value = "range")
26name = Name ( value = "name")
27
28
29SPGenerator = generator.Generator ()
30
31SPGenerator.addDefinition(
32    "declarationset",
33    Name (value = "declaration", repeating = 1),
34)
35
36
37
38SPGenerator.addDefinition (
39    "declaration",
40    SequentialGroup (
41        children = [
42            whitespace,
43            FirstOfGroup (
44                children = [
45                    Name (value = "unreportedname", ),
46                    Name (value = "expandedname", ),
47                    Name (value = "name", ),
48                ],
49            ),
50            whitespace,
51            Literal (value = ":"),
52            Literal (value = ":", optional=1),
53            Literal (value = "=",),
54            Name( value = "seq_group"),
55        ],
56    )
57)
58
59SPGenerator.addDefinition (
60    "group",
61    SequentialGroup (
62        children = [
63            Literal (value ="("),
64            Name( value= "seq_group"),
65            Literal (value =")"),
66        ],
67        expanded = 1,
68    )
69)
70
71_seq_children = FirstOfGroup(
72    children = [
73        Name(value="error_on_fail"),
74        Name(value="fo_group"),
75        Name(value="element_token"),
76    ],
77)
78
79SPGenerator.addDefinition (
80    "seq_group",
81    SequentialGroup (
82        children = [
83            whitespace,
84            _seq_children,
85            SequentialGroup(
86                children = [
87                    whitespace,
88                    Name( value="seq_indicator"),
89                    whitespace,
90                    _seq_children,
91                ],
92                repeating = 1, optional = 1,
93            ),
94            whitespace,
95        ],
96    ),
97)
98
99SPGenerator.addDefinition (
100    "fo_group",
101    SequentialGroup (
102        children = [
103            element_token,
104            SequentialGroup(
105                children = [
106                    whitespace,
107                    Name( value="fo_indicator"),
108                    whitespace,
109                    element_token,
110                ],
111                repeating = 1,
112            ),
113        ],
114    )
115)
116SPGenerator.addDefinition (
117    "seq_indicator",
118    Literal(value = ",", report=0 ),
119)
120SPGenerator.addDefinition (
121    "fo_indicator",
122    Literal(value = "/", report=0 ),
123)
124
125SPGenerator.addDefinition (
126    "element_token",
127    SequentialGroup (
128        children = [
129            Name (value = "lookahead_indicator", optional = 1),
130            whitespace,
131            Name (value = "negpos_indicator", optional = 1),
132            whitespace,
133            FirstOfGroup (
134                children = [
135                    literal,
136                    characterrange,
137                    group,
138                    name,
139                ]
140            ),
141            whitespace,
142            Name (value = "occurence_indicator", optional = 1),
143            whitespace,
144            Name (value = "error_on_fail", optional = 1),
145        ]
146    )
147)
148
149SPGenerator.addDefinition (
150    "negpos_indicator",
151    Range (value = "+-" )
152)
153SPGenerator.addDefinition (
154    "lookahead_indicator",
155    Literal(value = "?" ),
156)
157
158SPGenerator.addDefinition (
159    "occurence_indicator",
160    Range (value = "+*?" ),
161)
162SPGenerator.addDefinition (
163    "error_on_fail",
164    SequentialGroup (
165        children = [
166            Literal (value ="!"),
167            SequentialGroup (
168                children = [
169                    whitespace,
170                    Name( value="literal"),
171                ],
172                optional = 1,
173            ),
174        ],
175    ),
176)
177
178SPGenerator.addDefinition (
179    "unreportedname",
180    SequentialGroup (
181        children = [
182            Literal (value ="<"),
183            whitespace,
184            name,
185            whitespace,
186            Literal (value =">"),
187        ]
188    )
189)
190SPGenerator.addDefinition (
191    "expandedname",
192    SequentialGroup (
193        children = [
194            Literal (value =">"),
195            whitespace,
196            name,
197            whitespace,
198            Literal (value ="<"),
199        ]
200    )
201)
202
203SPGenerator.addDefinition (
204    "name",
205    SequentialGroup (
206        children = [
207            Range(value ='abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'),
208            Range(value ='abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789', optional= 1, repeating= 1),
209        ]
210    )
211)
212
213SPGenerator.addDefinition (
214    "ts", # ( [ \011-\015]+ / ('#',-'\n'+,'\n')+ )*
215    FirstOfGroup (
216        children = [
217            Range(value =' \011\012\013\014\015', repeating=1),
218            Name( value = "comment" ),
219        ],
220        repeating = 1, optional=1,
221    )
222)
223SPGenerator.addDefinition (
224    "comment", # ( [ \011-\015]+ / ('#',-'\n'+,'\n')+ )*
225    SequentialGroup (
226        children = [
227            Literal ( value ="#"),
228            Literal (value ="\n", negative = 1, repeating = 1, optional=1),
229            Literal (value = "\n",),
230        ],
231    ),
232)
233
234SPGenerator.addDefinition (
235    "literalDecorator", # literalDecorator    :=  [c]
236    Range( value = 'c' )
237)
238
239SPGenerator.addDefinition (
240    "literal",  # ("'",(CHARNOSNGLQUOTE/ESCAPEDCHAR)*,"'")  /  ('"',(CHARNODBLQUOTE/ESCAPEDCHAR)*,'"')
241    SequentialGroup(
242        children = [
243            Name( value = 'literalDecorator', optional=1 ),
244            FirstOfGroup (
245                children = [
246                    SequentialGroup (
247                        children = [
248                            Literal (value ="'"),
249                            FirstOfGroup (
250                                children = [
251                                    Name (value = "CHARNOSNGLQUOTE"),
252                                    Name (value = "ESCAPEDCHAR"),
253                                ],
254                                optional = 1, repeating = 1,
255                            ),
256                            Literal (value ="'"),
257                        ],
258                    ),
259                    SequentialGroup (
260                        children = [
261                            Literal (value ='"'),
262                            FirstOfGroup (
263                                children = [
264                                    Name (value = "CHARNODBLQUOTE"),
265                                    Name (value = "ESCAPEDCHAR"),
266                                ],
267                                optional = 1, repeating = 1,
268                            ),
269                            Literal (value ='"'),
270                        ],
271                    )
272                ],
273            ),
274        ],
275    )
276)
277
278SPGenerator.addDefinition (
279    "range",   # '[',CHARBRACE?,CHARDASH?, (CHARRANGE/CHARNOBRACE)*, CHARDASH?,']'
280    SequentialGroup (
281        children =[
282            Literal (value ="["),
283            Name (value ="CHARBRACE",optional = 1),
284            Name (value ="CHARDASH",optional = 1),
285            FirstOfGroup(
286                children = [
287                    Name (value ="CHARRANGE"),
288                    Name (value ="CHARNOBRACE"),
289                ],
290                optional = 1, repeating = 1,
291            ),
292            Name (value ="CHARDASH",optional = 1),
293            Literal (value ="]"),
294        ],
295    )
296)
297SPGenerator.addDefinition (
298    "CHARBRACE",
299    Literal (value = "]"),
300)
301SPGenerator.addDefinition (
302    "CHARDASH",
303    Literal (value = "-"),
304)
305SPGenerator.addDefinition (
306    "CHARRANGE",   # CHARRANGE           :=  CHARNOBRACE, '-', CHARNOBRACE
307    SequentialGroup (
308        children =[
309            Name (value ="CHARNOBRACE"),
310            Literal (value ="-"),
311            Name (value ="CHARNOBRACE"),
312        ],
313    ),
314)
315SPGenerator.addDefinition (
316    "CHARNOBRACE",   # CHARRANGE           :=  CHARNOBRACE, '-', CHARNOBRACE
317    FirstOfGroup(
318        children =[
319            Name (value ="ESCAPEDCHAR"),
320            Name (value ="CHAR"),
321        ],
322    ),
323)
324SPGenerator.addDefinition (
325    "CHAR",
326    Literal (
327        value ="]",
328        negative = 1,
329    ),
330)
331
332SPGenerator.addDefinition (
333    "ESCAPEDCHAR",   # '\\',( SPECIALESCAPEDCHAR / ('x',HEXESCAPEDCHAR) / UNICODEESCAPEDCHAR_16 / OCTALESCAPEDCHAR /   )
334    SequentialGroup (
335        children =[
336            Literal (value ="\\"),
337            FirstOfGroup(
338                children = [
339                    Name (value ="SPECIALESCAPEDCHAR"),
340                    SequentialGroup(
341                        children = [
342                            Range( value = 'xX' ),
343                            Name( value="HEXESCAPEDCHAR"),
344                        ]
345                    ),
346                    Name (value ="OCTALESCAPEDCHAR"),
347                    SequentialGroup(
348                        children = [
349                            Range( value='uU'),
350                            Name( value='UNICODEESCAPEDCHAR' ),
351                        ],
352                    ),
353                ],
354            ),
355        ],
356    )
357)
358
359SPGenerator.addDefinition (
360    "SPECIALESCAPEDCHAR",
361    Range(value ='\\abfnrtv"\''),
362)
363
364SPGenerator.addDefinition (
365    "OCTALESCAPEDCHAR",   # [0-7],[0-7]?,[0-7]?
366    SequentialGroup (
367        children =[
368            Range (value ="01234567"),
369            Range (value ="01234567", optional = 1),
370            Range (value ="01234567", optional = 1),
371        ],
372    )
373)
374SPGenerator.addDefinition (
375    "HEXESCAPEDCHAR",   # [0-9a-fA-F],[0-9a-fA-F]
376    SequentialGroup (
377        children =[
378            Range (value ="0123456789abcdefABCDEF"),
379            Range (value ="0123456789abcdefABCDEF"),
380        ],
381    )
382)
383SPGenerator.addDefinition(
384    "UNICODEESCAPEDCHAR",
385    SequentialGroup(
386        children=[
387            Range (value ="0123456789abcdefABCDEF"),
388            Range (value ="0123456789abcdefABCDEF"),
389            Range (value ="0123456789abcdefABCDEF"),
390            Range (value ="0123456789abcdefABCDEF"),
391            SequentialGroup(
392                children = [
393                    Range (value ="0123456789abcdefABCDEF"),
394                    Range (value ="0123456789abcdefABCDEF"),
395                    Range (value ="0123456789abcdefABCDEF"),
396                    Range (value ="0123456789abcdefABCDEF"),
397                ],
398                optional = True,
399            )
400        ]
401    )
402)
403
404SPGenerator.addDefinition (
405    "CHARNODBLQUOTE",
406    Range(value ='\\"', negative = 1, repeating = 1),
407)
408SPGenerator.addDefinition (
409    "CHARNOSNGLQUOTE",
410    Range(value ="\\'", negative = 1, repeating = 1),
411)
412
413declaration = r"""declarationset      :=  declaration+
414declaration         :=  ts, (unreportedname/expandedname/name) ,ts,':',':'?,'=',seq_group
415
416element_token       :=  lookahead_indicator?, ts, negpos_indicator?,ts, (literal/range/group/name),ts, occurence_indicator?, ts, error_on_fail?
417
418negpos_indicator    :=  [-+]
419lookahead_indicator :=  "?"
420occurence_indicator :=  [+*?]
421error_on_fail       :=  "!", (ts,literal)?
422
423>group<             :=  '(',seq_group, ')'
424seq_group           :=  ts,(error_on_fail/fo_group/element_token),
425                          (ts, seq_indicator, ts,
426                              (error_on_fail/fo_group/element_token)
427                          )*, ts
428
429fo_group            :=  element_token, (ts, fo_indicator, ts, element_token)+
430
431
432# following two are likely something peoples might want to
433# replace in many instances...
434<fo_indicator>      :=  "/"
435<seq_indicator>     :=  ','
436
437unreportedname      :=  '<', name, '>'
438expandedname        :=  '>', name, '<'
439name                :=  [a-zA-Z_],[a-zA-Z0-9_]*
440<ts>                :=  ( [ \011-\015]+ / comment )*
441comment             :=  '#',-'\n'*,'\n'
442literal             :=  literalDecorator?,("'",(CHARNOSNGLQUOTE/ESCAPEDCHAR)*,"'")  /  ('"',(CHARNODBLQUOTE/ESCAPEDCHAR)*,'"')
443literalDecorator    :=  [c]
444
445
446
447range               :=  '[',CHARBRACE?,CHARDASH?, (CHARRANGE/CHARNOBRACE)*, CHARDASH?,']'
448CHARBRACE           :=  ']'
449CHARDASH            :=  '-'
450CHARRANGE           :=  CHARNOBRACE, '-', CHARNOBRACE
451CHARNOBRACE         :=  ESCAPEDCHAR/CHAR
452CHAR                :=  -[]]
453ESCAPEDCHAR         :=  '\\',( SPECIALESCAPEDCHAR / ('x',HEXESCAPEDCHAR) / ([uU],UNICODEESCAPEDCHAR) / OCTALESCAPEDCHAR  )
454SPECIALESCAPEDCHAR  :=  [\\abfnrtv"']
455OCTALESCAPEDCHAR    :=  [0-7],[0-7]?,[0-7]?
456HEXESCAPEDCHAR      :=  [0-9a-fA-F],[0-9a-fA-F]
457CHARNODBLQUOTE      :=  -[\\"]+
458CHARNOSNGLQUOTE     :=  -[\\']+
459UNICODEESCAPEDCHAR  := [0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F],([0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F])?
460"""
461
462### Now the interpreter objects...
463class Parser(baseparser.BaseParser):
464    """Parser which generates new parsers from EBNF grammars
465
466    This parser class allows you to pass in an EBNF grammar as
467    the initialisation parameter.  The EBNF is processed, and a
468    SimpleParse generator object is created as self.generator.
469
470    Unlike most Parsers, this object is intended to be re-created
471    for each bit of data it parses (i.e. each EBNF), so it warps
472    the standard API a lot.
473    """
474    _rootProduction = 'declarationset'
475    def __init__( self, ebnf, prebuilts=(), methodSource=None, definitionSources=() ):
476        """Create a new generator based on the EBNF in simpleparse format"""
477        processor = SPGrammarProcessor( prebuilts, definitionSources )
478        success, tags, next = self.parse( ebnf, self._rootProduction, processor=processor )
479        if next != len(ebnf):
480            lineNumber = lines(0, next, ebnf)
481            raise ValueError(
482                """Unable to complete parsing of the EBNF, stopped at line %s (%s chars of %s)
483Unparsed:\n%s..."""%(lineNumber, next, len(ebnf), ebnf[next:next+100])
484            )
485        self.generator = processor.generator
486    def buildTagger( self, name=None, processor = None ):
487        """Build the tag-table for parsing the EBNF for this parser"""
488        return SPGenerator.buildParser( name, processor )
489
490class SPGrammarProcessor( DispatchProcessor ):
491    """Processing object for post-processing an EBNF into a new generator"""
492    ### top level
493    def __init__( self, prebuilts=(), definitionSources=() ):
494        """Create a new generator based on the EBNF in simpleparse format"""
495        self.generator = generator.Generator()
496        for (name, table) in prebuilts:
497            if isinstance( table, ElementToken):
498                self.generator.addDefinition( name, table)
499            else:
500                self.generator.addDefinition( name, Prebuilt(value=table))
501        for source in definitionSources:
502            self.generator.addDefinitionSource( source )
503
504    def declaration( self, info, buffer):
505        '''Base declaration from the grammar, a "production" or "rule"'''
506        (tag, left, right, sublist) = info
507        name = sublist[0]
508        expanded = 0
509        if name[0] == "unreportedname":
510            name = name[3][0]
511            # note that the info is stored in the wrong place :(
512            report = 0
513        elif name[0] == 'expandedname':
514            report = 1
515            expanded = 1
516            name = name[3][0]
517        else:
518            report = 1
519        name = getString( name, buffer )
520        self.currentProduction = name
521        content = dispatch( self, sublist[1], buffer )
522        content.report = report
523        content.expanded = expanded
524        self.generator.addDefinition(
525            name,
526            content,
527        )
528        del self.currentProduction
529
530    ### element configuration
531    def element_token( self, info, buffer):
532        '''get the children, then configure'''
533        (tag, left, right, sublist) = info
534        base = None
535        negative = 0
536        optional = 0
537        repeating = 0
538        lookahead = 0
539        errorOnFail = None
540        for tup in sublist:
541            result = dispatch( self, tup, buffer )
542            if tup[0] == 'negpos_indicator':
543                negative = result
544            elif tup[0] == 'occurence_indicator':
545                optional, repeating = result
546            elif tup[0] == 'lookahead_indicator':
547                lookahead = result
548            elif tup[0] == 'error_on_fail':
549                # we do some extra work here
550                errorOnFail = result
551                self._config_error_on_fail( errorOnFail, (tag,left,tup[1],[]), buffer )
552            else:
553                base = result
554        base.optional = optional
555        base.negative = negative
556        base.repeating = repeating
557        base.lookahead = lookahead
558        if errorOnFail:
559            base.errorOnFail = errorOnFail
560        return base
561
562    ### generator-node-builders
563    def seq_group( self, info, buffer):
564        """Process a sequential-group into a SequentialGroup element token"""
565        (tag, left, right, sublist) = info
566        children = dispatchList( self, sublist, buffer )
567        errorOnFail = None
568        result = []
569        for (item,tup) in zip(children,sublist):
570            if isinstance( item, ErrorOnFail ):
571                errorOnFail = item
572            else:
573                if errorOnFail:
574                    item.errorOnFail = errorOnFail.copy()
575                    self._config_error_on_fail(
576                        item.errorOnFail,
577                        tup,
578                        buffer
579                    )
580                result.append( item )
581        if len(result) == 1:
582            # single-item sequential group (very common)
583            return result[0]
584        elif not result:
585            raise ValueError( """SequentialGroup on line %s doesn't have an element-token child! grammar was %s"""%( lines(0,left, buffer), buffer[left:left+25]))
586        base = SequentialGroup(
587            children = result,
588        )
589        return base
590    def fo_group( self, info, buffer):
591        """Process a first-of-group into a FirstOf element token"""
592        (tag, left, right, sublist) = info
593        children = dispatchList( self, sublist, buffer )
594        if len(children) == 1:
595            # this should never happen, but if it does, we can deal with it I suppose...
596            return children[0]
597        base = FirstOfGroup(
598            children = children
599        )
600        return base
601
602    def literal( self, info, buffer):
603        '''Turn a literal result into a literal generator'''
604        (tag, left, right, sublist) = info
605        if sublist and sublist[0][0] == 'literalDecorator':
606            # right now only have the one decorator...
607            sublist = sublist[1:]
608            classObject = CILiteral
609        else:
610            classObject = Literal
611        elements = dispatchList( self, sublist, buffer)
612        ### Should check for CILiteral with non-CI string or single-character value!
613        return classObject( value = "".join(elements) )
614
615    def range( self, info, buffer):
616##		if hasattr( Range, 'requiresExpandedSet') and Range.requiresExpandedSet:
617        (tag, left, right, sublist) = info
618        return Range(
619            value = ''.join(dispatchList( self, sublist, buffer)),
620        )
621##		else:
622##			# need to build up a new-syntax version of the range...
623##			# escape ^ to \^
624##			# escape \ to \\
625##			# escape - to \-
626##			# make sure range-sets are in proper order...
627##			raise NotImplementedError( """Haven't got the new CharSet version implemented yet""")
628    def name( self, tup, buffer):
629        return Name(
630            value = getString(tup, buffer),
631        )
632    ### simple translators
633    occurenceIndicatorMap = {
634        '*': (1,1),
635        '+': (0,1),
636        '?': (1,0),
637    }
638    def occurence_indicator( self, tup, buffer):
639        '''Return optional, repeating as a tuple of true/false values'''
640        value = getString(tup, buffer)
641        return self.occurenceIndicatorMap[value]
642    def lookahead_indicator( self, tup, buffer ):
643        """If present, the lookahead indictor just says "yes", so just return 1"""
644        return 1
645    def error_on_fail( self, info, buffer ):
646        """If present, we are going to make the current object an errorOnFail type,
647
648        If there's a string literal child, then we use it to create the
649        "message" attribute of the errorOnFail object.
650        """
651        (tag,left,right,children) = info
652        err = ErrorOnFail()
653        if children:
654            (tag,left,right,children) = children[0]
655            message = "".join(dispatchList( self, children, buffer))
656            err.message = message
657        return err
658    def _config_error_on_fail( self, errorOnFail, tup, buffer ):
659        """Configure an error-on-fail instance for a given child tuple"""
660        # what we expected to find...
661        errorOnFail.expected = buffer[tup[1]:tup[2]]
662        if hasattr( self, "currentProduction"):
663            errorOnFail.production = self.currentProduction
664
665
666    negposIndicatorMap = {
667        '+': 0,
668        '-': 1,
669    }
670    def negpos_indicator( self, tup, buffer ):
671        '''return whether indicates negative'''
672        value = getString(tup, buffer)
673        return self.negposIndicatorMap[value]
674
675    def CHARNODBLQUOTE( self, tup, buffer):
676        return getString(tup, buffer)
677    CHAR = CHARNOSNGLQUOTE = CHARNODBLQUOTE
678    def ESCAPEDCHAR( self, info, buffer):
679        (tag, left, right, sublist) = info
680        return "".join(dispatchList( self, sublist, buffer))
681    specialescapedmap = {
682    'a':'\a',
683    'b':'\b',
684    'f':'\f',
685    'n':'\n',
686    'r':'\r',
687    't':'\t',
688    'v':'\v',
689    '\\':'\\',
690    '"':'"',
691    "'":"'",
692    }
693    def SPECIALESCAPEDCHAR( self, tup, buffer):
694        return self.specialescapedmap[ getString(tup, buffer)]
695    def OCTALESCAPEDCHAR(self, tup, buffer):
696        return chr(int( getString(tup, buffer), 8 ))
697    def HEXESCAPEDCHAR( self, tup , buffer):
698        return chr(int( getString(tup, buffer), 16 ))
699    def CHARNOBRACE( self, info, buffer):
700        (tag, left, right, sublist) = info
701        return "".join(dispatchList( self, sublist, buffer))
702    def CHARRANGE( self, info, buffer):
703        '''Create a string from first to second item'''
704        (tag, left, right, sublist) = info
705        first,second = dispatchList( self, sublist, buffer)
706        if second < first:
707            second, first = first, second
708        if isinstance( first, _unicode ) or isinstance( second, _unicode ):
709            _chr = _unichr
710            if not (isinstance( second, _unicode ) and isinstance( first, _unicode )):
711                raise ValueError( 'Range %s uses one unicode and one string escape, cannot mix'%(buffer[left:right]) )
712        else:
713            _chr = chr
714        first, second = list(map( ord, (first,second) ))
715        return u''.join([_chr(u) for u in range(first,second+1)])
716    def CHARDASH( self, tup , buffer):
717        return '-'
718    def CHARBRACE( self, tup , buffer):
719        return ']'
720
721    def UNICODEESCAPEDCHAR( self, info, buffer):
722        """Decode a unicode-escaped hex character into a character value"""
723        (tag, left, right, sublist) = info
724        char = _unichr(int( buffer[left:right], 16 ))
725        return char
726