examples/page/lemon.peg

# -* text -*-
## PE grammar for the grammar specification language accepted by the
## LEMON parser generator.


## Questions:
## Are directives always at the beginning of a line ?
## Or can they start within a line ?

## The ifdef/ifndef/endif directives are not documented.
## Nor are the cmdline options


PEG pg::peg::lemon (LemonGrammar)

LemonGrammar <- SPACE Statement + EOF;

Statement <- ( Directive
	     / Rule
	     ) SPACE;

Rule       <- Identifier Label? ASSIGN Definition DOT Precedence? Codeblock? ;
Definition <- (Identifier Label?)* ;
Label      <- LPAREN Identifier RPAREN;
Precedence <- LBRACKET Identifier RBRACKET;

Directive <- DINTRO ( Code	      / DefaultDestructor
		    / DefaultType     / Destructor
		    / ExtraArgument   / Include
		    / Left	      / Name
		    / Nonassoc	      / ParseAccept
		    / ParseFailure    / Right
		    / StackOverflow   / Stacksize
		    / StartSymbol     / SyntaxError
		    / TokenDestructor / TokenPrefix
		    / TokenType	      / Type
		    / Fallback
		    );

Code		  <- DCODE    Codeblock;
DefaultDestructor <- DDEFDEST Identifier Codeblock;
DefaultType       <- DDEFTYPE Codeblock;
Destructor        <- DDEST    Identifier Codeblock;
ExtraArgument     <- DEXTRA   Codeblock;
Include		  <- DINCL    Codeblock;
Left		  <- DLEFT    Identifier+ DOT;
Name		  <- DNAME    Identifier     ;
Nonassoc	  <- DNON     Identifier+ DOT;
ParseAccept	  <- DPACC    Codeblock;
ParseFailure	  <- DPFAIL   Codeblock;
Right		  <- DRIGHT   Identifier+ DOT;
StackOverflow	  <- DSTKOVER Codeblock;
Stacksize	  <- DSTKSZ   NaturalNumber;
StartSymbol	  <- DSTART   Identifier;
SyntaxError	  <- DSYNERR  Codeblock;
TokenDestructor	  <- DTOKDEST Identifier Codeblock;
TokenPrefix	  <- DTOKPFX  Identifier;
TokenType	  <- DTOKTYPE Codeblock;
Type		  <- DTYPE    Identifier Codeblock;
Fallback	  <- DFALLBK  Identifier+ DOT;


# Lexical layer

match: Codeblock  <- LBRACE ( Codeblock
			    / C_COMMENT
			    / Cplusplus_COMMENT
			    / (!RBRACE .)
		            )* RBRACE ;

       Identifier <- Ident SPACE;
match: Ident      <- (<alpha>/'_') (<alnum>/'_')*;

       NaturalNumber <- NatNum SPACE;
match: NatNum	     <- [0-9]+;

void:  ASSIGN     <- '::='  SPACE;
void:  DOT        <- '.'    SPACE;
void:  LPAREN     <- '('    SPACE;
void:  RPAREN     <- ')'    SPACE;
void:  LBRACKET   <- '['    SPACE;
void:  RBRACKET   <- ']'    SPACE;
void:  LBRACE     <- '{';
void:  RBRACE     <- '}';

void:  DINTRO     <- '%';
void:  DCODE      <- 'code'		  SPACE;
void:  DDEFDEST	  <- 'default_destructor' SPACE;
void:  DDEFTYPE	  <- 'default_type'	  SPACE;
void:  DDEST	  <- 'destructor'	  SPACE;
void:  DEXTRA	  <- 'extra_argument'	  SPACE;
void:  DINCL	  <- 'include'		  SPACE;
void:  DLEFT      <- 'left'		  SPACE;
void:  DNAME	  <- 'name'		  SPACE;
void:  DNON       <- 'nonassoc'		  SPACE;
void:  DPACC	  <- 'parse_accept'	  SPACE;
void:  DPFAIL	  <- 'parse_failure'	  SPACE;
void:  DRIGHT     <- 'right'		  SPACE;
void:  DSTKOVER	  <- 'stack_overflow'	  SPACE;
void:  DSTKSZ	  <- 'stack_size'	  SPACE;
void:  DSTART	  <- 'start_symbol'	  SPACE;
void:  DSYNERR	  <- 'syntax_error'	  SPACE;
void:  DTOKDEST	  <- 'token_destructor'	  SPACE;
void:  DTOKPFX	  <- 'token_prefix'	  SPACE;
void:  DTOKTYPE	  <- 'token_type'	  SPACE;
void:  DTYPE	  <- 'type'		  SPACE;
void:  DFALLBK	  <- 'fallback'		  SPACE;

# These have % in their definition because they
# are not part of the regular directives.
void:  DIFDEF	  <- '%ifdef'		  SPACE;
void:  DIFNDEF	  <- '%ifndef'		  SPACE;
void:  DENDIF	  <- '%endif'		  SPACE;

# Directives we do not really know about. They are in the SQLite
# parse.y file, but are not documented for LEMON. We treat them as
# whitespace for now, see below.

Ifdef  <- DIFDEF  Identifier;
Ifndef <- DIFNDEF Identifier;
Endif  <- DENDIF;


# Whitespace

void: SPACE <- ( [ \t\n\r]
	       / C_COMMENT
	       / Cplusplus_COMMENT
	       / Ifndef / Ifdef / Endif
	       )*;

C_COMMENT  <- CCOM_OPEN (!CCOM_CLOSE .)* CCOM_CLOSE;
CCOM_OPEN  <- '/*';
CCOM_CLOSE <- '*/';

Cplusplus_COMMENT <- '//' (!EOL .)* EOL;
EOL               <- '\r\n' / '\r' / '\n';

EOF <- !.;

END;