1// This file is part of PyANTLR. See LICENSE.txt for license 2// details..........Copyright (C) Wolfgang Haefelinger, 2004. 3// 4// $Id$ 5 6header { 7 8 // import language specific stuff 9 // need to import my local module defining super classes etc. 10 import asn1 11} 12options { 13 language="Python"; 14} 15 16class asn1_l extends Lexer("asn1.CharScanner"); 17 18options { 19 k = 3; 20 charVocabulary = '\3'..'\377'; 21 caseSensitive=true; 22 testLiterals = true; 23 codeGenMakeSwitchThreshold = 2; 24 codeGenBitsetTestThreshold = 2; 25 importVocab=ASN1; 26} 27 28tokens { 29 DOTDOT; 30 ELLIPSIS; 31} 32 33ASSIGN_OP : "::=" ; 34BAR : '|' ; 35COLON : ':' ; 36COMMA : ',' ; 37DOT : '.' ; 38DOTDOT : ".." ; 39ELLIPSIS : "..." ; 40EXCLAMATION : '!' ; 41INTERSECTION : '^' ; 42LESS : '<' ; 43L_BRACE : '{' ; 44L_BRACKET : '[' ; 45LL_BRACKET : { self.state_with_syntax==False }? "[[" ; 46L_PAREN : '(' ; 47MINUS : '-' ; 48PLUS : '+' ; 49R_BRACE : '}' ; 50R_BRACKET : ']' ; 51RR_BRACKET : { self.state_with_syntax==False }? "]]" ; 52R_PAREN : ')' ; 53SEMI : ';' ; 54AT : '@' ; 55 56 57/* These are whitespace (without newline) characters according to X.680:2002 */ 58protected 59WSchr 60 : '\t' // horizontal tab (HT) '\t' 0x09 9 61 | ' ' // space (SP) ' ' 0x20 32 62 ; 63 64/* Same as WSign - just ignore consumed character */ 65protected 66WSign 67 : WSchr { $setText("") } 68 ; 69 70/* the end of line */ 71protected 72EOLchr 73 : ( 74 options { 75 generateAmbigWarnings = false; 76 } 77 : '\r''\n' 78 | '\r' 79 | '\n' 80// | '\v' // vertical tab (VT) 0x0b 11 81// | '\f' // form feed (FF) '\f' 0x0c 12 82 ) 83 { 84 $newline 85 } 86 ; 87 88/* like EOL but we ignore the consumed symbol */ 89protected 90EOLign 91 : EOLchr { 92 $setText("") 93 } 94 ; 95 96/* like EOL but we normalize consumed symbol */ 97protected 98EOLnrm 99 : EOLchr { $setText("\n") } 100 ; 101 102/* upper (ASCII) case characters */ 103protected 104UPCHR 105 : 'A' .. 'Z' 106 ; 107 108/* lower (ASCII) case characters */ 109protected 110LOCHR 111 : 'a' .. 'z' 112 ; 113 114/* what's a (arabic) digit */ 115protected 116DIGIT 117 : '0' .. '9' 118 ; 119 120/* whats a (roman) letter - yes, the name sucks a bit */ 121protected 122CHR 123 : UPCHR | LOCHR 124 ; 125 126/* what's allowed in an identifier */ 127protected 128IDCHR 129 : CHR | '-' | DIGIT 130 ; 131 132 133/* a binary digit */ 134protected 135BINCHR 136 : ('0'|'1') 137 ; 138 139/* a hex digit */ 140protected 141HEXCHR 142 : ('0'..'9') 143 | ('A'..'F') 144 | ('a'..'f') 145 ; 146 147/* a binary string */ 148protected 149BINSTR 150 : "'" (BINCHR|WSign|EOLign)+ "'B" ; 151 152/* a hex string */ 153protected 154HEXSTR 155 : "'" (HEXCHR|WSign|EOLign)+ "'H" ; 156 157/* escape character in character strings */ 158protected 159CHResc 160 : '"' '"' { $setText("\"") } 161 ; 162 163 164/* define which input symbols we can skip (so called whitespace) */ 165WS 166 : ( WSchr | EOLchr )+ { $skip } 167 ; 168 169 170/* A number is a sequence of digits - note that deliberatly we allow 171** here for tokens like '001' etc. 172*/ 173TOKEN_NUMBER 174 : (DIGIT)+ 175 ; 176 177 178/* what's an idenifier */ 179ID 180{ lowchrseen=False} 181 : ("BIT" WS "STRING") => "BIT" WS "STRING" { 182 $setType(TOKEN_BIT_STRING) 183 } 184 | ("OCTET" WS "STRING") => "OCTET" WS "STRING" { 185 $setType(TOKEN_OCTET_STRING) 186 } 187 | ("OBJECT" WS "IDENTIFIER") => "OBJECT" WS "IDENTIFIER" { 188 $setType(TOKEN_OBJECT_IDENTIFIER) 189 } 190 | ("ENCODED" WS "BY") => "ENCODED" WS "BY" { 191 $setType(TOKEN_ENCODED_BY) 192 } 193 | ("CONSTRAINED" WS "BY") => "CONSTRAINED" WS "BY" { 194 $setType(TOKEN_CONSTRAINED_BY) 195 } 196 | ("DEFINED" WS "BY") => "DEFINED" WS "BY" { 197 $setType(TOKEN_DEFINED_BY) 198 } 199 | UPCHR ( LOCHR{lowchrseen=True}|UPCHR|DIGIT|'-')* { 200 $setType(TOKEN_Word) 201 if lowchrseen: pass 202 else: $setType(TOKEN_WORD) 203 } 204 | LOCHR ( IDCHR )* { 205 $setType(TOKEN_word) 206 } 207 ; 208 209/* what's a field */ 210FIELD 211{ lowchrseen=False } 212 : '&' UPCHR ( LOCHR{lowchrseen=True}|UPCHR|DIGIT|'-')* { 213 $setType(TOKEN_Field) 214 if lowchrseen: 215 pass 216 else: 217 $setType(TOKEN_FIELD) 218 } 219 | '&' LOCHR ( IDCHR )* { $setType(TOKEN_field) } 220 ; 221 222 223 224/* an octet string is either a bit string or a hex string */ 225OCTSTR 226 : (BINSTR)=>BINSTR { $setType(TOKEN_BSTRING) } 227 | HEXSTR { $setType(TOKEN_HSTRING) } 228 ; 229 230 231/* A character string: this rule is not 1oo% correct as it will not 232** ignore ws before and after eol. This needs best to be handled 233** via a language specific function. Note that rule EOLnrm will 234** replace any eol character by \n to simplify text processing. 235** Contrary, ws is not normalized as ws can't be ignored in general. 236*/ 237TOKEN_CSTRING 238 : '"' (CHResc | EOLnrm | ~('"'|'\r'|'\n'))* '"' { 239 s = self.chr_ws_erase($getText,"\n","\t ") 240 $setText(s) 241 } 242 ; 243 244 245 246/* ASN.1 has kind of tricky comment rule: A comment starts with "--" 247** and ends either with a "--" or with a eol character. Nesting of 248** comments is therefore not possible, ie. 249** -- not visible -- visible -- not visible 250** The real ugly thing about this is that you can't just uncomment 251** a line (regardless of it's content) by prefixing the liene with 252** "--". For example assume you have this line: 253** one INTEGER ::= 1 -- sample integer 254** Then have this: 255** -- one INTEGER ::= 1 -- sample integer 256** This will hide ASN.1 and just makes the comment visible! 257*/ 258 259COMMENT 260 : 261 "--" 262 ( 263 ~('-'|'\n'|'\r') | {self.LA(2) != '-'}? '-' 264 )* 265 { 266 if self.LA(1) == '-': self.match("--"); 267 $skip 268 } 269 ; 270 271ALTCOMMENT 272 : { altcomment == true }? 273 ( ALTCOMMENT1 274 | ALTCOMMENT2 275 | ALTCOMMENT3 276 ) 277 { 278 $skip 279 } 280 ; 281 282/* Due to problematic ASN.1 commentaries we have an alternative - 283** "//" starts a comment that eat's up everything till end of line 284** (as in C++ and Java). 285*/ 286 287protected 288ALTCOMMENT1 289 : 290 { altcomment == true }? "//" (~('\n'|'\r'))* 291 { 292 pass 293 } 294 ; 295 296/* We also also for typical C comments albeit not nested ones */ 297protected 298ALTCOMMENT2 299 : "/*" 300 ( 301 options { 302 greedy=false; 303 } 304 : '\r' ( options { warnWhenFollowAmbig=false; } : '\n')? { $newline } 305 | '\n' { $newline } 306 | . 307 )* 308 "*/" 309 { 310 pass 311 } 312 ; 313 314/* And as homage to the master of style, Niklaus Wirth, we also also 315** comments ala PASCAL */ 316protected 317ALTCOMMENT3 318 : "{*" 319 ( 320 options { 321 greedy=false; 322 } 323 : '\r' ( options { warnWhenFollowAmbig=false; } : '\n')? { $nl } 324 | '\n' { $nl; } 325 | . 326 )* 327 "*}" 328 { 329 pass 330 } 331 ; 332 333