1 %{ 2 3 #include <string.h> 4 #include <errno.h> 5 #include <assert.h> 6 7 #include "asn1parser.h" 8 #include "asn1p_y.h" 9 10 int asn1p_lex(void); 11 void asn1p_lexer_hack_push_opaque_state(void); /* Used in .y */ 12 void asn1p_lexer_hack_enable_with_syntax(void); /* Used in .y */ 13 void asn1p_lexer_hack_push_encoding_control(void); /* Used in .y */ 14 15 #define YY_FATAL_ERROR(msg) do { \ 16 fprintf(stderr, \ 17 "lexer error at line %d, " \ 18 "text \"%s\"\n", \ 19 yylineno, yytext); \ 20 exit(1); \ 21 } while(0) 22 23 int asn1p_lexer_pedantic_1990 = 0; 24 int asn1p_lexer_types_year = 0; 25 int asn1p_lexer_constructs_year = 0; 26 27 int asn1p_as_pointer; 28 29 static asn1c_integer_t _lex_atoi(const char *ptr); 30 static double _lex_atod(const char *ptr); 31 32 /* 33 * Check that the type is defined in the year of the standard choosen. 34 */ 35 #define TYPE_LIFETIME(fyr, lyr) \ 36 (!asn1p_lexer_types_year \ 37 || (fyr && fyr <= asn1p_lexer_types_year) \ 38 || (lyr && lyr > asn1p_lexer_types_year)) 39 40 /* 41 * Check the the construction (or concept, i.e. CLASS) is defined in 42 * a given year. 43 */ 44 #define CONSTRUCT_LIFETIME(fyr, lyr) \ 45 (!asn1p_lexer_constructs_year \ 46 || (fyr && fyr <= asn1p_lexer_constructs_year) \ 47 || (lyr && lyr > asn1p_lexer_constructs_year)) 48 49 /* 50 * Append quoted string. 51 */ 52 #define QAPPEND(text, tlen) do { \ 53 char *prev_text = asn1p_lval.tv_opaque.buf; \ 54 int prev_len = asn1p_lval.tv_opaque.len; \ 55 char *p; \ 56 \ 57 p = malloc((tlen) + prev_len + 1); \ 58 if(p == NULL) return -1; \ 59 \ 60 if(prev_text) memcpy(p, prev_text, prev_len); \ 61 memcpy(p + prev_len, text, tlen); \ 62 p[prev_len + (tlen)] = '\0'; \ 63 \ 64 free(asn1p_lval.tv_opaque.buf); \ 65 asn1p_lval.tv_opaque.buf = p; \ 66 asn1p_lval.tv_opaque.len = (tlen) + prev_len; \ 67 } while(0) 68 69 %} 70 71 %option never-interactive 72 %option noinput 73 %option noyywrap stack 74 /* Performance penalty is OK */ 75 %option yylineno 76 /* Controlled from within application */ 77 %option debug 78 79 %pointer 80 81 %x dash_comment 82 %x idash_comment 83 %x cpp_comment 84 %x quoted 85 %x opaque 86 %x encoding_control 87 %x with_syntax 88 89 /* Newline */ 90 NL [\r\v\f\n] 91 /* White-space */ 92 WSP [\t\r\v\f\n ] 93 94 %% 95 96 -{3,}/[\r\n] /* Immediately terminated long comment */ 97 -{3,}/[^-\r\n] yy_push_state(idash_comment); /* Incorrect, but acceptable */ 98 <idash_comment>{ 99 -{3,} yy_pop_state(); /* Acceptable end of comment */ 100 } 101 102 --<[ \t]*ASN1C.RepresentAsPointer[ \t]*>-- asn1p_as_pointer = 1; 103 104 <INITIAL,with_syntax>-- yy_push_state(dash_comment); 105 <dash_comment,idash_comment>{ 106 107 {NL} yy_pop_state(); 108 109 -- yy_pop_state(); /* End of comment */ 110 - /* Eat single dash */ 111 [^\r\v\f\n-]+ /* Eat */ 112 } 113 114 <INITIAL,cpp_comment,with_syntax>"/*" yy_push_state(cpp_comment); 115 <cpp_comment>{ 116 [^*/<] /* Eat */ 117 "*/" yy_pop_state(); 118 . /* Eat */ 119 } 120 121 122 /* 123 * This is state is being set from corresponding .y module when 124 * higher-level data is necessary to make proper parsing of the 125 * underlying data. Thus, we enter the <opaque> state and save 126 * everything for later processing. 127 */ 128 <opaque>{ 129 130 "{" { 131 yy_push_state(opaque); 132 asn1p_lval.tv_opaque.buf = strdup(yytext); 133 asn1p_lval.tv_opaque.len = yyleng; 134 return TOK_opaque; 135 } 136 137 "}" { 138 yy_pop_state(); 139 asn1p_lval.tv_opaque.buf = strdup(yytext); 140 asn1p_lval.tv_opaque.len = yyleng; 141 return TOK_opaque; 142 } 143 144 [^{}:=]+ { 145 asn1p_lval.tv_opaque.buf = strdup(yytext); 146 asn1p_lval.tv_opaque.len = yyleng; 147 return TOK_opaque; 148 } 149 150 "::=" { 151 fprintf(stderr, 152 "ASN.1 Parser syncronization failure: " 153 "\"%s\" at line %d must not appear " 154 "inside value definition\n", 155 yytext, yylineno); 156 return -1; 157 } 158 159 [:=] { 160 asn1p_lval.tv_opaque.buf = strdup(yytext); 161 asn1p_lval.tv_opaque.len = yyleng; 162 return TOK_opaque; 163 } 164 165 } 166 167 \"[^\"]* { 168 asn1p_lval.tv_opaque.buf = 0; 169 asn1p_lval.tv_opaque.len = 0; 170 QAPPEND(yytext+1, yyleng-1); 171 yy_push_state(quoted); 172 } 173 <quoted>{ 174 175 \"\" { QAPPEND(yytext, yyleng-1); } /* Add a single quote */ 176 [^\"]+ { QAPPEND(yytext, yyleng); } 177 178 \" { 179 yy_pop_state(); 180 /* Do not append last quote: 181 // QAPPEND(yytext, yyleng); */ 182 183 if(asn1p_lexer_pedantic_1990 184 && strchr(yytext, '\n')) { 185 fprintf(stderr, "%s: " 186 "Newlines are prohibited by ASN.1:1990\n", 187 asn1p_lval.tv_opaque.buf); 188 return -1; 189 } 190 191 return TOK_cstring; 192 } 193 194 } 195 196 <encoding_control>{ 197 ENCODING-CONTROL { 198 const char *s = "ENCODING-CONTROL"; 199 const char *p = s + sizeof("ENCODING-CONTROL") - 2; 200 for(; p >= s; p--) unput(*p); 201 yy_pop_state(); 202 } 203 END unput('D'); unput('N'); unput('E'); yy_pop_state(); 204 [^{} \t\r\v\f\n]+ 205 [[:alnum:]]+ 206 . /* Eat everything else */ 207 "\n" 208 } 209 210 '[0-9A-F \t\r\v\f\n]+'H { 211 /* " \t\r\n" weren't allowed in ASN.1:1990. */ 212 asn1p_lval.tv_str = yytext; 213 return TOK_hstring; 214 } 215 216 '[01 \t\r\v\f\n]+'B { 217 /* " \t\r\n" weren't allowed in ASN.1:1990. */ 218 asn1p_lval.tv_str = strdup(yytext); 219 return TOK_bstring; 220 } 221 222 223 -[1-9][0-9]* { 224 asn1p_lval.a_int = _lex_atoi(yytext); 225 if(errno == ERANGE) 226 return -1; 227 return TOK_number_negative; 228 } 229 230 [1-9][0-9]* { 231 asn1p_lval.a_int = _lex_atoi(yytext); 232 if(errno == ERANGE) 233 return -1; 234 return TOK_number; 235 } 236 237 "0" { 238 asn1p_lval.a_int = _lex_atoi(yytext); 239 if(errno == ERANGE) 240 return -1; 241 return TOK_number; 242 } 243 244 [-+]?[0-9]+[.]?([eE][-+]?)?[0-9]+ { 245 asn1p_lval.a_dbl = _lex_atod(yytext); 246 if(errno == ERANGE) 247 return -1; 248 return TOK_realnumber; 249 } 250 251 ABSENT return TOK_ABSENT; 252 ALL return TOK_ALL; 253 ANY { 254 /* Appeared in 1990, removed in 1997 */ 255 if(TYPE_LIFETIME(1990, 1997)) 256 return TOK_ANY; 257 fprintf(stderr, "Keyword \"%s\" at line %d " 258 "is obsolete\n", yytext, yylineno); 259 REJECT; 260 } 261 APPLICATION return TOK_APPLICATION; 262 AUTOMATIC return TOK_AUTOMATIC; 263 BEGIN return TOK_BEGIN; 264 BIT return TOK_BIT; 265 BMPString { 266 if(TYPE_LIFETIME(1994, 0)) 267 return TOK_BMPString; 268 REJECT; 269 } 270 BOOLEAN return TOK_BOOLEAN; 271 BY return TOK_BY; 272 CHARACTER return TOK_CHARACTER; 273 CHOICE return TOK_CHOICE; 274 CLASS return TOK_CLASS; 275 COMPONENT return TOK_COMPONENT; 276 COMPONENTS return TOK_COMPONENTS; 277 CONSTRAINED return TOK_CONSTRAINED; 278 CONTAINING return TOK_CONTAINING; 279 DEFAULT return TOK_DEFAULT; 280 DEFINED { 281 /* Appeared in 1990, removed in 1997 */ 282 if(TYPE_LIFETIME(1990, 1997)) 283 return TOK_DEFINED; 284 fprintf(stderr, "Keyword \"%s\" at line %d " 285 "is obsolete\n", yytext, yylineno); 286 /* Deprecated since */ 287 REJECT; 288 } 289 DEFINITIONS return TOK_DEFINITIONS; 290 EMBEDDED return TOK_EMBEDDED; 291 ENCODED return TOK_ENCODED; 292 ENCODING-CONTROL return TOK_ENCODING_CONTROL; 293 END return TOK_END; 294 ENUMERATED return TOK_ENUMERATED; 295 EXCEPT return TOK_EXCEPT; 296 EXPLICIT return TOK_EXPLICIT; 297 EXPORTS return TOK_EXPORTS; 298 EXTENSIBILITY return TOK_EXTENSIBILITY; 299 EXTERNAL return TOK_EXTERNAL; 300 FALSE return TOK_FALSE; 301 FROM return TOK_FROM; 302 GeneralizedTime return TOK_GeneralizedTime; 303 GeneralString return TOK_GeneralString; 304 GraphicString return TOK_GraphicString; 305 IA5String return TOK_IA5String; 306 IDENTIFIER return TOK_IDENTIFIER; 307 IMPLICIT return TOK_IMPLICIT; 308 IMPLIED return TOK_IMPLIED; 309 IMPORTS return TOK_IMPORTS; 310 INCLUDES return TOK_INCLUDES; 311 INSTANCE return TOK_INSTANCE; 312 INSTRUCTIONS return TOK_INSTRUCTIONS; 313 INTEGER return TOK_INTEGER; 314 INTERSECTION return TOK_INTERSECTION; 315 ISO646String return TOK_ISO646String; 316 MAX return TOK_MAX; 317 MIN return TOK_MIN; 318 MINUS-INFINITY return TOK_MINUS_INFINITY; 319 NULL return TOK_NULL; 320 NumericString return TOK_NumericString; 321 OBJECT return TOK_OBJECT; 322 ObjectDescriptor return TOK_ObjectDescriptor; 323 OCTET return TOK_OCTET; 324 OF return TOK_OF; 325 OPTIONAL return TOK_OPTIONAL; 326 PATTERN return TOK_PATTERN; 327 PDV return TOK_PDV; 328 PLUS-INFINITY return TOK_PLUS_INFINITY; 329 PRESENT return TOK_PRESENT; 330 PrintableString return TOK_PrintableString; 331 PRIVATE return TOK_PRIVATE; 332 REAL return TOK_REAL; 333 RELATIVE-OID return TOK_RELATIVE_OID; 334 SEQUENCE return TOK_SEQUENCE; 335 SET return TOK_SET; 336 SIZE return TOK_SIZE; 337 STRING return TOK_STRING; 338 SYNTAX return TOK_SYNTAX; 339 T61String return TOK_T61String; 340 TAGS return TOK_TAGS; 341 TeletexString return TOK_TeletexString; 342 TRUE return TOK_TRUE; 343 UNION return TOK_UNION; 344 UNIQUE return TOK_UNIQUE; 345 UNIVERSAL return TOK_UNIVERSAL; 346 UniversalString { 347 if(TYPE_LIFETIME(1994, 0)) 348 return TOK_UniversalString; 349 REJECT; 350 } 351 UTCTime return TOK_UTCTime; 352 UTF8String { 353 if(TYPE_LIFETIME(1994, 0)) 354 return TOK_UTF8String; 355 REJECT; 356 } 357 VideotexString return TOK_VideotexString; 358 VisibleString return TOK_VisibleString; 359 WITH return TOK_WITH; 360 361 362 <INITIAL,with_syntax>&[A-Z][A-Za-z0-9]*([-][A-Za-z0-9]+)* { 363 asn1p_lval.tv_str = strdup(yytext); 364 return TOK_typefieldreference; 365 } 366 367 <INITIAL,with_syntax>&[a-z][a-zA-Z0-9]*([-][a-zA-Z0-9]+)* { 368 asn1p_lval.tv_str = strdup(yytext); 369 return TOK_valuefieldreference; 370 } 371 372 373 [a-z][a-zA-Z0-9]*([-][a-zA-Z0-9]+)* { 374 asn1p_lval.tv_str = strdup(yytext); 375 return TOK_identifier; 376 } 377 378 /* 379 * objectclassreference 380 */ 381 [A-Z][A-Z0-9]*([-][A-Z0-9]+)* { 382 asn1p_lval.tv_str = strdup(yytext); 383 return TOK_capitalreference; 384 } 385 386 /* 387 * typereference, modulereference 388 * NOTE: TOK_objectclassreference must be combined 389 * with this token to produce true typereference. 390 */ 391 [A-Z][A-Za-z0-9]*([-][A-Za-z0-9]+)* { 392 asn1p_lval.tv_str = strdup(yytext); 393 return TOK_typereference; 394 } 395 396 "::=" return TOK_PPEQ; 397 398 "..." return TOK_ThreeDots; 399 ".." return TOK_TwoDots; 400 401 <with_syntax>{ 402 403 [A-Z][A-Za-z0-9]*([-][A-Za-z0-9]+)* { 404 asn1p_lval.tv_str = strdup(yytext); 405 return TOK_Literal; 406 } 407 408 "," { 409 asn1p_lval.tv_str = strdup(yytext); 410 return TOK_Literal; 411 } 412 413 "{" { 414 yy_push_state(with_syntax); 415 asn1p_lval.tv_str = strdup(yytext); 416 return TOK_Literal; 417 } 418 419 "[" return '['; 420 "]" return ']'; 421 422 {WSP}+ { 423 asn1p_lval.tv_opaque.buf = strdup(yytext); 424 asn1p_lval.tv_opaque.len = yyleng; 425 return TOK_whitespace; 426 } 427 428 "}" { 429 yy_pop_state(); 430 if(YYSTATE == with_syntax) { 431 asn1p_lval.tv_str = strdup(yytext); 432 return TOK_Literal; 433 } else { 434 return '}'; 435 } 436 } 437 438 } 439 440 441 {WSP}+ /* Ignore whitespace */ 442 443 444 [{][\t\r\v\f\n ]*[0-7][,][\t\r\v\f\n ]*[0-9]+[\t\r\v\f\n ]*[}] { 445 asn1c_integer_t v1 = -1, v2 = -1; 446 char *p; 447 for(p = yytext; *p; p++) 448 if(*p >= '0' && *p <= '9') 449 { v1 = _lex_atoi(p); break; } 450 while(*p >= '0' && *p <= '9') p++; /* Skip digits */ 451 for(; *p; p++) if(*p >= '0' && *p <= '9') 452 { v2 = _lex_atoi(p); break; } 453 if(v1 < 0 || v1 > 7) { 454 fprintf(stderr, "%s at line %d: X.680:2003, #37.14 " 455 "mandates 0..7 range for Tuple's TableColumn\n", 456 yytext, yylineno); 457 return -1; 458 } 459 if(v2 < 0 || v2 > 15) { 460 fprintf(stderr, "%s at line %d: X.680:2003, #37.14 " 461 "mandates 0..15 range for Tuple's TableRow\n", 462 yytext, yylineno); 463 return -1; 464 } 465 asn1p_lval.a_int = (v1 << 4) + v2; 466 return TOK_tuple; 467 } 468 469 [{][\t\r\v\f\n ]*[0-9]+[,][\t\r\v\f\n ]*[0-9]+[,][\t\r\v\f\n ]*[0-9]+[,][\t\r\v\f\n ]*[0-9]+[\t\r\v\f\n ]*[}] { 470 asn1c_integer_t v1 = -1, v2 = -1, v3 = -1, v4 = -1; 471 char *p; 472 for(p = yytext; *p; p++) 473 if(*p >= '0' && *p <= '9') 474 { v1 = _lex_atoi(p); break; } 475 while(*p >= '0' && *p <= '9') p++; /* Skip digits */ 476 for(; *p; p++) if(*p >= '0' && *p <= '9') 477 { v2 = _lex_atoi(p); break; } 478 while(*p >= '0' && *p <= '9') p++; 479 for(; *p; p++) if(*p >= '0' && *p <= '9') 480 { v3 = _lex_atoi(p); break; } 481 while(*p >= '0' && *p <= '9') p++; 482 for(; *p; p++) if(*p >= '0' && *p <= '9') 483 { v4 = _lex_atoi(p); break; } 484 if(v1 < 0 || v1 > 127) { 485 fprintf(stderr, "%s at line %d: X.680:2003, #37.12 " 486 "mandates 0..127 range for Quadruple's Group\n", 487 yytext, yylineno); 488 return -1; 489 } 490 if(v2 < 0 || v2 > 255) { 491 fprintf(stderr, "%s at line %d: X.680:2003, #37.12 " 492 "mandates 0..255 range for Quadruple's Plane\n", 493 yytext, yylineno); 494 return -1; 495 } 496 if(v3 < 0 || v3 > 255) { 497 fprintf(stderr, "%s at line %d: X.680:2003, #37.12 " 498 "mandates 0..255 range for Quadruple's Row\n", 499 yytext, yylineno); 500 return -1; 501 } 502 if(v4 < 0 || v4 > 255) { 503 fprintf(stderr, "%s at line %d: X.680:2003, #37.12 " 504 "mandates 0..255 range for Quadruple's Cell\n", 505 yytext, yylineno); 506 return -1; 507 } 508 asn1p_lval.a_int = (v1 << 24) | (v2 << 16) | (v3 << 8) | v4; 509 return TOK_quadruple; 510 } 511 512 513 "[[" return TOK_VBracketLeft; 514 "]]" return TOK_VBracketRight; 515 516 [(){},;:|!.&@\[\]^] return yytext[0]; 517 518 [^A-Za-z0-9:=,{}<.@()[]'\"|&^*;!-] { 519 if(TYPE_LIFETIME(1994, 0)) 520 fprintf(stderr, "ERROR: "); 521 fprintf(stderr, 522 "Symbol '%c' at line %d is prohibited " 523 "by ASN.1:1994 and ASN.1:1997\n", 524 yytext[0], yylineno); 525 if(TYPE_LIFETIME(1994, 0)) 526 return -1; 527 } 528 529 <*>. { 530 fprintf(stderr, 531 "Unexpected token at line %d: \"%s\"\n", 532 yylineno, yytext); 533 while(YYSTATE != INITIAL) 534 yy_pop_state(); 535 if(0) { 536 yy_top_state(); /* Just to use this function. */ 537 yy_fatal_error("Parse error"); 538 } 539 return -1; 540 } 541 542 <*><<EOF>> { 543 while(YYSTATE != INITIAL) 544 yy_pop_state(); 545 yyterminate(); 546 } 547 548 549 %% 550 551 /* 552 * Very dirty but wonderful hack allowing to rule states from within .y file. 553 */ 554 void asn1p_lexer_hack_push_opaque_state() { yy_push_state(opaque); } 555 556 /* 557 * Another hack which disables recognizing some tokens when inside WITH SYNTAX. 558 */ 559 void asn1p_lexer_hack_enable_with_syntax() { yy_push_state(with_syntax); } 560 561 /* Yet another */ 562 void asn1p_lexer_hack_push_encoding_control() { 563 yy_push_state(encoding_control); 564 } 565 566 static asn1c_integer_t 567 _lex_atoi(const char *ptr) { 568 asn1c_integer_t value; 569 if(asn1p_atoi(ptr, &value)) { 570 fprintf(stderr, 571 "Value \"%s\" at line %d is too large " 572 "for this compiler! Please contact the asn1c author.\n", 573 ptr, yylineno); 574 errno = ERANGE; 575 } 576 return value; 577 } 578 579 static double 580 _lex_atod(const char *ptr) { 581 double value; 582 errno = 0; 583 value = strtod(ptr, 0); 584 if(errno) { 585 fprintf(stderr, 586 "Value \"%s\" at line %d is outside of `double` range " 587 "in this compiler! Please contact the asn1c author.\n", 588 ptr, yylineno); 589 errno = ERANGE; 590 } 591 return value; 592 } 593 594