1 %{
2 
3 #include <string.h>
4 #include <errno.h>
5 #include <assert.h>
6 
7 #include "asn1parser.h"
8 #include "asn1p_y.h"
9 
10 int asn1p_lex(void);
11 void asn1p_lexer_hack_push_opaque_state(void);		/* Used in .y */
12 void asn1p_lexer_hack_enable_with_syntax(void);		/* Used in .y */
13 void asn1p_lexer_hack_push_encoding_control(void);	/* Used in .y */
14 
15 #define	YY_FATAL_ERROR(msg)	do {			\
16 		fprintf(stderr,				\
17 			"lexer error at line %d, "	\
18 			"text \"%s\"\n",		\
19 			yylineno, yytext);		\
20 		exit(1);				\
21 	} while(0)
22 
23 int asn1p_lexer_pedantic_1990 = 0;
24 int asn1p_lexer_types_year = 0;
25 int asn1p_lexer_constructs_year = 0;
26 
27 int asn1p_as_pointer;
28 
29 static asn1c_integer_t _lex_atoi(const char *ptr);
30 static double          _lex_atod(const char *ptr);
31 
32 /*
33  * Check that the type is defined in the year of the standard choosen.
34  */
35 #define	TYPE_LIFETIME(fyr, lyr)				\
36 	(!asn1p_lexer_types_year			\
37 	|| (fyr && fyr <= asn1p_lexer_types_year)	\
38 	|| (lyr && lyr  > asn1p_lexer_types_year))
39 
40 /*
41  * Check the the construction (or concept, i.e. CLASS) is defined in
42  * a given year.
43  */
44 #define	CONSTRUCT_LIFETIME(fyr, lyr)			\
45 	(!asn1p_lexer_constructs_year			\
46 	|| (fyr && fyr <= asn1p_lexer_constructs_year)	\
47 	|| (lyr && lyr  > asn1p_lexer_constructs_year))
48 
49 /*
50  * Append quoted string.
51  */
52 #define	QAPPEND(text, tlen)	do {				\
53 		char *prev_text = asn1p_lval.tv_opaque.buf;	\
54 		int prev_len = asn1p_lval.tv_opaque.len;	\
55 		char *p;					\
56 								\
57 		p = malloc((tlen) + prev_len + 1);		\
58 		if(p == NULL) return -1;			\
59 								\
60 		if(prev_text) memcpy(p, prev_text, prev_len);	\
61 		memcpy(p + prev_len, text, tlen);		\
62 		p[prev_len + (tlen)] = '\0';			\
63 								\
64 		free(asn1p_lval.tv_opaque.buf);			\
65 		asn1p_lval.tv_opaque.buf = p;			\
66 		asn1p_lval.tv_opaque.len = (tlen) + prev_len;	\
67 	} while(0)
68 
69 %}
70 
71 %option	never-interactive
72 %option	noinput
73 %option	noyywrap stack
74 /* Performance penalty is OK */
75 %option yylineno
76 /* Controlled from within application */
77 %option debug
78 
79 %pointer
80 
81 %x dash_comment
82 %x idash_comment
83 %x cpp_comment
84 %x quoted
85 %x opaque
86 %x encoding_control
87 %x with_syntax
88 
89 /* Newline */
90 NL	[\r\v\f\n]
91 /* White-space */
92 WSP	[\t\r\v\f\n ]
93 
94 %%
95 
96 -{3,}/[\r\n]	/* Immediately terminated long comment */
97 -{3,}/[^-\r\n]	yy_push_state(idash_comment);	/* Incorrect, but acceptable */
98 <idash_comment>{
99 	-{3,}	yy_pop_state(); /* Acceptable end of comment */
100 }
101 
102 --<[ \t]*ASN1C.RepresentAsPointer[ \t]*>--	asn1p_as_pointer = 1;
103 
104 <INITIAL,with_syntax>--		yy_push_state(dash_comment);
105 <dash_comment,idash_comment>{
106 
107 	{NL}	yy_pop_state();
108 
109 	--	yy_pop_state();	/* End of comment */
110 	-	/* Eat single dash */
111 	[^\r\v\f\n-]+	/* Eat */
112 }
113 
114 <INITIAL,cpp_comment,with_syntax>"/*"		yy_push_state(cpp_comment);
115 <cpp_comment>{
116 	[^*/<]	/* Eat */
117 	"*/"	yy_pop_state();
118 	.	/* Eat */
119 }
120 
121 
122 	/*
123 	 * This is state is being set from corresponding .y module when
124 	 * higher-level data is necessary to make proper parsing of the
125 	 * underlying data. Thus, we enter the <opaque> state and save
126 	 * everything for later processing.
127 	 */
128 <opaque>{
129 
130 	"{"	{
131 			yy_push_state(opaque);
132 			asn1p_lval.tv_opaque.buf = strdup(yytext);
133 			asn1p_lval.tv_opaque.len = yyleng;
134 			return TOK_opaque;
135 		}
136 
137 	"}"	{
138 			yy_pop_state();
139 			asn1p_lval.tv_opaque.buf = strdup(yytext);
140 			asn1p_lval.tv_opaque.len = yyleng;
141 			return TOK_opaque;
142 		}
143 
144 	[^{}:=]+	{
145 			asn1p_lval.tv_opaque.buf = strdup(yytext);
146 			asn1p_lval.tv_opaque.len = yyleng;
147 			return TOK_opaque;
148 		}
149 
150 	"::="	{
151 			fprintf(stderr,
152 				"ASN.1 Parser syncronization failure: "
153 				"\"%s\" at line %d must not appear "
154 				"inside value definition\n",
155 				yytext, yylineno);
156 			return -1;
157 		}
158 
159 	[:=]	{
160 			asn1p_lval.tv_opaque.buf = strdup(yytext);
161 			asn1p_lval.tv_opaque.len = yyleng;
162 			return TOK_opaque;
163 		}
164 
165 	}
166 
167 \"[^\"]*		{
168 			asn1p_lval.tv_opaque.buf = 0;
169 			asn1p_lval.tv_opaque.len = 0;
170 			QAPPEND(yytext+1, yyleng-1);
171 			yy_push_state(quoted);
172 		}
173 <quoted>{
174 
175 	\"\"	{ QAPPEND(yytext, yyleng-1); }	/* Add a single quote */
176 	[^\"]+	{ QAPPEND(yytext, yyleng); }
177 
178 	\"	{
179 			yy_pop_state();
180 			/* Do not append last quote:
181 			// QAPPEND(yytext, yyleng); */
182 
183 			if(asn1p_lexer_pedantic_1990
184 			&& strchr(yytext, '\n')) {
185 				fprintf(stderr, "%s: "
186 				"Newlines are prohibited by ASN.1:1990\n",
187 				asn1p_lval.tv_opaque.buf);
188 				return -1;
189 			}
190 
191 			return TOK_cstring;
192 		}
193 
194 	}
195 
196 <encoding_control>{
197 	ENCODING-CONTROL	{
198 			const char *s = "ENCODING-CONTROL";
199 			const char *p = s + sizeof("ENCODING-CONTROL") - 2;
200 			for(; p >= s; p--) unput(*p);
201 			yy_pop_state();
202 		}
203 	END	unput('D'); unput('N'); unput('E'); yy_pop_state();
204 	[^{} \t\r\v\f\n]+
205 	[[:alnum:]]+
206 	.	/* Eat everything else */
207 	"\n"
208 	}
209 
210 '[0-9A-F \t\r\v\f\n]+'H {
211 		/* " \t\r\n" weren't allowed in ASN.1:1990. */
212 		asn1p_lval.tv_str = yytext;
213 		return TOK_hstring;
214 	}
215 
216 '[01 \t\r\v\f\n]+'B	{
217 		/* " \t\r\n" weren't allowed in ASN.1:1990. */
218 		asn1p_lval.tv_str = strdup(yytext);
219 		return TOK_bstring;
220 	}
221 
222 
223 -[1-9][0-9]*	{
224 		asn1p_lval.a_int = _lex_atoi(yytext);
225 		if(errno == ERANGE)
226 			return -1;
227 		return TOK_number_negative;
228 	}
229 
230 [1-9][0-9]*	{
231 		asn1p_lval.a_int = _lex_atoi(yytext);
232 		if(errno == ERANGE)
233 			return -1;
234 		return TOK_number;
235 	}
236 
237 "0"	{
238 		asn1p_lval.a_int = _lex_atoi(yytext);
239 		if(errno == ERANGE)
240 			return -1;
241 		return TOK_number;
242 	}
243 
244 [-+]?[0-9]+[.]?([eE][-+]?)?[0-9]+ {
245 		asn1p_lval.a_dbl = _lex_atod(yytext);
246 		if(errno == ERANGE)
247 			return -1;
248 		return TOK_realnumber;
249 	}
250 
251 ABSENT			return TOK_ABSENT;
252 ALL			return TOK_ALL;
253 ANY			{
254 				/* Appeared in 1990, removed in 1997 */
255 				if(TYPE_LIFETIME(1990, 1997))
256 					return TOK_ANY;
257 				fprintf(stderr, "Keyword \"%s\" at line %d "
258 					"is obsolete\n", yytext, yylineno);
259 				REJECT;
260 			}
261 APPLICATION		return TOK_APPLICATION;
262 AUTOMATIC		return TOK_AUTOMATIC;
263 BEGIN			return TOK_BEGIN;
264 BIT			return TOK_BIT;
265 BMPString		{
266 				if(TYPE_LIFETIME(1994, 0))
267 					return TOK_BMPString;
268 				REJECT;
269 			}
270 BOOLEAN			return TOK_BOOLEAN;
271 BY			return TOK_BY;
272 CHARACTER		return TOK_CHARACTER;
273 CHOICE			return TOK_CHOICE;
274 CLASS			return TOK_CLASS;
275 COMPONENT		return TOK_COMPONENT;
276 COMPONENTS		return TOK_COMPONENTS;
277 CONSTRAINED		return TOK_CONSTRAINED;
278 CONTAINING		return TOK_CONTAINING;
279 DEFAULT			return TOK_DEFAULT;
280 DEFINED			{
281 				/* Appeared in 1990, removed in 1997 */
282 				if(TYPE_LIFETIME(1990, 1997))
283 					return TOK_DEFINED;
284 				fprintf(stderr, "Keyword \"%s\" at line %d "
285 					"is obsolete\n", yytext, yylineno);
286 				/* Deprecated since */
287 				REJECT;
288 			}
289 DEFINITIONS		return TOK_DEFINITIONS;
290 EMBEDDED		return TOK_EMBEDDED;
291 ENCODED			return TOK_ENCODED;
292 ENCODING-CONTROL	return TOK_ENCODING_CONTROL;
293 END			return TOK_END;
294 ENUMERATED		return TOK_ENUMERATED;
295 EXCEPT			return TOK_EXCEPT;
296 EXPLICIT		return TOK_EXPLICIT;
297 EXPORTS			return TOK_EXPORTS;
298 EXTENSIBILITY		return TOK_EXTENSIBILITY;
299 EXTERNAL		return TOK_EXTERNAL;
300 FALSE			return TOK_FALSE;
301 FROM			return TOK_FROM;
302 GeneralizedTime		return TOK_GeneralizedTime;
303 GeneralString		return TOK_GeneralString;
304 GraphicString		return TOK_GraphicString;
305 IA5String		return TOK_IA5String;
306 IDENTIFIER		return TOK_IDENTIFIER;
307 IMPLICIT		return TOK_IMPLICIT;
308 IMPLIED			return TOK_IMPLIED;
309 IMPORTS			return TOK_IMPORTS;
310 INCLUDES		return TOK_INCLUDES;
311 INSTANCE		return TOK_INSTANCE;
312 INSTRUCTIONS		return TOK_INSTRUCTIONS;
313 INTEGER			return TOK_INTEGER;
314 INTERSECTION		return TOK_INTERSECTION;
315 ISO646String		return TOK_ISO646String;
316 MAX			return TOK_MAX;
317 MIN			return TOK_MIN;
318 MINUS-INFINITY		return TOK_MINUS_INFINITY;
319 NULL			return TOK_NULL;
320 NumericString		return TOK_NumericString;
321 OBJECT			return TOK_OBJECT;
322 ObjectDescriptor	return TOK_ObjectDescriptor;
323 OCTET			return TOK_OCTET;
324 OF			return TOK_OF;
325 OPTIONAL		return TOK_OPTIONAL;
326 PATTERN			return TOK_PATTERN;
327 PDV			return TOK_PDV;
328 PLUS-INFINITY		return TOK_PLUS_INFINITY;
329 PRESENT			return TOK_PRESENT;
330 PrintableString		return TOK_PrintableString;
331 PRIVATE			return TOK_PRIVATE;
332 REAL			return TOK_REAL;
333 RELATIVE-OID		return TOK_RELATIVE_OID;
334 SEQUENCE		return TOK_SEQUENCE;
335 SET			return TOK_SET;
336 SIZE			return TOK_SIZE;
337 STRING			return TOK_STRING;
338 SYNTAX			return TOK_SYNTAX;
339 T61String		return TOK_T61String;
340 TAGS			return TOK_TAGS;
341 TeletexString		return TOK_TeletexString;
342 TRUE			return TOK_TRUE;
343 UNION			return TOK_UNION;
344 UNIQUE			return TOK_UNIQUE;
345 UNIVERSAL		return TOK_UNIVERSAL;
346 UniversalString		{
347 				if(TYPE_LIFETIME(1994, 0))
348 					return TOK_UniversalString;
349 				REJECT;
350 			}
351 UTCTime			return TOK_UTCTime;
352 UTF8String		{
353 				if(TYPE_LIFETIME(1994, 0))
354 					return TOK_UTF8String;
355 				REJECT;
356 			}
357 VideotexString		return TOK_VideotexString;
358 VisibleString		return TOK_VisibleString;
359 WITH			return TOK_WITH;
360 
361 
362 <INITIAL,with_syntax>&[A-Z][A-Za-z0-9]*([-][A-Za-z0-9]+)*	{
363 		asn1p_lval.tv_str = strdup(yytext);
364 		return TOK_typefieldreference;
365 	}
366 
367 <INITIAL,with_syntax>&[a-z][a-zA-Z0-9]*([-][a-zA-Z0-9]+)*	{
368 		asn1p_lval.tv_str = strdup(yytext);
369 		return TOK_valuefieldreference;
370 	}
371 
372 
373 [a-z][a-zA-Z0-9]*([-][a-zA-Z0-9]+)*	{
374 		asn1p_lval.tv_str = strdup(yytext);
375 		return TOK_identifier;
376 	}
377 
378 	/*
379 	 * objectclassreference
380 	 */
381 [A-Z][A-Z0-9]*([-][A-Z0-9]+)*	{
382 		asn1p_lval.tv_str = strdup(yytext);
383 		return TOK_capitalreference;
384 	}
385 
386 	/*
387 	 * typereference, modulereference
388 	 * NOTE: TOK_objectclassreference must be combined
389 	 * with this token to produce true typereference.
390 	 */
391 [A-Z][A-Za-z0-9]*([-][A-Za-z0-9]+)*	{
392 		asn1p_lval.tv_str = strdup(yytext);
393 		return TOK_typereference;
394 	}
395 
396 "::="		return TOK_PPEQ;
397 
398 "..."		return TOK_ThreeDots;
399 ".."		return TOK_TwoDots;
400 
401 <with_syntax>{
402 
403 	[A-Z][A-Za-z0-9]*([-][A-Za-z0-9]+)*	{
404 				asn1p_lval.tv_str = strdup(yytext);
405 				return TOK_Literal;
406 			}
407 
408 	","		{
409 				asn1p_lval.tv_str = strdup(yytext);
410 				return TOK_Literal;
411 			}
412 
413 	"{"		{
414 				yy_push_state(with_syntax);
415 				asn1p_lval.tv_str = strdup(yytext);
416 				return TOK_Literal;
417 			}
418 
419 	"["		return '[';
420 	"]"		return ']';
421 
422 	{WSP}+		{
423 			asn1p_lval.tv_opaque.buf = strdup(yytext);
424 			asn1p_lval.tv_opaque.len = yyleng;
425 			return TOK_whitespace;
426 			}
427 
428 	"}"		{
429 				yy_pop_state();
430 				if(YYSTATE == with_syntax) {
431 					asn1p_lval.tv_str = strdup(yytext);
432 					return TOK_Literal;
433 				} else {
434 					return '}';
435 				}
436 			}
437 
438 }
439 
440 
441 {WSP}+	/* Ignore whitespace */
442 
443 
444 [{][\t\r\v\f\n ]*[0-7][,][\t\r\v\f\n ]*[0-9]+[\t\r\v\f\n ]*[}]	{
445 		asn1c_integer_t v1 = -1, v2 = -1;
446 		char *p;
447 		for(p = yytext; *p; p++)
448 			if(*p >= '0' && *p <= '9')
449 			{ v1 = _lex_atoi(p); break; }
450 		while(*p >= '0' && *p <= '9') p++;	/* Skip digits */
451 		for(; *p; p++) if(*p >= '0' && *p <= '9')
452 			{ v2 = _lex_atoi(p); break; }
453 		if(v1 < 0 || v1 > 7) {
454 			fprintf(stderr, "%s at line %d: X.680:2003, #37.14 "
455 				"mandates 0..7 range for Tuple's TableColumn\n",
456 				yytext, yylineno);
457 			return -1;
458 		}
459 		if(v2 < 0 || v2 > 15) {
460 			fprintf(stderr, "%s at line %d: X.680:2003, #37.14 "
461 				"mandates 0..15 range for Tuple's TableRow\n",
462 				yytext, yylineno);
463 			return -1;
464 		}
465 		asn1p_lval.a_int = (v1 << 4) + v2;
466 		return TOK_tuple;
467 	}
468 
469 [{][\t\r\v\f\n ]*[0-9]+[,][\t\r\v\f\n ]*[0-9]+[,][\t\r\v\f\n ]*[0-9]+[,][\t\r\v\f\n ]*[0-9]+[\t\r\v\f\n ]*[}]	{
470 		asn1c_integer_t v1 = -1, v2 = -1, v3 = -1, v4 = -1;
471 		char *p;
472 		for(p = yytext; *p; p++)
473 			if(*p >= '0' && *p <= '9')
474 			{ v1 = _lex_atoi(p); break; }
475 		while(*p >= '0' && *p <= '9') p++;	/* Skip digits */
476 		for(; *p; p++) if(*p >= '0' && *p <= '9')
477 			{ v2 = _lex_atoi(p); break; }
478 		while(*p >= '0' && *p <= '9') p++;
479 		for(; *p; p++) if(*p >= '0' && *p <= '9')
480 			{ v3 = _lex_atoi(p); break; }
481 		while(*p >= '0' && *p <= '9') p++;
482 		for(; *p; p++) if(*p >= '0' && *p <= '9')
483 			{ v4 = _lex_atoi(p); break; }
484 		if(v1 < 0 || v1 > 127) {
485 			fprintf(stderr, "%s at line %d: X.680:2003, #37.12 "
486 				"mandates 0..127 range for Quadruple's Group\n",
487 				yytext, yylineno);
488 			return -1;
489 		}
490 		if(v2 < 0 || v2 > 255) {
491 			fprintf(stderr, "%s at line %d: X.680:2003, #37.12 "
492 				"mandates 0..255 range for Quadruple's Plane\n",
493 				yytext, yylineno);
494 			return -1;
495 		}
496 		if(v3 < 0 || v3 > 255) {
497 			fprintf(stderr, "%s at line %d: X.680:2003, #37.12 "
498 				"mandates 0..255 range for Quadruple's Row\n",
499 				yytext, yylineno);
500 			return -1;
501 		}
502 		if(v4 < 0 || v4 > 255) {
503 			fprintf(stderr, "%s at line %d: X.680:2003, #37.12 "
504 				"mandates 0..255 range for Quadruple's Cell\n",
505 				yytext, yylineno);
506 			return -1;
507 		}
508 		asn1p_lval.a_int = (v1 << 24) | (v2 << 16) | (v3 << 8) | v4;
509 		return TOK_quadruple;
510 	}
511 
512 
513 "[["        return TOK_VBracketLeft;
514 "]]"        return TOK_VBracketRight;
515 
516 [(){},;:|!.&@\[\]^]	return yytext[0];
517 
518 [^A-Za-z0-9:=,{}<.@()[]'\"|&^*;!-] {
519 		if(TYPE_LIFETIME(1994, 0))
520 			fprintf(stderr, "ERROR: ");
521 		fprintf(stderr,
522 		"Symbol '%c' at line %d is prohibited "
523 		"by ASN.1:1994 and ASN.1:1997\n",
524 			yytext[0], yylineno);
525 		if(TYPE_LIFETIME(1994, 0))
526 			return -1;
527 	}
528 
529 <*>.	{
530 		fprintf(stderr,
531 			"Unexpected token at line %d: \"%s\"\n",
532 			yylineno, yytext);
533 		while(YYSTATE != INITIAL)
534 			yy_pop_state();
535 		if(0) {
536 			yy_top_state();	/* Just to use this function. */
537 			yy_fatal_error("Parse error");
538 		}
539 		return -1;
540 }
541 
542 <*><<EOF>>      {
543 		while(YYSTATE != INITIAL)
544 			yy_pop_state();
545 		yyterminate();
546 	}
547 
548 
549 %%
550 
551 /*
552  * Very dirty but wonderful hack allowing to rule states from within .y file.
553  */
554 void asn1p_lexer_hack_push_opaque_state() { yy_push_state(opaque); }
555 
556 /*
557  * Another hack which disables recognizing some tokens when inside WITH SYNTAX.
558  */
559 void asn1p_lexer_hack_enable_with_syntax() { yy_push_state(with_syntax); }
560 
561 /* Yet another */
562 void asn1p_lexer_hack_push_encoding_control() {
563 	yy_push_state(encoding_control);
564 }
565 
566 static asn1c_integer_t
567 _lex_atoi(const char *ptr) {
568 	asn1c_integer_t value;
569 	if(asn1p_atoi(ptr, &value)) {
570 		fprintf(stderr,
571 			"Value \"%s\" at line %d is too large "
572 			"for this compiler! Please contact the asn1c author.\n",
573 			ptr, yylineno);
574 		errno = ERANGE;
575 	}
576 	return value;
577 }
578 
579 static double
580 _lex_atod(const char *ptr) {
581 	double value;
582 	errno = 0;
583 	value = strtod(ptr, 0);
584 	if(errno) {
585 		fprintf(stderr,
586 			"Value \"%s\" at line %d is outside of `double` range "
587 			"in this compiler! Please contact the asn1c author.\n",
588 			ptr, yylineno);
589 		errno = ERANGE;
590 	}
591 	return value;
592 }
593 
594