1
2
3 /*
4 * This software was developed by U.S. Government employees as part of
5 * their official duties and is not subject to copyright.
6 *
7 * $Log: lexact.c,v $
8 * Revision 1.10 1997/09/24 20:05:38 dar
9 * scanner went into infinite loop with unmatched single quote at eol
10 *
11 * Revision 1.10 1997/09/24 15:56:35 libes
12 * scanner went into infinite loop with unmatched single quote at eol
13 *
14 * Revision 1.9 1997/01/21 20:07:11 dar
15 * made C++ compatible
16 *
17 * Revision 1.8 1997/01/21 19:51:14 libes
18 * add POSIX portability
19 *
20 * Revision 1.7 1995/04/05 13:55:40 clark
21 * CADDETC preval
22 *
23 * Revision 1.6 1994/11/29 20:55:34 clark
24 * fix inline comment bug
25 *
26 * Revision 1.5 1994/11/22 18:32:39 clark
27 * Part 11 IS; group reference
28 *
29 * Revision 1.4 1994/11/10 19:20:03 clark
30 * Update to IS
31 *
32 * Revision 1.3 1994/05/11 19:51:24 libes
33 * numerous fixes
34 *
35 * Revision 1.2 1993/10/15 18:48:48 libes
36 * CADDETC certified
37 *
38 * Revision 1.7 1993/02/22 21:48:00 libes
39 * added decl for strdup
40 *
41 * Revision 1.6 1993/01/19 22:44:17 libes
42 * *** empty log message ***
43 *
44 * Revision 1.5 1992/08/27 23:40:58 libes
45 * remove connection between scanner and rest of EXPRESSinitializations
46 *
47 * Revision 1.4 1992/08/18 17:13:43 libes
48 * rm'd extraneous error messages
49 *
50 * Revision 1.3 1992/06/08 18:06:57 libes
51 * prettied up interface to print_objects_when_running
52 */
53
54 #include <sc_memmgr.h>
55 #include <stdlib.h>
56 #include <ctype.h>
57 #include "express/lexact.h"
58 #include "string.h"
59 #include "express/linklist.h"
60 #include "stack.h"
61 #include "express/hash.h"
62 #include "express/express.h"
63 #include "express/dict.h"
64 #include "express/memory.h"
65 #include "token_type.h"
66 #include "expparse.h"
67 #include "expscan.h"
68
69 extern YYSTYPE yylval;
70
71 Scan_Buffer SCAN_buffers[SCAN_NESTING_DEPTH];
72 int SCAN_current_buffer = 0;
73 char * SCANcurrent;
74
75 Error ERROR_include_file = ERROR_none;
76 Error ERROR_unmatched_close_comment = ERROR_none;
77 Error ERROR_unmatched_open_comment = ERROR_none;
78 Error ERROR_unterminated_string = ERROR_none;
79 Error ERROR_encoded_string_bad_digit = ERROR_none;
80 Error ERROR_encoded_string_bad_count = ERROR_none;
81 Error ERROR_bad_identifier = ERROR_none;
82 Error ERROR_unexpected_character = ERROR_none;
83 Error ERROR_nonascii_char;
84
85
86 extern int yylineno;
87
88 #define SCAN_COMMENT_LENGTH 256
89 static char last_comment_[256] = "";
90 static char * last_comment = 0;
91
92 /* keyword lookup table */
93
94 static Hash_Table keyword_dictionary;
95
96 static struct keyword_entry {
97 char * key;
98 int token;
99 } keywords[] = {
100 { "ABS", TOK_BUILTIN_FUNCTION },
101 { "ABSTRACT", TOK_ABSTRACT },
102 { "ACOS", TOK_BUILTIN_FUNCTION },
103 { "AGGREGATE", TOK_AGGREGATE },
104 { "ALIAS", TOK_ALIAS },
105 { "AND", TOK_AND },
106 { "ANDOR", TOK_ANDOR },
107 { "ARRAY", TOK_ARRAY },
108 { "AS", TOK_AS },
109 { "ASIN", TOK_BUILTIN_FUNCTION },
110 { "ATAN", TOK_BUILTIN_FUNCTION },
111 { "BAG", TOK_BAG },
112 { "BEGIN", TOK_BEGIN },
113 { "BINARY", TOK_BINARY },
114 { "BLENGTH", TOK_BUILTIN_FUNCTION },
115 { "BOOLEAN", TOK_BOOLEAN },
116 { "BY", TOK_BY },
117 { "CASE", TOK_CASE },
118 { "CONST_E", TOK_E },
119 { "CONSTANT", TOK_CONSTANT },
120 { "COS", TOK_BUILTIN_FUNCTION },
121 { "DERIVE", TOK_DERIVE },
122 { "DIV", TOK_DIV },
123 { "ELSE", TOK_ELSE },
124 { "END", TOK_END },
125 { "END_ALIAS", TOK_END_ALIAS },
126 { "END_CASE", TOK_END_CASE },
127 { "END_CONSTANT", TOK_END_CONSTANT },
128 { "END_ENTITY", TOK_END_ENTITY },
129 { "END_FUNCTION", TOK_END_FUNCTION },
130 { "END_IF", TOK_END_IF },
131 { "END_LOCAL", TOK_END_LOCAL },
132 { "END_PROCEDURE", TOK_END_PROCEDURE },
133 { "END_REPEAT", TOK_END_REPEAT },
134 { "END_RULE", TOK_END_RULE },
135 { "END_SCHEMA", TOK_END_SCHEMA },
136 { "END_TYPE", TOK_END_TYPE },
137 { "ENTITY", TOK_ENTITY },
138 { "ENUMERATION", TOK_ENUMERATION },
139 { "ESCAPE", TOK_ESCAPE },
140 { "EXISTS", TOK_BUILTIN_FUNCTION },
141 { "EXP", TOK_BUILTIN_FUNCTION },
142 { "FALSE", TOK_LOGICAL_LITERAL },
143 { "FIXED", TOK_FIXED },
144 { "FOR", TOK_FOR },
145 { "FORMAT", TOK_BUILTIN_FUNCTION },
146 { "FROM", TOK_FROM },
147 { "FUNCTION", TOK_FUNCTION },
148 { "GENERIC", TOK_GENERIC },
149 { "HIBOUND", TOK_BUILTIN_FUNCTION },
150 { "HIINDEX", TOK_BUILTIN_FUNCTION },
151 { "IF", TOK_IF },
152 { "IN", TOK_IN },
153 { "INCLUDE", TOK_INCLUDE },
154 { "INSERT", TOK_BUILTIN_PROCEDURE },
155 { "INTEGER", TOK_INTEGER },
156 { "INVERSE", TOK_INVERSE },
157 { "LENGTH", TOK_BUILTIN_FUNCTION },
158 { "LIKE", TOK_LIKE },
159 { "LIST", TOK_LIST },
160 { "LOBOUND", TOK_BUILTIN_FUNCTION },
161 { "LOCAL", TOK_LOCAL },
162 { "LOG", TOK_BUILTIN_FUNCTION },
163 { "LOG10", TOK_BUILTIN_FUNCTION },
164 { "LOG2", TOK_BUILTIN_FUNCTION },
165 { "LOGICAL", TOK_LOGICAL },
166 { "LOINDEX", TOK_BUILTIN_FUNCTION },
167 { "MOD", TOK_MOD },
168 { "NOT", TOK_NOT },
169 { "NUMBER", TOK_NUMBER },
170 { "NVL", TOK_BUILTIN_FUNCTION },
171 { "ODD", TOK_BUILTIN_FUNCTION },
172 { "OF", TOK_OF },
173 { "ONEOF", TOK_ONEOF },
174 { "OPTIONAL", TOK_OPTIONAL },
175 { "OR", TOK_OR },
176 { "OTHERWISE", TOK_OTHERWISE },
177 { "PI", TOK_PI },
178 { "PROCEDURE", TOK_PROCEDURE },
179 { "QUERY", TOK_QUERY },
180 { "REAL", TOK_REAL },
181 { "REFERENCE", TOK_REFERENCE },
182 { "REMOVE", TOK_BUILTIN_PROCEDURE },
183 { "REPEAT", TOK_REPEAT },
184 { "RETURN", TOK_RETURN },
185 { "ROLESOF", TOK_BUILTIN_FUNCTION },
186 { "RULE", TOK_RULE },
187 { "SCHEMA", TOK_SCHEMA },
188 { "SELECT", TOK_SELECT },
189 { "SELF", TOK_SELF },
190 { "SET", TOK_SET },
191 { "SIN", TOK_BUILTIN_FUNCTION },
192 { "SIZEOF", TOK_BUILTIN_FUNCTION },
193 { "SKIP", TOK_SKIP },
194 { "SQRT", TOK_BUILTIN_FUNCTION },
195 { "STRING", TOK_STRING },
196 { "SUBTYPE", TOK_SUBTYPE },
197 { "SUPERTYPE", TOK_SUPERTYPE },
198 { "TAN", TOK_BUILTIN_FUNCTION },
199 { "THEN", TOK_THEN },
200 { "TO", TOK_TO },
201 { "TRUE", TOK_LOGICAL_LITERAL },
202 { "TYPE", TOK_TYPE },
203 { "TYPEOF", TOK_BUILTIN_FUNCTION },
204 { "UNIQUE", TOK_UNIQUE },
205 { "UNKNOWN", TOK_LOGICAL_LITERAL },
206 { "UNTIL", TOK_UNTIL },
207 { "USE", TOK_USE },
208 { "USEDIN", TOK_BUILTIN_FUNCTION },
209 { "VALUE", TOK_BUILTIN_FUNCTION },
210 { "VALUE_IN", TOK_BUILTIN_FUNCTION },
211 { "VALUE_UNIQUE", TOK_BUILTIN_FUNCTION },
212 { "VAR", TOK_VAR },
213 { "WHERE", TOK_WHERE },
214 { "WHILE", TOK_WHILE },
215 { "XOR", TOK_XOR },
216 { 0, 0}
217 };
218
SCANpush_buffer(char * filename,FILE * fp)219 static void SCANpush_buffer( char * filename, FILE * fp ) {
220 SCANbuffer.savedPos = SCANcurrent;
221 SCANbuffer.lineno = yylineno;
222 yylineno = 1;
223 ++SCAN_current_buffer;
224 #ifdef keep_nul
225 SCANbuffer.numRead = 0;
226 #else
227 *( SCANcurrent = SCANbuffer.text ) = '\0';
228 #endif
229 SCANbuffer.readEof = false;
230 SCANbuffer.file = fp;
231 SCANbuffer.filename = current_filename = filename;
232 }
233
SCANpop_buffer()234 static void SCANpop_buffer() {
235 if( SCANbuffer.file != NULL ) {
236 fclose( SCANbuffer.file );
237 }
238 --SCAN_current_buffer;
239 SCANcurrent = SCANbuffer.savedPos;
240 yylineno = SCANbuffer.lineno + 1; /* DEL */
241 current_filename = SCANbuffer.filename;
242 }
243
SCANinitialize(void)244 void SCANinitialize( void ) {
245 struct keyword_entry * k;
246
247 keyword_dictionary = HASHcreate( 100 ); /* not exact */
248 for( k = keywords; k->key; k++ ) {
249 DICTdefine( keyword_dictionary, k->key, ( Generic )k, 0, OBJ_UNKNOWN );
250 /* not "unknown", but certainly won't be looked up by type! */
251 }
252
253 /* set up errors on first time through */
254 if( ERROR_include_file == ERROR_none ) {
255 ERROR_include_file =
256 ERRORcreate( "Could not open include file `%s'.", SEVERITY_ERROR );
257 ERROR_unmatched_close_comment =
258 ERRORcreate( "unmatched close comment", SEVERITY_ERROR );
259 ERROR_unmatched_open_comment =
260 ERRORcreate( "unmatched open comment", SEVERITY_ERROR );
261 ERROR_unterminated_string =
262 ERRORcreate( "unterminated string literal", SEVERITY_ERROR );
263 ERROR_encoded_string_bad_digit = ERRORcreate(
264 "non-hex digit (%c) in encoded string literal", SEVERITY_ERROR );
265 ERROR_encoded_string_bad_count = ERRORcreate(
266 "number of digits (%d) in encoded string literal is not divisible by 8", SEVERITY_ERROR );
267 ERROR_bad_identifier = ERRORcreate(
268 "identifier (%s) cannot start with underscore", SEVERITY_ERROR );
269 ERROR_unexpected_character = ERRORcreate(
270 "character (%c) is not a valid lexical element by itself", SEVERITY_ERROR );
271 ERROR_nonascii_char = ERRORcreate(
272 "character (0x%x) is not in the EXPRESS character set", SEVERITY_ERROR );
273 }
274 }
275
276 /** Clean up the Scan module */
SCANcleanup(void)277 void SCANcleanup( void ) {
278 ERRORdestroy( ERROR_include_file );
279 ERRORdestroy( ERROR_unmatched_close_comment );
280 ERRORdestroy( ERROR_unmatched_open_comment );
281 ERRORdestroy( ERROR_unterminated_string );
282 ERRORdestroy( ERROR_encoded_string_bad_digit );
283 ERRORdestroy( ERROR_encoded_string_bad_count );
284 ERRORdestroy( ERROR_bad_identifier );
285 ERRORdestroy( ERROR_unexpected_character );
286 ERRORdestroy( ERROR_nonascii_char );
287 }
288
SCANprocess_real_literal(const char * yytext)289 int SCANprocess_real_literal( const char * yytext ) {
290 sscanf( yytext, "%lf", &( yylval.rVal ) );
291 return TOK_REAL_LITERAL;
292 }
293
SCANprocess_integer_literal(const char * yytext)294 int SCANprocess_integer_literal( const char * yytext ) {
295 sscanf( yytext, "%d", &( yylval.iVal ) );
296 return TOK_INTEGER_LITERAL;
297 }
298
SCANprocess_binary_literal(const char * yytext)299 int SCANprocess_binary_literal( const char * yytext ) {
300 yylval.binary = SCANstrdup( yytext + 1 ); /* drop '%' prefix */
301 return TOK_BINARY_LITERAL;
302 }
303
SCANprocess_logical_literal(char * string)304 int SCANprocess_logical_literal( char * string ) {
305 switch( string[0] ) {
306 case 'T':
307 yylval.logical = Ltrue;
308 break;
309 case 'F':
310 yylval.logical = Lfalse;
311 break;
312 default:
313 yylval.logical = Lunknown;
314 break;
315 /* default will actually be triggered by 'UNKNOWN' keyword */
316 }
317 sc_free( string );
318 return TOK_LOGICAL_LITERAL;
319 }
320
SCANprocess_identifier_or_keyword(const char * yytext)321 int SCANprocess_identifier_or_keyword( const char * yytext ) {
322 char * test_string, * dest;
323 const char * src;
324 struct keyword_entry * k;
325 int len;
326
327 /* make uppercase copy */
328 len = strlen( yytext );
329 dest = test_string = ( char * )sc_malloc( len + 1 );
330 for( src = yytext; *src; src++, dest++ ) {
331 *dest = ( islower( *src ) ? toupper( *src ) : *src );
332 }
333 *dest = '\0';
334
335 /* check for language keywords */
336 k = ( struct keyword_entry * )DICTlookup( keyword_dictionary, test_string );
337 if( k ) {
338 switch( k->token ) {
339 case TOK_BUILTIN_FUNCTION:
340 case TOK_BUILTIN_PROCEDURE:
341 break;
342 case TOK_LOGICAL_LITERAL:
343 return SCANprocess_logical_literal( test_string );
344 default:
345 sc_free( test_string );
346 return k->token;
347 }
348 }
349 /* now we have an identifier token */
350 yylval.symbol = SYMBOLcreate( test_string, yylineno, current_filename );
351 if( k ) {
352 /* built-in function/procedure */
353 return( k->token );
354 } else {
355 /* plain identifier */
356 /* translate back to lower-case */
357 SCANlowerize( test_string );
358 return TOK_IDENTIFIER;
359 }
360 }
361
SCANprocess_string(const char * yytext)362 int SCANprocess_string( const char * yytext ) {
363 char * s, *d; /* source, destination */
364
365 /* strip off quotes */
366 yylval.string = SCANstrdup( yytext + 1 ); /* remove 1st single quote */
367
368 /* change pairs of quotes to single quotes */
369 for( s = d = yylval.string; *s; ) {
370 if( *s != '\'' ) {
371 *d++ = *s++;
372 } else if( 0 == strncmp( s, "''", 2 ) ) {
373 *d++ = '\'';
374 s += 2;
375 } else if( *s == '\'' ) {
376 /* trailing quote */
377 *s = '\0';
378 /* if string was unterminated, there will be no */
379 /* quote to remove in which case the scanner has */
380 /* already complained about it */
381 }
382 }
383 *d = '\0';
384
385 return TOK_STRING_LITERAL;
386 }
387
SCANprocess_encoded_string(const char * yytext)388 int SCANprocess_encoded_string( const char * yytext ) {
389 char * s; /* source */
390 int count;
391
392 /* strip off quotes */
393 yylval.string = SCANstrdup( yytext + 1 ); /* remove 1st double quote */
394
395 s = strrchr( yylval.string, '"' );
396 if( s ) {
397 *s = '\0'; /* remove last double quote */
398 }
399 /* if string was unterminated, there will be no quote to remove */
400 /* in which case the scanner has already complained about it */
401
402 count = 0;
403 for( s = yylval.string; *s; s++, count++ ) {
404 if( !isxdigit( *s ) ) {
405 ERRORreport_with_line( ERROR_encoded_string_bad_digit, yylineno, *s );
406 }
407 }
408
409 if( 0 != ( count % 8 ) ) {
410 ERRORreport_with_line( ERROR_encoded_string_bad_count, yylineno, count );
411 }
412
413 return TOK_STRING_LITERAL_ENCODED;
414 }
415
SCANprocess_semicolon(const char * yytext,int commentp)416 int SCANprocess_semicolon( const char * yytext, int commentp ) {
417
418 if( commentp ) {
419 strcpy( last_comment_, strchr( yytext, '-' ) );
420 yylval.string = last_comment_;
421 } else {
422 yylval.string = last_comment;
423 }
424
425 if( last_comment ) {
426 last_comment = 0;
427 }
428
429 return TOK_SEMICOLON;
430 }
431
SCANsave_comment(const char * yytext)432 void SCANsave_comment( const char * yytext ) {
433 strncpy( last_comment_ , yytext, SCAN_COMMENT_LENGTH - 1 );
434 last_comment = last_comment_;
435 }
436
SCANread(void)437 bool SCANread( void ) {
438 int numRead;
439 bool done;
440
441 do {
442 /* this loop is guaranteed to terminate, since buffer[0] is on yyin */
443 while( SCANbuffer.file == NULL ) {
444 SCANpop_buffer();
445 if( SCANtext_ready ) {
446 return true;
447 }
448 }
449
450 /* now we have a file buffer */
451
452 /* check for more stuff already buffered */
453 if( SCANtext_ready ) {
454 return true;
455 }
456
457 /* check whether we've seen eof on this file */
458 if( !SCANbuffer.readEof ) {
459 numRead = fread( SCANbuffer.text, sizeof( char ),
460 SCAN_BUFFER_SIZE, SCANbuffer.file );
461 if( numRead < SCAN_BUFFER_SIZE ) {
462 SCANbuffer.readEof = true;
463 }
464 #ifdef keep_nul
465 SCANbuffer.numRead = numRead;
466 #else
467 SCANbuffer.text[numRead] = '\0';
468 #endif
469 SCANcurrent = SCANbuffer.text;
470 }
471
472 if( !( done = SCANtext_ready ) ) {
473 if( SCAN_current_buffer == 0 ) {
474 done = true;
475 fclose( SCANbuffer.file ); /* close yyin */
476 SCANbuffer.file = NULL;
477 } else {
478 SCANpop_buffer();
479 }
480 }
481 } while( !done );
482 return SCANtext_ready;
483 }
484
485
SCANinclude_file(char * filename)486 void SCANinclude_file( char * filename ) {
487 extern int print_objects_while_running;
488 FILE * fp;
489
490 if( ( fp = fopen( filename, "r" ) ) == NULL ) {
491 ERRORreport_with_line( ERROR_include_file, yylineno );
492 } else {
493 if( print_objects_while_running & OBJ_SCHEMA_BITS ) {
494 fprintf( stderr, "parse: including %s at line %d of %s\n",
495 filename, yylineno, SCANbuffer.filename );
496 }
497 SCANpush_buffer( filename, fp );
498 }
499 }
500
SCANlowerize(char * s)501 void SCANlowerize( char * s ) {
502 for( ; *s; s++ ) {
503 if( isupper( *s ) ) {
504 *s = tolower( *s );
505 }
506 }
507 }
508
SCANupperize(char * s)509 void SCANupperize( char * s ) {
510 for( ; *s; s++ ) {
511 if( islower( *s ) ) {
512 *s = toupper( *s );
513 }
514 }
515 }
516
SCANstrdup(const char * s)517 char * SCANstrdup( const char * s ) {
518 char * s2 = ( char * )sc_malloc( strlen( s ) + 1 );
519 if( !s2 ) {
520 return 0;
521 }
522
523 strcpy( s2, s );
524 return s2;
525 }
526
SCANtell()527 long SCANtell() {
528 return yylineno;
529 }
530