1 
2 
3 /*
4  * This software was developed by U.S. Government employees as part of
5  * their official duties and is not subject to copyright.
6  *
7  * $Log: lexact.c,v $
8  * Revision 1.10  1997/09/24 20:05:38  dar
9  * scanner went into infinite loop with unmatched single quote at eol
10  *
11  * Revision 1.10  1997/09/24 15:56:35  libes
12  * scanner went into infinite loop with unmatched single quote at eol
13  *
14  * Revision 1.9  1997/01/21 20:07:11  dar
15  * made C++ compatible
16  *
17  * Revision 1.8  1997/01/21  19:51:14  libes
18  * add POSIX portability
19  *
20  * Revision 1.7  1995/04/05  13:55:40  clark
21  * CADDETC preval
22  *
23  * Revision 1.6  1994/11/29  20:55:34  clark
24  * fix inline comment bug
25  *
26  * Revision 1.5  1994/11/22  18:32:39  clark
27  * Part 11 IS; group reference
28  *
29  * Revision 1.4  1994/11/10  19:20:03  clark
30  * Update to IS
31  *
32  * Revision 1.3  1994/05/11  19:51:24  libes
33  * numerous fixes
34  *
35  * Revision 1.2  1993/10/15  18:48:48  libes
36  * CADDETC certified
37  *
38  * Revision 1.7  1993/02/22  21:48:00  libes
39  * added decl for strdup
40  *
41  * Revision 1.6  1993/01/19  22:44:17  libes
42  * *** empty log message ***
43  *
44  * Revision 1.5  1992/08/27  23:40:58  libes
45  * remove connection between scanner and rest of EXPRESSinitializations
46  *
47  * Revision 1.4  1992/08/18  17:13:43  libes
48  * rm'd extraneous error messages
49  *
50  * Revision 1.3  1992/06/08  18:06:57  libes
51  * prettied up interface to print_objects_when_running
52  */
53 
54 #include <sc_memmgr.h>
55 #include <stdlib.h>
56 #include <ctype.h>
57 #include "express/lexact.h"
58 #include "string.h"
59 #include "express/linklist.h"
60 #include "stack.h"
61 #include "express/hash.h"
62 #include "express/express.h"
63 #include "express/dict.h"
64 #include "express/memory.h"
65 #include "token_type.h"
66 #include "expparse.h"
67 #include "expscan.h"
68 
69 extern YYSTYPE yylval;
70 
71 Scan_Buffer SCAN_buffers[SCAN_NESTING_DEPTH];
72 int     SCAN_current_buffer = 0;
73 char    *   SCANcurrent;
74 
75 Error       ERROR_include_file              = ERROR_none;
76 Error       ERROR_unmatched_close_comment   = ERROR_none;
77 Error       ERROR_unmatched_open_comment    = ERROR_none;
78 Error       ERROR_unterminated_string       = ERROR_none;
79 Error       ERROR_encoded_string_bad_digit  = ERROR_none;
80 Error       ERROR_encoded_string_bad_count  = ERROR_none;
81 Error       ERROR_bad_identifier            = ERROR_none;
82 Error       ERROR_unexpected_character      = ERROR_none;
83 Error       ERROR_nonascii_char;
84 
85 
86 extern int      yylineno;
87 
88 #define SCAN_COMMENT_LENGTH 256
89 static char     last_comment_[256] = "";
90 static char   *  last_comment = 0;
91 
92 /* keyword lookup table */
93 
94 static Hash_Table   keyword_dictionary;
95 
96 static struct keyword_entry {
97     char * key;
98     int  token;
99 } keywords[] = {
100     { "ABS",        TOK_BUILTIN_FUNCTION },
101     { "ABSTRACT",       TOK_ABSTRACT },
102     { "ACOS",       TOK_BUILTIN_FUNCTION },
103     { "AGGREGATE",      TOK_AGGREGATE },
104     { "ALIAS",      TOK_ALIAS },
105     { "AND",        TOK_AND },
106     { "ANDOR",      TOK_ANDOR },
107     { "ARRAY",      TOK_ARRAY },
108     { "AS",         TOK_AS },
109     { "ASIN",       TOK_BUILTIN_FUNCTION },
110     { "ATAN",       TOK_BUILTIN_FUNCTION },
111     { "BAG",        TOK_BAG },
112     { "BEGIN",      TOK_BEGIN },
113     { "BINARY",     TOK_BINARY },
114     { "BLENGTH",        TOK_BUILTIN_FUNCTION },
115     { "BOOLEAN",        TOK_BOOLEAN },
116     { "BY",         TOK_BY },
117     { "CASE",       TOK_CASE },
118     { "CONST_E",        TOK_E },
119     { "CONSTANT",       TOK_CONSTANT },
120     { "COS",        TOK_BUILTIN_FUNCTION },
121     { "DERIVE",     TOK_DERIVE },
122     { "DIV",        TOK_DIV },
123     { "ELSE",       TOK_ELSE },
124     { "END",        TOK_END },
125     { "END_ALIAS",      TOK_END_ALIAS },
126     { "END_CASE",       TOK_END_CASE },
127     { "END_CONSTANT",   TOK_END_CONSTANT },
128     { "END_ENTITY",     TOK_END_ENTITY },
129     { "END_FUNCTION",   TOK_END_FUNCTION },
130     { "END_IF",     TOK_END_IF },
131     { "END_LOCAL",      TOK_END_LOCAL },
132     { "END_PROCEDURE",  TOK_END_PROCEDURE },
133     { "END_REPEAT",     TOK_END_REPEAT },
134     { "END_RULE",       TOK_END_RULE },
135     { "END_SCHEMA",     TOK_END_SCHEMA },
136     { "END_TYPE",       TOK_END_TYPE },
137     { "ENTITY",     TOK_ENTITY },
138     { "ENUMERATION",    TOK_ENUMERATION },
139     { "ESCAPE",     TOK_ESCAPE },
140     { "EXISTS",     TOK_BUILTIN_FUNCTION },
141     { "EXP",        TOK_BUILTIN_FUNCTION },
142     { "FALSE",      TOK_LOGICAL_LITERAL },
143     { "FIXED",      TOK_FIXED },
144     { "FOR",        TOK_FOR },
145     { "FORMAT",     TOK_BUILTIN_FUNCTION },
146     { "FROM",       TOK_FROM },
147     { "FUNCTION",       TOK_FUNCTION },
148     { "GENERIC",        TOK_GENERIC },
149     { "HIBOUND",        TOK_BUILTIN_FUNCTION },
150     { "HIINDEX",        TOK_BUILTIN_FUNCTION },
151     { "IF",         TOK_IF },
152     { "IN",         TOK_IN },
153     { "INCLUDE",        TOK_INCLUDE },
154     { "INSERT",     TOK_BUILTIN_PROCEDURE },
155     { "INTEGER",        TOK_INTEGER },
156     { "INVERSE",        TOK_INVERSE },
157     { "LENGTH",     TOK_BUILTIN_FUNCTION },
158     { "LIKE",       TOK_LIKE },
159     { "LIST",       TOK_LIST },
160     { "LOBOUND",        TOK_BUILTIN_FUNCTION },
161     { "LOCAL",      TOK_LOCAL },
162     { "LOG",        TOK_BUILTIN_FUNCTION },
163     { "LOG10",      TOK_BUILTIN_FUNCTION },
164     { "LOG2",       TOK_BUILTIN_FUNCTION },
165     { "LOGICAL",        TOK_LOGICAL },
166     { "LOINDEX",        TOK_BUILTIN_FUNCTION },
167     { "MOD",        TOK_MOD },
168     { "NOT",        TOK_NOT },
169     { "NUMBER",     TOK_NUMBER },
170     { "NVL",        TOK_BUILTIN_FUNCTION },
171     { "ODD",        TOK_BUILTIN_FUNCTION },
172     { "OF",         TOK_OF },
173     { "ONEOF",      TOK_ONEOF },
174     { "OPTIONAL",       TOK_OPTIONAL },
175     { "OR",         TOK_OR },
176     { "OTHERWISE",      TOK_OTHERWISE },
177     { "PI",         TOK_PI },
178     { "PROCEDURE",      TOK_PROCEDURE },
179     { "QUERY",      TOK_QUERY },
180     { "REAL",       TOK_REAL },
181     { "REFERENCE",      TOK_REFERENCE },
182     { "REMOVE",     TOK_BUILTIN_PROCEDURE },
183     { "REPEAT",     TOK_REPEAT },
184     { "RETURN",     TOK_RETURN },
185     { "ROLESOF",        TOK_BUILTIN_FUNCTION },
186     { "RULE",       TOK_RULE },
187     { "SCHEMA",     TOK_SCHEMA },
188     { "SELECT",     TOK_SELECT },
189     { "SELF",       TOK_SELF },
190     { "SET",        TOK_SET },
191     { "SIN",        TOK_BUILTIN_FUNCTION },
192     { "SIZEOF",     TOK_BUILTIN_FUNCTION },
193     { "SKIP",       TOK_SKIP },
194     { "SQRT",       TOK_BUILTIN_FUNCTION },
195     { "STRING",     TOK_STRING },
196     { "SUBTYPE",        TOK_SUBTYPE },
197     { "SUPERTYPE",      TOK_SUPERTYPE },
198     { "TAN",        TOK_BUILTIN_FUNCTION },
199     { "THEN",       TOK_THEN },
200     { "TO",         TOK_TO },
201     { "TRUE",       TOK_LOGICAL_LITERAL },
202     { "TYPE",       TOK_TYPE },
203     { "TYPEOF",     TOK_BUILTIN_FUNCTION },
204     { "UNIQUE",     TOK_UNIQUE },
205     { "UNKNOWN",        TOK_LOGICAL_LITERAL },
206     { "UNTIL",      TOK_UNTIL },
207     { "USE",        TOK_USE },
208     { "USEDIN",     TOK_BUILTIN_FUNCTION },
209     { "VALUE",      TOK_BUILTIN_FUNCTION },
210     { "VALUE_IN",       TOK_BUILTIN_FUNCTION },
211     { "VALUE_UNIQUE",   TOK_BUILTIN_FUNCTION },
212     { "VAR",        TOK_VAR },
213     { "WHERE",      TOK_WHERE },
214     { "WHILE",      TOK_WHILE },
215     { "XOR",        TOK_XOR },
216     { 0,            0}
217 };
218 
SCANpush_buffer(char * filename,FILE * fp)219 static void SCANpush_buffer( char * filename, FILE * fp ) {
220     SCANbuffer.savedPos = SCANcurrent;
221     SCANbuffer.lineno = yylineno;
222     yylineno = 1;
223     ++SCAN_current_buffer;
224 #ifdef keep_nul
225     SCANbuffer.numRead = 0;
226 #else
227     *( SCANcurrent = SCANbuffer.text ) = '\0';
228 #endif
229     SCANbuffer.readEof = false;
230     SCANbuffer.file = fp;
231     SCANbuffer.filename = current_filename = filename;
232 }
233 
SCANpop_buffer()234 static void SCANpop_buffer() {
235     if( SCANbuffer.file != NULL ) {
236         fclose( SCANbuffer.file );
237     }
238     --SCAN_current_buffer;
239     SCANcurrent = SCANbuffer.savedPos;
240     yylineno = SCANbuffer.lineno + 1;   /* DEL */
241     current_filename = SCANbuffer.filename;
242 }
243 
SCANinitialize(void)244 void SCANinitialize( void ) {
245     struct keyword_entry * k;
246 
247     keyword_dictionary = HASHcreate( 100 ); /* not exact */
248     for( k = keywords; k->key; k++ ) {
249         DICTdefine( keyword_dictionary, k->key, ( Generic )k, 0, OBJ_UNKNOWN );
250         /* not "unknown", but certainly won't be looked up by type! */
251     }
252 
253     /* set up errors on first time through */
254     if( ERROR_include_file == ERROR_none ) {
255         ERROR_include_file =
256             ERRORcreate( "Could not open include file `%s'.", SEVERITY_ERROR );
257         ERROR_unmatched_close_comment =
258             ERRORcreate( "unmatched close comment", SEVERITY_ERROR );
259         ERROR_unmatched_open_comment =
260             ERRORcreate( "unmatched open comment", SEVERITY_ERROR );
261         ERROR_unterminated_string =
262             ERRORcreate( "unterminated string literal", SEVERITY_ERROR );
263         ERROR_encoded_string_bad_digit = ERRORcreate(
264                                              "non-hex digit (%c) in encoded string literal", SEVERITY_ERROR );
265         ERROR_encoded_string_bad_count = ERRORcreate(
266                                              "number of digits (%d) in encoded string literal is not divisible by 8", SEVERITY_ERROR );
267         ERROR_bad_identifier = ERRORcreate(
268                                    "identifier (%s) cannot start with underscore", SEVERITY_ERROR );
269         ERROR_unexpected_character = ERRORcreate(
270                                          "character (%c) is not a valid lexical element by itself", SEVERITY_ERROR );
271         ERROR_nonascii_char = ERRORcreate(
272                                   "character (0x%x) is not in the EXPRESS character set", SEVERITY_ERROR );
273     }
274 }
275 
276 /** Clean up the Scan module */
SCANcleanup(void)277 void SCANcleanup( void ) {
278     ERRORdestroy( ERROR_include_file );
279     ERRORdestroy( ERROR_unmatched_close_comment );
280     ERRORdestroy( ERROR_unmatched_open_comment );
281     ERRORdestroy( ERROR_unterminated_string );
282     ERRORdestroy( ERROR_encoded_string_bad_digit );
283     ERRORdestroy( ERROR_encoded_string_bad_count );
284     ERRORdestroy( ERROR_bad_identifier );
285     ERRORdestroy( ERROR_unexpected_character );
286     ERRORdestroy( ERROR_nonascii_char );
287 }
288 
SCANprocess_real_literal(const char * yytext)289 int SCANprocess_real_literal( const char * yytext ) {
290     sscanf( yytext, "%lf", &( yylval.rVal ) );
291     return TOK_REAL_LITERAL;
292 }
293 
SCANprocess_integer_literal(const char * yytext)294 int SCANprocess_integer_literal( const char * yytext ) {
295     sscanf( yytext, "%d", &( yylval.iVal ) );
296     return TOK_INTEGER_LITERAL;
297 }
298 
SCANprocess_binary_literal(const char * yytext)299 int SCANprocess_binary_literal( const char * yytext ) {
300     yylval.binary = SCANstrdup( yytext + 1 ); /* drop '%' prefix */
301     return TOK_BINARY_LITERAL;
302 }
303 
SCANprocess_logical_literal(char * string)304 int SCANprocess_logical_literal( char * string ) {
305     switch( string[0] ) {
306         case 'T':
307             yylval.logical = Ltrue;
308             break;
309         case 'F':
310             yylval.logical = Lfalse;
311             break;
312         default:
313             yylval.logical = Lunknown;
314             break;
315             /* default will actually be triggered by 'UNKNOWN' keyword */
316     }
317     sc_free( string );
318     return TOK_LOGICAL_LITERAL;
319 }
320 
SCANprocess_identifier_or_keyword(const char * yytext)321 int SCANprocess_identifier_or_keyword( const char * yytext ) {
322     char * test_string, * dest;
323     const char * src;
324     struct keyword_entry * k;
325     int len;
326 
327     /* make uppercase copy */
328     len = strlen( yytext );
329     dest = test_string = ( char * )sc_malloc( len + 1 );
330     for( src = yytext; *src; src++, dest++ ) {
331         *dest = ( islower( *src ) ? toupper( *src ) : *src );
332     }
333     *dest = '\0';
334 
335     /* check for language keywords */
336     k = ( struct keyword_entry * )DICTlookup( keyword_dictionary, test_string );
337     if( k ) {
338         switch( k->token ) {
339             case TOK_BUILTIN_FUNCTION:
340             case TOK_BUILTIN_PROCEDURE:
341                 break;
342             case TOK_LOGICAL_LITERAL:
343                 return SCANprocess_logical_literal( test_string );
344             default:
345                 sc_free( test_string );
346                 return k->token;
347         }
348     }
349     /* now we have an identifier token */
350     yylval.symbol = SYMBOLcreate( test_string, yylineno, current_filename );
351     if( k ) {
352         /* built-in function/procedure */
353         return( k->token );
354     } else {
355         /* plain identifier */
356         /* translate back to lower-case */
357         SCANlowerize( test_string );
358         return TOK_IDENTIFIER;
359     }
360 }
361 
SCANprocess_string(const char * yytext)362 int SCANprocess_string( const char * yytext ) {
363     char * s, *d;   /* source, destination */
364 
365     /* strip off quotes */
366     yylval.string = SCANstrdup( yytext + 1 ); /* remove 1st single quote */
367 
368     /* change pairs of quotes to single quotes */
369     for( s = d = yylval.string; *s; ) {
370         if( *s != '\'' ) {
371             *d++ = *s++;
372         } else if( 0 == strncmp( s, "''", 2 ) ) {
373             *d++ = '\'';
374             s += 2;
375         } else if( *s == '\'' ) {
376             /* trailing quote */
377             *s = '\0';
378             /* if string was unterminated, there will be no */
379             /* quote to remove in which case the scanner has */
380             /* already complained about it */
381         }
382     }
383     *d = '\0';
384 
385     return TOK_STRING_LITERAL;
386 }
387 
SCANprocess_encoded_string(const char * yytext)388 int SCANprocess_encoded_string( const char * yytext ) {
389     char * s;   /* source */
390     int count;
391 
392     /* strip off quotes */
393     yylval.string = SCANstrdup( yytext + 1 ); /* remove 1st double quote */
394 
395     s = strrchr( yylval.string, '"' );
396     if( s ) {
397         *s = '\0';    /* remove last double quote */
398     }
399     /* if string was unterminated, there will be no quote to remove */
400     /* in which case the scanner has already complained about it */
401 
402     count = 0;
403     for( s = yylval.string; *s; s++, count++ ) {
404         if( !isxdigit( *s ) ) {
405             ERRORreport_with_line( ERROR_encoded_string_bad_digit, yylineno, *s );
406         }
407     }
408 
409     if( 0 != ( count % 8 ) ) {
410         ERRORreport_with_line( ERROR_encoded_string_bad_count, yylineno, count );
411     }
412 
413     return TOK_STRING_LITERAL_ENCODED;
414 }
415 
SCANprocess_semicolon(const char * yytext,int commentp)416 int SCANprocess_semicolon( const char * yytext, int commentp ) {
417 
418     if( commentp ) {
419         strcpy( last_comment_, strchr( yytext, '-' ) );
420         yylval.string = last_comment_;
421     } else {
422         yylval.string = last_comment;
423     }
424 
425     if( last_comment ) {
426         last_comment = 0;
427     }
428 
429     return TOK_SEMICOLON;
430 }
431 
SCANsave_comment(const char * yytext)432 void SCANsave_comment( const char * yytext ) {
433     strncpy( last_comment_ , yytext, SCAN_COMMENT_LENGTH - 1 );
434     last_comment = last_comment_;
435 }
436 
SCANread(void)437 bool SCANread( void ) {
438     int     numRead;
439     bool done;
440 
441     do {
442         /* this loop is guaranteed to terminate, since buffer[0] is on yyin */
443         while( SCANbuffer.file == NULL ) {
444             SCANpop_buffer();
445             if( SCANtext_ready ) {
446                 return true;
447             }
448         }
449 
450         /* now we have a file buffer */
451 
452         /* check for more stuff already buffered */
453         if( SCANtext_ready ) {
454             return true;
455         }
456 
457         /* check whether we've seen eof on this file */
458         if( !SCANbuffer.readEof ) {
459             numRead = fread( SCANbuffer.text, sizeof( char ),
460                              SCAN_BUFFER_SIZE, SCANbuffer.file );
461             if( numRead < SCAN_BUFFER_SIZE ) {
462                 SCANbuffer.readEof = true;
463             }
464 #ifdef keep_nul
465             SCANbuffer.numRead = numRead;
466 #else
467             SCANbuffer.text[numRead] = '\0';
468 #endif
469             SCANcurrent = SCANbuffer.text;
470         }
471 
472         if( !( done = SCANtext_ready ) ) {
473             if( SCAN_current_buffer == 0 ) {
474                 done = true;
475                 fclose( SCANbuffer.file ); /* close yyin */
476                 SCANbuffer.file = NULL;
477             } else {
478                 SCANpop_buffer();
479             }
480         }
481     } while( !done );
482     return SCANtext_ready;
483 }
484 
485 
SCANinclude_file(char * filename)486 void SCANinclude_file( char * filename ) {
487     extern int print_objects_while_running;
488     FILE * fp;
489 
490     if( ( fp = fopen( filename, "r" ) ) == NULL ) {
491         ERRORreport_with_line( ERROR_include_file, yylineno );
492     } else {
493         if( print_objects_while_running & OBJ_SCHEMA_BITS ) {
494             fprintf( stderr, "parse: including %s at line %d of %s\n",
495                      filename, yylineno, SCANbuffer.filename );
496         }
497         SCANpush_buffer( filename, fp );
498     }
499 }
500 
SCANlowerize(char * s)501 void SCANlowerize( char * s ) {
502     for( ; *s; s++ ) {
503         if( isupper( *s ) ) {
504             *s = tolower( *s );
505         }
506     }
507 }
508 
SCANupperize(char * s)509 void SCANupperize( char * s ) {
510     for( ; *s; s++ ) {
511         if( islower( *s ) ) {
512             *s = toupper( *s );
513         }
514     }
515 }
516 
SCANstrdup(const char * s)517 char * SCANstrdup( const char * s ) {
518     char * s2 = ( char * )sc_malloc( strlen( s ) + 1 );
519     if( !s2 ) {
520         return 0;
521     }
522 
523     strcpy( s2, s );
524     return s2;
525 }
526 
SCANtell()527 long SCANtell() {
528     return yylineno;
529 }
530