1 /* $Id: keywords.c,v 1.13 2003/06/05 00:19:59 landrito Exp $
2 
3 	Routines to distinguish keywords from identifiers.
4 
5 
6 Copyright (c) 2001 by Robert K. Moniot.
7 
8 Permission is hereby granted, free of charge, to any person
9 obtaining a copy of this software and associated documentation
10 files (the "Software"), to deal in the Software without
11 restriction, including without limitation the rights to use,
12 copy, modify, merge, publish, distribute, sublicense, and/or
13 sell copies of the Software, and to permit persons to whom the
14 Software is furnished to do so, subject to the following
15 conditions:
16 
17 The above copyright notice and this permission notice shall be
18 included in all copies or substantial portions of the
19 Software.
20 
21 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
22 KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
23 WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
24 PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
25 COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
27 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
28 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 
30 Acknowledgement: the above permission notice is what is known
31 as the "MIT License."
32 */
33 
34 
35 #include <stdio.h>
36 #include <ctype.h>
37 #include <string.h>
38 
39 #include "ftnchek.h"
40 #include "symtab.h"
41 #include "tokdefs.h"
42 #include "forlex.h"
43 
44 extern int in_attrbased_typedecl; /* shared with fortran.y */
45 
46 PROTO( PRIVATE int is_keyword,( int i ));
47 
48 /*
49 
50   Keyword definition table holds names of keywords and flags used
51   to differentiate them from identifiers.
52 
53 */
54 
55 #define IK 0x01	/* Allowed only in initial keyword of a statement (can be
56 		   preceded only by non-EK keywords) */
57 #define NP 0x02	/* Never followed by ( or =  */
58 #define MP 0x04	/* Must be followed by ( */
59 #define NI 0x08	/* Disallowed in logical IF */
60 #define EK 0x10	/* Cannot be followed by IK keyword: turns off initial_flag */
61 #define TY 0x20	/* Data type name */
62 #define NA 0x40	/* Never followed by alphabetic.  Put this onto any that
63 		   can stand alone or be followed by a second keyword, so
64 		   that the longer form will be taken if present. */
65 #define CN 0x80 /* Can be preceded by construct-name and colon.  (Only set
66 		   this flag if there is support in the parser.) */
67 #define MB 0x100/* Blanks mandatory between two words (in free form) */
68 #define TK 0x200/* Type name that can be followed by kind-spec or len-spec */
69 
70 				/* Bisection search done at each
71 				   length step requires fixed-length
72 				   strings for keyword names.  Macro
73 				   below identifies which keyword is
74 				   the longest.
75 				 */
76 #define LONGEST_KEYWORD "DOUBLEPRECISION"
77 
78 				/* Number of keywords in table */
79 #define NUM_KEYWORDS (sizeof(keywords)/sizeof(keywords[0]))
80 
81 	/* Macro to determine whether a token class C is that of a data
82 	   type (for purposes of is_keyword) */
83 #ifndef OLDDEF
84 #define is_a_type_token(C) (((C)>=keytab_offset&&\
85 	        (unsigned)((C)-keytab_offset)<keytab_size)?\
86 		  (keywords[keytab_index[(C)-keytab_offset]].context&TY):FALSE)
87 #else
88 				/* This is a simpler defn that will work
89 				   for is_keyword's needs. */
90 #define is_a_type_token(C) ((C)>=tok_BYTE && ((C)<=tok_REAL))
91 #endif
92 
93 		/* Keyword list must be maintained in alphabetical
94 		   order.  New keywords can be added so long as their
95 		   context info is specified.  No other source code
96 		   changes are necessary, but of course new keywords
97 		   won't be recognized by the parser till you add
98 		   productions to fortran.y.  Also, if IK flag is not
99 		   set, is_keyword will have to look at it specially.
100 		   Field split_pos is 0 if keyword cannot be split; otherwise
101 		   it is the length of the first moiety.  END BLOCK DATA
102 		   is the only 3-word keyword, and the implementation happens
103 		   to check its "END" based on ENDSUBROUTINE's split_pos so OK.
104 		 */
105 PRIVATE struct {
106 	char name[sizeof(LONGEST_KEYWORD)];
107 	short tclass,		/* token class */
108 	      context;		/* local-context flags */
109 	short split_pos;	/* where keyword may have space */
110 } keywords[]={
111 {"ACCEPT",	tok_ACCEPT,	IK | EK,			0},
112 {"ALLOCATABLE", tok_ALLOCATABLE,        IK | NI | EK | NP,      0},
113 {"ALLOCATE",	tok_ALLOCATE,	IK | MP | NI | EK,		0},
114 {"ASSIGN",	tok_ASSIGN,	IK | NP | EK | NA,		0},
115 {"BACKSPACE",	tok_BACKSPACE,	IK | EK,			0},
116 {"BLOCKDATA",	tok_BLOCKDATA,	IK | EK | NP | NI,		5},
117 {"BYTE",	tok_BYTE,	IK | NI | EK | TY,		0},
118 {"CALL",	tok_CALL,	IK | NP | EK,			0},
119 {"CASE",	tok_CASE,	IK | MP | NI | EK | NA,		0},
120 {"CASEDEFAULT",	tok_CASEDEFAULT,IK | NP | NI | EK | MB,		4},
121 {"CHARACTER",	tok_CHARACTER,	IK | NI | EK | TY | TK,		0},
122 {"CLOSE",	tok_CLOSE,	IK | EK | MP | NA,		0},
123 {"COMMON",	tok_COMMON,	IK | NP | NI | EK,		0},
124 {"COMPLEX",	tok_COMPLEX,	IK | NI | EK | TY | TK,		0},
125 {"CONTINUE",	tok_CONTINUE,	IK | NP | EK | NA,		0},
126 {"CYCLE",	tok_CYCLE,	IK | NP | EK,			0},
127 {"DATA",	tok_DATA,	IK | NI | EK,			0},
128 {"DEALLOCATE",	tok_DEALLOCATE,	IK | MP | NI | EK,		0},
129 {"DIMENSION",	tok_DIMENSION,	IK | NP | NI | EK,		0},
130 {"DO",		tok_DO,		IK | NP | NI | EK | CN,		0},
131 {"DOUBLECOMPLEX",tok_DOUBLECOMPLEX,	IK | NI | EK | TY,	6},
132 {"DOUBLEPRECISION",tok_DOUBLEPRECISION,	IK | NI | EK | TY,	6},
133 {"DOWHILE",	tok_DOWHILE,	IK | NI | EK | MB,		2},
134 {"ELSE",	tok_ELSE,	IK | NP | NI,			0},
135 #if 0	/* ELSEIF not lexed: lexes ELSE and IF separately */
136 {"ELSEIF",	tok_ELSEIF,	IK | NI | EK | MP | NA,		4},
137 #endif
138 {"END",		tok_END,	IK | NP | NI | NA,		0},
139 {"ENDBLOCKDATA",tok_ENDBLOCKDATA,IK | NP | NI | EK,		8},
140 {"ENDDO",	tok_ENDDO,	IK | NP | NI | EK,		3},
141 {"ENDFILE",	tok_ENDFILE,	IK | EK,			3},
142 {"ENDFUNCTION",	tok_ENDFUNCTION,IK | NP | NI | EK,		3},
143 {"ENDIF",	tok_ENDIF,	IK | NP | NI | EK,		3},
144 {"ENDPROGRAM",	tok_ENDPROGRAM,	IK | NP | NI | EK,		3},
145 {"ENDSELECT",	tok_ENDSELECT,	IK | NP | NI | EK,		3},
146 {"ENDSUBROUTINE",tok_ENDSUBROUTINE,	IK | NP | NI | EK,	3},
147 {"ENTRY",	tok_ENTRY,	IK | NP | NI | EK,		0},
148 {"EQUIVALENCE",	tok_EQUIVALENCE,IK | NI | EK | MP | NA,		0},
149 {"EXIT",	tok_EXIT,	IK | NP | EK,			0},
150 {"EXTERNAL",	tok_EXTERNAL,	IK | NP | NI | EK,		0},
151 {"FORMAT",	tok_FORMAT,	IK | NI | EK | MP | NA,		0},
152 {"FUNCTION",	tok_FUNCTION,	NP | NI | EK,			0},
153 {"GOTO",	tok_GOTO,	IK | EK,			2},
154 {"IF",		tok_IF,		IK | NI | EK | MP | NA | CN,	0},
155 {"IMPLICIT",	tok_IMPLICIT,	IK | NP | NI,			0},
156 {"INCLUDE",	tok_INCLUDE,	IK | NP | NI | EK | NA,		0},
157 {"INQUIRE",	tok_INQUIRE,	IK | EK | MP | NA,		0},
158 {"INTEGER",	tok_INTEGER,	IK | NI | EK | TY | TK,		0},
159 {"INTRINSIC",	tok_INTRINSIC,	IK | NP | NI | EK,		0},
160 {"LOGICAL",	tok_LOGICAL,	IK | NI | EK | TY | TK,		0},
161 {"NAMELIST",	tok_NAMELIST,	IK | NP | NI | EK,		0},
162 {"NONE",	tok_NONE,	IK | NI | EK | TY | NA,		0},
163 {"NULLIFY",	tok_NULLIFY,	IK | MP | NI | EK,		0},
164 {"OPEN",	tok_OPEN,	IK | EK | MP | NA,		0},
165 {"PARAMETER",	tok_PARAMETER,	IK | NI | EK | MP | NA,		0},
166 {"PAUSE",	tok_PAUSE,	IK | NP | EK,			0},
167 #ifdef ALLOW_CRAY_POINTERS
168 {"POINTER",     tok_POINTER,    IK | NI | EK,			0},
169 #endif
170 {"PRINT",	tok_PRINT,	IK | EK,			0},
171 {"PROGRAM",	tok_PROGRAM,	IK | NP | NI | EK,		0},
172 {"READ",	tok_READ,	IK | EK,			0},
173 {"REAL",	tok_REAL,	IK | NI | EK | TY | TK,		0},
174 {"RETURN",	tok_RETURN,	IK | EK,			0},
175 {"REWIND",	tok_REWIND,	IK | EK,			0},
176 {"SAVE",	tok_SAVE,	IK | NP | NI | EK,		0},
177 {"SELECTCASE",	tok_SELECTCASE,	IK | MP | NI | EK | NA | CN | MB,6},
178 {"STOP",	tok_STOP,	IK | NP | EK,			0},
179 {"SUBROUTINE",	tok_SUBROUTINE,	IK | NP | NI | EK,		0},
180 {"TARGET",      tok_TARGET,     IK | NI | EK | NP,              0},
181 {"THEN",	tok_THEN,	IK | NP | EK,			0},
182 {"TO",		tok_TO,		NI | EK,			0},
183 {"TYPE",	tok_TYPE,	IK | EK,			0},
184 {"WHILE",	tok_WHILE,	NI | EK | MP | NA,		0},
185 {"WRITE",	tok_WRITE,	IK | EK | MP | NA,		0},
186 };
187 
188 
189 		/* Lookup table to allow index in keywords table of
190 		   a given keyword to be found by its token number.
191 		   Initialized by init_keyhashtab. */
192 PRIVATE short
193   keytab_offset,	/* lowest keyword token number */
194   *keytab_index;	/* array of keyword indices  */
195 PRIVATE unsigned
196   keytab_size;		/* Number of elements in keytab_index */
197 
198 
199 	/* get_identifier reads a string of characters satisfying
200 	   isidletter.  As they are read and as long as they are
201 	   alphabetic, it looks for a match to a keyword, and
202 	   whenever one is found, checks with is_keyword to see
203 	   if the context is right.  If so, it returns the keyword.
204 	   Otherwise it keeps going and eventually returns the id.
205 	 */
206 void
207 #if HAVE_STDC
get_identifier(Token * token)208 get_identifier(Token *token)
209 #else /* K&R style */
210 get_identifier(token)
211 	Token *token;
212 #endif /* HAVE_STDC */
213 {
214 	int c,		/* Uppercase version of current letter */
215 	    preceding_c,/* Char preceding latest id */
216 	    has_embedded_space,	/* Spaces inside keyword or id */
217 	    split_pos,		/* Where space allowed */
218 	    kwd_has_embedded_space, /* Keyword does not follow freeform rules*/
219 	    kwd_not_separated,	/* keyword followed by alphanumeric w/o spc */
220 	    kwd_split_pos,	/* for MB keyword: actual location of blank */
221 	    kwd_not_split,	/* for checking MB keywords */
222 	    space_seen_lately,	/* Flag for catching embedded space */
223 	    lo,hi,	/* Indices in keyword table where match may be */
224 	    keywd_class;/* Class number returned by is_keyword */
225 	unsigned klen;	/* Length of id read so far (after keyword test) */
226 	int possible_keyword;
227 
228 	token->tclass = tok_identifier;
229 	keywd_class = FALSE;
230 
231 	klen = 0;
232 	lo = 0;
233 	hi = NUM_KEYWORDS-1;
234 	split_pos = 0;
235 
236 	/* Define shorthand for the keyword letter under study */
237 #define KN(i) keywords[i].name
238 #define KL(i) keywords[i].name[klen]
239 
240 	possible_keyword = TRUE;
241 	preceding_c = prev_char;
242 	has_embedded_space = kwd_has_embedded_space = kwd_not_split = FALSE;
243 	kwd_not_separated = FALSE;
244 	kwd_split_pos = 0;
245 	space_seen_lately = FALSE;
246 
247 			/* set stmt class in case is_keyword not invoked */
248 	if(initial_flag && !in_attrbased_typedecl)
249 	  curr_stmt_class = tok_identifier;
250 
251 			/* This loop gets  letter [letter|digit]* forms */
252 	while(isidletter(curr_char) || isadigit(curr_char)) {
253 	  c = makeupper(curr_char); /* Get the next char of id */
254 	  if(src_text_len < MAX_SRC_TEXT)
255 	    src_text_buf[src_text_len++] = (int)makeupper(curr_char);
256 
257 	  if(space_seen_lately) {
258 	    has_embedded_space = TRUE;
259 				/* If space occurs, it is OK if located where
260 				   it is allowed in keyword pair.  If split
261 				   where it's OK, record for checking MB.
262 				 */
263 	    if( klen != split_pos )
264 		kwd_has_embedded_space = TRUE;
265 	    else
266 		kwd_split_pos = split_pos;
267 	  }
268 	  bi_advance();		/* Pull in the next character */
269 
270 	  space_seen_lately = iswhitespace(prev_char);
271 
272 				/* As long as it may yet be a keyword,
273 				   keep track of whether to invoke is_keyword.
274 				 */
275 	  if(possible_keyword) {
276 
277 	    if(!isaletter(c)	/* If not alphabetic, cannot be keyword */
278 	       || klen >= sizeof(keywords[0].name)-1) /* or overlength */
279 	    {
280 #ifdef DEBUG_IS_KEYWORD
281 if(debug_lexer && getenv("BISECTION")) {
282 src_text_buf[src_text_len] = '\0';
283 (void)fprintf(list_fd,"\n%s not a keyword because",src_text_buf);
284 if(!isaletter(c))
285   (void)fprintf(list_fd," non-letter at %c",c);
286 if(klen >= sizeof(keywords[0].name)-1)
287   (void)fprintf(list_fd,"length %d >= max %d",klen,sizeof(keywords[0].name)-1);
288 }
289 #endif
290 	      possible_keyword = FALSE;
291 	    }
292 	    else {
293 	      int mid;
294 #ifdef DEBUG_IS_KEYWORD
295 if(debug_lexer && getenv("BISECTION")) {
296 (void)fprintf(list_fd,"\nklen=%d c=%c",klen,c);
297 (void)fprintf(list_fd,"\nBisecting [lo,hi]=[%d,%d] \"%s\"..\"%s\"",
298 	   lo,hi,KN(lo),KN(hi));
299 }
300 #endif
301 				/* Bisect lo .. hi looking for match
302 				   on characters found so far. */
303 	      while(lo <= hi) {
304 		mid = (lo + hi)/2;
305 		if( KL(mid) < c ) {	/* No match in lower half */
306 		  lo = mid+1;
307 		}
308 		else if( KL(mid) > c ) {/* No match in upper half */
309 		  hi = mid-1;
310 		}
311 		else {		/* Match at midpoint: Bisect each
312 				   half to find the new subinterval. */
313 		  int midlo=mid, midhi=mid;
314 				/* Bisect lo .. mid */
315 		  while( lo < midlo-1 &&  KL(lo) != c) {
316 		    mid = (lo + midlo)/2;
317 		    if(  KL(mid) < c ) {
318 		      lo = mid+1;
319 		    }
320 		    else {	/* equal */
321 		      midlo = mid;
322 		    }
323 		  }
324 		  if( KL(lo) != c )
325 		    lo = midlo;
326 				/* Bisect mid .. hi */
327 		  while( midhi < hi-1 && KL(hi) != c ) {
328 		    mid = (midhi + hi)/2;
329 		    if( KL(mid) > c ) {
330 		      hi = mid-1;
331 		    }
332 		    else {	/* equal */
333 		      midhi = mid;
334 		    }
335 		  }
336 		  if( KL(hi) != c )
337 		    hi = midhi;
338 
339 		  break;	/* After bisecting each half, we are done */
340 		}		/* end else KL(mid) == c */
341 	      }			/* end while(lo <= hi) */
342 
343 	      klen++;		/* Now increment the length */
344 
345 #ifdef DEBUG_IS_KEYWORD
346 if(debug_lexer && getenv("BISECTION")) {
347 (void)fprintf(list_fd,"\nNew [lo,hi]=[%d,%d] \"%s\"..\"%s\"",
348 	   lo,hi,KN(lo),KN(hi));
349 }
350 #endif
351 			/* If range is null, a match has been ruled out. */
352 	      if(lo > hi) {
353 #ifdef DEBUG_IS_KEYWORD
354 if(debug_lexer && getenv("BISECTION")) {
355 src_text_buf[src_text_len] = '\0';
356 (void)fprintf(list_fd,"\nKeyword ruled out for %s at length %d since lo %d > hi %d",
357 	   src_text_buf,klen,lo,hi);
358 }
359 #endif
360 		possible_keyword = FALSE;
361 	      }
362 			/* If length of first keyword in range is equal
363 			   to the new length, then we have a match at
364 			   this point.  Check it out with is_keyword.
365 			 */
366 	      else if(KN(lo)[klen] == '\0') {
367 		if( (keywd_class = is_keyword(lo)) != FALSE) {
368 		  token->tclass = keywd_class;	/* It's a keyword */
369 		  token->value.string = NULL;
370 		  token->src_text = KN(lo);
371 				/* Keyword butting against following token
372 				   is warned about separately if token is an
373 				   identifier.  Here we catch cases like
374 				   BACKSPACE6 where next token is a number.
375 				 */
376 		  kwd_not_separated = (!space_seen_lately && isdigit(curr_char));
377 				/* If keyword is required to have a space
378 				   between parts, check if it does.
379 				 */
380 		  if( keywords[lo].context & MB )
381 		      kwd_not_split = (kwd_split_pos != keywords[lo].split_pos);
382 		  break;	/* Quit the input loop */
383 		}
384 		else if(lo == hi) {	/* Match is unique and ruled out */
385 		  possible_keyword = FALSE;
386 		}
387 	      }
388 	    }/* end else isaletter(c) */
389 				/* Longest match gives split pos if any.
390 				   This fails for DO UBLE...
391 				 */
392 	    split_pos = (hi >= 0)? keywords[hi].split_pos: 0;
393 
394 	  }/* end if(possible_keyword) */
395 	}/* end while(isidletter || isadigit) */
396 
397 	if(keywd_class == FALSE) {		/* it is an identifier */
398 
399 				/* Identifier: find its hashtable entry or
400 				   create a new entry.	*/
401 		    int h;
402 		    Lsymtab *symt;
403 #ifdef ALLOW_TYPELESS_CONSTANTS
404 				/* Watch out for const like X'nnn' */
405 		    if(src_text_len == 1 && isaquote(curr_char)) {
406 				/* Read the string, append the trailing quote
407 				   then invoke routine to interpret it. */
408 		      get_string(token);
409 #ifndef LEX_RAWSTRINGS
410 		      if(src_text_len < MAX_SRC_TEXT)
411 			src_text_buf[src_text_len++] = quote_char;
412 #endif
413 		      get_binary_const(token,src_text_buf[0],space_seen_lately);
414 		      return;
415 		    }
416 #endif
417 
418 		    if(src_text_len < MAX_SRC_TEXT)
419 		      src_text_buf[src_text_len] = '\0';
420 		    token->value.integer = h = hash_lookup(src_text_buf);
421 		    token->src_text = hashtab[h].name;
422 				/* If it is an array give it a special token
423 				   class, so that arrays can be distinguished
424 				   from functions in the grammar. */
425 		    if((symt=hashtab[h].loc_symtab) != NULL
426 		       && symt->array_var) {
427 		      token->tclass = tok_array_identifier;
428 
429 	  }
430 	}
431 	else {			/* It is a keyword */
432 	    has_embedded_space = kwd_has_embedded_space;
433 	}
434 
435 				/* Check identifiers for being juxtaposed
436 				   to keywords or having internal space.
437 				   Keywords are warned about if they are
438 				   split where they are not allowed to be.
439 				   Special case: ELSEIF is never a problem.
440 				 */
441 
442 	if(( ((pretty_no_space || (free_form && f90_freeform_space))
443 		 && (isidletter(preceding_c) || isadigit(preceding_c) ||
444 		     kwd_not_separated)
445 	      && !(prev_token_class==tok_ELSE && keywd_class==tok_IF) )
446 	  || ((pretty_extra_space || (free_form && f90_freeform_space))
447 		   && has_embedded_space) ) ) {
448 	    if(token->tclass==tok_identifier || token->tclass==tok_array_identifier) {
449 	      space_violation(token->line_num,token->col_num,"identifier");
450 	      msg_tail(hashtab[token->value.integer].name);
451 	    }
452 	    else {
453 	      space_violation(token->line_num,token->col_num,"keyword");
454 	      msg_tail(keywords[keytab_index[keywd_class-keytab_offset]].name);
455 	    }
456 	  if(has_embedded_space)
457 	    msg_tail("has embedded space");
458 	  else
459 	    msg_tail("not clearly separated from context");
460 	}
461 	if( keywd_class != FALSE ) {
462 	  if( free_form && f90_freeform_space && kwd_not_split ) {
463 	    space_violation(token->line_num,token->col_num,"keyword");
464 	    msg_tail(keywords[keytab_index[keywd_class-keytab_offset]].name);
465 	    msg_tail("lacks required space between parts");
466 	  }
467 	}
468 #ifdef DEBUG_FORLEX
469 	if(debug_lexer){
470 	    switch((int)(token->tclass)) {
471 		case tok_identifier:
472 			(void)fprintf(list_fd,"\nIdentifier:\t\t%s",
473 				      token->src_text);
474 			break;
475 		case tok_array_identifier:
476 			(void)fprintf(list_fd,"\nArray_identifier:\t%s",
477 				      token->src_text);
478 			break;
479 		default:
480 			(void)fprintf(list_fd,"\nKeyword:\t\ttok_%s",
481 				      token->src_text);
482 			break;
483 	    }
484 	}
485 #endif
486 } /* get_identifier */
487 
488 /*  iskeyword:
489 
490 	Determines (to the best of its current ability) whether a
491 	given identifier is a keyword or not.  Hopefully now no
492 	keywords are reserved.  For keywords that can be split, like
493 	DO WHILE, the longest form of the keyword is gotten, i.e. this
494 	yields tok_DOWHILE not tok_DO then tok_WHILE.  There are some
495 	exceptions (NA flag not present) like IMPLICIT.
496 
497 	Method uses context from start of statement up to and
498 	including the character following the putative keyword to
499 	eliminate as many cases as possible.  Any non-IK keywords
500 	(those that need not be in the initial series of keywords of
501 	statement) have special code to handle them.  The rest are
502 	handed off to looking_at_keywd which tries to see if it is an
503 	assignment statement.
504 
505 	Note that some rules that could be used if F77 Standard were
506 	adhered to strictly are not used here.  The idea is to allow
507 	extensions, and leave catching syntax errors in the parser.
508 	For example, specification-statement keywords are not excluded
509 	after the first executable statement has been seen.  The status
510 	of a variable as declared array or character type is not consulted
511 	in ruling out an assignment statement if following parentheses
512 	are present.  Etc.  */
513 
514 
515 		/* Macro to test if all the specified bits are set */
516 #define MATCH(CONTEXT) ((keywords[i].context & (CONTEXT)) == (CONTEXT))
517 
518 
519 PRIVATE int
520 #if HAVE_STDC
is_keyword(int i)521 is_keyword(int i)
522            			/* Index in keywords table */
523 #else /* K&R style */
524 is_keyword(i)
525      int i;			/* Index in keywords table */
526 #endif /* HAVE_STDC */
527 {
528   int ans = FALSE;
529   int putative_keyword_class;	/* Class of the supposed keyword */
530   extern int 	construct_name_seen,	/* helper variables set by parser */
531 		stmt_sequence_no;	/* shared with fortran.y */
532 
533   while(iswhitespace(curr_char))	      /* Move to lookahead char */
534     advance();
535 
536 #ifdef DEBUG_IS_KEYWORD
537   if(debug_lexer){
538     (void)fprintf(list_fd,
539 		"\nkeyword %s: initialflag=%d implicitflag=%d ",
540 	    keywords[i].name,initial_flag,implicit_flag);
541     (void)fprintf(list_fd,
542 		"context=%o, next char=%c %o",keywords[i].context,
543 						curr_char,curr_char);
544   }
545 #endif
546 
547   putative_keyword_class = keywords[i].tclass;
548 
549   if( !initial_flag && MATCH(IK) ) {
550 			/* Dispose of keywords which can only occur in initial
551 			   part of statement, if found elsewhere. One exception
552 			   is something with a construct-name tacked on in
553 			   front of it, which is a cinch.
554 			*/
555     if( (ans = (construct_name_seen && MATCH(CN))) ) {
556       if( putative_keyword_class == tok_DO &&
557 	  looking_at_keywd(tok_WHILE) ) {
558 	WHILE_expected = TRUE;
559       }
560     }
561 
562   }
563 
564 #if 0 /* This does not work: curr_stmt_class not cleared beforehand */
565   else if(curr_stmt_class == tok_IF && MATCH(NI)) {
566 			/* Dispose of keywords which cannot occur in stmt
567 			   field of logical IF if that is where we are.
568 			 */
569     ans = FALSE;
570   }
571 #endif
572 
573   else if(MATCH(NA) && isalpha(curr_char)) {
574 			/* Dispose of keywords which cannot be followed
575 			   by alphabetic character if that is so.
576 
577 			   Handle variant unparenthesized PARAMETER stmt.
578 			   Reject if it follows a stmt fun or executable stmt.
579 			 */
580     if(putative_keyword_class != tok_PARAMETER
581        || stmt_sequence_no > SEQ_STMT_FUN) {
582       ans = FALSE;
583     }
584     else {		  /* non-paren form _should_ look like an assignment */
585       ans = ! looking_at_keywd(putative_keyword_class);
586     }
587   }
588 
589   else if(putative_keyword_class == tok_TO) {/* A non-IK case */
590 				/* TO following GO is handled with GO.
591 				   Here identify it if it follows a label
592 				   in ASSIGN statement.
593 				 */
594     ans = ( curr_stmt_class == tok_ASSIGN
595 	   && prev_token_class == tok_integer_const);
596 
597   }
598   else if(putative_keyword_class == tok_FUNCTION /* A non-IK case */
599     && (stmt_sequence_no != 0 /* not the first statement of module */
600 
601 	|| !(initial_flag  /* if not initial can only be preceded by type */
602 	     || is_a_type_token(curr_stmt_class)) )) {
603     ans = FALSE; /* otherwise it will be handled correctly by looking_at */
604   }
605   else if(putative_keyword_class == tok_WHILE) { /* A non-IK case */
606 	 /* Only occurs in DO label [,] WHILE and constructname: DO WHILE  */
607     ans = WHILE_expected;
608     WHILE_expected = FALSE;
609   }
610 		/* Remaining cases are IK in initial part */
611 
612 			/*   Eliminate those which can never be followed
613 			     by '(' or '=' if that is what we have.
614 			     Exception for DIMENSION in attr-based type
615 			     declaration, which must be followed by left paren.
616 			 */
617   else if(MATCH(NP) &&
618 	  ((curr_char == '('
619 	    && !(in_attrbased_typedecl && putative_keyword_class == tok_DIMENSION))
620 	   || curr_char == '=') ) {
621     ans = FALSE;
622   }
623 
624 			/* Likewise with those that must be followed by
625 			   '(' but aren't.
626 			   Exception for PARAMETER in attr-based type
627 			   declaration, which must be followed by comma or
628 			   double colon.
629 			*/
630   else if(MATCH(MP) && curr_char != '(') {
631     ans = (in_attrbased_typedecl && putative_keyword_class == tok_PARAMETER
632 	&& (curr_char == ',' || curr_char == ':'));
633   }
634 
635 
636 				/* END DO: handle its DO here */
637   else if( putative_keyword_class == tok_DO && curr_char == EOS ) {
638 	/* Also must have prev_token_class == tok_END, but
639 	   no need to check since end-of-statement suffices. */
640     ans = TRUE;
641   }
642 
643 
644 				/* Other type names always follow the word
645 				   IMPLICIT */
646   else if( implicit_flag ) {
647     ans =  MATCH(TY);
648   }
649 
650   else {
651 		     /* Remaining cases are keywords that must be in
652 			initial position. If followed by '=' must be an
653 			identifier.  If followed by '(' then may be an array
654 			or character lvalue, so use looking_at to scan ahead
655 			to see if this is an assignment statement. */
656       ans =  looking_at_keywd(putative_keyword_class);
657   }
658 
659 
660 			/* Save initial token class for use by parser.
661 			   Either set it to keyword token or to id for
662 			   assignment stmt. */
663   if(initial_flag && !in_attrbased_typedecl) {
664     curr_stmt_class = (ans? keywords[i].tclass: tok_identifier);
665   }
666 
667 
668 		/* Turn off the initial-keyword flag if this is a
669 		   keyword that cannot be followed by another keyword
670 		   or if it is not a keyword.
671 		*/
672   if(ans) {
673     if(keywords[i].context & EK) {
674 	/* In case lookahead to :: was not done or failed, a type keyword
675 	   followed immediately by comma must be attr-based form.
676 	   If TK type keyword is followed by parenthesis, it signals a
677 	   parentheized KIND or LEN spec, which needs a special left paren
678 	   to avoid shift/reduce conflict in the grammar.
679 	 */
680       if( MATCH(TY) ) {
681 	if( curr_char == ',' ) {
682 	  in_attrbased_typedecl = TRUE;
683 	}
684 	else if( MATCH(TK) && curr_char == '(' ) {
685 	  if(! (implicit_flag && looking_at_implicit_list()) )
686 	    need_special_lparen = TRUE;
687 	}
688       }
689       if( !in_attrbased_typedecl )
690 	initial_flag = FALSE;
691     }
692     return keywords[i].tclass;
693   }
694   else {	/* If no more letters follow, then keyword here
695 		   is ruled out.  Turn off initial_flag. */
696     if( ! isalpha(curr_char) )
697       initial_flag = FALSE;
698 
699     return 0;	/* Not found in list */
700   }
701 }/* End of is_keyword */
702 
703 
704 /*    init_keyhashtab:
705 */
706 		/* Hashing is no longer used.  This guy now only
707 		   initializes the table of indices that allow
708 		   keywords to be looked up by their token class*/
709 void
init_keyhashtab(VOID)710 init_keyhashtab(VOID)
711 {
712   unsigned i;
713   int k,kmin,kmax;
714   kmin = kmax = keywords[0].tclass;	/* Find min and max token classes */
715   for(i=1; i<NUM_KEYWORDS; i++) {
716     k = keywords[i].tclass;
717     if(k < kmin)  kmin = k;
718     if(k > kmax)  kmax = k;
719   }
720 
721   keytab_offset = kmin;	/* Index table from [kmin..kmax] -> [0..size-1] */
722   keytab_size = (unsigned) (kmax-kmin+1);
723   if( (keytab_index=(short *)calloc(keytab_size,sizeof(keytab_index[0])))
724      == (short *)NULL) {
725     oops_message(OOPS_FATAL,NO_LINE_NUM,NO_COL_NUM,
726 	   "cannot allocate space for keytab_index");
727   }
728 
729 				/* Now fill in the lookup table, indexed
730 				   by class - offset */
731   for(i=0; i<NUM_KEYWORDS; i++) {
732     k = keywords[i].tclass;
733     keytab_index[k - keytab_offset] = i;
734   }
735 }
736 
737 /* keytok_name: given token class number, returns pointer to its name.
738    Works only for keyword tokens.  This is inefficient, but it is only
739    used in error messages.
740  */
741 
742 char *
keytok_name(int tclass)743 keytok_name(int tclass)
744 {
745     int i;
746     for(i=0; i<NUM_KEYWORDS; i++) {
747 	if( keywords[i].tclass == tclass ) {
748 	    return keywords[i].name;
749 	}
750     }
751     return "noname";		/* not in the list */
752 }
753