1 /* $Id: forlex.c,v 1.46 2004/12/31 17:22:16 moniot Exp $
2 
3 	Tokenizing routines for Fortran program checker.
4 */
5 
6 /*
7 
8 
9 Copyright (c) 2001 by Robert K. Moniot.
10 
11 Permission is hereby granted, free of charge, to any person
12 obtaining a copy of this software and associated documentation
13 files (the "Software"), to deal in the Software without
14 restriction, including without limitation the rights to use,
15 copy, modify, merge, publish, distribute, sublicense, and/or
16 sell copies of the Software, and to permit persons to whom the
17 Software is furnished to do so, subject to the following
18 conditions:
19 
20 The above copyright notice and this permission notice shall be
21 included in all copies or substantial portions of the
22 Software.
23 
24 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
25 KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
26 WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
27 PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
28 COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
30 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
31 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
32 
33 Acknowledgement: the above permission notice is what is known
34 as the "MIT License."
35 */
36 
37 
38 #include <stdio.h>
39 #include <ctype.h>
40 #include <string.h>
41 
42 	/* Some older mac compilers need compat.h to use memset.  If
43 	   needed, add  `` -d MAC_MPW ''  to compilation options.
44 	 */
45 #ifdef MAC_MPW
46 #include <compat.h>
47 #endif
48 
49 #include "ftnchek.h"
50 #define FORLEX
51 #include "symtab.h"
52 #include "tokdefs.h"
53 #include "forlex.h"
54 
55 /* toascii() is widely supported, but in case it isn't, define it here.
56    We need it mainly in order to avoid bounds violation in legal_chars array.
57    On EBCDIC systems, toascii() should convert from an EBCDIC code
58    to the ASCII code for the same character, because the legal_chars array is
59    ordered according to the ASCII collating sequence.
60  */
61 #ifndef toascii
62 #define toascii(C) ((C) & 0177)
63 #endif
64 
65 extern int complex_const_allowed,    /* shared flags operated by fortran.y */
66 	   inside_format,
67 	   integer_context;
68 
69 
70 
71 PROTO(PRIVATE void make_legal_char,( char *s ));
72 
73 
74 
75 
76 /*
77 
78 Part I. yylex()
79 
80    Shared functions defined:
81 	yylex()			Returns next token.  Called from yyparse().
82 	implied_id_token(t,s)	Creates token for blank common declaration.
83 	get_binary_const(t, c ) Creates token for binary constant
84 	get_string(t)		Creates token for a string
85 
86 Note: compilation options LEX_STORE_STRINGS and LEX_STORE_HOLLERITHS:
87   Define the macro name LEX_STORE_STRINGS to build a version of ftnchek that
88   stores string constants, and LEX_STORE_HOLLERITHS to store hollerith
89   constants.  Now that INCLUDE statements are supported, strings must
90   be stored.  Holleriths are not used, so they need not be stored.
91 */
92 #ifndef LEX_STORE_STRINGS
93 #define LEX_STORE_STRINGS
94 #endif
95 
96 #ifdef DEBUG_FORLEX		/* For maintaining the program */
97 #define LEX_STORE_HOLLERITHS
98 #endif
99 
100 #ifdef DEBUG_FORLEX
101 #include <math.h>		/* Only used for pow() in debug mode */
102 #endif
103 
104 PRIVATE int closeup_saw_whitespace;
105 
106 	/* The following macro says whether a given character is legal,
107 	 * i.e. one of the stream control chars or a valid ANSI Fortran
108 	 * character.  Lower case letters are considered legal too.
109 	 * Nondigits in columns 1-6 (except EOF,EOS) are illegal in fixed form.
110 	 * Hopefully this works for EBCDIC too.
111 	 */
112 #define islegal(C) ( ((C) == EOF) || ((C) == EOS) || \
113 	( (col_num >= 6 || free_form || isdigit(C)) && \
114 	 (toascii((int)(C)) >= toascii(' ') && \
115 	  legal_chars[toascii((int)(C))-toascii(' ')] == (C))) )
116 		/* Array has x where ASCII character is not valid.
117 		   This defn is not exactly standard f77, since it includes
118 		   supported extensions: $ in format,
119 		   <> in variable formats, and " in strings.
120 		   Since strlen of array is 96, indexing by [toascii(c)-32]
121 		   is always in bounds.
122 		 */
123 PRIVATE char f77_legal_chars[]=
124 " x\"x$xx'()*+,-./0123456789:x<=>xx\
125 ABCDEFGHIJKLMNOPQRSTUVWXYZxxxxxxabcdefghijklmnopqrstuvwxyzxxxxx";
126 
127 		/* This is the working copy of list of legal chars, with
128 <                  any chars in idletter_list made legal.
129 		 */
130 PRIVATE char legal_chars[sizeof(f77_legal_chars)];
131 
132 				/* Routine to fix up list of legal chars */
make_legal_char_list(VOID)133 void make_legal_char_list(VOID)
134 {
135     int i;
136 			/* Start with the f77 list */
137     (void)strcpy(legal_chars,f77_legal_chars);
138 
139 			/* Verify idletter_list has only punctuation chars.
140 			   If violators, reset to default of "$_"
141 			 */
142     for(i=0; idletter_list[i] != '\0'; i++) {
143       if( !ispunct(idletter_list[i]) ) {
144 	(void)fprintf(stderr,
145 	"\n%cidentifier setting specifies invalid character %c: setting ignored",
146 #ifdef OPTION_PREFIX_SLASH
147 			  '/',
148 #else
149 			  '-',
150 #endif
151 		idletter_list[i]);
152 	idletter_list = DEF_IDLETTER_LIST;	/* restore to default */
153 	break;
154       }
155     }
156 			/* Add nonstd nonalpha chars allowed in identifiers */
157     make_legal_char(idletter_list);
158 
159 }
160 
161 
162 
163 
164 		/* Routines to alter the default status of characters,
165 		   to support various extensions to f77. */
166 
167 PRIVATE void
168 #if HAVE_STDC
make_legal_char(char * s)169 make_legal_char(char *s)
170 #else /* K&R style */
171 make_legal_char(s)
172      char *s;			/* List of legal chars */
173 #endif /* HAVE_STDC */
174 {
175   int i;
176   while( *s != '\0' ) {
177     i = toascii((int)(*s));
178     if(i >= toascii(' ') && i <= toascii('~')) {
179       legal_chars[i-toascii(' ')] = *s;
180     }
181     s++;
182   }
183 }
184 
185 #if 0
186 		/* Routines to alter the default status of characters,
187 		   to support various extensions to f77. Not used now.*/
188 
189 PROTO(void make_illegal_char,( char *s ));
190 
191 
192 void
193 make_illegal_char(s)
194      char *s;			/* List of illegal chars */
195 {
196   int i;
197   while( *s != '\0' ) {
198     i = toascii((int)(*s));
199     if(i >= toascii(' ') && i <= toascii('~')) {
200 	legal_chars[i-toascii(' ')] = ( (*s != 'x')? 'x': 'X');
201     }
202     s++;
203   }
204 }
205 #endif
206 
207 
208 		/* local functions defined */
209 
210 
211 PROTO(PRIVATE void closeup,( void ));
212 
213 
214 PROTO(PRIVATE void get_complex_const,( Token *token ));
215 
216 #ifdef ALLOW_UNIX_CPP
217 PROTO(PRIVATE void get_cpp_directive,( void ));
218 #endif
219 
220 PROTO(PRIVATE void get_dot,( Token *token ));
221 
222 PROTO(PRIVATE void get_dotted_keyword,( Token *token));
223 
224 PROTO(PRIVATE void get_edit_descriptor,( Token *token ));
225 
226 PROTO(PRIVATE void get_hollerith,( Token *token, int n ));
227 
228 PROTO(PRIVATE void get_illegal_token,( Token *token ));
229 
230 PROTO(PRIVATE void get_label,( Token *token ));
231 
232 PROTO(PRIVATE void get_letter,( Token *token ));
233 
234 PROTO(PRIVATE void get_number,( Token *token ));
235 
236 PROTO(PRIVATE void get_punctuation,( Token *token ));
237 
238 PROTO(PRIVATE void get_simple_punctuation,( Token *token ));
239 
240 PROTO(PRIVATE int f90_relop, ( Token *token, int *multichar ) );
241 
242 /* Define zero_struct to set a struct to zero. It works around the differing availability
243    of memset and bzero.   This is used to initialize a token.
244  */
245 #if HAVE_MEMSET
246 #define zero_struct(sptr,struct_size)    (void)memset((sptr),0,struct_size)
247 #else
248 #if HAVE_BZERO
249 #define zero_struct(sptr,struct_size)    bzero((char *)(sptr),struct_size)
250 #else
251 PROTO(PRIVATE void zero_struct, (void *sptr, int struct_size) );
252 PRIVATE void
zero_struct(void * sptr,int struct_size)253 zero_struct(void *sptr, int struct_size)
254 {
255      int i;
256      for(i=0; i<struct_size; i++) {
257 	  ((char *)sptr)[i] = 0;
258      }
259 }
260 #endif
261 #endif
262 
263 		/*  Gets next token for Yacc.  Return value is token.class,
264 		 *  and a copy of the token is stored in yylval.
265 		 */
266 int
yylex(VOID)267 yylex(VOID)
268 {
269     Token token;
270     extern int in_attrbased_typedecl; /* shared with fortran.y */
271 
272 		/* Initialize token fields to scratch. */
273     zero_struct(&token,sizeof(token));
274 
275     src_text_len = 0;
276 
277     if(curr_char == EOF) {
278 	token.tclass = EOF;
279 	token.line_num = line_num;
280 	token.col_num = col_num;
281     }
282     else /* not EOF */ {
283 
284 
285 		/* Skip leading spaces, and give error message if non-ANSI
286 		 * characters are found.
287 		 */
288 
289 	while(iswhitespace(curr_char) || (! islegal(curr_char))  ) {
290 	  if(!iswhitespace(curr_char)) {
291 #ifdef ALLOW_UNIX_CPP
292 	    if(curr_char == '#' && col_num == 1) {
293 	       get_cpp_directive();	/* turn # line into EOS */
294 	       break;
295 	    }
296 	    else
297 #endif
298 		lex_error("Illegal character");
299 	  }
300 	  advance();
301 	}
302 
303 	token.line_num = line_num;
304 	token.col_num = col_num;
305 
306 	closeup_saw_whitespace = FALSE;
307 
308 	if(inside_format) {	/* Handle format stuff here to avoid trouble */
309 	  get_edit_descriptor(&token);
310 	}
311 	else if(isadigit(curr_char)) {
312 			/* Identify label:
313 			      Fixed form: Number in cols 1-5.
314 			      Free form:  Number at start of statement.
315 			 */
316 	    if( (free_form)? (initial_flag && !in_attrbased_typedecl): (col_num < 6))
317 			get_label(&token);      /* Stmt label */
318 		else
319 			get_number(&token);     /* Numeric or hollerith const */
320 	}
321 	else if(isidletter(curr_char)) {
322 		if(implicit_letter_flag)
323 			get_letter(&token);	/* letter in IMPLICIT list */
324 		else
325 			get_identifier(&token); /* Identifier or keyword */
326 	}
327 	else if(isaquote(curr_char)) {
328 			get_string(&token);	/* Quoted string */
329 	}
330 	else if(curr_char == '.') {
331 			get_dot(&token);	 /* '.' lead-in */
332 	}
333 	else {
334 			get_punctuation(&token);  /* Punctuation character or EOS */
335 	}
336     }/*end not EOF*/
337 
338     if(token.tclass == EOS) {
339 	implicit_flag=FALSE;	/* in case of errors, reset flags */
340 	implicit_letter_flag = FALSE;
341 	WHILE_expected = FALSE;
342     }
343 
344 
345     prev_token_class = token.tclass;
346 
347     yylval = token;
348     return token.tclass;
349 
350 } /* yylex */
351 
352 	/* closeup: Advances input stream till next_char is nonspace.  Fudges
353 	   things so that curr_char remains as it was.
354 	*/
355 PRIVATE void
closeup(VOID)356 closeup(VOID)
357 {
358   int
359     save_curr_char = curr_char,
360     save_prev_char = prev_char;
361   LINENO_t
362     save_line_num = line_num;
363   COLNO_t
364     save_col_num = col_num;
365 
366   int next_space = iswhitespace(next_char);
367 
368   closeup_saw_whitespace = next_space; /* Record for free-format warnings */
369 
370   while(next_space) {
371     advance();
372     next_space = iswhitespace(next_char);
373   }
374 
375   curr_char = save_curr_char;
376   prev_char = save_prev_char;
377   line_num = save_line_num;
378   col_num = save_col_num;
379 }
380 
381 
382 	/* Fills argument with token for an identifer, as if an identifer
383 	 * with name given by string s had been lexed.  This will
384 	 * be called by parser when blank common declaration is seen,
385 	 * and when a main prog without program statement is found,
386 	 * and when an unnamed block data statement is found,
387 	 * so processing of named and unnamed cases can be handled uniformly.
388 	*/
389 void
390 #if HAVE_STDC
implied_id_token(Token * t,char * s)391 implied_id_token(Token *t, char *s)
392 #else /* K&R style */
393 implied_id_token(t,s)
394 	Token *t;
395 	char *s;
396 #endif /* HAVE_STDC */
397 {
398 	int h;
399 	unsigned long hnum;
400 
401 	hnum = hash(s);
402 	while( h=hnum%HASHSZ, hashtab[h].name != NULL &&
403 		strcmp(hashtab[h].name,s) != 0)
404 			hnum = rehash(hnum);
405 	if(hashtab[h].name == NULL) {	/* not seen before */
406 		hashtab[h].name = s;
407 		hashtab[h].loc_symtab = NULL;
408 		hashtab[h].glob_symtab = NULL;
409 		hashtab[h].com_loc_symtab = NULL;
410 		hashtab[h].com_glob_symtab = NULL;
411 	}
412 	t->tclass = tok_identifier;
413 	t->value.integer = h;
414 	t->src_text = new_src_text("",0);
415 } /* implied_id_token */
416 
417 #ifdef ALLOW_UNIX_CPP
418 		/* This does not create a token but just performs the
419 		   actions needed when a cpp directive is seen.  It
420 		   advances curr_char to the EOS.  The setting of
421 		   filename is delayed to this point because it is not
422 		   stored in tokens but is external, so changing it
423 		   must wait till the previous statement is fully
424 		   parsed and any error messages printed and arg or
425 		   com list headers completed.
426 		 */
427 PRIVATE void
get_cpp_directive(VOID)428 get_cpp_directive(VOID)
429 {
430   if(next_filename != (char *)NULL) {
431 
432 		/* A #line directive on first line of toplevel source file
433 		   gives name of real original file.  Replace our idea
434 		   of top_filename with that. But ignore an initial
435 		   # 1 "" since that means cpp was working with stdin,
436 		   probably from ftnpp.  Likewise ignore # 1 "stdin"
437 		   which is a variant form, e.g. from fpp.
438 		   Nowadays cpp may indicate stdin by "<stdin>" or suchlike,
439 		   which we also ignore.
440 		*/
441     if( cpp_start_of_file ) {
442       if( next_filename[0] != '\0' &&
443 	  next_filename[0] != '<' &&
444 	  strcmp(next_filename,"stdin") != 0 ) {
445 	top_filename = next_filename;
446 	current_filename = next_filename;
447 	cpp_start_of_file = FALSE;
448       }
449     }
450     else {
451       if( cpp_inc_depth > 0 &&
452 	  next_filename == cpp_include_stack[cpp_inc_depth-1].filename ) {
453 	--cpp_inc_depth;
454       }
455       else {
456 	if( cpp_inc_depth == 0 ) {
457 	  top_file_line_num = next_top_file_line_num;
458 	}
459 				/* Avoid overrun, but it will recover
460 				   even if max depth is exceeded.
461 				*/
462 	if(cpp_inc_depth < MAX_INCLUDE_DEPTH)
463 	  cpp_include_stack[cpp_inc_depth++].filename = current_filename;
464       }
465       current_filename = next_filename;
466     }
467 
468 
469   }
470   if(f77_unix_cpp || f90_unix_cpp || !cpp_handled) {
471     nonstandard(line_num,col_num,f90_unix_cpp,0);
472     msg_tail(": preprocessor directive");
473     if(!cpp_handled)
474       msg_tail("(not processed)");
475   }
476 
477   do {			/* Skip to end of directive.  It will become an EOS */
478     advance();
479   } while( curr_char != EOS);
480 
481 }/*get_cpp_directive*/
482 #endif
483 
484 PRIVATE void
485 #if HAVE_STDC
get_dot(Token * token)486 get_dot(Token *token)
487 #else /* K&R style */
488 get_dot(token)
489 	Token *token;
490 #endif /* HAVE_STDC */
491 {
492 	if(src_text_len < MAX_SRC_TEXT)
493 	  src_text_buf[src_text_len++] = curr_char;
494 
495 	closeup();		/* Advance till nonspace char in next_char */
496 
497 	if(isadigit(next_char))
498 		get_number(token);		/* Numeric const */
499 	else if(isaletter(next_char))
500 		get_dotted_keyword(token);	/* .EQ. etc. */
501 	else
502 		get_simple_punctuation(token);	/* "." out of place */
503 }
504 
505 
506 PRIVATE const struct {
507 	char *name;
508 	int tclass,tsubclass;
509  } dotted_keywords[]={
510 			{".EQ.",tok_relop,relop_EQ},
511 			{".NE.",tok_relop,relop_NE},
512 			{".LE.",tok_relop,relop_LE},
513 			{".LT.",tok_relop,relop_LT},
514 			{".GE.",tok_relop,relop_GE},
515 			{".GT.",tok_relop,relop_GT},
516 			{".AND.",tok_AND,0},
517 			{".OR.",tok_OR,0},
518 			{".NOT.",tok_NOT,0},
519 			{".FALSE.",tok_logical_const,FALSE},
520 			{".TRUE.",tok_logical_const,TRUE},
521 			{".EQV.",tok_EQV,0},
522 			{".NEQV.",tok_NEQV,0},
523 			{NULL,0,0}
524 		    };
525 
526 
527 PRIVATE void
528 #if HAVE_STDC
get_dotted_keyword(Token * token)529 get_dotted_keyword(Token *token)
530 #else /* K&R style */
531 get_dotted_keyword(token)
532 	Token *token;
533 #endif /* HAVE_STDC */
534 {
535 	int i,
536 	    has_embedded_space,	/* Spaces inside keyword */
537 	    space_seen_lately;	/* Flag for catching embedded space */
538 	initial_flag = FALSE;
539 				/* Watch for embedded space, but not
540 				   (in fixed form)
541 				   between dots and letters of keyword.
542 				   I.e.  ". eq ." is OK, but not ".e q." */
543 	has_embedded_space = FALSE;
544 
545 	bi_advance();      /* gobble the initial '.' */
546 
547 	space_seen_lately = (!free_form? FALSE:
548 			     closeup_saw_whitespace);
549 
550 	while(isaletter(curr_char)) {
551 
552 	   if(src_text_len < MAX_SRC_TEXT)
553 	     src_text_buf[src_text_len++] = (char)makeupper(curr_char);
554 
555 	  if(space_seen_lately)
556 	    has_embedded_space = TRUE;
557 
558 	   bi_advance();
559 
560 	   space_seen_lately = iswhitespace(prev_char);
561 	}
562 
563 			/* Free form complains about space before last dot */
564 	if(free_form && space_seen_lately)
565 	    has_embedded_space = TRUE;
566 
567 	if(src_text_len < MAX_SRC_TEXT)
568 	  src_text_buf[src_text_len++] = '.'; /* make it complete */
569 
570 	if(curr_char != '.') {
571 	    lex_error("Badly formed logical/relational operator or constant");
572 	}
573 	else {
574 		advance();      /* gobble the final '.' */
575 	}
576 	if( (pretty_extra_space || (free_form && f90_freeform_space))
577 	   && has_embedded_space) {
578 	      space_violation(token->line_num,token->col_num,
579 			"keyword has embedded space");
580 	}
581 
582 	for(i=0; dotted_keywords[i].name != NULL; i++) {
583 	  if(strncmp(src_text_buf+1, /* only compare the significant parts */
584 		     dotted_keywords[i].name+1,
585 		     src_text_len-2) == 0) {
586 	    token->tclass = dotted_keywords[i].tclass;
587 	    token->tsubclass = dotted_keywords[i].tsubclass;
588 	    token->value.string = token->src_text = dotted_keywords[i].name;
589 #ifdef DEBUG_FORLEX
590 			if(debug_lexer)
591 			   (void)fprintf(list_fd,"\nDotted keyword:\t\t%s",
592 						token->src_text);
593 #endif
594 			return;
595 		}
596 	}
597 			/* Match not found: signal an error */
598 	lex_error("Unknown logical/relational operator or constant");
599 	get_illegal_token(token);
600 
601 } /* get_dotted_keyword */
602 
603 PRIVATE void
604 #if HAVE_STDC
get_edit_descriptor(Token * token)605 get_edit_descriptor(Token *token)
606 #else /* K&R style */
607 get_edit_descriptor(token)
608 	Token *token;
609 #endif /* HAVE_STDC */
610 {
611     int c;
612     long repeat_spec;
613     int Ee_allowed=FALSE;	/* true if edit descr can have Ee after w.d */
614 
615     if(isadigit(curr_char)) {	/* Digit: repeat spec or holl or kP or nX */
616       repeat_spec = 0;
617       do {
618 	if(src_text_len < MAX_SRC_TEXT)
619 	  src_text_buf[src_text_len++] = curr_char;
620 	repeat_spec = repeat_spec*10L + (long)BCD(curr_char);
621 	if( makeupper(next_char) == 'H' )
622 	  inside_hollerith = TRUE;/* get ready for hollerith*/
623 	bi_advance();
624       } while(isadigit(curr_char));
625 
626       if( makeupper(curr_char) == 'H' ) {
627 				/* nH... pass off to hollerith routine */
628 	get_hollerith(token, (int)repeat_spec);
629 	return;
630       }
631       else {
632 				/* Otherwise it is a repeat spec or the
633 				   numeric part of kP or nX which we treat
634 				   as repeat specs too */
635 	token->tclass = tok_integer_const;
636 	token->value.integer = repeat_spec;
637 	token->src_text = new_src_text(src_text_buf,src_text_len);
638 #ifdef DEBUG_FORLEX
639 if(debug_lexer)
640 (void)fprintf(list_fd,"\nInteger const:\t\t%ld (from %s)",
641 	      repeat_spec,
642               token->src_text);
643 #endif
644       }
645     }/* end if digit */
646 
647     else if(isaletter(curr_char)) {
648       c = makeupper(curr_char);
649       if(src_text_len < MAX_SRC_TEXT)
650 	src_text_buf[src_text_len++] = c;
651       bi_advance();
652       switch(c) {
653 
654 	case 'P':		/* P of kP  k seen previously */
655 	  if(prev_token_class != tok_integer_const) {
656 	    if(f77_format_extensions || f90_format_extensions){
657 	      nonstandard(token->line_num,token->col_num,f90_format_extensions,0);
658 	      msg_tail(": P must follow a number");
659 	    }
660 	  }
661 	  break;
662 
663 	case 'X':		/* X or nX */
664 	  break;
665 
666 	case 'S':		/* S or SP or SS */
667 	  c = makeupper(curr_char);
668 	  if(c == 'S' || c == 'P') {
669 	    if(src_text_len < MAX_SRC_TEXT)
670 	      src_text_buf[src_text_len++] = c;
671 	    bi_advance();
672 	  }
673 	  break;
674 
675 	case 'B':		/* BN or BZ */
676 	  c = makeupper(curr_char);
677 	  if(c == 'N' || c == 'Z') {
678 	    if(src_text_len < MAX_SRC_TEXT)
679 	      src_text_buf[src_text_len++] = c;
680 	    bi_advance();
681 	  }
682 	  else {
683 	    if(f77_format_extensions){
684 	      nonstandard(token->line_num,token->col_num,0,0);
685 	      msg_tail(": N or Z expected after B");
686 	    }
687 	    goto get_w_d;	/* F90 has Bw.d: allow that */
688 	  }
689 	  break;
690 
691 	case 'T':		/* Tc or TLc or TRc */
692 	  c = makeupper(curr_char);
693 	  if(c == 'L' || c == 'R') {
694 	    if(src_text_len < MAX_SRC_TEXT)
695 	      src_text_buf[src_text_len++] = c;
696 	    bi_advance();
697 	  }
698 
699 	case 'E':		/* In F90, E can be followed by N or S  */
700 	  c = makeupper(curr_char);
701 	  if( c == 'N' || c == 'S' ) {
702 	    if(src_text_len < MAX_SRC_TEXT)
703 	      src_text_buf[src_text_len++] = c;
704 	    bi_advance();
705 	    if(f77_format_extensions){
706 	      nonstandard(token->line_num,token->col_num,0,0);
707 	    }
708 	  }
709 	  Ee_allowed = TRUE;
710 	  goto get_w_d;
711 
712 
713 	case 'O':	/* These are OK in f90 but not f77 */
714 	case 'Z':
715 	  if(f77_format_extensions){
716 	    nonstandard(token->line_num,token->col_num,0,0);
717 	  }
718 	  goto get_w_d;
719 
720 				/* Iw, Fw.c and similar forms */
721 
722 	case 'G':
723 	  Ee_allowed = TRUE;	/* OK in F90 to have Ee trailer */
724 	  /*FALLTHRU*/
725 	case 'A':
726 	case 'D':
727 	case 'F':
728 	case 'I':
729 	case 'L':
730 get_w_d:				/* Get the w field if any */
731 	  while( isadigit(curr_char) ){
732 	    if(src_text_len < MAX_SRC_TEXT)
733 	      src_text_buf[src_text_len++] = curr_char;
734 	    bi_advance();
735 	  }
736 			/* Include any dot followed by number (e.g. F10.5)
737 			*/
738 	  if( curr_char == '.' ) {
739 	    do {
740 	      if(src_text_len < MAX_SRC_TEXT)
741 		src_text_buf[src_text_len++] = curr_char;
742 	      bi_advance();
743 	    } while( isadigit(curr_char) );
744 	  }
745 				/* w.d can sometimes be followed by Ee */
746 	  if( Ee_allowed && (c=makeupper(curr_char)) == 'E' ) {
747 	    if(src_text_len < MAX_SRC_TEXT)
748 	      src_text_buf[src_text_len++] = c;
749 	    bi_advance();
750 	    while( isadigit(curr_char) ){
751 	      if(src_text_len < MAX_SRC_TEXT)
752 		src_text_buf[src_text_len++] = curr_char;
753 	      bi_advance();
754 	    }
755 	  }
756 	  break;
757 
758 	default:
759 	  if(f77_format_extensions || f90_format_extensions) {
760 	    nonstandard(token->line_num,token->col_num,f90_format_extensions,0);
761 	    msg_tail(": edit descriptor");
762 	    src_text_buf[src_text_len++] = '\0';
763 	    msg_tail(src_text_buf);
764 	  }
765 	  goto get_w_d;
766       }/*end switch*/
767 
768       token->tclass = tok_edit_descriptor;
769       token->value.string = NULL;
770       token->src_text = new_src_text(src_text_buf,src_text_len);
771 
772 #ifdef DEBUG_FORLEX
773 if(debug_lexer)
774 (void)fprintf(list_fd,"\nEdit descriptor:\t%s",token->src_text);
775 #endif
776     }/*end else if isaletter*/
777 
778 			/* Apostrophe or quote mark means a string. */
779     else if( isaquote(curr_char) ) {
780       get_string(token);
781     }
782 				/* Otherwise it is mere punctuation. Handle
783 				   it here ourself to avoid complications. */
784     else {
785       src_text_buf[src_text_len++] = curr_char;
786       get_simple_punctuation(token);
787     }
788 }
789 
790 PRIVATE void
791 #if HAVE_STDC
get_hollerith(Token * token,int n)792 get_hollerith(Token *token, int n)  /* Gets string of form nHaaaa */
793 #else /* K&R style */
794 get_hollerith(token,n)  /* Gets string of form nHaaaa */
795 	Token *token;
796 	int n;
797 #endif /* HAVE_STDC */
798 {
799 	int i;
800 	LINENO_t last_line_num;
801 	COLNO_t last_col_num;
802 
803 		/* strsize = length of only the string being defined
804 		   fullsize = length of whole hollerith const, which includes
805 		   length spec already stored in src_text_buf plus the
806 		   H plus the text plus final nul. */
807 	int strsize=n,
808 	    leadin=src_text_len+1,
809 	    fullsize=leadin+strsize+1;
810 	char *s;
811 
812 	initial_flag = FALSE;
813 
814 	s = new_src_text_alloc(fullsize);
815 
816 	for(i=0; i<src_text_len; i++) /* Copy the leadin already saved */
817 	  s[i] = src_text_buf[i];
818 	s[i++] = 'H';		/* store the 'H' */
819 
820 	if(n==1)
821 	  inside_hollerith=FALSE;/* turn off flag ahead of next_char */
822 	advance();/* Gobble the 'H' */
823 
824 	last_col_num = col_num;
825 	last_line_num = line_num;
826 
827 	for(i=0; i<n; i++) {
828 	  while(curr_char == EOL) {
829 			/* Treat short line as if extended with blanks */
830 	    COLNO_t col;
831 	    for(col=last_col_num; i<n && col<(COLNO_t)max_stmt_col; i++,col++) {
832 		s[leadin+i] = ' ';
833 	    }
834 	    last_col_num = col_num;
835 	    advance();
836 	  }
837 	  if(i==n) break;
838 
839 	  if(curr_char == EOS || curr_char == EOF) {
840 	    COLNO_t col;
841 	    for(col=last_col_num; i<n && col<(COLNO_t)max_stmt_col; i++,col++) {
842 	      if(i < strsize)
843 		s[leadin+i] = ' ';
844 	    }
845 	    if(i < n) {		/* If it did not fill up */
846 	      syntax_error((LINENO_t)last_line_num,(COLNO_t)last_col_num,
847 			   "Hollerith constant ends prematurely");
848 	      strsize=i;
849 	    }
850 	    break;
851 	  }
852 	  else {
853 	    s[leadin+i] = curr_char;
854 	    last_col_num = col_num;
855 	    last_line_num = line_num;
856 	    if(i==n-2)/* turn flag off ahead of next_char*/
857 	      inside_hollerith = FALSE;
858 	    advance();
859 	  }
860 	}
861 
862 	if(strsize > 0)
863 	  s[leadin+strsize] = '\0';
864 
865 	inside_hollerith = FALSE;
866 	token->tclass = tok_hollerith;
867 	token->value.string = s + leadin;
868 	token->size = n;
869 	token->src_text = s;
870 #ifdef DEBUG_FORLEX
871 	if(debug_lexer)
872 		(void)fprintf(list_fd,"\nHollerith:\t\t%s (from %s)",
873 			      token->value.string,
874 			      token->src_text);
875 #endif
876 
877 } /* get_hollerith */
878 
879 
880 
881 PRIVATE void
882 #if HAVE_STDC
get_illegal_token(Token * token)883 get_illegal_token(Token *token)	/* Handle an illegal input situation */
884 #else /* K&R style */
885 get_illegal_token(token)	/* Handle an illegal input situation */
886 	Token *token;
887 #endif /* HAVE_STDC */
888 {
889 	token->tclass = tok_illegal;
890 	token->src_text = new_src_text("",0);
891 #ifdef DEBUG_FORLEX
892 	if(debug_lexer)
893 	     (void)fprintf(list_fd,"\nILLEGAL TOKEN");
894 #endif
895 
896 } /* get_illegal_token */
897 
898 
899 
900 		/* Read a label from label field. */
901 PRIVATE void
902 #if HAVE_STDC
get_label(Token * token)903 get_label(Token *token)
904 #else /* K&R style */
905 get_label(token)
906 	Token *token;
907 #endif /* HAVE_STDC */
908 {
909     int value=0;
910     int space_seen=FALSE, has_embedded_space=FALSE;
911     if( !free_form ) {
912 	while( isadigit(curr_char) && col_num < 6 ) {
913 	  if(space_seen)
914 	    has_embedded_space = TRUE;
915 	  value = value*10 + BCD(curr_char);
916 	  src_text_buf[src_text_len++] = curr_char;
917 	  advance();
918 	  while(curr_char==' ' && col_num < 6) {
919 	    space_seen = TRUE;
920 	    advance();
921 	  }
922 	}
923 	if((pretty_extra_space || (free_form && f90_freeform_space))
924 	   && has_embedded_space) {
925 	      space_violation(token->line_num,token->col_num,
926 			"label has embedded space");
927 	}
928     }
929     else {			/* free form */
930 	int numdigits=0;
931 	while( isadigit(curr_char) ) {
932 	    value = value*10 + BCD(curr_char);
933 	    if(src_text_len < MAX_SRC_TEXT)
934 		src_text_buf[src_text_len++] = curr_char;
935 	    ++numdigits;
936 	    advance();
937 	}
938 				/* label can have only up to 5 digits */
939 	if( numdigits > 5 && misc_warn) {
940 	    syntax_error(token->line_num,token->col_num,
941 			 "statement label exceeds 5 digits");
942 	}
943     }
944     token->tclass = tok_label;
945     token->value.integer = value;
946     token->src_text = new_src_text(src_text_buf,src_text_len);
947 #ifdef DEBUG_FORLEX
948 	if(debug_lexer)
949 		(void)fprintf(list_fd,"\nLabel:\t\t\t%d (from %s)",
950 			      value,
951 			      token->src_text);
952 #endif
953 
954 } /* get_label */
955 
956 
957 PRIVATE void
958 #if HAVE_STDC
get_letter(Token * token)959 get_letter(Token *token)		/* Gets letter in IMPLICIT list */
960 #else /* K&R style */
961 get_letter(token)		/* Gets letter in IMPLICIT list */
962 	Token *token;
963 #endif /* HAVE_STDC */
964 {
965 	token->tclass = tok_letter;
966 	token->tsubclass = src_text_buf[src_text_len++] = makeupper(curr_char);
967 	token->src_text = new_src_text(src_text_buf,src_text_len);
968 
969 #ifdef DEBUG_FORLEX
970     if(debug_lexer)
971 	(void)fprintf(list_fd,"\nLetter:\t\t\t%s",token->src_text);
972 #endif
973 
974 	advance();
975 
976 } /* get_letter */
977 
978 
979 	/* get_number reads a number and determines data type: integer,
980 	 * real, or double precision.
981 	 */
982 /* This belongs in ftnchek.h, perhaps.  Defines number of significant
983    figures that are reasonable for a single-precision real constant.
984    Works out to 9 for wordsize=4, 21 for wordsize=8. These allow
985    for a couple of extra digits for rounding. Used in -trunc warning. */
986 #define REAL_SIGFIGS (local_wordsize==0? 8: (local_wordsize-1)*3)
987 
988 PRIVATE int getting_complex_const=FALSE;
989 
990 PRIVATE void
991 #if HAVE_STDC
get_number(Token * token)992 get_number(Token *token)
993 #else /* K&R style */
994 get_number(token)
995 	Token *token;
996 #endif /* HAVE_STDC */
997 {
998 	DBLVAL dvalue,leftside,rightside,pwr_of_ten;
999 	int exponent,datatype,c;
1000 #ifdef DEBUG_FORLEX
1001 	int expsign;
1002 #endif
1003 	int numdigits,	/* Count of digits in integer, significant or not */
1004 	    sigfigs;	/* Count of significant digits */
1005 
1006 			/* For freeform warnings, this gets set when we
1007 			   arrive here via a leading '.', otherwise is false.
1008 			 */
1009 	int space_seen_lately = closeup_saw_whitespace;
1010 	int has_embedded_space = FALSE;
1011 
1012 	initial_flag = FALSE;
1013 
1014 	leftside = (DBLVAL)0;
1015 	numdigits = sigfigs = 0;
1016 	datatype = tok_integer_const;
1017 	while(isadigit(curr_char)) {
1018 		if(space_seen_lately)
1019 		  has_embedded_space = TRUE;
1020 		leftside = leftside*(DBLVAL)10 + (DBLVAL)BCD(curr_char);
1021 		++numdigits;
1022 			/* Do not count leading zeroes as significant */
1023 		if(sigfigs > 0 || curr_char != '0')
1024 		  ++sigfigs;
1025 		if( !integer_context && makeupper(next_char) == 'H' )
1026 		  inside_hollerith = TRUE;/* get ready for hollerith*/
1027 
1028 		if(src_text_len < MAX_SRC_TEXT)
1029 		  src_text_buf[src_text_len++] = curr_char;
1030 				/* Embedded space is worth preserving since
1031 				   it is often used in long numbers.  Any
1032 				   amount of blanks + tabs -> 1 blank.
1033 				   Exception: integer_context says upcoming
1034 				   item is a label or datatype length spec. */
1035 		if(! integer_context &&
1036 		   (next_char == ' ' || next_char == '\t')) {
1037 		  if(src_text_len < MAX_SRC_TEXT)
1038 		    src_text_buf[src_text_len++] = ' ';
1039 		}
1040 
1041 		bi_advance();
1042 		space_seen_lately = iswhitespace(prev_char);
1043 	}
1044 
1045 		/* If context specifies integer expected, skip to end.
1046 		   Otherwise scan on ahead for more. */
1047     if( integer_context) {
1048         if(numdigits == 0) {
1049 	    lex_error("integer expected");
1050 	    advance();	/* gobble something to avoid infinite loop */
1051 	}
1052     }
1053     else {/* not integer_context */
1054 	if( makeupper(curr_char) == 'H' ){      /* nnH means hollerith */
1055 		if(leftside == (DBLVAL)0) {
1056 			lex_error("Zero-length hollerith constant");
1057 			inside_hollerith = FALSE;
1058 			advance();
1059 			get_illegal_token(token);
1060 		}
1061 		else {
1062 			if(src_text_buf[src_text_len-1] == ' ')
1063 			  --src_text_len;
1064 			get_hollerith(token, (int)leftside);
1065 		}
1066 		return;
1067 	}
1068 
1069 	rightside = (DBLVAL)0;
1070 	pwr_of_ten = (DBLVAL)1;
1071 	closeup();		/* Pull in the lookahead character */
1072 
1073 	if( curr_char == '.' &&
1074 				/* don't be fooled by 1.eq.N or
1075 				   I.eq.1.and. etc */
1076 	   !looking_at_relop() ) {
1077 		datatype = tok_real_const;
1078 		if( space_seen_lately )
1079 		    has_embedded_space = TRUE;
1080 		if(numdigits > 0) /* if dot is initial it is already stored */
1081 		  if(src_text_len < MAX_SRC_TEXT)
1082 		    src_text_buf[src_text_len++] = curr_char;
1083 		bi_advance();
1084 		space_seen_lately = closeup_saw_whitespace || iswhitespace(prev_char);
1085 		closeup_saw_whitespace = FALSE;
1086 
1087 		while(isadigit(curr_char)) {
1088 			if( space_seen_lately )
1089 			    has_embedded_space = TRUE;
1090 			rightside = rightside*(DBLVAL)10 + (DBLVAL)BCD(curr_char);
1091 			++numdigits; /* not used past here, but maintain it anyway */
1092 			if(sigfigs > 0 || curr_char != '0')
1093 			  ++sigfigs;
1094 			pwr_of_ten /= (DBLVAL)10;
1095 
1096 			if(src_text_len < MAX_SRC_TEXT)
1097 			  src_text_buf[src_text_len++] = curr_char;
1098 			if(next_char == ' ' || next_char == '\t')
1099 			  if(src_text_len < MAX_SRC_TEXT)
1100 			    src_text_buf[src_text_len++] = ' ';
1101 
1102 			bi_advance();
1103 			space_seen_lately = iswhitespace(prev_char);
1104 		}
1105 	}
1106 #ifdef DEBUG_FORLEX
1107 if(debug_lexer)
1108 	dvalue = leftside + rightside*pwr_of_ten;
1109 else
1110 #endif
1111 	dvalue = (DBLVAL)0;
1112 
1113 	exponent = 0;
1114 #ifdef DEBUG_FORLEX
1115 	expsign = 1;
1116 #endif
1117 		/* Integer followed by E or D gives a real/d.p constant.
1118 		   We also accept Q for quad (real*16) constants. */
1119 
1120 	space_seen_lately = space_seen_lately || closeup_saw_whitespace;
1121 
1122 	if( ( (c = makeupper(curr_char)) == 'E' || c == 'D' || c == 'Q') )
1123 	{
1124 		datatype = ((c == 'E')? tok_real_const:
1125 			    ((c == 'D')? tok_dp_const:
1126 			    tok_quad_const));
1127 		if( space_seen_lately )
1128 		  has_embedded_space = TRUE;
1129 		if(src_text_len < MAX_SRC_TEXT)
1130 		  src_text_buf[src_text_len++] = c;
1131 		bi_advance();
1132 		space_seen_lately = iswhitespace(prev_char);
1133 		if(curr_char == '+') {
1134 #ifdef DEBUG_FORLEX
1135 			expsign = 1;
1136 #endif
1137 			if(src_text_len < MAX_SRC_TEXT)
1138 			  src_text_buf[src_text_len++] = curr_char;
1139 			bi_advance();
1140 			space_seen_lately = space_seen_lately || iswhitespace(prev_char);
1141 		}
1142 		else if(curr_char == '-') {
1143 #ifdef DEBUG_FORLEX
1144 			expsign = -1;
1145 #endif
1146 			if( iswhitespace(prev_char) )
1147 			  has_embedded_space = TRUE;
1148 			if(src_text_len < MAX_SRC_TEXT)
1149 			  src_text_buf[src_text_len++] = curr_char;
1150 			bi_advance();
1151 			space_seen_lately = space_seen_lately || iswhitespace(prev_char);
1152 		}
1153 		if(!isadigit(curr_char)) {
1154 			lex_error("Badly formed real constant");
1155 		}
1156 		else while(isadigit(curr_char)) {
1157 			if( space_seen_lately )
1158 			  has_embedded_space = TRUE;
1159 			exponent = exponent*10 + (curr_char-'0');
1160 			if(src_text_len < MAX_SRC_TEXT)
1161 			  src_text_buf[src_text_len++] = curr_char;
1162 			bi_advance();
1163 			space_seen_lately = iswhitespace(prev_char);
1164 		}
1165 
1166 	/*  Compute real value only if debugging. If it exceeds max magnitude,
1167 	    computing it may cause crash. At this time, value of real const
1168 	    is not used for anything. */
1169 #ifdef DEBUG_FORLEX
1170 if(debug_lexer)
1171 		  dvalue *= pow(10.0, (double)(exponent*expsign));
1172 else
1173 #endif
1174 		  dvalue = (DBLVAL)0;
1175 
1176 	}
1177     }/* end if(!integer_context) */
1178 
1179         if(src_text_buf[src_text_len-1] == ' ')	/* remove any trailing blank */
1180 	  --src_text_len;
1181 
1182 	token->tclass = datatype;
1183 				/* If this is part of complex const,
1184 				   do not store src_text but arrange
1185 				   so debugging works. */
1186 	if(!getting_complex_const) {
1187 	  token->src_text = new_src_text(src_text_buf,src_text_len);
1188 	}
1189 #ifdef DEBUG_FORLEX
1190 	  else {
1191 	    src_text_buf[src_text_len] = '\0';
1192 	    token->src_text = src_text_buf;
1193 	  }
1194 #endif
1195 
1196 	if( free_form && (pretty_extra_space || f90_freeform_space)
1197 	     && has_embedded_space ) {
1198 	      space_violation(token->line_num,token->col_num,
1199 			"Numeric constant has embedded space");
1200 	}
1201 
1202 	switch(datatype) {
1203 	   case tok_integer_const:
1204 		token->value.integer = (long)leftside;
1205 #ifdef DEBUG_FORLEX
1206 if(debug_lexer)
1207 (void)fprintf(list_fd,"\nInteger const:\t\t%ld (from %s)",
1208 	      token->value.integer,
1209 	      token->src_text);
1210 #endif
1211 		break;
1212 	   case tok_real_const:
1213 			/* store single as double lest it overflow */
1214 		token->value.dbl = dvalue;
1215 		if(trunc_sigfigs && sigfigs >= REAL_SIGFIGS) {
1216 		  warning(token->line_num,token->col_num,
1217 	"Single-precision real constant has more digits than are stored");
1218 		}
1219 #ifdef DEBUG_FORLEX
1220 if(debug_lexer)
1221 (void)fprintf(list_fd,"\nReal const:\t\t%g (from %s)",
1222 	      (double)token->value.dbl,
1223 	      token->src_text);
1224 #endif
1225 		break;
1226 	   case tok_dp_const:
1227 		token->value.dbl = dvalue;
1228 #ifdef DEBUG_FORLEX
1229 if(debug_lexer)
1230 (void)fprintf(list_fd,"\nDouble const:\t\t%g (from %s)",
1231 	      (double)token->value.dbl,
1232 	      token->src_text);
1233 #endif
1234 		break;
1235 	   case tok_quad_const:
1236 			/* store quad as double in case host doesn't do quad */
1237 		token->value.dbl = dvalue;
1238 #ifdef DEBUG_FORLEX
1239 if(debug_lexer)
1240 (void)fprintf(list_fd,"\nQuad const:\t\t%g (from %s)",
1241 	      (double)token->value.dbl,
1242 	      token->src_text);
1243 #endif
1244 		break;
1245 	}
1246 
1247 } /* get_number */
1248 
1249      /* get_complex_constant reads an entity of the form (num,num)
1250       where num is any [signed] numeric constant.  It will only be
1251       called when looking_at() has guaranteed that there is one there.
1252       The token receives the real part as a number.  The imaginary part
1253       is not stored.  Whitespace is allowed between ( and num, around
1254       the comma, and between num and ) but not within num. */
1255 
1256 PRIVATE void
1257 #if HAVE_STDC
get_complex_const(Token * token)1258 get_complex_const(Token *token)
1259 #else /* K&R style */
1260 get_complex_const(token)
1261 	Token *token;
1262 #endif /* HAVE_STDC */
1263 {
1264 	Token imag_part;	/* temporary to hold imag part */
1265 #ifdef DEBUG_FORLEX
1266 	double sign=(DBLVAL)1;
1267 #endif
1268 	int dble_size=FALSE;	/* flag to set if parts are D floats */
1269 	int imag_dble_size;	/* if imaginary part D float */
1270 	LINENO_t comma_line_num;
1271 	COLNO_t comma_col_num;
1272 	getting_complex_const = TRUE;
1273 	initial_flag = FALSE;
1274 
1275 
1276 
1277 	bi_advance();	/* skip over the initial paren (already stored) */
1278 
1279 
1280 	if(curr_char == '+' || curr_char == '-') {
1281 #ifdef DEBUG_FORLEX
1282 	  if(curr_char == '-') sign = (DBLVAL)(-1);
1283 #endif
1284 	  if(src_text_len < MAX_SRC_TEXT)
1285 	    src_text_buf[src_text_len++] = curr_char;
1286 
1287 	  bi_advance();
1288 	}
1289 
1290 #ifdef DEBUG_FORLEX
1291 if(debug_lexer){
1292 (void)fprintf(list_fd,"\nComplex const:(");
1293 if(sign < 0.0) (void)fprintf(list_fd," -");
1294 }
1295 #endif
1296 	closeup_saw_whitespace = FALSE;
1297 	get_number(token);
1298 	switch((short)token->tclass) {
1299 	   case tok_integer_const:
1300 #ifdef DEBUG_FORLEX
1301 if(debug_lexer)
1302 		token->value.dbl = sign*(double)token->value.integer;
1303 else
1304 #endif
1305 		token->value.dbl = (DBLVAL)0;
1306 		break;
1307 	   case tok_dp_const:
1308 		dble_size=TRUE;
1309 			/*FALLTHRU*/
1310 	   case tok_real_const:
1311 #ifdef DEBUG_FORLEX
1312 if(debug_lexer)
1313 		token->value.dbl = sign*token->value.dbl;
1314 else
1315 #endif
1316 		token->value.dbl = (DBLVAL)0;
1317 		break;
1318 	}
1319 
1320 	while(iswhitespace(curr_char))
1321 	  advance();
1322 
1323 
1324 	comma_line_num = line_num;
1325 	comma_col_num = col_num;
1326 
1327 	if(src_text_len < MAX_SRC_TEXT)
1328 	  src_text_buf[src_text_len++] = curr_char;
1329 	if(next_char == ' ' || next_char == '\t') /* preserve space after , */
1330 	  if(src_text_len < MAX_SRC_TEXT)
1331 	    src_text_buf[src_text_len++] = ' ';
1332 
1333 	bi_advance();		/* skip over the comma */
1334 
1335 	if(curr_char == '+' || curr_char == '-') {
1336 #ifdef DEBUG_FORLEX
1337 	     if(curr_char == '-') sign = (DBLVAL)(-1);
1338 #endif
1339 	     if(src_text_len < MAX_SRC_TEXT)
1340 		src_text_buf[src_text_len++] = curr_char;
1341 
1342 	     bi_advance();
1343 	}
1344 #ifdef DEBUG_FORLEX
1345 if(debug_lexer){
1346 (void)fprintf(list_fd,"\n,");
1347 if(sign < 0.0) (void)fprintf(list_fd," -");
1348 }
1349 #endif
1350 	closeup_saw_whitespace = FALSE;
1351 		/* Initialize imag_part token fields. */
1352 	zero_struct(&imag_part,sizeof(imag_part));
1353 	imag_part.line_num = line_num;
1354 	imag_part.col_num = col_num;
1355 	get_number(&imag_part);
1356 	imag_dble_size = (imag_part.tclass == tok_dp_const);
1357 
1358 	if(dble_size != imag_dble_size) {
1359 	    warning(comma_line_num,comma_col_num,
1360 		  "different precision in real and imaginary parts");
1361 	}
1362 	else if(f77_double_complex) {
1363 	  if(dble_size)
1364 	    warning(token->line_num,token->col_num,
1365 		  "nonstandard double precision complex constant");
1366 	}
1367 
1368 	dble_size = (dble_size || imag_dble_size);
1369 
1370 	while(iswhitespace(curr_char))
1371 	   advance();
1372 
1373 
1374 	if(src_text_len < MAX_SRC_TEXT)
1375 	  src_text_buf[src_text_len++] = curr_char;
1376 
1377 	advance();	/* skip over final paren */
1378 
1379 	if(dble_size)
1380 	  token->tclass = tok_dcomplex_const;
1381 	else
1382 	  token->tclass = tok_complex_const;
1383 
1384 	token->src_text = new_src_text(src_text_buf,src_text_len);
1385 
1386 #ifdef DEBUG_FORLEX
1387 if(debug_lexer) {
1388 (void)fprintf(list_fd,"\n\t\t\tsource text=%s",
1389 	      token->src_text);
1390 (void)fprintf(list_fd,"\n)");
1391 }
1392 #endif
1393 
1394 	getting_complex_const = FALSE;
1395 }
1396 
1397 #ifdef ALLOW_TYPELESS_CONSTANTS
1398 		/* Routine to get constants of the forms:
1399 		     B'nnnn' (f90std)  'nnnn'B (nonf90)  -- binary
1400 		     O'nnnn' (f90std)  'nnnn'O (nonf90)  -- octal
1401 		     Z'nnnn' (f90std)  X'nnnn' 'nnnn'X 'nnnn'Z (nonf90) -- hex
1402 		   No check of whether digits are less than base.
1403 		   Warning is issued here instead of in parser since constant
1404 		   looks like a normal integer by the time the parser sees it.
1405 		 */
1406 void
get_binary_const(Token * token,int c,int space_seen_lately)1407 get_binary_const(Token *token, int c, int space_seen_lately)
1408 			/* c is base character: madeupper'ed by caller */
1409 {
1410   long value=0;
1411   int base,digit;
1412   int badly_formed=FALSE;
1413   int i,j;			/* indices in src_text_buf for repacking */
1414 
1415   if(c == 'O')  base = 8;
1416   else if(c == 'X' || c == 'Z')  base = 16;
1417   else if(c == 'B') base = 2;
1418   else {
1419     syntax_error(token->line_num,token->col_num,
1420 		 "Unknown base for typeless constant -- octal assumed");
1421     base = 8;
1422   }
1423 				/* F90 allows initial B, O, Z but not X */
1424   if( c == 'X' && f90_typeless_constants ) {
1425     nonstandard(token->line_num,token->col_num,f90_typeless_constants,0);
1426   }
1427 
1428 				/* Advance i to starting digit */
1429   i = 0;
1430   while( ! isaquote(src_text_buf[i]) ) {
1431     ++i;
1432   }
1433   j = ++i;	/* Input = Output to start */
1434 
1435 				/* Scan the string, moving chars down
1436 				   to change multi spaces to single
1437 				   blanks, and converting digits. */
1438   while( ! isaquote(src_text_buf[i]) ) {
1439     digit=src_text_buf[i++];
1440     if( ishex(digit) ){
1441       value = value*base + HEX(digit);
1442       src_text_buf[j++] = digit;
1443     }
1444     else {			/* Anything else should be space */
1445       if( isspace(digit) ) {
1446 	if( free_form )
1447 	    space_seen_lately = TRUE; /* blanks not OK in free form */
1448 	src_text_buf[j++] = ' ';
1449 	while( isspace(src_text_buf[i]) ) {
1450 	  ++i;
1451 	}
1452       }
1453       else {
1454 	  badly_formed = TRUE;
1455       }
1456     }
1457   }
1458   if( badly_formed ) {
1459       syntax_error(token->line_num,token->col_num,
1460 		   "badly formed typeless constant");
1461   }
1462   else if((pretty_extra_space || (free_form && f90_freeform_space))
1463 	  && space_seen_lately) {
1464       space_violation(token->line_num,token->col_num,
1465 		"typeless constant has embedded space");
1466   }
1467 
1468   while(i < src_text_len)
1469     src_text_buf[j++] = src_text_buf[i++]; /* Copy the rest over */
1470 
1471   src_text_len = j;
1472 
1473   token->tclass = tok_integer_const;
1474   token->value.integer = value;
1475   token->src_text = new_src_text(src_text_buf,src_text_len);
1476 
1477   if(f77_typeless_constants) {
1478     nonstandard(token->line_num,token->col_num,0,0);
1479   }
1480 
1481 #ifdef DEBUG_FORLEX
1482 if(debug_lexer)
1483 (void)fprintf(list_fd,"\nInteger const:\t\t%ld (from %s)",
1484 	      token->value.integer,
1485 	      token->src_text);
1486 #endif
1487 
1488 }/*get_binary_const*/
1489 
1490 #endif/*ALLOW_TYPELESS_CONSTANTS*/
1491 
1492 
1493 PRIVATE void
1494 #if HAVE_STDC
get_punctuation(Token * token)1495 get_punctuation(Token *token)
1496 #else /* K&R style */
1497 get_punctuation(token)
1498 	Token *token;
1499 #endif /* HAVE_STDC */
1500 {
1501 	int multichar,	   /* Flags To catch spaces inside multi-char token */
1502 	    space_seen_lately;
1503 	extern int in_attrbased_typedecl;	/* shared with fortran.y */
1504 	multichar = FALSE;
1505 
1506 	src_text_buf[src_text_len++] = curr_char;
1507 
1508 		/* If lexing attr-based type decl, turn off the flag when
1509 		   the double colon is reached...
1510 		 */
1511 	if( in_attrbased_typedecl && curr_char == ':' ) {
1512 	  in_attrbased_typedecl = FALSE;
1513 	}
1514 		/* ...whereas turn initial_flag back on if a comma is found. */
1515 	if( in_attrbased_typedecl && curr_char == ',' ) {
1516 	  initial_flag = TRUE;
1517 	}
1518 
1519 	if( !in_attrbased_typedecl )
1520 	  initial_flag = FALSE;
1521 
1522 	space_seen_lately = iswhitespace(next_char);
1523 
1524 	closeup();
1525 
1526 	if(curr_char == '*' && next_char == '*') {
1527 		token->tclass = tok_power;
1528 		multichar = TRUE;
1529 		advance();
1530 		src_text_buf[src_text_len++] = curr_char;
1531 	}
1532 	else if(curr_char == '/' && next_char == '/' ) {
1533 		/* If this is COMMON / / list, then embedded space is OK */
1534 		if( prev_token_class == tok_COMMON ) {
1535 		    space_seen_lately = FALSE;
1536 		}
1537 			/* Otherwise it is concatenation operator */
1538 		else {
1539 		    extern int in_assignment_stmt;
1540 				/* for obscure rule check */
1541 		    if(in_assignment_stmt)
1542 			make_true(IN_ASSIGN,token->TOK_flags);
1543 		}
1544 		token->tclass = tok_concat;
1545 		multichar = TRUE;
1546 		advance();
1547 		src_text_buf[src_text_len++] = curr_char;
1548 	}
1549                                 /* recognize F90 pointer assignment => */
1550         else if(curr_char == '=' && next_char == '>') {
1551                 token->tclass = tok_pointer_assignment;
1552                 multichar = TRUE;
1553                 advance();
1554 		src_text_buf[src_text_len++] = curr_char;
1555 	}
1556 				/* recognize F90 rel-ops here */
1557 	else if( f90_relop(token,&multichar) ) {
1558 	  token->tclass = tok_relop;
1559 	  if(f77_relops) {
1560 	    nonstandard(token->line_num,token->col_num,0,0);
1561 	    msg_tail("for relational operator");
1562 	  }
1563 	}
1564 		/* paren can be the start of complex constant if everything
1565 		   is just right. Maybe more tests needed here. */
1566 	else if(complex_const_allowed && curr_char == '(' &&
1567 	     (  (prev_token_class<256 && ispunct(prev_token_class))
1568 	      || prev_token_class == tok_relop
1569 	      || prev_token_class == tok_power )
1570 	     && looking_at_cplx()) {
1571 		get_complex_const(token);
1572 		return;
1573 	}
1574 	else {
1575 			/* Provide special left parenthesis to avoid s/r
1576 			   conflict in grammar.
1577 			 */
1578 	  if( need_special_lparen ) {
1579 	      /* ASSERT ( curr_char == '(' ) */
1580 	    token->tclass = tok_lparen;
1581 	    need_special_lparen = FALSE;
1582 	  }
1583 	  else {
1584 	    token->tclass = curr_char;
1585 	  }
1586 	}
1587 
1588 	token->src_text = new_src_text(src_text_buf,src_text_len);
1589 
1590 	if((pretty_extra_space || (free_form && f90_freeform_space))
1591 	   && multichar && space_seen_lately) {
1592 	      space_violation(token->line_num,token->col_num,
1593 			"multi-character operator has embedded space");
1594 	}
1595 
1596 	advance();
1597 
1598 #ifdef DEBUG_FORLEX
1599 if(debug_lexer) {
1600 	if(token->tclass == EOS)
1601 		(void)fprintf(list_fd,"\n\t\t\tEOS");
1602 	else {
1603 		(void)fprintf(list_fd,"\nPunctuation:\t\t");
1604 		if(token->tclass == tok_lparen)
1605 		    (void)fprintf(list_fd,"special ");
1606 		(void)fprintf(list_fd,"%s",token->src_text);
1607 	}
1608  }
1609 #endif
1610 } /* get_punctuation */
1611 
1612 
1613 PRIVATE void
1614 #if HAVE_STDC
get_simple_punctuation(Token * token)1615 get_simple_punctuation(Token *token)
1616 #else /* K&R style */
1617 get_simple_punctuation(token)
1618 	Token *token;
1619 #endif /* HAVE_STDC */
1620 {
1621 		/* Like get_punctuation but lacks special cases.  Just
1622 		   gets the punctuation character. Text is already in
1623 		   src_text_buf. */
1624 
1625 	token->tclass = curr_char;
1626 	token->src_text = new_src_text(src_text_buf,src_text_len);
1627 	advance();
1628 #ifdef DEBUG_FORLEX
1629 if(debug_lexer) {
1630 	if(token->tclass == EOS)
1631 		(void)fprintf(list_fd,"\n\t\t\tEOS");
1632 	else
1633 		(void)fprintf(list_fd,"\nPunctuation:\t\t%s",token->src_text);
1634 }
1635 #endif
1636 } /* get_simple_punctuation */
1637 
1638 PRIVATE int
1639 #if HAVE_STDC
f90_relop(Token * token,int * multichar)1640 f90_relop(Token *token, int *multichar)
1641 #else /* K&R style */
1642 f90_relop(token, multichar)
1643      Token *token;
1644      int *multichar;
1645 #endif /* HAVE_STDC */
1646 {
1647   *multichar = FALSE;
1648   if( curr_char == '>' ) {
1649     if( next_char == '=' ) {
1650       token->tsubclass = relop_GE;
1651       token->src_text = ">=";
1652       goto twochar_relop;
1653     }
1654     else {
1655       token->tsubclass = relop_GT;
1656       token->value.string = ">";
1657       return TRUE;
1658     }
1659   }
1660 
1661   if( curr_char == '<' ) {
1662     if( next_char == '=' ) {
1663       token->tsubclass = relop_LE;
1664       token->value.string = "<=";
1665       goto twochar_relop;
1666     }
1667     else {
1668       token->tsubclass = relop_LT;
1669       token->value.string = "<";
1670       return TRUE;
1671     }
1672   }
1673 
1674   if( curr_char == '=' && next_char == '=' ) {
1675       token->tsubclass = relop_EQ;
1676       token->value.string = "==";
1677       goto twochar_relop;
1678   }
1679 
1680   if( curr_char == '/'  && next_char == '=' ) {
1681       token->tsubclass = relop_NE;
1682       token->value.string = "/=";
1683       goto twochar_relop;
1684   }
1685 
1686   return FALSE;
1687 
1688 			/* Two-character relops: need to gobble 2nd char */
1689 twochar_relop:
1690   *multichar = TRUE;
1691   advance();
1692   src_text_buf[src_text_len++] = curr_char;
1693   return TRUE;
1694 }
1695 
1696 void
1697 #if HAVE_STDC
get_string(Token * token)1698 get_string(Token *token)       /* Gets string of form 'aaaa' */
1699 #else /* K&R style */
1700 get_string(token)       /* Gets string of form 'aaaa' */
1701 	Token *token;
1702 #endif /* HAVE_STDC */
1703 {
1704 	int len;
1705 	COLNO_t last_col_num;
1706 	int has_backslash = FALSE; /* for portability check */
1707 
1708 	quote_char = curr_char; /* remember the delimiter */
1709 	initial_flag = FALSE;
1710 	inside_string = TRUE;
1711 	last_col_num=col_num;
1712 	src_text_buf[src_text_len++] = curr_char; /* store leading quote */
1713 	advance();      /* Gobble leading quote */
1714 	len = 0;
1715 	for(;;) {
1716 		while(curr_char == EOL) {
1717 			/* Fixed form: treat short line as if extended with
1718 			   blanks to 72 columns.  Free form: line ends at EOL */
1719 		  if( ! free_form ) {
1720 		    COLNO_t col;
1721 		    for(col=last_col_num; col<max_stmt_col; col++) {
1722 
1723 		      if(src_text_len < MAX_SRC_TEXT)
1724 			src_text_buf[src_text_len++] = ' ';
1725 
1726 		      ++len;
1727 		    }
1728 		  }
1729 		  last_col_num=col_num;
1730 		  advance();
1731 		}
1732 		if(curr_char == EOS || curr_char == EOF) {
1733 			lex_error("Closing quote missing from string");
1734 			break;
1735 		}
1736 		if(curr_char == quote_char) {
1737 			inside_string = FALSE;/* assume so for now */
1738 
1739 /* If LEX_RAWSTRINGS defined, stores doubled quotes and final quote.
1740    Otherwise initial quote is stored and doubled quotes are reduced to one. */
1741 #ifdef LEX_RAWSTRINGS
1742 				/* Store the quote */
1743 			if(src_text_len < MAX_SRC_TEXT)
1744 			  src_text_buf[src_text_len++] = curr_char;
1745 #endif
1746 
1747 			last_col_num=col_num;
1748 			advance();
1749 
1750 			if(curr_char == quote_char){/* '' becomes ' in string */
1751 				inside_string = TRUE; /* not a closing quote */
1752 
1753 				if(src_text_len < MAX_SRC_TEXT)
1754 				  src_text_buf[src_text_len++] = curr_char;
1755 
1756 				++len;
1757 				last_col_num=col_num;
1758 				advance();
1759 			}
1760 			else {
1761 				break;  /* It was a closing quote after all */
1762 			}
1763 		}
1764 		else {		/* ordinary character within quotes */
1765 			int value=curr_char;
1766 
1767 			if(curr_char == '\\') {
1768 			  if(!has_backslash) {/* only warn once per string */
1769 			    if(port_backslash)
1770 			      nonportable(line_num,col_num,
1771 			   "backslash treated incompatibly by some compilers");
1772 			  }
1773 			  has_backslash = TRUE;
1774 
1775 #ifdef ALLOW_UNIX_BACKSLASH	/* This has problems: undigesting
1776 				   a string gets complicated. */
1777 			  if(source_unix_backslash) {
1778 			    if(f77_unix_backslash || f90_unix_backslash) {
1779 			      nonstandard(line_num,col_num,f90_unix_backslash,0);
1780 			      msg_tail(": backslash escape sequence");
1781 			    }
1782 #ifdef LEX_RAWSTRINGS
1783 				/* Store the backslash */
1784 			    if(src_text_len < MAX_SRC_TEXT)
1785 			      src_text_buf[src_text_len++] = curr_char;
1786 #endif
1787 			    inside_string = FALSE;/* so inline_comment works */
1788 			    advance(); /* gobble the backslash */
1789 			    inside_string = TRUE;
1790 #ifdef LEX_RAWSTRINGS
1791 			    value = curr_char;
1792 #else /* !LEX_RAWSTRINGS*/
1793 			    if(isadigit(curr_char)) { /* \octal digits */
1794 			      value = BCD(curr_char);
1795 			      while(isadigit(next_char)) {
1796 				advance();
1797 				value = value*8 + BCD(curr_char);
1798 			      }
1799 			    }
1800 			    else if(curr_char == 'x') {
1801 			      advance(); /* gobble the 'x' */
1802 			      value = HEX(curr_char);
1803 			      while(ishex(next_char)) {
1804 				advance();
1805 				value = value*16 + HEX(curr_char);
1806 			      }
1807 			    }/* end if octal or hex */
1808 			    else switch(curr_char) {
1809 #if __STDC__ + 0
1810 			      case 'a': value = '\a'; break; /* alarm */
1811 #else
1812 			      case 'a': value = '\007'; break; /* alarm */
1813 #endif
1814 			      case 'b': value = '\b'; break; /* backspace */
1815 			      case 'f': value = '\f'; break; /* formfeed */
1816 			      case 'n': value = '\n'; break; /* newline */
1817 			      case 'r': value = '\r'; break; /* carr return */
1818 			      case 't': value = '\t'; break; /* h tab */
1819 			      case 'v': value = '\v'; break; /* v tab */
1820 			      case EOS: value = '\n'; break; /* a no-no */
1821 				/* All others: \c --> c */
1822 			      default:  value = curr_char; break;
1823 			    }
1824 #endif /* !LEX_RAWSTRINGS*/
1825 			  }/* end if source_unix_backslash */
1826 #endif /*ALLOW_UNIX_BACKSLASH*/
1827 
1828 			}/* end if curr_char == backslash */
1829 
1830 			if(src_text_len < MAX_SRC_TEXT)
1831 			  src_text_buf[src_text_len++] = value;
1832 
1833 			++len;
1834 			last_col_num=col_num;
1835 			advance();
1836 		}
1837 	}
1838 
1839 #ifdef ALLOW_TYPELESS_CONSTANTS
1840 				/* Watch for const like 'nnn'X */
1841 	if(!inside_format) {
1842 	  int space_seen_lately = iswhitespace(curr_char);
1843 	  while(iswhitespace(curr_char))
1844 	    advance();
1845 	  if(isaletter(curr_char)) {
1846 	    int c=makeupper(curr_char);
1847 #ifndef LEX_RAWSTRINGS
1848 	    if(src_text_len < MAX_SRC_TEXT)
1849 	      src_text_buf[src_text_len++] = quote_char;
1850 #endif
1851 	    if(src_text_len < MAX_SRC_TEXT)
1852 	      src_text_buf[src_text_len++] = c;
1853 	    advance();		/* Gobble the base character */
1854 
1855 			/* F90 does not allow forms 'ddd'[BOZ].
1856 			   Suppress message here if letter is not in [BOZ]
1857 			   since that gets a warning in get_binary_const
1858 			*/
1859 	    if( f90_typeless_constants && (c=='Z' || c=='O' || c=='B') ) {
1860 	      nonstandard(token->line_num,token->col_num,f90_typeless_constants,0);
1861 	    }
1862 
1863 	    get_binary_const(token,c,space_seen_lately);
1864 	    return;
1865 	  }
1866 	}
1867 #endif /*ALLOW_TYPELESS_CONSTANTS*/
1868 
1869 	if(len == 0) {
1870 		warning(line_num,col_num,
1871 			"Zero-length string not allowed\n");
1872 		len = 1;
1873 	}
1874 
1875 	if(quote_char != '\'') { /* Warn if quote is used instead of apost */
1876 	  if(f77_quotemarks) {
1877 	    nonstandard(token->line_num,token->col_num,0,0);
1878 	    msg_tail(": character string should be delimited by apostrophes");
1879 	  }
1880 	}
1881 
1882 	inside_string = FALSE;
1883 
1884 	token->tclass = tok_string;
1885 	token->size = len;
1886 	token->src_text = new_src_text(src_text_buf,src_text_len);
1887 #ifdef LEX_RAWSTRINGS
1888 	token->value.string = token->src_text; /* Includes the initial quote */
1889 #else
1890 	token->value.string = token->src_text+1; /* Skips the initial quote */
1891 #endif
1892 				/* Under -port warn if char size > 255 */
1893 	if(port_long_string) {
1894 	  if(len > 255)
1895 	    nonportable(line_num,col_num,
1896 			"character constant length exceeds 255");
1897 	}
1898 
1899 #ifdef DEBUG_FORLEX
1900 	if(debug_lexer
1901 	   && src_text_buf[0] == quote_char) { /* skip if doing X'nnnn' */
1902 		(void)fprintf(list_fd,"\nString:\t\t\t%s",token->value.string);
1903 		(void)fprintf(list_fd,"\n\t\t(from\t%s)",token->src_text);
1904 	}
1905 #endif
1906 
1907 } /* get_string */
1908 
1909 
1910 		/* This routine is called when -pretty=extra-space or
1911 		   missing-space are in effect, or when in free form
1912 		   mode.  It figures out the right kind of warning to issue.
1913 		*/
space_violation(LINENO_t lineno,COLNO_t colno,const char * s)1914 void space_violation( LINENO_t lineno, COLNO_t colno, const char *s )
1915 {
1916     if(free_form && f90_freeform_space) {
1917 	syntax_error(lineno,colno,s);
1918     }
1919     else {
1920 	ugly_code(lineno,colno,s);
1921     }
1922 }
1923 /* End of Forlex module */
1924