1 /* $Id: forlex.c,v 1.46 2004/12/31 17:22:16 moniot Exp $
2
3 Tokenizing routines for Fortran program checker.
4 */
5
6 /*
7
8
9 Copyright (c) 2001 by Robert K. Moniot.
10
11 Permission is hereby granted, free of charge, to any person
12 obtaining a copy of this software and associated documentation
13 files (the "Software"), to deal in the Software without
14 restriction, including without limitation the rights to use,
15 copy, modify, merge, publish, distribute, sublicense, and/or
16 sell copies of the Software, and to permit persons to whom the
17 Software is furnished to do so, subject to the following
18 conditions:
19
20 The above copyright notice and this permission notice shall be
21 included in all copies or substantial portions of the
22 Software.
23
24 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
25 KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
26 WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
27 PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
28 COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
30 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
31 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
32
33 Acknowledgement: the above permission notice is what is known
34 as the "MIT License."
35 */
36
37
38 #include <stdio.h>
39 #include <ctype.h>
40 #include <string.h>
41
42 /* Some older mac compilers need compat.h to use memset. If
43 needed, add `` -d MAC_MPW '' to compilation options.
44 */
45 #ifdef MAC_MPW
46 #include <compat.h>
47 #endif
48
49 #include "ftnchek.h"
50 #define FORLEX
51 #include "symtab.h"
52 #include "tokdefs.h"
53 #include "forlex.h"
54
55 /* toascii() is widely supported, but in case it isn't, define it here.
56 We need it mainly in order to avoid bounds violation in legal_chars array.
57 On EBCDIC systems, toascii() should convert from an EBCDIC code
58 to the ASCII code for the same character, because the legal_chars array is
59 ordered according to the ASCII collating sequence.
60 */
61 #ifndef toascii
62 #define toascii(C) ((C) & 0177)
63 #endif
64
65 extern int complex_const_allowed, /* shared flags operated by fortran.y */
66 inside_format,
67 integer_context;
68
69
70
71 PROTO(PRIVATE void make_legal_char,( char *s ));
72
73
74
75
76 /*
77
78 Part I. yylex()
79
80 Shared functions defined:
81 yylex() Returns next token. Called from yyparse().
82 implied_id_token(t,s) Creates token for blank common declaration.
83 get_binary_const(t, c ) Creates token for binary constant
84 get_string(t) Creates token for a string
85
86 Note: compilation options LEX_STORE_STRINGS and LEX_STORE_HOLLERITHS:
87 Define the macro name LEX_STORE_STRINGS to build a version of ftnchek that
88 stores string constants, and LEX_STORE_HOLLERITHS to store hollerith
89 constants. Now that INCLUDE statements are supported, strings must
90 be stored. Holleriths are not used, so they need not be stored.
91 */
92 #ifndef LEX_STORE_STRINGS
93 #define LEX_STORE_STRINGS
94 #endif
95
96 #ifdef DEBUG_FORLEX /* For maintaining the program */
97 #define LEX_STORE_HOLLERITHS
98 #endif
99
100 #ifdef DEBUG_FORLEX
101 #include <math.h> /* Only used for pow() in debug mode */
102 #endif
103
104 PRIVATE int closeup_saw_whitespace;
105
106 /* The following macro says whether a given character is legal,
107 * i.e. one of the stream control chars or a valid ANSI Fortran
108 * character. Lower case letters are considered legal too.
109 * Nondigits in columns 1-6 (except EOF,EOS) are illegal in fixed form.
110 * Hopefully this works for EBCDIC too.
111 */
112 #define islegal(C) ( ((C) == EOF) || ((C) == EOS) || \
113 ( (col_num >= 6 || free_form || isdigit(C)) && \
114 (toascii((int)(C)) >= toascii(' ') && \
115 legal_chars[toascii((int)(C))-toascii(' ')] == (C))) )
116 /* Array has x where ASCII character is not valid.
117 This defn is not exactly standard f77, since it includes
118 supported extensions: $ in format,
119 <> in variable formats, and " in strings.
120 Since strlen of array is 96, indexing by [toascii(c)-32]
121 is always in bounds.
122 */
123 PRIVATE char f77_legal_chars[]=
124 " x\"x$xx'()*+,-./0123456789:x<=>xx\
125 ABCDEFGHIJKLMNOPQRSTUVWXYZxxxxxxabcdefghijklmnopqrstuvwxyzxxxxx";
126
127 /* This is the working copy of list of legal chars, with
128 < any chars in idletter_list made legal.
129 */
130 PRIVATE char legal_chars[sizeof(f77_legal_chars)];
131
132 /* Routine to fix up list of legal chars */
make_legal_char_list(VOID)133 void make_legal_char_list(VOID)
134 {
135 int i;
136 /* Start with the f77 list */
137 (void)strcpy(legal_chars,f77_legal_chars);
138
139 /* Verify idletter_list has only punctuation chars.
140 If violators, reset to default of "$_"
141 */
142 for(i=0; idletter_list[i] != '\0'; i++) {
143 if( !ispunct(idletter_list[i]) ) {
144 (void)fprintf(stderr,
145 "\n%cidentifier setting specifies invalid character %c: setting ignored",
146 #ifdef OPTION_PREFIX_SLASH
147 '/',
148 #else
149 '-',
150 #endif
151 idletter_list[i]);
152 idletter_list = DEF_IDLETTER_LIST; /* restore to default */
153 break;
154 }
155 }
156 /* Add nonstd nonalpha chars allowed in identifiers */
157 make_legal_char(idletter_list);
158
159 }
160
161
162
163
164 /* Routines to alter the default status of characters,
165 to support various extensions to f77. */
166
167 PRIVATE void
168 #if HAVE_STDC
make_legal_char(char * s)169 make_legal_char(char *s)
170 #else /* K&R style */
171 make_legal_char(s)
172 char *s; /* List of legal chars */
173 #endif /* HAVE_STDC */
174 {
175 int i;
176 while( *s != '\0' ) {
177 i = toascii((int)(*s));
178 if(i >= toascii(' ') && i <= toascii('~')) {
179 legal_chars[i-toascii(' ')] = *s;
180 }
181 s++;
182 }
183 }
184
185 #if 0
186 /* Routines to alter the default status of characters,
187 to support various extensions to f77. Not used now.*/
188
189 PROTO(void make_illegal_char,( char *s ));
190
191
192 void
193 make_illegal_char(s)
194 char *s; /* List of illegal chars */
195 {
196 int i;
197 while( *s != '\0' ) {
198 i = toascii((int)(*s));
199 if(i >= toascii(' ') && i <= toascii('~')) {
200 legal_chars[i-toascii(' ')] = ( (*s != 'x')? 'x': 'X');
201 }
202 s++;
203 }
204 }
205 #endif
206
207
208 /* local functions defined */
209
210
211 PROTO(PRIVATE void closeup,( void ));
212
213
214 PROTO(PRIVATE void get_complex_const,( Token *token ));
215
216 #ifdef ALLOW_UNIX_CPP
217 PROTO(PRIVATE void get_cpp_directive,( void ));
218 #endif
219
220 PROTO(PRIVATE void get_dot,( Token *token ));
221
222 PROTO(PRIVATE void get_dotted_keyword,( Token *token));
223
224 PROTO(PRIVATE void get_edit_descriptor,( Token *token ));
225
226 PROTO(PRIVATE void get_hollerith,( Token *token, int n ));
227
228 PROTO(PRIVATE void get_illegal_token,( Token *token ));
229
230 PROTO(PRIVATE void get_label,( Token *token ));
231
232 PROTO(PRIVATE void get_letter,( Token *token ));
233
234 PROTO(PRIVATE void get_number,( Token *token ));
235
236 PROTO(PRIVATE void get_punctuation,( Token *token ));
237
238 PROTO(PRIVATE void get_simple_punctuation,( Token *token ));
239
240 PROTO(PRIVATE int f90_relop, ( Token *token, int *multichar ) );
241
242 /* Define zero_struct to set a struct to zero. It works around the differing availability
243 of memset and bzero. This is used to initialize a token.
244 */
245 #if HAVE_MEMSET
246 #define zero_struct(sptr,struct_size) (void)memset((sptr),0,struct_size)
247 #else
248 #if HAVE_BZERO
249 #define zero_struct(sptr,struct_size) bzero((char *)(sptr),struct_size)
250 #else
251 PROTO(PRIVATE void zero_struct, (void *sptr, int struct_size) );
252 PRIVATE void
zero_struct(void * sptr,int struct_size)253 zero_struct(void *sptr, int struct_size)
254 {
255 int i;
256 for(i=0; i<struct_size; i++) {
257 ((char *)sptr)[i] = 0;
258 }
259 }
260 #endif
261 #endif
262
263 /* Gets next token for Yacc. Return value is token.class,
264 * and a copy of the token is stored in yylval.
265 */
266 int
yylex(VOID)267 yylex(VOID)
268 {
269 Token token;
270 extern int in_attrbased_typedecl; /* shared with fortran.y */
271
272 /* Initialize token fields to scratch. */
273 zero_struct(&token,sizeof(token));
274
275 src_text_len = 0;
276
277 if(curr_char == EOF) {
278 token.tclass = EOF;
279 token.line_num = line_num;
280 token.col_num = col_num;
281 }
282 else /* not EOF */ {
283
284
285 /* Skip leading spaces, and give error message if non-ANSI
286 * characters are found.
287 */
288
289 while(iswhitespace(curr_char) || (! islegal(curr_char)) ) {
290 if(!iswhitespace(curr_char)) {
291 #ifdef ALLOW_UNIX_CPP
292 if(curr_char == '#' && col_num == 1) {
293 get_cpp_directive(); /* turn # line into EOS */
294 break;
295 }
296 else
297 #endif
298 lex_error("Illegal character");
299 }
300 advance();
301 }
302
303 token.line_num = line_num;
304 token.col_num = col_num;
305
306 closeup_saw_whitespace = FALSE;
307
308 if(inside_format) { /* Handle format stuff here to avoid trouble */
309 get_edit_descriptor(&token);
310 }
311 else if(isadigit(curr_char)) {
312 /* Identify label:
313 Fixed form: Number in cols 1-5.
314 Free form: Number at start of statement.
315 */
316 if( (free_form)? (initial_flag && !in_attrbased_typedecl): (col_num < 6))
317 get_label(&token); /* Stmt label */
318 else
319 get_number(&token); /* Numeric or hollerith const */
320 }
321 else if(isidletter(curr_char)) {
322 if(implicit_letter_flag)
323 get_letter(&token); /* letter in IMPLICIT list */
324 else
325 get_identifier(&token); /* Identifier or keyword */
326 }
327 else if(isaquote(curr_char)) {
328 get_string(&token); /* Quoted string */
329 }
330 else if(curr_char == '.') {
331 get_dot(&token); /* '.' lead-in */
332 }
333 else {
334 get_punctuation(&token); /* Punctuation character or EOS */
335 }
336 }/*end not EOF*/
337
338 if(token.tclass == EOS) {
339 implicit_flag=FALSE; /* in case of errors, reset flags */
340 implicit_letter_flag = FALSE;
341 WHILE_expected = FALSE;
342 }
343
344
345 prev_token_class = token.tclass;
346
347 yylval = token;
348 return token.tclass;
349
350 } /* yylex */
351
352 /* closeup: Advances input stream till next_char is nonspace. Fudges
353 things so that curr_char remains as it was.
354 */
355 PRIVATE void
closeup(VOID)356 closeup(VOID)
357 {
358 int
359 save_curr_char = curr_char,
360 save_prev_char = prev_char;
361 LINENO_t
362 save_line_num = line_num;
363 COLNO_t
364 save_col_num = col_num;
365
366 int next_space = iswhitespace(next_char);
367
368 closeup_saw_whitespace = next_space; /* Record for free-format warnings */
369
370 while(next_space) {
371 advance();
372 next_space = iswhitespace(next_char);
373 }
374
375 curr_char = save_curr_char;
376 prev_char = save_prev_char;
377 line_num = save_line_num;
378 col_num = save_col_num;
379 }
380
381
382 /* Fills argument with token for an identifer, as if an identifer
383 * with name given by string s had been lexed. This will
384 * be called by parser when blank common declaration is seen,
385 * and when a main prog without program statement is found,
386 * and when an unnamed block data statement is found,
387 * so processing of named and unnamed cases can be handled uniformly.
388 */
389 void
390 #if HAVE_STDC
implied_id_token(Token * t,char * s)391 implied_id_token(Token *t, char *s)
392 #else /* K&R style */
393 implied_id_token(t,s)
394 Token *t;
395 char *s;
396 #endif /* HAVE_STDC */
397 {
398 int h;
399 unsigned long hnum;
400
401 hnum = hash(s);
402 while( h=hnum%HASHSZ, hashtab[h].name != NULL &&
403 strcmp(hashtab[h].name,s) != 0)
404 hnum = rehash(hnum);
405 if(hashtab[h].name == NULL) { /* not seen before */
406 hashtab[h].name = s;
407 hashtab[h].loc_symtab = NULL;
408 hashtab[h].glob_symtab = NULL;
409 hashtab[h].com_loc_symtab = NULL;
410 hashtab[h].com_glob_symtab = NULL;
411 }
412 t->tclass = tok_identifier;
413 t->value.integer = h;
414 t->src_text = new_src_text("",0);
415 } /* implied_id_token */
416
417 #ifdef ALLOW_UNIX_CPP
418 /* This does not create a token but just performs the
419 actions needed when a cpp directive is seen. It
420 advances curr_char to the EOS. The setting of
421 filename is delayed to this point because it is not
422 stored in tokens but is external, so changing it
423 must wait till the previous statement is fully
424 parsed and any error messages printed and arg or
425 com list headers completed.
426 */
427 PRIVATE void
get_cpp_directive(VOID)428 get_cpp_directive(VOID)
429 {
430 if(next_filename != (char *)NULL) {
431
432 /* A #line directive on first line of toplevel source file
433 gives name of real original file. Replace our idea
434 of top_filename with that. But ignore an initial
435 # 1 "" since that means cpp was working with stdin,
436 probably from ftnpp. Likewise ignore # 1 "stdin"
437 which is a variant form, e.g. from fpp.
438 Nowadays cpp may indicate stdin by "<stdin>" or suchlike,
439 which we also ignore.
440 */
441 if( cpp_start_of_file ) {
442 if( next_filename[0] != '\0' &&
443 next_filename[0] != '<' &&
444 strcmp(next_filename,"stdin") != 0 ) {
445 top_filename = next_filename;
446 current_filename = next_filename;
447 cpp_start_of_file = FALSE;
448 }
449 }
450 else {
451 if( cpp_inc_depth > 0 &&
452 next_filename == cpp_include_stack[cpp_inc_depth-1].filename ) {
453 --cpp_inc_depth;
454 }
455 else {
456 if( cpp_inc_depth == 0 ) {
457 top_file_line_num = next_top_file_line_num;
458 }
459 /* Avoid overrun, but it will recover
460 even if max depth is exceeded.
461 */
462 if(cpp_inc_depth < MAX_INCLUDE_DEPTH)
463 cpp_include_stack[cpp_inc_depth++].filename = current_filename;
464 }
465 current_filename = next_filename;
466 }
467
468
469 }
470 if(f77_unix_cpp || f90_unix_cpp || !cpp_handled) {
471 nonstandard(line_num,col_num,f90_unix_cpp,0);
472 msg_tail(": preprocessor directive");
473 if(!cpp_handled)
474 msg_tail("(not processed)");
475 }
476
477 do { /* Skip to end of directive. It will become an EOS */
478 advance();
479 } while( curr_char != EOS);
480
481 }/*get_cpp_directive*/
482 #endif
483
484 PRIVATE void
485 #if HAVE_STDC
get_dot(Token * token)486 get_dot(Token *token)
487 #else /* K&R style */
488 get_dot(token)
489 Token *token;
490 #endif /* HAVE_STDC */
491 {
492 if(src_text_len < MAX_SRC_TEXT)
493 src_text_buf[src_text_len++] = curr_char;
494
495 closeup(); /* Advance till nonspace char in next_char */
496
497 if(isadigit(next_char))
498 get_number(token); /* Numeric const */
499 else if(isaletter(next_char))
500 get_dotted_keyword(token); /* .EQ. etc. */
501 else
502 get_simple_punctuation(token); /* "." out of place */
503 }
504
505
506 PRIVATE const struct {
507 char *name;
508 int tclass,tsubclass;
509 } dotted_keywords[]={
510 {".EQ.",tok_relop,relop_EQ},
511 {".NE.",tok_relop,relop_NE},
512 {".LE.",tok_relop,relop_LE},
513 {".LT.",tok_relop,relop_LT},
514 {".GE.",tok_relop,relop_GE},
515 {".GT.",tok_relop,relop_GT},
516 {".AND.",tok_AND,0},
517 {".OR.",tok_OR,0},
518 {".NOT.",tok_NOT,0},
519 {".FALSE.",tok_logical_const,FALSE},
520 {".TRUE.",tok_logical_const,TRUE},
521 {".EQV.",tok_EQV,0},
522 {".NEQV.",tok_NEQV,0},
523 {NULL,0,0}
524 };
525
526
527 PRIVATE void
528 #if HAVE_STDC
get_dotted_keyword(Token * token)529 get_dotted_keyword(Token *token)
530 #else /* K&R style */
531 get_dotted_keyword(token)
532 Token *token;
533 #endif /* HAVE_STDC */
534 {
535 int i,
536 has_embedded_space, /* Spaces inside keyword */
537 space_seen_lately; /* Flag for catching embedded space */
538 initial_flag = FALSE;
539 /* Watch for embedded space, but not
540 (in fixed form)
541 between dots and letters of keyword.
542 I.e. ". eq ." is OK, but not ".e q." */
543 has_embedded_space = FALSE;
544
545 bi_advance(); /* gobble the initial '.' */
546
547 space_seen_lately = (!free_form? FALSE:
548 closeup_saw_whitespace);
549
550 while(isaletter(curr_char)) {
551
552 if(src_text_len < MAX_SRC_TEXT)
553 src_text_buf[src_text_len++] = (char)makeupper(curr_char);
554
555 if(space_seen_lately)
556 has_embedded_space = TRUE;
557
558 bi_advance();
559
560 space_seen_lately = iswhitespace(prev_char);
561 }
562
563 /* Free form complains about space before last dot */
564 if(free_form && space_seen_lately)
565 has_embedded_space = TRUE;
566
567 if(src_text_len < MAX_SRC_TEXT)
568 src_text_buf[src_text_len++] = '.'; /* make it complete */
569
570 if(curr_char != '.') {
571 lex_error("Badly formed logical/relational operator or constant");
572 }
573 else {
574 advance(); /* gobble the final '.' */
575 }
576 if( (pretty_extra_space || (free_form && f90_freeform_space))
577 && has_embedded_space) {
578 space_violation(token->line_num,token->col_num,
579 "keyword has embedded space");
580 }
581
582 for(i=0; dotted_keywords[i].name != NULL; i++) {
583 if(strncmp(src_text_buf+1, /* only compare the significant parts */
584 dotted_keywords[i].name+1,
585 src_text_len-2) == 0) {
586 token->tclass = dotted_keywords[i].tclass;
587 token->tsubclass = dotted_keywords[i].tsubclass;
588 token->value.string = token->src_text = dotted_keywords[i].name;
589 #ifdef DEBUG_FORLEX
590 if(debug_lexer)
591 (void)fprintf(list_fd,"\nDotted keyword:\t\t%s",
592 token->src_text);
593 #endif
594 return;
595 }
596 }
597 /* Match not found: signal an error */
598 lex_error("Unknown logical/relational operator or constant");
599 get_illegal_token(token);
600
601 } /* get_dotted_keyword */
602
603 PRIVATE void
604 #if HAVE_STDC
get_edit_descriptor(Token * token)605 get_edit_descriptor(Token *token)
606 #else /* K&R style */
607 get_edit_descriptor(token)
608 Token *token;
609 #endif /* HAVE_STDC */
610 {
611 int c;
612 long repeat_spec;
613 int Ee_allowed=FALSE; /* true if edit descr can have Ee after w.d */
614
615 if(isadigit(curr_char)) { /* Digit: repeat spec or holl or kP or nX */
616 repeat_spec = 0;
617 do {
618 if(src_text_len < MAX_SRC_TEXT)
619 src_text_buf[src_text_len++] = curr_char;
620 repeat_spec = repeat_spec*10L + (long)BCD(curr_char);
621 if( makeupper(next_char) == 'H' )
622 inside_hollerith = TRUE;/* get ready for hollerith*/
623 bi_advance();
624 } while(isadigit(curr_char));
625
626 if( makeupper(curr_char) == 'H' ) {
627 /* nH... pass off to hollerith routine */
628 get_hollerith(token, (int)repeat_spec);
629 return;
630 }
631 else {
632 /* Otherwise it is a repeat spec or the
633 numeric part of kP or nX which we treat
634 as repeat specs too */
635 token->tclass = tok_integer_const;
636 token->value.integer = repeat_spec;
637 token->src_text = new_src_text(src_text_buf,src_text_len);
638 #ifdef DEBUG_FORLEX
639 if(debug_lexer)
640 (void)fprintf(list_fd,"\nInteger const:\t\t%ld (from %s)",
641 repeat_spec,
642 token->src_text);
643 #endif
644 }
645 }/* end if digit */
646
647 else if(isaletter(curr_char)) {
648 c = makeupper(curr_char);
649 if(src_text_len < MAX_SRC_TEXT)
650 src_text_buf[src_text_len++] = c;
651 bi_advance();
652 switch(c) {
653
654 case 'P': /* P of kP k seen previously */
655 if(prev_token_class != tok_integer_const) {
656 if(f77_format_extensions || f90_format_extensions){
657 nonstandard(token->line_num,token->col_num,f90_format_extensions,0);
658 msg_tail(": P must follow a number");
659 }
660 }
661 break;
662
663 case 'X': /* X or nX */
664 break;
665
666 case 'S': /* S or SP or SS */
667 c = makeupper(curr_char);
668 if(c == 'S' || c == 'P') {
669 if(src_text_len < MAX_SRC_TEXT)
670 src_text_buf[src_text_len++] = c;
671 bi_advance();
672 }
673 break;
674
675 case 'B': /* BN or BZ */
676 c = makeupper(curr_char);
677 if(c == 'N' || c == 'Z') {
678 if(src_text_len < MAX_SRC_TEXT)
679 src_text_buf[src_text_len++] = c;
680 bi_advance();
681 }
682 else {
683 if(f77_format_extensions){
684 nonstandard(token->line_num,token->col_num,0,0);
685 msg_tail(": N or Z expected after B");
686 }
687 goto get_w_d; /* F90 has Bw.d: allow that */
688 }
689 break;
690
691 case 'T': /* Tc or TLc or TRc */
692 c = makeupper(curr_char);
693 if(c == 'L' || c == 'R') {
694 if(src_text_len < MAX_SRC_TEXT)
695 src_text_buf[src_text_len++] = c;
696 bi_advance();
697 }
698
699 case 'E': /* In F90, E can be followed by N or S */
700 c = makeupper(curr_char);
701 if( c == 'N' || c == 'S' ) {
702 if(src_text_len < MAX_SRC_TEXT)
703 src_text_buf[src_text_len++] = c;
704 bi_advance();
705 if(f77_format_extensions){
706 nonstandard(token->line_num,token->col_num,0,0);
707 }
708 }
709 Ee_allowed = TRUE;
710 goto get_w_d;
711
712
713 case 'O': /* These are OK in f90 but not f77 */
714 case 'Z':
715 if(f77_format_extensions){
716 nonstandard(token->line_num,token->col_num,0,0);
717 }
718 goto get_w_d;
719
720 /* Iw, Fw.c and similar forms */
721
722 case 'G':
723 Ee_allowed = TRUE; /* OK in F90 to have Ee trailer */
724 /*FALLTHRU*/
725 case 'A':
726 case 'D':
727 case 'F':
728 case 'I':
729 case 'L':
730 get_w_d: /* Get the w field if any */
731 while( isadigit(curr_char) ){
732 if(src_text_len < MAX_SRC_TEXT)
733 src_text_buf[src_text_len++] = curr_char;
734 bi_advance();
735 }
736 /* Include any dot followed by number (e.g. F10.5)
737 */
738 if( curr_char == '.' ) {
739 do {
740 if(src_text_len < MAX_SRC_TEXT)
741 src_text_buf[src_text_len++] = curr_char;
742 bi_advance();
743 } while( isadigit(curr_char) );
744 }
745 /* w.d can sometimes be followed by Ee */
746 if( Ee_allowed && (c=makeupper(curr_char)) == 'E' ) {
747 if(src_text_len < MAX_SRC_TEXT)
748 src_text_buf[src_text_len++] = c;
749 bi_advance();
750 while( isadigit(curr_char) ){
751 if(src_text_len < MAX_SRC_TEXT)
752 src_text_buf[src_text_len++] = curr_char;
753 bi_advance();
754 }
755 }
756 break;
757
758 default:
759 if(f77_format_extensions || f90_format_extensions) {
760 nonstandard(token->line_num,token->col_num,f90_format_extensions,0);
761 msg_tail(": edit descriptor");
762 src_text_buf[src_text_len++] = '\0';
763 msg_tail(src_text_buf);
764 }
765 goto get_w_d;
766 }/*end switch*/
767
768 token->tclass = tok_edit_descriptor;
769 token->value.string = NULL;
770 token->src_text = new_src_text(src_text_buf,src_text_len);
771
772 #ifdef DEBUG_FORLEX
773 if(debug_lexer)
774 (void)fprintf(list_fd,"\nEdit descriptor:\t%s",token->src_text);
775 #endif
776 }/*end else if isaletter*/
777
778 /* Apostrophe or quote mark means a string. */
779 else if( isaquote(curr_char) ) {
780 get_string(token);
781 }
782 /* Otherwise it is mere punctuation. Handle
783 it here ourself to avoid complications. */
784 else {
785 src_text_buf[src_text_len++] = curr_char;
786 get_simple_punctuation(token);
787 }
788 }
789
790 PRIVATE void
791 #if HAVE_STDC
get_hollerith(Token * token,int n)792 get_hollerith(Token *token, int n) /* Gets string of form nHaaaa */
793 #else /* K&R style */
794 get_hollerith(token,n) /* Gets string of form nHaaaa */
795 Token *token;
796 int n;
797 #endif /* HAVE_STDC */
798 {
799 int i;
800 LINENO_t last_line_num;
801 COLNO_t last_col_num;
802
803 /* strsize = length of only the string being defined
804 fullsize = length of whole hollerith const, which includes
805 length spec already stored in src_text_buf plus the
806 H plus the text plus final nul. */
807 int strsize=n,
808 leadin=src_text_len+1,
809 fullsize=leadin+strsize+1;
810 char *s;
811
812 initial_flag = FALSE;
813
814 s = new_src_text_alloc(fullsize);
815
816 for(i=0; i<src_text_len; i++) /* Copy the leadin already saved */
817 s[i] = src_text_buf[i];
818 s[i++] = 'H'; /* store the 'H' */
819
820 if(n==1)
821 inside_hollerith=FALSE;/* turn off flag ahead of next_char */
822 advance();/* Gobble the 'H' */
823
824 last_col_num = col_num;
825 last_line_num = line_num;
826
827 for(i=0; i<n; i++) {
828 while(curr_char == EOL) {
829 /* Treat short line as if extended with blanks */
830 COLNO_t col;
831 for(col=last_col_num; i<n && col<(COLNO_t)max_stmt_col; i++,col++) {
832 s[leadin+i] = ' ';
833 }
834 last_col_num = col_num;
835 advance();
836 }
837 if(i==n) break;
838
839 if(curr_char == EOS || curr_char == EOF) {
840 COLNO_t col;
841 for(col=last_col_num; i<n && col<(COLNO_t)max_stmt_col; i++,col++) {
842 if(i < strsize)
843 s[leadin+i] = ' ';
844 }
845 if(i < n) { /* If it did not fill up */
846 syntax_error((LINENO_t)last_line_num,(COLNO_t)last_col_num,
847 "Hollerith constant ends prematurely");
848 strsize=i;
849 }
850 break;
851 }
852 else {
853 s[leadin+i] = curr_char;
854 last_col_num = col_num;
855 last_line_num = line_num;
856 if(i==n-2)/* turn flag off ahead of next_char*/
857 inside_hollerith = FALSE;
858 advance();
859 }
860 }
861
862 if(strsize > 0)
863 s[leadin+strsize] = '\0';
864
865 inside_hollerith = FALSE;
866 token->tclass = tok_hollerith;
867 token->value.string = s + leadin;
868 token->size = n;
869 token->src_text = s;
870 #ifdef DEBUG_FORLEX
871 if(debug_lexer)
872 (void)fprintf(list_fd,"\nHollerith:\t\t%s (from %s)",
873 token->value.string,
874 token->src_text);
875 #endif
876
877 } /* get_hollerith */
878
879
880
881 PRIVATE void
882 #if HAVE_STDC
get_illegal_token(Token * token)883 get_illegal_token(Token *token) /* Handle an illegal input situation */
884 #else /* K&R style */
885 get_illegal_token(token) /* Handle an illegal input situation */
886 Token *token;
887 #endif /* HAVE_STDC */
888 {
889 token->tclass = tok_illegal;
890 token->src_text = new_src_text("",0);
891 #ifdef DEBUG_FORLEX
892 if(debug_lexer)
893 (void)fprintf(list_fd,"\nILLEGAL TOKEN");
894 #endif
895
896 } /* get_illegal_token */
897
898
899
900 /* Read a label from label field. */
901 PRIVATE void
902 #if HAVE_STDC
get_label(Token * token)903 get_label(Token *token)
904 #else /* K&R style */
905 get_label(token)
906 Token *token;
907 #endif /* HAVE_STDC */
908 {
909 int value=0;
910 int space_seen=FALSE, has_embedded_space=FALSE;
911 if( !free_form ) {
912 while( isadigit(curr_char) && col_num < 6 ) {
913 if(space_seen)
914 has_embedded_space = TRUE;
915 value = value*10 + BCD(curr_char);
916 src_text_buf[src_text_len++] = curr_char;
917 advance();
918 while(curr_char==' ' && col_num < 6) {
919 space_seen = TRUE;
920 advance();
921 }
922 }
923 if((pretty_extra_space || (free_form && f90_freeform_space))
924 && has_embedded_space) {
925 space_violation(token->line_num,token->col_num,
926 "label has embedded space");
927 }
928 }
929 else { /* free form */
930 int numdigits=0;
931 while( isadigit(curr_char) ) {
932 value = value*10 + BCD(curr_char);
933 if(src_text_len < MAX_SRC_TEXT)
934 src_text_buf[src_text_len++] = curr_char;
935 ++numdigits;
936 advance();
937 }
938 /* label can have only up to 5 digits */
939 if( numdigits > 5 && misc_warn) {
940 syntax_error(token->line_num,token->col_num,
941 "statement label exceeds 5 digits");
942 }
943 }
944 token->tclass = tok_label;
945 token->value.integer = value;
946 token->src_text = new_src_text(src_text_buf,src_text_len);
947 #ifdef DEBUG_FORLEX
948 if(debug_lexer)
949 (void)fprintf(list_fd,"\nLabel:\t\t\t%d (from %s)",
950 value,
951 token->src_text);
952 #endif
953
954 } /* get_label */
955
956
957 PRIVATE void
958 #if HAVE_STDC
get_letter(Token * token)959 get_letter(Token *token) /* Gets letter in IMPLICIT list */
960 #else /* K&R style */
961 get_letter(token) /* Gets letter in IMPLICIT list */
962 Token *token;
963 #endif /* HAVE_STDC */
964 {
965 token->tclass = tok_letter;
966 token->tsubclass = src_text_buf[src_text_len++] = makeupper(curr_char);
967 token->src_text = new_src_text(src_text_buf,src_text_len);
968
969 #ifdef DEBUG_FORLEX
970 if(debug_lexer)
971 (void)fprintf(list_fd,"\nLetter:\t\t\t%s",token->src_text);
972 #endif
973
974 advance();
975
976 } /* get_letter */
977
978
979 /* get_number reads a number and determines data type: integer,
980 * real, or double precision.
981 */
982 /* This belongs in ftnchek.h, perhaps. Defines number of significant
983 figures that are reasonable for a single-precision real constant.
984 Works out to 9 for wordsize=4, 21 for wordsize=8. These allow
985 for a couple of extra digits for rounding. Used in -trunc warning. */
986 #define REAL_SIGFIGS (local_wordsize==0? 8: (local_wordsize-1)*3)
987
988 PRIVATE int getting_complex_const=FALSE;
989
990 PRIVATE void
991 #if HAVE_STDC
get_number(Token * token)992 get_number(Token *token)
993 #else /* K&R style */
994 get_number(token)
995 Token *token;
996 #endif /* HAVE_STDC */
997 {
998 DBLVAL dvalue,leftside,rightside,pwr_of_ten;
999 int exponent,datatype,c;
1000 #ifdef DEBUG_FORLEX
1001 int expsign;
1002 #endif
1003 int numdigits, /* Count of digits in integer, significant or not */
1004 sigfigs; /* Count of significant digits */
1005
1006 /* For freeform warnings, this gets set when we
1007 arrive here via a leading '.', otherwise is false.
1008 */
1009 int space_seen_lately = closeup_saw_whitespace;
1010 int has_embedded_space = FALSE;
1011
1012 initial_flag = FALSE;
1013
1014 leftside = (DBLVAL)0;
1015 numdigits = sigfigs = 0;
1016 datatype = tok_integer_const;
1017 while(isadigit(curr_char)) {
1018 if(space_seen_lately)
1019 has_embedded_space = TRUE;
1020 leftside = leftside*(DBLVAL)10 + (DBLVAL)BCD(curr_char);
1021 ++numdigits;
1022 /* Do not count leading zeroes as significant */
1023 if(sigfigs > 0 || curr_char != '0')
1024 ++sigfigs;
1025 if( !integer_context && makeupper(next_char) == 'H' )
1026 inside_hollerith = TRUE;/* get ready for hollerith*/
1027
1028 if(src_text_len < MAX_SRC_TEXT)
1029 src_text_buf[src_text_len++] = curr_char;
1030 /* Embedded space is worth preserving since
1031 it is often used in long numbers. Any
1032 amount of blanks + tabs -> 1 blank.
1033 Exception: integer_context says upcoming
1034 item is a label or datatype length spec. */
1035 if(! integer_context &&
1036 (next_char == ' ' || next_char == '\t')) {
1037 if(src_text_len < MAX_SRC_TEXT)
1038 src_text_buf[src_text_len++] = ' ';
1039 }
1040
1041 bi_advance();
1042 space_seen_lately = iswhitespace(prev_char);
1043 }
1044
1045 /* If context specifies integer expected, skip to end.
1046 Otherwise scan on ahead for more. */
1047 if( integer_context) {
1048 if(numdigits == 0) {
1049 lex_error("integer expected");
1050 advance(); /* gobble something to avoid infinite loop */
1051 }
1052 }
1053 else {/* not integer_context */
1054 if( makeupper(curr_char) == 'H' ){ /* nnH means hollerith */
1055 if(leftside == (DBLVAL)0) {
1056 lex_error("Zero-length hollerith constant");
1057 inside_hollerith = FALSE;
1058 advance();
1059 get_illegal_token(token);
1060 }
1061 else {
1062 if(src_text_buf[src_text_len-1] == ' ')
1063 --src_text_len;
1064 get_hollerith(token, (int)leftside);
1065 }
1066 return;
1067 }
1068
1069 rightside = (DBLVAL)0;
1070 pwr_of_ten = (DBLVAL)1;
1071 closeup(); /* Pull in the lookahead character */
1072
1073 if( curr_char == '.' &&
1074 /* don't be fooled by 1.eq.N or
1075 I.eq.1.and. etc */
1076 !looking_at_relop() ) {
1077 datatype = tok_real_const;
1078 if( space_seen_lately )
1079 has_embedded_space = TRUE;
1080 if(numdigits > 0) /* if dot is initial it is already stored */
1081 if(src_text_len < MAX_SRC_TEXT)
1082 src_text_buf[src_text_len++] = curr_char;
1083 bi_advance();
1084 space_seen_lately = closeup_saw_whitespace || iswhitespace(prev_char);
1085 closeup_saw_whitespace = FALSE;
1086
1087 while(isadigit(curr_char)) {
1088 if( space_seen_lately )
1089 has_embedded_space = TRUE;
1090 rightside = rightside*(DBLVAL)10 + (DBLVAL)BCD(curr_char);
1091 ++numdigits; /* not used past here, but maintain it anyway */
1092 if(sigfigs > 0 || curr_char != '0')
1093 ++sigfigs;
1094 pwr_of_ten /= (DBLVAL)10;
1095
1096 if(src_text_len < MAX_SRC_TEXT)
1097 src_text_buf[src_text_len++] = curr_char;
1098 if(next_char == ' ' || next_char == '\t')
1099 if(src_text_len < MAX_SRC_TEXT)
1100 src_text_buf[src_text_len++] = ' ';
1101
1102 bi_advance();
1103 space_seen_lately = iswhitespace(prev_char);
1104 }
1105 }
1106 #ifdef DEBUG_FORLEX
1107 if(debug_lexer)
1108 dvalue = leftside + rightside*pwr_of_ten;
1109 else
1110 #endif
1111 dvalue = (DBLVAL)0;
1112
1113 exponent = 0;
1114 #ifdef DEBUG_FORLEX
1115 expsign = 1;
1116 #endif
1117 /* Integer followed by E or D gives a real/d.p constant.
1118 We also accept Q for quad (real*16) constants. */
1119
1120 space_seen_lately = space_seen_lately || closeup_saw_whitespace;
1121
1122 if( ( (c = makeupper(curr_char)) == 'E' || c == 'D' || c == 'Q') )
1123 {
1124 datatype = ((c == 'E')? tok_real_const:
1125 ((c == 'D')? tok_dp_const:
1126 tok_quad_const));
1127 if( space_seen_lately )
1128 has_embedded_space = TRUE;
1129 if(src_text_len < MAX_SRC_TEXT)
1130 src_text_buf[src_text_len++] = c;
1131 bi_advance();
1132 space_seen_lately = iswhitespace(prev_char);
1133 if(curr_char == '+') {
1134 #ifdef DEBUG_FORLEX
1135 expsign = 1;
1136 #endif
1137 if(src_text_len < MAX_SRC_TEXT)
1138 src_text_buf[src_text_len++] = curr_char;
1139 bi_advance();
1140 space_seen_lately = space_seen_lately || iswhitespace(prev_char);
1141 }
1142 else if(curr_char == '-') {
1143 #ifdef DEBUG_FORLEX
1144 expsign = -1;
1145 #endif
1146 if( iswhitespace(prev_char) )
1147 has_embedded_space = TRUE;
1148 if(src_text_len < MAX_SRC_TEXT)
1149 src_text_buf[src_text_len++] = curr_char;
1150 bi_advance();
1151 space_seen_lately = space_seen_lately || iswhitespace(prev_char);
1152 }
1153 if(!isadigit(curr_char)) {
1154 lex_error("Badly formed real constant");
1155 }
1156 else while(isadigit(curr_char)) {
1157 if( space_seen_lately )
1158 has_embedded_space = TRUE;
1159 exponent = exponent*10 + (curr_char-'0');
1160 if(src_text_len < MAX_SRC_TEXT)
1161 src_text_buf[src_text_len++] = curr_char;
1162 bi_advance();
1163 space_seen_lately = iswhitespace(prev_char);
1164 }
1165
1166 /* Compute real value only if debugging. If it exceeds max magnitude,
1167 computing it may cause crash. At this time, value of real const
1168 is not used for anything. */
1169 #ifdef DEBUG_FORLEX
1170 if(debug_lexer)
1171 dvalue *= pow(10.0, (double)(exponent*expsign));
1172 else
1173 #endif
1174 dvalue = (DBLVAL)0;
1175
1176 }
1177 }/* end if(!integer_context) */
1178
1179 if(src_text_buf[src_text_len-1] == ' ') /* remove any trailing blank */
1180 --src_text_len;
1181
1182 token->tclass = datatype;
1183 /* If this is part of complex const,
1184 do not store src_text but arrange
1185 so debugging works. */
1186 if(!getting_complex_const) {
1187 token->src_text = new_src_text(src_text_buf,src_text_len);
1188 }
1189 #ifdef DEBUG_FORLEX
1190 else {
1191 src_text_buf[src_text_len] = '\0';
1192 token->src_text = src_text_buf;
1193 }
1194 #endif
1195
1196 if( free_form && (pretty_extra_space || f90_freeform_space)
1197 && has_embedded_space ) {
1198 space_violation(token->line_num,token->col_num,
1199 "Numeric constant has embedded space");
1200 }
1201
1202 switch(datatype) {
1203 case tok_integer_const:
1204 token->value.integer = (long)leftside;
1205 #ifdef DEBUG_FORLEX
1206 if(debug_lexer)
1207 (void)fprintf(list_fd,"\nInteger const:\t\t%ld (from %s)",
1208 token->value.integer,
1209 token->src_text);
1210 #endif
1211 break;
1212 case tok_real_const:
1213 /* store single as double lest it overflow */
1214 token->value.dbl = dvalue;
1215 if(trunc_sigfigs && sigfigs >= REAL_SIGFIGS) {
1216 warning(token->line_num,token->col_num,
1217 "Single-precision real constant has more digits than are stored");
1218 }
1219 #ifdef DEBUG_FORLEX
1220 if(debug_lexer)
1221 (void)fprintf(list_fd,"\nReal const:\t\t%g (from %s)",
1222 (double)token->value.dbl,
1223 token->src_text);
1224 #endif
1225 break;
1226 case tok_dp_const:
1227 token->value.dbl = dvalue;
1228 #ifdef DEBUG_FORLEX
1229 if(debug_lexer)
1230 (void)fprintf(list_fd,"\nDouble const:\t\t%g (from %s)",
1231 (double)token->value.dbl,
1232 token->src_text);
1233 #endif
1234 break;
1235 case tok_quad_const:
1236 /* store quad as double in case host doesn't do quad */
1237 token->value.dbl = dvalue;
1238 #ifdef DEBUG_FORLEX
1239 if(debug_lexer)
1240 (void)fprintf(list_fd,"\nQuad const:\t\t%g (from %s)",
1241 (double)token->value.dbl,
1242 token->src_text);
1243 #endif
1244 break;
1245 }
1246
1247 } /* get_number */
1248
1249 /* get_complex_constant reads an entity of the form (num,num)
1250 where num is any [signed] numeric constant. It will only be
1251 called when looking_at() has guaranteed that there is one there.
1252 The token receives the real part as a number. The imaginary part
1253 is not stored. Whitespace is allowed between ( and num, around
1254 the comma, and between num and ) but not within num. */
1255
1256 PRIVATE void
1257 #if HAVE_STDC
get_complex_const(Token * token)1258 get_complex_const(Token *token)
1259 #else /* K&R style */
1260 get_complex_const(token)
1261 Token *token;
1262 #endif /* HAVE_STDC */
1263 {
1264 Token imag_part; /* temporary to hold imag part */
1265 #ifdef DEBUG_FORLEX
1266 double sign=(DBLVAL)1;
1267 #endif
1268 int dble_size=FALSE; /* flag to set if parts are D floats */
1269 int imag_dble_size; /* if imaginary part D float */
1270 LINENO_t comma_line_num;
1271 COLNO_t comma_col_num;
1272 getting_complex_const = TRUE;
1273 initial_flag = FALSE;
1274
1275
1276
1277 bi_advance(); /* skip over the initial paren (already stored) */
1278
1279
1280 if(curr_char == '+' || curr_char == '-') {
1281 #ifdef DEBUG_FORLEX
1282 if(curr_char == '-') sign = (DBLVAL)(-1);
1283 #endif
1284 if(src_text_len < MAX_SRC_TEXT)
1285 src_text_buf[src_text_len++] = curr_char;
1286
1287 bi_advance();
1288 }
1289
1290 #ifdef DEBUG_FORLEX
1291 if(debug_lexer){
1292 (void)fprintf(list_fd,"\nComplex const:(");
1293 if(sign < 0.0) (void)fprintf(list_fd," -");
1294 }
1295 #endif
1296 closeup_saw_whitespace = FALSE;
1297 get_number(token);
1298 switch((short)token->tclass) {
1299 case tok_integer_const:
1300 #ifdef DEBUG_FORLEX
1301 if(debug_lexer)
1302 token->value.dbl = sign*(double)token->value.integer;
1303 else
1304 #endif
1305 token->value.dbl = (DBLVAL)0;
1306 break;
1307 case tok_dp_const:
1308 dble_size=TRUE;
1309 /*FALLTHRU*/
1310 case tok_real_const:
1311 #ifdef DEBUG_FORLEX
1312 if(debug_lexer)
1313 token->value.dbl = sign*token->value.dbl;
1314 else
1315 #endif
1316 token->value.dbl = (DBLVAL)0;
1317 break;
1318 }
1319
1320 while(iswhitespace(curr_char))
1321 advance();
1322
1323
1324 comma_line_num = line_num;
1325 comma_col_num = col_num;
1326
1327 if(src_text_len < MAX_SRC_TEXT)
1328 src_text_buf[src_text_len++] = curr_char;
1329 if(next_char == ' ' || next_char == '\t') /* preserve space after , */
1330 if(src_text_len < MAX_SRC_TEXT)
1331 src_text_buf[src_text_len++] = ' ';
1332
1333 bi_advance(); /* skip over the comma */
1334
1335 if(curr_char == '+' || curr_char == '-') {
1336 #ifdef DEBUG_FORLEX
1337 if(curr_char == '-') sign = (DBLVAL)(-1);
1338 #endif
1339 if(src_text_len < MAX_SRC_TEXT)
1340 src_text_buf[src_text_len++] = curr_char;
1341
1342 bi_advance();
1343 }
1344 #ifdef DEBUG_FORLEX
1345 if(debug_lexer){
1346 (void)fprintf(list_fd,"\n,");
1347 if(sign < 0.0) (void)fprintf(list_fd," -");
1348 }
1349 #endif
1350 closeup_saw_whitespace = FALSE;
1351 /* Initialize imag_part token fields. */
1352 zero_struct(&imag_part,sizeof(imag_part));
1353 imag_part.line_num = line_num;
1354 imag_part.col_num = col_num;
1355 get_number(&imag_part);
1356 imag_dble_size = (imag_part.tclass == tok_dp_const);
1357
1358 if(dble_size != imag_dble_size) {
1359 warning(comma_line_num,comma_col_num,
1360 "different precision in real and imaginary parts");
1361 }
1362 else if(f77_double_complex) {
1363 if(dble_size)
1364 warning(token->line_num,token->col_num,
1365 "nonstandard double precision complex constant");
1366 }
1367
1368 dble_size = (dble_size || imag_dble_size);
1369
1370 while(iswhitespace(curr_char))
1371 advance();
1372
1373
1374 if(src_text_len < MAX_SRC_TEXT)
1375 src_text_buf[src_text_len++] = curr_char;
1376
1377 advance(); /* skip over final paren */
1378
1379 if(dble_size)
1380 token->tclass = tok_dcomplex_const;
1381 else
1382 token->tclass = tok_complex_const;
1383
1384 token->src_text = new_src_text(src_text_buf,src_text_len);
1385
1386 #ifdef DEBUG_FORLEX
1387 if(debug_lexer) {
1388 (void)fprintf(list_fd,"\n\t\t\tsource text=%s",
1389 token->src_text);
1390 (void)fprintf(list_fd,"\n)");
1391 }
1392 #endif
1393
1394 getting_complex_const = FALSE;
1395 }
1396
1397 #ifdef ALLOW_TYPELESS_CONSTANTS
1398 /* Routine to get constants of the forms:
1399 B'nnnn' (f90std) 'nnnn'B (nonf90) -- binary
1400 O'nnnn' (f90std) 'nnnn'O (nonf90) -- octal
1401 Z'nnnn' (f90std) X'nnnn' 'nnnn'X 'nnnn'Z (nonf90) -- hex
1402 No check of whether digits are less than base.
1403 Warning is issued here instead of in parser since constant
1404 looks like a normal integer by the time the parser sees it.
1405 */
1406 void
get_binary_const(Token * token,int c,int space_seen_lately)1407 get_binary_const(Token *token, int c, int space_seen_lately)
1408 /* c is base character: madeupper'ed by caller */
1409 {
1410 long value=0;
1411 int base,digit;
1412 int badly_formed=FALSE;
1413 int i,j; /* indices in src_text_buf for repacking */
1414
1415 if(c == 'O') base = 8;
1416 else if(c == 'X' || c == 'Z') base = 16;
1417 else if(c == 'B') base = 2;
1418 else {
1419 syntax_error(token->line_num,token->col_num,
1420 "Unknown base for typeless constant -- octal assumed");
1421 base = 8;
1422 }
1423 /* F90 allows initial B, O, Z but not X */
1424 if( c == 'X' && f90_typeless_constants ) {
1425 nonstandard(token->line_num,token->col_num,f90_typeless_constants,0);
1426 }
1427
1428 /* Advance i to starting digit */
1429 i = 0;
1430 while( ! isaquote(src_text_buf[i]) ) {
1431 ++i;
1432 }
1433 j = ++i; /* Input = Output to start */
1434
1435 /* Scan the string, moving chars down
1436 to change multi spaces to single
1437 blanks, and converting digits. */
1438 while( ! isaquote(src_text_buf[i]) ) {
1439 digit=src_text_buf[i++];
1440 if( ishex(digit) ){
1441 value = value*base + HEX(digit);
1442 src_text_buf[j++] = digit;
1443 }
1444 else { /* Anything else should be space */
1445 if( isspace(digit) ) {
1446 if( free_form )
1447 space_seen_lately = TRUE; /* blanks not OK in free form */
1448 src_text_buf[j++] = ' ';
1449 while( isspace(src_text_buf[i]) ) {
1450 ++i;
1451 }
1452 }
1453 else {
1454 badly_formed = TRUE;
1455 }
1456 }
1457 }
1458 if( badly_formed ) {
1459 syntax_error(token->line_num,token->col_num,
1460 "badly formed typeless constant");
1461 }
1462 else if((pretty_extra_space || (free_form && f90_freeform_space))
1463 && space_seen_lately) {
1464 space_violation(token->line_num,token->col_num,
1465 "typeless constant has embedded space");
1466 }
1467
1468 while(i < src_text_len)
1469 src_text_buf[j++] = src_text_buf[i++]; /* Copy the rest over */
1470
1471 src_text_len = j;
1472
1473 token->tclass = tok_integer_const;
1474 token->value.integer = value;
1475 token->src_text = new_src_text(src_text_buf,src_text_len);
1476
1477 if(f77_typeless_constants) {
1478 nonstandard(token->line_num,token->col_num,0,0);
1479 }
1480
1481 #ifdef DEBUG_FORLEX
1482 if(debug_lexer)
1483 (void)fprintf(list_fd,"\nInteger const:\t\t%ld (from %s)",
1484 token->value.integer,
1485 token->src_text);
1486 #endif
1487
1488 }/*get_binary_const*/
1489
1490 #endif/*ALLOW_TYPELESS_CONSTANTS*/
1491
1492
1493 PRIVATE void
1494 #if HAVE_STDC
get_punctuation(Token * token)1495 get_punctuation(Token *token)
1496 #else /* K&R style */
1497 get_punctuation(token)
1498 Token *token;
1499 #endif /* HAVE_STDC */
1500 {
1501 int multichar, /* Flags To catch spaces inside multi-char token */
1502 space_seen_lately;
1503 extern int in_attrbased_typedecl; /* shared with fortran.y */
1504 multichar = FALSE;
1505
1506 src_text_buf[src_text_len++] = curr_char;
1507
1508 /* If lexing attr-based type decl, turn off the flag when
1509 the double colon is reached...
1510 */
1511 if( in_attrbased_typedecl && curr_char == ':' ) {
1512 in_attrbased_typedecl = FALSE;
1513 }
1514 /* ...whereas turn initial_flag back on if a comma is found. */
1515 if( in_attrbased_typedecl && curr_char == ',' ) {
1516 initial_flag = TRUE;
1517 }
1518
1519 if( !in_attrbased_typedecl )
1520 initial_flag = FALSE;
1521
1522 space_seen_lately = iswhitespace(next_char);
1523
1524 closeup();
1525
1526 if(curr_char == '*' && next_char == '*') {
1527 token->tclass = tok_power;
1528 multichar = TRUE;
1529 advance();
1530 src_text_buf[src_text_len++] = curr_char;
1531 }
1532 else if(curr_char == '/' && next_char == '/' ) {
1533 /* If this is COMMON / / list, then embedded space is OK */
1534 if( prev_token_class == tok_COMMON ) {
1535 space_seen_lately = FALSE;
1536 }
1537 /* Otherwise it is concatenation operator */
1538 else {
1539 extern int in_assignment_stmt;
1540 /* for obscure rule check */
1541 if(in_assignment_stmt)
1542 make_true(IN_ASSIGN,token->TOK_flags);
1543 }
1544 token->tclass = tok_concat;
1545 multichar = TRUE;
1546 advance();
1547 src_text_buf[src_text_len++] = curr_char;
1548 }
1549 /* recognize F90 pointer assignment => */
1550 else if(curr_char == '=' && next_char == '>') {
1551 token->tclass = tok_pointer_assignment;
1552 multichar = TRUE;
1553 advance();
1554 src_text_buf[src_text_len++] = curr_char;
1555 }
1556 /* recognize F90 rel-ops here */
1557 else if( f90_relop(token,&multichar) ) {
1558 token->tclass = tok_relop;
1559 if(f77_relops) {
1560 nonstandard(token->line_num,token->col_num,0,0);
1561 msg_tail("for relational operator");
1562 }
1563 }
1564 /* paren can be the start of complex constant if everything
1565 is just right. Maybe more tests needed here. */
1566 else if(complex_const_allowed && curr_char == '(' &&
1567 ( (prev_token_class<256 && ispunct(prev_token_class))
1568 || prev_token_class == tok_relop
1569 || prev_token_class == tok_power )
1570 && looking_at_cplx()) {
1571 get_complex_const(token);
1572 return;
1573 }
1574 else {
1575 /* Provide special left parenthesis to avoid s/r
1576 conflict in grammar.
1577 */
1578 if( need_special_lparen ) {
1579 /* ASSERT ( curr_char == '(' ) */
1580 token->tclass = tok_lparen;
1581 need_special_lparen = FALSE;
1582 }
1583 else {
1584 token->tclass = curr_char;
1585 }
1586 }
1587
1588 token->src_text = new_src_text(src_text_buf,src_text_len);
1589
1590 if((pretty_extra_space || (free_form && f90_freeform_space))
1591 && multichar && space_seen_lately) {
1592 space_violation(token->line_num,token->col_num,
1593 "multi-character operator has embedded space");
1594 }
1595
1596 advance();
1597
1598 #ifdef DEBUG_FORLEX
1599 if(debug_lexer) {
1600 if(token->tclass == EOS)
1601 (void)fprintf(list_fd,"\n\t\t\tEOS");
1602 else {
1603 (void)fprintf(list_fd,"\nPunctuation:\t\t");
1604 if(token->tclass == tok_lparen)
1605 (void)fprintf(list_fd,"special ");
1606 (void)fprintf(list_fd,"%s",token->src_text);
1607 }
1608 }
1609 #endif
1610 } /* get_punctuation */
1611
1612
1613 PRIVATE void
1614 #if HAVE_STDC
get_simple_punctuation(Token * token)1615 get_simple_punctuation(Token *token)
1616 #else /* K&R style */
1617 get_simple_punctuation(token)
1618 Token *token;
1619 #endif /* HAVE_STDC */
1620 {
1621 /* Like get_punctuation but lacks special cases. Just
1622 gets the punctuation character. Text is already in
1623 src_text_buf. */
1624
1625 token->tclass = curr_char;
1626 token->src_text = new_src_text(src_text_buf,src_text_len);
1627 advance();
1628 #ifdef DEBUG_FORLEX
1629 if(debug_lexer) {
1630 if(token->tclass == EOS)
1631 (void)fprintf(list_fd,"\n\t\t\tEOS");
1632 else
1633 (void)fprintf(list_fd,"\nPunctuation:\t\t%s",token->src_text);
1634 }
1635 #endif
1636 } /* get_simple_punctuation */
1637
1638 PRIVATE int
1639 #if HAVE_STDC
f90_relop(Token * token,int * multichar)1640 f90_relop(Token *token, int *multichar)
1641 #else /* K&R style */
1642 f90_relop(token, multichar)
1643 Token *token;
1644 int *multichar;
1645 #endif /* HAVE_STDC */
1646 {
1647 *multichar = FALSE;
1648 if( curr_char == '>' ) {
1649 if( next_char == '=' ) {
1650 token->tsubclass = relop_GE;
1651 token->src_text = ">=";
1652 goto twochar_relop;
1653 }
1654 else {
1655 token->tsubclass = relop_GT;
1656 token->value.string = ">";
1657 return TRUE;
1658 }
1659 }
1660
1661 if( curr_char == '<' ) {
1662 if( next_char == '=' ) {
1663 token->tsubclass = relop_LE;
1664 token->value.string = "<=";
1665 goto twochar_relop;
1666 }
1667 else {
1668 token->tsubclass = relop_LT;
1669 token->value.string = "<";
1670 return TRUE;
1671 }
1672 }
1673
1674 if( curr_char == '=' && next_char == '=' ) {
1675 token->tsubclass = relop_EQ;
1676 token->value.string = "==";
1677 goto twochar_relop;
1678 }
1679
1680 if( curr_char == '/' && next_char == '=' ) {
1681 token->tsubclass = relop_NE;
1682 token->value.string = "/=";
1683 goto twochar_relop;
1684 }
1685
1686 return FALSE;
1687
1688 /* Two-character relops: need to gobble 2nd char */
1689 twochar_relop:
1690 *multichar = TRUE;
1691 advance();
1692 src_text_buf[src_text_len++] = curr_char;
1693 return TRUE;
1694 }
1695
1696 void
1697 #if HAVE_STDC
get_string(Token * token)1698 get_string(Token *token) /* Gets string of form 'aaaa' */
1699 #else /* K&R style */
1700 get_string(token) /* Gets string of form 'aaaa' */
1701 Token *token;
1702 #endif /* HAVE_STDC */
1703 {
1704 int len;
1705 COLNO_t last_col_num;
1706 int has_backslash = FALSE; /* for portability check */
1707
1708 quote_char = curr_char; /* remember the delimiter */
1709 initial_flag = FALSE;
1710 inside_string = TRUE;
1711 last_col_num=col_num;
1712 src_text_buf[src_text_len++] = curr_char; /* store leading quote */
1713 advance(); /* Gobble leading quote */
1714 len = 0;
1715 for(;;) {
1716 while(curr_char == EOL) {
1717 /* Fixed form: treat short line as if extended with
1718 blanks to 72 columns. Free form: line ends at EOL */
1719 if( ! free_form ) {
1720 COLNO_t col;
1721 for(col=last_col_num; col<max_stmt_col; col++) {
1722
1723 if(src_text_len < MAX_SRC_TEXT)
1724 src_text_buf[src_text_len++] = ' ';
1725
1726 ++len;
1727 }
1728 }
1729 last_col_num=col_num;
1730 advance();
1731 }
1732 if(curr_char == EOS || curr_char == EOF) {
1733 lex_error("Closing quote missing from string");
1734 break;
1735 }
1736 if(curr_char == quote_char) {
1737 inside_string = FALSE;/* assume so for now */
1738
1739 /* If LEX_RAWSTRINGS defined, stores doubled quotes and final quote.
1740 Otherwise initial quote is stored and doubled quotes are reduced to one. */
1741 #ifdef LEX_RAWSTRINGS
1742 /* Store the quote */
1743 if(src_text_len < MAX_SRC_TEXT)
1744 src_text_buf[src_text_len++] = curr_char;
1745 #endif
1746
1747 last_col_num=col_num;
1748 advance();
1749
1750 if(curr_char == quote_char){/* '' becomes ' in string */
1751 inside_string = TRUE; /* not a closing quote */
1752
1753 if(src_text_len < MAX_SRC_TEXT)
1754 src_text_buf[src_text_len++] = curr_char;
1755
1756 ++len;
1757 last_col_num=col_num;
1758 advance();
1759 }
1760 else {
1761 break; /* It was a closing quote after all */
1762 }
1763 }
1764 else { /* ordinary character within quotes */
1765 int value=curr_char;
1766
1767 if(curr_char == '\\') {
1768 if(!has_backslash) {/* only warn once per string */
1769 if(port_backslash)
1770 nonportable(line_num,col_num,
1771 "backslash treated incompatibly by some compilers");
1772 }
1773 has_backslash = TRUE;
1774
1775 #ifdef ALLOW_UNIX_BACKSLASH /* This has problems: undigesting
1776 a string gets complicated. */
1777 if(source_unix_backslash) {
1778 if(f77_unix_backslash || f90_unix_backslash) {
1779 nonstandard(line_num,col_num,f90_unix_backslash,0);
1780 msg_tail(": backslash escape sequence");
1781 }
1782 #ifdef LEX_RAWSTRINGS
1783 /* Store the backslash */
1784 if(src_text_len < MAX_SRC_TEXT)
1785 src_text_buf[src_text_len++] = curr_char;
1786 #endif
1787 inside_string = FALSE;/* so inline_comment works */
1788 advance(); /* gobble the backslash */
1789 inside_string = TRUE;
1790 #ifdef LEX_RAWSTRINGS
1791 value = curr_char;
1792 #else /* !LEX_RAWSTRINGS*/
1793 if(isadigit(curr_char)) { /* \octal digits */
1794 value = BCD(curr_char);
1795 while(isadigit(next_char)) {
1796 advance();
1797 value = value*8 + BCD(curr_char);
1798 }
1799 }
1800 else if(curr_char == 'x') {
1801 advance(); /* gobble the 'x' */
1802 value = HEX(curr_char);
1803 while(ishex(next_char)) {
1804 advance();
1805 value = value*16 + HEX(curr_char);
1806 }
1807 }/* end if octal or hex */
1808 else switch(curr_char) {
1809 #if __STDC__ + 0
1810 case 'a': value = '\a'; break; /* alarm */
1811 #else
1812 case 'a': value = '\007'; break; /* alarm */
1813 #endif
1814 case 'b': value = '\b'; break; /* backspace */
1815 case 'f': value = '\f'; break; /* formfeed */
1816 case 'n': value = '\n'; break; /* newline */
1817 case 'r': value = '\r'; break; /* carr return */
1818 case 't': value = '\t'; break; /* h tab */
1819 case 'v': value = '\v'; break; /* v tab */
1820 case EOS: value = '\n'; break; /* a no-no */
1821 /* All others: \c --> c */
1822 default: value = curr_char; break;
1823 }
1824 #endif /* !LEX_RAWSTRINGS*/
1825 }/* end if source_unix_backslash */
1826 #endif /*ALLOW_UNIX_BACKSLASH*/
1827
1828 }/* end if curr_char == backslash */
1829
1830 if(src_text_len < MAX_SRC_TEXT)
1831 src_text_buf[src_text_len++] = value;
1832
1833 ++len;
1834 last_col_num=col_num;
1835 advance();
1836 }
1837 }
1838
1839 #ifdef ALLOW_TYPELESS_CONSTANTS
1840 /* Watch for const like 'nnn'X */
1841 if(!inside_format) {
1842 int space_seen_lately = iswhitespace(curr_char);
1843 while(iswhitespace(curr_char))
1844 advance();
1845 if(isaletter(curr_char)) {
1846 int c=makeupper(curr_char);
1847 #ifndef LEX_RAWSTRINGS
1848 if(src_text_len < MAX_SRC_TEXT)
1849 src_text_buf[src_text_len++] = quote_char;
1850 #endif
1851 if(src_text_len < MAX_SRC_TEXT)
1852 src_text_buf[src_text_len++] = c;
1853 advance(); /* Gobble the base character */
1854
1855 /* F90 does not allow forms 'ddd'[BOZ].
1856 Suppress message here if letter is not in [BOZ]
1857 since that gets a warning in get_binary_const
1858 */
1859 if( f90_typeless_constants && (c=='Z' || c=='O' || c=='B') ) {
1860 nonstandard(token->line_num,token->col_num,f90_typeless_constants,0);
1861 }
1862
1863 get_binary_const(token,c,space_seen_lately);
1864 return;
1865 }
1866 }
1867 #endif /*ALLOW_TYPELESS_CONSTANTS*/
1868
1869 if(len == 0) {
1870 warning(line_num,col_num,
1871 "Zero-length string not allowed\n");
1872 len = 1;
1873 }
1874
1875 if(quote_char != '\'') { /* Warn if quote is used instead of apost */
1876 if(f77_quotemarks) {
1877 nonstandard(token->line_num,token->col_num,0,0);
1878 msg_tail(": character string should be delimited by apostrophes");
1879 }
1880 }
1881
1882 inside_string = FALSE;
1883
1884 token->tclass = tok_string;
1885 token->size = len;
1886 token->src_text = new_src_text(src_text_buf,src_text_len);
1887 #ifdef LEX_RAWSTRINGS
1888 token->value.string = token->src_text; /* Includes the initial quote */
1889 #else
1890 token->value.string = token->src_text+1; /* Skips the initial quote */
1891 #endif
1892 /* Under -port warn if char size > 255 */
1893 if(port_long_string) {
1894 if(len > 255)
1895 nonportable(line_num,col_num,
1896 "character constant length exceeds 255");
1897 }
1898
1899 #ifdef DEBUG_FORLEX
1900 if(debug_lexer
1901 && src_text_buf[0] == quote_char) { /* skip if doing X'nnnn' */
1902 (void)fprintf(list_fd,"\nString:\t\t\t%s",token->value.string);
1903 (void)fprintf(list_fd,"\n\t\t(from\t%s)",token->src_text);
1904 }
1905 #endif
1906
1907 } /* get_string */
1908
1909
1910 /* This routine is called when -pretty=extra-space or
1911 missing-space are in effect, or when in free form
1912 mode. It figures out the right kind of warning to issue.
1913 */
space_violation(LINENO_t lineno,COLNO_t colno,const char * s)1914 void space_violation( LINENO_t lineno, COLNO_t colno, const char *s )
1915 {
1916 if(free_form && f90_freeform_space) {
1917 syntax_error(lineno,colno,s);
1918 }
1919 else {
1920 ugly_code(lineno,colno,s);
1921 }
1922 }
1923 /* End of Forlex module */
1924