1 /* Copyright (C) 2002-2016 Free Software Foundation, Inc.
2    Contributed by Andy Vaught
3    F2003 I/O support contributed by Jerry DeLisle
4 
5 This file is part of the GNU Fortran runtime library (libgfortran).
6 
7 Libgfortran is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11 
12 Libgfortran is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20 
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24 <http://www.gnu.org/licenses/>.  */
25 
26 
27 /* format.c-- parse a FORMAT string into a binary format suitable for
28  * interpretation during I/O statements */
29 
30 #include "io.h"
31 #include "format.h"
32 #include <ctype.h>
33 #include <string.h>
34 #include <stdlib.h>
35 
36 
37 static const fnode colon_node = { FMT_COLON, 0, NULL, NULL, {{ 0, 0, 0 }}, 0,
38 				  NULL };
39 
40 /* Error messages. */
41 
42 static const char posint_required[] = "Positive width required in format",
43   period_required[] = "Period required in format",
44   nonneg_required[] = "Nonnegative width required in format",
45   unexpected_element[] = "Unexpected element '%c' in format\n",
46   unexpected_end[] = "Unexpected end of format string",
47   bad_string[] = "Unterminated character constant in format",
48   bad_hollerith[] = "Hollerith constant extends past the end of the format",
49   reversion_error[] = "Exhausted data descriptors in format",
50   zero_width[] = "Zero width in format descriptor";
51 
52 /* The following routines support caching format data from parsed format strings
53    into a hash table.  This avoids repeatedly parsing duplicate format strings
54    or format strings in I/O statements that are repeated in loops.  */
55 
56 
57 /* Traverse the table and free all data.  */
58 
59 void
free_format_hash_table(gfc_unit * u)60 free_format_hash_table (gfc_unit *u)
61 {
62   size_t i;
63 
64   /* free_format_data handles any NULL pointers.  */
65   for (i = 0; i < FORMAT_HASH_SIZE; i++)
66     {
67       if (u->format_hash_table[i].hashed_fmt != NULL)
68 	{
69 	  free_format_data (u->format_hash_table[i].hashed_fmt);
70 	  free (u->format_hash_table[i].key);
71 	}
72       u->format_hash_table[i].key = NULL;
73       u->format_hash_table[i].key_len = 0;
74       u->format_hash_table[i].hashed_fmt = NULL;
75     }
76 }
77 
78 /* Traverse the format_data structure and reset the fnode counters.  */
79 
80 static void
reset_node(fnode * fn)81 reset_node (fnode *fn)
82 {
83   fnode *f;
84 
85   fn->count = 0;
86   fn->current = NULL;
87 
88   if (fn->format != FMT_LPAREN)
89     return;
90 
91   for (f = fn->u.child; f; f = f->next)
92     {
93       if (f->format == FMT_RPAREN)
94 	break;
95       reset_node (f);
96     }
97 }
98 
99 static void
reset_fnode_counters(st_parameter_dt * dtp)100 reset_fnode_counters (st_parameter_dt *dtp)
101 {
102   fnode *f;
103   format_data *fmt;
104 
105   fmt = dtp->u.p.fmt;
106 
107   /* Clear this pointer at the head so things start at the right place.  */
108   fmt->array.array[0].current = NULL;
109 
110   for (f = fmt->array.array[0].u.child; f; f = f->next)
111     reset_node (f);
112 }
113 
114 
115 /* A simple hashing function to generate an index into the hash table.  */
116 
117 static uint32_t
format_hash(st_parameter_dt * dtp)118 format_hash (st_parameter_dt *dtp)
119 {
120   char *key;
121   gfc_charlen_type key_len;
122   uint32_t hash = 0;
123   gfc_charlen_type i;
124 
125   /* Hash the format string. Super simple, but what the heck!  */
126   key = dtp->format;
127   key_len = dtp->format_len;
128   for (i = 0; i < key_len; i++)
129     hash ^= key[i];
130   hash &= (FORMAT_HASH_SIZE - 1);
131   return hash;
132 }
133 
134 
135 static void
save_parsed_format(st_parameter_dt * dtp)136 save_parsed_format (st_parameter_dt *dtp)
137 {
138   uint32_t hash;
139   gfc_unit *u;
140 
141   hash = format_hash (dtp);
142   u = dtp->u.p.current_unit;
143 
144   /* Index into the hash table.  We are simply replacing whatever is there
145      relying on probability.  */
146   if (u->format_hash_table[hash].hashed_fmt != NULL)
147     free_format_data (u->format_hash_table[hash].hashed_fmt);
148   u->format_hash_table[hash].hashed_fmt = NULL;
149 
150   free (u->format_hash_table[hash].key);
151   u->format_hash_table[hash].key = dtp->format;
152 
153   u->format_hash_table[hash].key_len = dtp->format_len;
154   u->format_hash_table[hash].hashed_fmt = dtp->u.p.fmt;
155 }
156 
157 
158 static format_data *
find_parsed_format(st_parameter_dt * dtp)159 find_parsed_format (st_parameter_dt *dtp)
160 {
161   uint32_t hash;
162   gfc_unit *u;
163 
164   hash = format_hash (dtp);
165   u = dtp->u.p.current_unit;
166 
167   if (u->format_hash_table[hash].key != NULL)
168     {
169       /* See if it matches.  */
170       if (u->format_hash_table[hash].key_len == dtp->format_len)
171 	{
172 	  /* So far so good.  */
173 	  if (strncmp (u->format_hash_table[hash].key,
174 	      dtp->format, dtp->format_len) == 0)
175 	    return u->format_hash_table[hash].hashed_fmt;
176 	}
177     }
178   return NULL;
179 }
180 
181 
182 /* next_char()-- Return the next character in the format string.
183  * Returns -1 when the string is done.  If the literal flag is set,
184  * spaces are significant, otherwise they are not. */
185 
186 static int
next_char(format_data * fmt,int literal)187 next_char (format_data *fmt, int literal)
188 {
189   int c;
190 
191   do
192     {
193       if (fmt->format_string_len == 0)
194 	return -1;
195 
196       fmt->format_string_len--;
197       c = toupper (*fmt->format_string++);
198       fmt->error_element = c;
199     }
200   while ((c == ' ' || c == '\t') && !literal);
201 
202   return c;
203 }
204 
205 
206 /* unget_char()-- Back up one character position. */
207 
208 #define unget_char(fmt) \
209   { fmt->format_string--; fmt->format_string_len++; }
210 
211 
212 /* get_fnode()-- Allocate a new format node, inserting it into the
213  * current singly linked list.  These are initially allocated from the
214  * static buffer. */
215 
216 static fnode *
get_fnode(format_data * fmt,fnode ** head,fnode ** tail,format_token t)217 get_fnode (format_data *fmt, fnode **head, fnode **tail, format_token t)
218 {
219   fnode *f;
220 
221   if (fmt->avail == &fmt->last->array[FARRAY_SIZE])
222     {
223       fmt->last->next = xmalloc (sizeof (fnode_array));
224       fmt->last = fmt->last->next;
225       fmt->last->next = NULL;
226       fmt->avail = &fmt->last->array[0];
227     }
228   f = fmt->avail++;
229   memset (f, '\0', sizeof (fnode));
230 
231   if (*head == NULL)
232     *head = *tail = f;
233   else
234     {
235       (*tail)->next = f;
236       *tail = f;
237     }
238 
239   f->format = t;
240   f->repeat = -1;
241   f->source = fmt->format_string;
242   return f;
243 }
244 
245 
246 /* free_format()-- Free allocated format string.  */
247 void
free_format(st_parameter_dt * dtp)248 free_format (st_parameter_dt *dtp)
249 {
250   if ((dtp->common.flags & IOPARM_DT_HAS_FORMAT) && dtp->format)
251     {
252       free (dtp->format);
253       dtp->format = NULL;
254     }
255 }
256 
257 
258 /* free_format_data()-- Free all allocated format data.  */
259 
260 void
free_format_data(format_data * fmt)261 free_format_data (format_data *fmt)
262 {
263   fnode_array *fa, *fa_next;
264 
265 
266   if (fmt == NULL)
267     return;
268 
269   for (fa = fmt->array.next; fa; fa = fa_next)
270     {
271       fa_next = fa->next;
272       free (fa);
273     }
274 
275   free (fmt);
276   fmt = NULL;
277 }
278 
279 
280 /* format_lex()-- Simple lexical analyzer for getting the next token
281  * in a FORMAT string.  We support a one-level token pushback in the
282  * fmt->saved_token variable. */
283 
284 static format_token
format_lex(format_data * fmt)285 format_lex (format_data *fmt)
286 {
287   format_token token;
288   int negative_flag;
289   int c;
290   char delim;
291 
292   if (fmt->saved_token != FMT_NONE)
293     {
294       token = fmt->saved_token;
295       fmt->saved_token = FMT_NONE;
296       return token;
297     }
298 
299   negative_flag = 0;
300   c = next_char (fmt, 0);
301 
302   switch (c)
303     {
304     case '*':
305        token = FMT_STAR;
306        break;
307 
308     case '(':
309       token = FMT_LPAREN;
310       break;
311 
312     case ')':
313       token = FMT_RPAREN;
314       break;
315 
316     case '-':
317       negative_flag = 1;
318       /* Fall Through */
319 
320     case '+':
321       c = next_char (fmt, 0);
322       if (!isdigit (c))
323 	{
324 	  token = FMT_UNKNOWN;
325 	  break;
326 	}
327 
328       fmt->value = c - '0';
329 
330       for (;;)
331 	{
332 	  c = next_char (fmt, 0);
333 	  if (!isdigit (c))
334 	    break;
335 
336 	  fmt->value = 10 * fmt->value + c - '0';
337 	}
338 
339       unget_char (fmt);
340 
341       if (negative_flag)
342 	fmt->value = -fmt->value;
343       token = FMT_SIGNED_INT;
344       break;
345 
346     case '0':
347     case '1':
348     case '2':
349     case '3':
350     case '4':
351     case '5':
352     case '6':
353     case '7':
354     case '8':
355     case '9':
356       fmt->value = c - '0';
357 
358       for (;;)
359 	{
360 	  c = next_char (fmt, 0);
361 	  if (!isdigit (c))
362 	    break;
363 
364 	  fmt->value = 10 * fmt->value + c - '0';
365 	}
366 
367       unget_char (fmt);
368       token = (fmt->value == 0) ? FMT_ZERO : FMT_POSINT;
369       break;
370 
371     case '.':
372       token = FMT_PERIOD;
373       break;
374 
375     case ',':
376       token = FMT_COMMA;
377       break;
378 
379     case ':':
380       token = FMT_COLON;
381       break;
382 
383     case '/':
384       token = FMT_SLASH;
385       break;
386 
387     case '$':
388       token = FMT_DOLLAR;
389       break;
390 
391     case 'T':
392       switch (next_char (fmt, 0))
393 	{
394 	case 'L':
395 	  token = FMT_TL;
396 	  break;
397 	case 'R':
398 	  token = FMT_TR;
399 	  break;
400 	default:
401 	  token = FMT_T;
402 	  unget_char (fmt);
403 	  break;
404 	}
405 
406       break;
407 
408     case 'X':
409       token = FMT_X;
410       break;
411 
412     case 'S':
413       switch (next_char (fmt, 0))
414 	{
415 	case 'S':
416 	  token = FMT_SS;
417 	  break;
418 	case 'P':
419 	  token = FMT_SP;
420 	  break;
421 	default:
422 	  token = FMT_S;
423 	  unget_char (fmt);
424 	  break;
425 	}
426 
427       break;
428 
429     case 'B':
430       switch (next_char (fmt, 0))
431 	{
432 	case 'N':
433 	  token = FMT_BN;
434 	  break;
435 	case 'Z':
436 	  token = FMT_BZ;
437 	  break;
438 	default:
439 	  token = FMT_B;
440 	  unget_char (fmt);
441 	  break;
442 	}
443 
444       break;
445 
446     case '\'':
447     case '"':
448       delim = c;
449 
450       fmt->string = fmt->format_string;
451       fmt->value = 0;		/* This is the length of the string */
452 
453       for (;;)
454 	{
455 	  c = next_char (fmt, 1);
456 	  if (c == -1)
457 	    {
458 	      token = FMT_BADSTRING;
459 	      fmt->error = bad_string;
460 	      break;
461 	    }
462 
463 	  if (c == delim)
464 	    {
465 	      c = next_char (fmt, 1);
466 
467 	      if (c == -1)
468 		{
469 		  token = FMT_BADSTRING;
470 		  fmt->error = bad_string;
471 		  break;
472 		}
473 
474 	      if (c != delim)
475 		{
476 		  unget_char (fmt);
477 		  token = FMT_STRING;
478 		  break;
479 		}
480 	    }
481 
482 	  fmt->value++;
483 	}
484 
485       break;
486 
487     case 'P':
488       token = FMT_P;
489       break;
490 
491     case 'I':
492       token = FMT_I;
493       break;
494 
495     case 'O':
496       token = FMT_O;
497       break;
498 
499     case 'Z':
500       token = FMT_Z;
501       break;
502 
503     case 'F':
504       token = FMT_F;
505       break;
506 
507     case 'E':
508       switch (next_char (fmt, 0))
509 	{
510 	case 'N':
511 	  token = FMT_EN;
512 	  break;
513 	case 'S':
514 	  token = FMT_ES;
515 	  break;
516 	default:
517 	  token = FMT_E;
518 	  unget_char (fmt);
519 	  break;
520 	}
521       break;
522 
523     case 'G':
524       token = FMT_G;
525       break;
526 
527     case 'H':
528       token = FMT_H;
529       break;
530 
531     case 'L':
532       token = FMT_L;
533       break;
534 
535     case 'A':
536       token = FMT_A;
537       break;
538 
539     case 'D':
540       switch (next_char (fmt, 0))
541 	{
542 	case 'P':
543 	  token = FMT_DP;
544 	  break;
545 	case 'C':
546 	  token = FMT_DC;
547 	  break;
548 	default:
549 	  token = FMT_D;
550 	  unget_char (fmt);
551 	  break;
552 	}
553       break;
554 
555     case 'R':
556       switch (next_char (fmt, 0))
557 	{
558 	case 'C':
559 	  token = FMT_RC;
560 	  break;
561 	case 'D':
562 	  token = FMT_RD;
563 	  break;
564 	case 'N':
565 	  token = FMT_RN;
566 	  break;
567 	case 'P':
568 	  token = FMT_RP;
569 	  break;
570 	case 'U':
571 	  token = FMT_RU;
572 	  break;
573 	case 'Z':
574 	  token = FMT_RZ;
575 	  break;
576 	default:
577 	  unget_char (fmt);
578 	  token = FMT_UNKNOWN;
579 	  break;
580 	}
581       break;
582 
583     case -1:
584       token = FMT_END;
585       break;
586 
587     default:
588       token = FMT_UNKNOWN;
589       break;
590     }
591 
592   return token;
593 }
594 
595 
596 /* parse_format_list()-- Parse a format list.  Assumes that a left
597  * paren has already been seen.  Returns a list representing the
598  * parenthesis node which contains the rest of the list. */
599 
600 static fnode *
parse_format_list(st_parameter_dt * dtp,bool * seen_dd)601 parse_format_list (st_parameter_dt *dtp, bool *seen_dd)
602 {
603   fnode *head, *tail;
604   format_token t, u, t2;
605   int repeat;
606   format_data *fmt = dtp->u.p.fmt;
607   bool seen_data_desc = false;
608 
609   head = tail = NULL;
610 
611   /* Get the next format item */
612  format_item:
613   t = format_lex (fmt);
614  format_item_1:
615   switch (t)
616     {
617     case FMT_STAR:
618       t = format_lex (fmt);
619       if (t != FMT_LPAREN)
620 	{
621 	  fmt->error = "Left parenthesis required after '*'";
622 	  goto finished;
623 	}
624       get_fnode (fmt, &head, &tail, FMT_LPAREN);
625       tail->repeat = -2;  /* Signifies unlimited format.  */
626       tail->u.child = parse_format_list (dtp, &seen_data_desc);
627       *seen_dd = seen_data_desc;
628       if (fmt->error != NULL)
629 	goto finished;
630       if (!seen_data_desc)
631 	{
632 	  fmt->error = "'*' requires at least one associated data descriptor";
633 	  goto finished;
634 	}
635       goto between_desc;
636 
637     case FMT_POSINT:
638       repeat = fmt->value;
639 
640       t = format_lex (fmt);
641       switch (t)
642 	{
643 	case FMT_LPAREN:
644 	  get_fnode (fmt, &head, &tail, FMT_LPAREN);
645 	  tail->repeat = repeat;
646 	  tail->u.child = parse_format_list (dtp, &seen_data_desc);
647 	  *seen_dd = seen_data_desc;
648 	  if (fmt->error != NULL)
649 	    goto finished;
650 
651 	  goto between_desc;
652 
653 	case FMT_SLASH:
654 	  get_fnode (fmt, &head, &tail, FMT_SLASH);
655 	  tail->repeat = repeat;
656 	  goto optional_comma;
657 
658 	case FMT_X:
659 	  get_fnode (fmt, &head, &tail, FMT_X);
660 	  tail->repeat = 1;
661 	  tail->u.k = fmt->value;
662 	  goto between_desc;
663 
664 	case FMT_P:
665 	  goto p_descriptor;
666 
667 	default:
668 	  goto data_desc;
669 	}
670 
671     case FMT_LPAREN:
672       get_fnode (fmt, &head, &tail, FMT_LPAREN);
673       tail->repeat = 1;
674       tail->u.child = parse_format_list (dtp, &seen_data_desc);
675       *seen_dd = seen_data_desc;
676       if (fmt->error != NULL)
677 	goto finished;
678 
679       goto between_desc;
680 
681     case FMT_SIGNED_INT:	/* Signed integer can only precede a P format.  */
682     case FMT_ZERO:		/* Same for zero.  */
683       t = format_lex (fmt);
684       if (t != FMT_P)
685 	{
686 	  fmt->error = "Expected P edit descriptor in format";
687 	  goto finished;
688 	}
689 
690     p_descriptor:
691       get_fnode (fmt, &head, &tail, FMT_P);
692       tail->u.k = fmt->value;
693       tail->repeat = 1;
694 
695       t = format_lex (fmt);
696       if (t == FMT_F || t == FMT_EN || t == FMT_ES || t == FMT_D
697 	  || t == FMT_G || t == FMT_E)
698 	{
699 	  repeat = 1;
700 	  goto data_desc;
701 	}
702 
703       if (t != FMT_COMMA && t != FMT_RPAREN && t != FMT_SLASH
704 	  && t != FMT_POSINT)
705 	{
706 	  fmt->error = "Comma required after P descriptor";
707 	  goto finished;
708 	}
709 
710       fmt->saved_token = t;
711       goto optional_comma;
712 
713     case FMT_P:		/* P and X require a prior number */
714       fmt->error = "P descriptor requires leading scale factor";
715       goto finished;
716 
717     case FMT_X:
718 /*
719    EXTENSION!
720 
721    If we would be pedantic in the library, we would have to reject
722    an X descriptor without an integer prefix:
723 
724       fmt->error = "X descriptor requires leading space count";
725       goto finished;
726 
727    However, this is an extension supported by many Fortran compilers,
728    including Cray, HP, AIX, and IRIX.  Therefore, we allow it in the
729    runtime library, and make the front end reject it if the compiler
730    is in pedantic mode.  The interpretation of 'X' is '1X'.
731 */
732       get_fnode (fmt, &head, &tail, FMT_X);
733       tail->repeat = 1;
734       tail->u.k = 1;
735       goto between_desc;
736 
737     case FMT_STRING:
738       get_fnode (fmt, &head, &tail, FMT_STRING);
739       tail->u.string.p = fmt->string;
740       tail->u.string.length = fmt->value;
741       tail->repeat = 1;
742       goto optional_comma;
743 
744     case FMT_RC:
745     case FMT_RD:
746     case FMT_RN:
747     case FMT_RP:
748     case FMT_RU:
749     case FMT_RZ:
750       notify_std (&dtp->common, GFC_STD_F2003, "Fortran 2003: Round "
751 		  "descriptor not allowed");
752       get_fnode (fmt, &head, &tail, t);
753       tail->repeat = 1;
754       goto between_desc;
755 
756     case FMT_DC:
757     case FMT_DP:
758       notify_std (&dtp->common, GFC_STD_F2003, "Fortran 2003: DC or DP "
759 		  "descriptor not allowed");
760     /* Fall through.  */
761     case FMT_S:
762     case FMT_SS:
763     case FMT_SP:
764     case FMT_BN:
765     case FMT_BZ:
766       get_fnode (fmt, &head, &tail, t);
767       tail->repeat = 1;
768       goto between_desc;
769 
770     case FMT_COLON:
771       get_fnode (fmt, &head, &tail, FMT_COLON);
772       tail->repeat = 1;
773       goto optional_comma;
774 
775     case FMT_SLASH:
776       get_fnode (fmt, &head, &tail, FMT_SLASH);
777       tail->repeat = 1;
778       tail->u.r = 1;
779       goto optional_comma;
780 
781     case FMT_DOLLAR:
782       get_fnode (fmt, &head, &tail, FMT_DOLLAR);
783       tail->repeat = 1;
784       notify_std (&dtp->common, GFC_STD_GNU, "Extension: $ descriptor");
785       goto between_desc;
786 
787     case FMT_T:
788     case FMT_TL:
789     case FMT_TR:
790       t2 = format_lex (fmt);
791       if (t2 != FMT_POSINT)
792 	{
793 	  fmt->error = posint_required;
794 	  goto finished;
795 	}
796       get_fnode (fmt, &head, &tail, t);
797       tail->u.n = fmt->value;
798       tail->repeat = 1;
799       goto between_desc;
800 
801     case FMT_I:
802     case FMT_B:
803     case FMT_O:
804     case FMT_Z:
805     case FMT_E:
806     case FMT_EN:
807     case FMT_ES:
808     case FMT_D:
809     case FMT_L:
810     case FMT_A:
811     case FMT_F:
812     case FMT_G:
813       repeat = 1;
814       *seen_dd = true;
815       goto data_desc;
816 
817     case FMT_H:
818       get_fnode (fmt, &head, &tail, FMT_STRING);
819       if (fmt->format_string_len < 1)
820 	{
821 	  fmt->error = bad_hollerith;
822 	  goto finished;
823 	}
824 
825       tail->u.string.p = fmt->format_string;
826       tail->u.string.length = 1;
827       tail->repeat = 1;
828 
829       fmt->format_string++;
830       fmt->format_string_len--;
831 
832       goto between_desc;
833 
834     case FMT_END:
835       fmt->error = unexpected_end;
836       goto finished;
837 
838     case FMT_BADSTRING:
839       goto finished;
840 
841     case FMT_RPAREN:
842       goto finished;
843 
844     default:
845       fmt->error = unexpected_element;
846       goto finished;
847     }
848 
849   /* In this state, t must currently be a data descriptor.  Deal with
850      things that can/must follow the descriptor */
851  data_desc:
852   switch (t)
853     {
854     case FMT_L:
855       *seen_dd = true;
856       t = format_lex (fmt);
857       if (t != FMT_POSINT)
858 	{
859 	  if (notification_std(GFC_STD_GNU) == NOTIFICATION_ERROR)
860 	    {
861 	      fmt->error = posint_required;
862 	      goto finished;
863 	    }
864 	  else
865 	    {
866 	      fmt->saved_token = t;
867 	      fmt->value = 1;	/* Default width */
868 	      notify_std (&dtp->common, GFC_STD_GNU, posint_required);
869 	    }
870 	}
871 
872       get_fnode (fmt, &head, &tail, FMT_L);
873       tail->u.n = fmt->value;
874       tail->repeat = repeat;
875       break;
876 
877     case FMT_A:
878       *seen_dd = true;
879       t = format_lex (fmt);
880       if (t == FMT_ZERO)
881 	{
882 	  fmt->error = zero_width;
883 	  goto finished;
884 	}
885 
886       if (t != FMT_POSINT)
887 	{
888 	  fmt->saved_token = t;
889 	  fmt->value = -1;		/* Width not present */
890 	}
891 
892       get_fnode (fmt, &head, &tail, FMT_A);
893       tail->repeat = repeat;
894       tail->u.n = fmt->value;
895       break;
896 
897     case FMT_D:
898     case FMT_E:
899     case FMT_F:
900     case FMT_G:
901     case FMT_EN:
902     case FMT_ES:
903       *seen_dd = true;
904       get_fnode (fmt, &head, &tail, t);
905       tail->repeat = repeat;
906 
907       u = format_lex (fmt);
908       if (t == FMT_G && u == FMT_ZERO)
909 	{
910 	  *seen_dd = true;
911 	  if (notification_std (GFC_STD_F2008) == NOTIFICATION_ERROR
912 	      || dtp->u.p.mode == READING)
913 	    {
914 	      fmt->error = zero_width;
915 	      goto finished;
916 	    }
917 	  tail->u.real.w = 0;
918 	  u = format_lex (fmt);
919 	  if (u != FMT_PERIOD)
920 	    {
921 	      fmt->saved_token = u;
922 	      break;
923 	    }
924 
925 	  u = format_lex (fmt);
926 	  if (u != FMT_POSINT)
927 	    {
928 	      fmt->error = posint_required;
929 	      goto finished;
930 	    }
931 	  tail->u.real.d = fmt->value;
932 	  break;
933 	}
934       if (t == FMT_F && dtp->u.p.mode == WRITING)
935 	{
936 	  *seen_dd = true;
937 	  if (u != FMT_POSINT && u != FMT_ZERO)
938 	    {
939 	      fmt->error = nonneg_required;
940 	      goto finished;
941 	    }
942 	}
943       else if (u != FMT_POSINT)
944 	{
945 	  fmt->error = posint_required;
946 	  goto finished;
947 	}
948 
949       tail->u.real.w = fmt->value;
950       t2 = t;
951       t = format_lex (fmt);
952       if (t != FMT_PERIOD)
953 	{
954 	  /* We treat a missing decimal descriptor as 0.  Note: This is only
955 	     allowed if -std=legacy, otherwise an error occurs.  */
956 	  if (compile_options.warn_std != 0)
957 	    {
958 	      fmt->error = period_required;
959 	      goto finished;
960 	    }
961 	  fmt->saved_token = t;
962 	  tail->u.real.d = 0;
963 	  tail->u.real.e = -1;
964 	  break;
965 	}
966 
967       t = format_lex (fmt);
968       if (t != FMT_ZERO && t != FMT_POSINT)
969 	{
970 	  fmt->error = nonneg_required;
971 	  goto finished;
972 	}
973 
974       tail->u.real.d = fmt->value;
975       tail->u.real.e = -1;
976 
977       if (t2 == FMT_D || t2 == FMT_F)
978 	{
979 	  *seen_dd = true;
980 	  break;
981 	}
982 
983       /* Look for optional exponent */
984       t = format_lex (fmt);
985       if (t != FMT_E)
986 	fmt->saved_token = t;
987       else
988 	{
989 	  t = format_lex (fmt);
990 	  if (t != FMT_POSINT)
991 	    {
992 	      fmt->error = "Positive exponent width required in format";
993 	      goto finished;
994 	    }
995 
996 	  tail->u.real.e = fmt->value;
997 	}
998 
999       break;
1000 
1001     case FMT_H:
1002       if (repeat > fmt->format_string_len)
1003 	{
1004 	  fmt->error = bad_hollerith;
1005 	  goto finished;
1006 	}
1007 
1008       get_fnode (fmt, &head, &tail, FMT_STRING);
1009       tail->u.string.p = fmt->format_string;
1010       tail->u.string.length = repeat;
1011       tail->repeat = 1;
1012 
1013       fmt->format_string += fmt->value;
1014       fmt->format_string_len -= repeat;
1015 
1016       break;
1017 
1018     case FMT_I:
1019     case FMT_B:
1020     case FMT_O:
1021     case FMT_Z:
1022       *seen_dd = true;
1023       get_fnode (fmt, &head, &tail, t);
1024       tail->repeat = repeat;
1025 
1026       t = format_lex (fmt);
1027 
1028       if (dtp->u.p.mode == READING)
1029 	{
1030 	  if (t != FMT_POSINT)
1031 	    {
1032 	      fmt->error = posint_required;
1033 	      goto finished;
1034 	    }
1035 	}
1036       else
1037 	{
1038 	  if (t != FMT_ZERO && t != FMT_POSINT)
1039 	    {
1040 	      fmt->error = nonneg_required;
1041 	      goto finished;
1042 	    }
1043 	}
1044 
1045       tail->u.integer.w = fmt->value;
1046       tail->u.integer.m = -1;
1047 
1048       t = format_lex (fmt);
1049       if (t != FMT_PERIOD)
1050 	{
1051 	  fmt->saved_token = t;
1052 	}
1053       else
1054 	{
1055 	  t = format_lex (fmt);
1056 	  if (t != FMT_ZERO && t != FMT_POSINT)
1057 	    {
1058 	      fmt->error = nonneg_required;
1059 	      goto finished;
1060 	    }
1061 
1062 	  tail->u.integer.m = fmt->value;
1063 	}
1064 
1065       if (tail->u.integer.w != 0 && tail->u.integer.m > tail->u.integer.w)
1066 	{
1067 	  fmt->error = "Minimum digits exceeds field width";
1068 	  goto finished;
1069 	}
1070 
1071       break;
1072 
1073     default:
1074       fmt->error = unexpected_element;
1075       goto finished;
1076     }
1077 
1078   /* Between a descriptor and what comes next */
1079  between_desc:
1080   t = format_lex (fmt);
1081   switch (t)
1082     {
1083     case FMT_COMMA:
1084       goto format_item;
1085 
1086     case FMT_RPAREN:
1087       goto finished;
1088 
1089     case FMT_SLASH:
1090     case FMT_COLON:
1091       get_fnode (fmt, &head, &tail, t);
1092       tail->repeat = 1;
1093       goto optional_comma;
1094 
1095     case FMT_END:
1096       fmt->error = unexpected_end;
1097       goto finished;
1098 
1099     default:
1100       /* Assume a missing comma, this is a GNU extension */
1101       goto format_item_1;
1102     }
1103 
1104   /* Optional comma is a weird between state where we've just finished
1105      reading a colon, slash or P descriptor. */
1106  optional_comma:
1107   t = format_lex (fmt);
1108   switch (t)
1109     {
1110     case FMT_COMMA:
1111       break;
1112 
1113     case FMT_RPAREN:
1114       goto finished;
1115 
1116     default:			/* Assume that we have another format item */
1117       fmt->saved_token = t;
1118       break;
1119     }
1120 
1121   goto format_item;
1122 
1123  finished:
1124 
1125   return head;
1126 }
1127 
1128 
1129 /* format_error()-- Generate an error message for a format statement.
1130  * If the node that gives the location of the error is NULL, the error
1131  * is assumed to happen at parse time, and the current location of the
1132  * parser is shown.
1133  *
1134  * We generate a message showing where the problem is.  We take extra
1135  * care to print only the relevant part of the format if it is longer
1136  * than a standard 80 column display. */
1137 
1138 void
format_error(st_parameter_dt * dtp,const fnode * f,const char * message)1139 format_error (st_parameter_dt *dtp, const fnode *f, const char *message)
1140 {
1141   int width, i, offset;
1142 #define BUFLEN 300
1143   char *p, buffer[BUFLEN];
1144   format_data *fmt = dtp->u.p.fmt;
1145 
1146   if (f != NULL)
1147     p = f->source;
1148   else                /* This should not happen.  */
1149     p = dtp->format;
1150 
1151   if (message == unexpected_element)
1152     snprintf (buffer, BUFLEN, message, fmt->error_element);
1153   else
1154     snprintf (buffer, BUFLEN, "%s\n", message);
1155 
1156   /* Get the offset into the format string where the error occurred.  */
1157   offset = dtp->format_len - (fmt->reversion_ok ?
1158 			      (int) strlen(p) : fmt->format_string_len);
1159 
1160   width = dtp->format_len;
1161 
1162   if (width > 80)
1163     width = 80;
1164 
1165   /* Show the format */
1166 
1167   p = strchr (buffer, '\0');
1168 
1169   if (dtp->format)
1170     memcpy (p, dtp->format, width);
1171 
1172   p += width;
1173   *p++ = '\n';
1174 
1175   /* Show where the problem is */
1176 
1177   for (i = 1; i < offset; i++)
1178     *p++ = ' ';
1179 
1180   *p++ = '^';
1181   *p = '\0';
1182 
1183   generate_error (&dtp->common, LIBERROR_FORMAT, buffer);
1184 }
1185 
1186 
1187 /* revert()-- Do reversion of the format.  Control reverts to the left
1188  * parenthesis that matches the rightmost right parenthesis.  From our
1189  * tree structure, we are looking for the rightmost parenthesis node
1190  * at the second level, the first level always being a single
1191  * parenthesis node.  If this node doesn't exit, we use the top
1192  * level. */
1193 
1194 static void
revert(st_parameter_dt * dtp)1195 revert (st_parameter_dt *dtp)
1196 {
1197   fnode *f, *r;
1198   format_data *fmt = dtp->u.p.fmt;
1199 
1200   dtp->u.p.reversion_flag = 1;
1201 
1202   r = NULL;
1203 
1204   for (f = fmt->array.array[0].u.child; f; f = f->next)
1205     if (f->format == FMT_LPAREN)
1206       r = f;
1207 
1208   /* If r is NULL because no node was found, the whole tree will be used */
1209 
1210   fmt->array.array[0].current = r;
1211   fmt->array.array[0].count = 0;
1212 }
1213 
1214 /* parse_format()-- Parse a format string.  */
1215 
1216 void
parse_format(st_parameter_dt * dtp)1217 parse_format (st_parameter_dt *dtp)
1218 {
1219   format_data *fmt;
1220   bool format_cache_ok, seen_data_desc = false;
1221 
1222   /* Don't cache for internal units and set an arbitrary limit on the size of
1223      format strings we will cache.  (Avoids memory issues.)  */
1224   format_cache_ok = !is_internal_unit (dtp);
1225 
1226   /* Lookup format string to see if it has already been parsed.  */
1227   if (format_cache_ok)
1228     {
1229       dtp->u.p.fmt = find_parsed_format (dtp);
1230 
1231       if (dtp->u.p.fmt != NULL)
1232 	{
1233 	  dtp->u.p.fmt->reversion_ok = 0;
1234 	  dtp->u.p.fmt->saved_token = FMT_NONE;
1235 	  dtp->u.p.fmt->saved_format = NULL;
1236 	  reset_fnode_counters (dtp);
1237 	  return;
1238 	}
1239     }
1240 
1241   /* Not found so proceed as follows.  */
1242 
1243   char *fmt_string = fc_strdup_notrim (dtp->format, dtp->format_len);
1244   dtp->format = fmt_string;
1245 
1246   dtp->u.p.fmt = fmt = xmalloc (sizeof (format_data));
1247   fmt->format_string = dtp->format;
1248   fmt->format_string_len = dtp->format_len;
1249 
1250   fmt->string = NULL;
1251   fmt->saved_token = FMT_NONE;
1252   fmt->error = NULL;
1253   fmt->value = 0;
1254 
1255   /* Initialize variables used during traversal of the tree.  */
1256 
1257   fmt->reversion_ok = 0;
1258   fmt->saved_format = NULL;
1259 
1260   /* Allocate the first format node as the root of the tree.  */
1261 
1262   fmt->last = &fmt->array;
1263   fmt->last->next = NULL;
1264   fmt->avail = &fmt->array.array[0];
1265 
1266   memset (fmt->avail, 0, sizeof (*fmt->avail));
1267   fmt->avail->format = FMT_LPAREN;
1268   fmt->avail->repeat = 1;
1269   fmt->avail++;
1270 
1271   if (format_lex (fmt) == FMT_LPAREN)
1272     fmt->array.array[0].u.child = parse_format_list (dtp, &seen_data_desc);
1273   else
1274     fmt->error = "Missing initial left parenthesis in format";
1275 
1276   if (format_cache_ok)
1277     save_parsed_format (dtp);
1278   else
1279     dtp->u.p.format_not_saved = 1;
1280 
1281   if (fmt->error)
1282     format_error (dtp, NULL, fmt->error);
1283 }
1284 
1285 
1286 /* next_format0()-- Get the next format node without worrying about
1287  * reversion.  Returns NULL when we hit the end of the list.
1288  * Parenthesis nodes are incremented after the list has been
1289  * exhausted, other nodes are incremented before they are returned. */
1290 
1291 static const fnode *
next_format0(fnode * f)1292 next_format0 (fnode * f)
1293 {
1294   const fnode *r;
1295 
1296   if (f == NULL)
1297     return NULL;
1298 
1299   if (f->format != FMT_LPAREN)
1300     {
1301       f->count++;
1302       if (f->count <= f->repeat)
1303 	return f;
1304 
1305       f->count = 0;
1306       return NULL;
1307     }
1308 
1309   /* Deal with a parenthesis node with unlimited format.  */
1310 
1311   if (f->repeat == -2)  /* -2 signifies unlimited.  */
1312   for (;;)
1313     {
1314       if (f->current == NULL)
1315 	f->current = f->u.child;
1316 
1317       for (; f->current != NULL; f->current = f->current->next)
1318 	{
1319 	  r = next_format0 (f->current);
1320 	  if (r != NULL)
1321 	    return r;
1322 	}
1323     }
1324 
1325   /* Deal with a parenthesis node with specific repeat count.  */
1326   for (; f->count < f->repeat; f->count++)
1327     {
1328       if (f->current == NULL)
1329 	f->current = f->u.child;
1330 
1331       for (; f->current != NULL; f->current = f->current->next)
1332 	{
1333 	  r = next_format0 (f->current);
1334 	  if (r != NULL)
1335 	    return r;
1336 	}
1337     }
1338 
1339   f->count = 0;
1340   return NULL;
1341 }
1342 
1343 
1344 /* next_format()-- Return the next format node.  If the format list
1345  * ends up being exhausted, we do reversion.  Reversion is only
1346  * allowed if we've seen a data descriptor since the
1347  * initialization or the last reversion.  We return NULL if there
1348  * are no more data descriptors to return (which is an error
1349  * condition). */
1350 
1351 const fnode *
next_format(st_parameter_dt * dtp)1352 next_format (st_parameter_dt *dtp)
1353 {
1354   format_token t;
1355   const fnode *f;
1356   format_data *fmt = dtp->u.p.fmt;
1357 
1358   if (fmt->saved_format != NULL)
1359     {				/* Deal with a pushed-back format node */
1360       f = fmt->saved_format;
1361       fmt->saved_format = NULL;
1362       goto done;
1363     }
1364 
1365   f = next_format0 (&fmt->array.array[0]);
1366   if (f == NULL)
1367     {
1368       if (!fmt->reversion_ok)
1369 	return NULL;
1370 
1371       fmt->reversion_ok = 0;
1372       revert (dtp);
1373 
1374       f = next_format0 (&fmt->array.array[0]);
1375       if (f == NULL)
1376 	{
1377 	  format_error (dtp, NULL, reversion_error);
1378 	  return NULL;
1379 	}
1380 
1381       /* Push the first reverted token and return a colon node in case
1382        * there are no more data items. */
1383 
1384       fmt->saved_format = f;
1385       return &colon_node;
1386     }
1387 
1388   /* If this is a data edit descriptor, then reversion has become OK. */
1389  done:
1390   t = f->format;
1391 
1392   if (!fmt->reversion_ok &&
1393       (t == FMT_I || t == FMT_B || t == FMT_O || t == FMT_Z || t == FMT_F ||
1394        t == FMT_E || t == FMT_EN || t == FMT_ES || t == FMT_G || t == FMT_L ||
1395        t == FMT_A || t == FMT_D))
1396     fmt->reversion_ok = 1;
1397   return f;
1398 }
1399 
1400 
1401 /* unget_format()-- Push the given format back so that it will be
1402  * returned on the next call to next_format() without affecting
1403  * counts.  This is necessary when we've encountered a data
1404  * descriptor, but don't know what the data item is yet.  The format
1405  * node is pushed back, and we return control to the main program,
1406  * which calls the library back with the data item (or not). */
1407 
1408 void
unget_format(st_parameter_dt * dtp,const fnode * f)1409 unget_format (st_parameter_dt *dtp, const fnode *f)
1410 {
1411   dtp->u.p.fmt->saved_format = f;
1412 }
1413 
1414