1 /* Check calls to formatted I/O functions (-Wformat).
2    Copyright (C) 1992-2021 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "c-target.h"
25 #include "c-common.h"
26 #include "alloc-pool.h"
27 #include "stringpool.h"
28 #include "c-objc.h"
29 #include "intl.h"
30 #include "langhooks.h"
31 #include "c-format.h"
32 #include "diagnostic.h"
33 #include "substring-locations.h"
34 #include "selftest.h"
35 #include "selftest-diagnostic.h"
36 #include "builtins.h"
37 #include "attribs.h"
38 #include "gcc-rich-location.h"
39 
40 /* Handle attributes associated with format checking.  */
41 
42 /* This must be in the same order as format_types, except for
43    format_type_error.  Target-specific format types do not have
44    matching enum values.  */
45 enum format_type { printf_format_type, asm_fprintf_format_type,
46 		   gcc_diag_format_type, gcc_tdiag_format_type,
47 		   gcc_cdiag_format_type,
48 		   gcc_cxxdiag_format_type, gcc_gfc_format_type,
49 		   gcc_dump_printf_format_type,
50 		   gcc_objc_string_format_type,
51 		   format_type_error = -1};
52 
53 struct function_format_info
54 {
55   enum format_type format_type;		/* type of format (printf, scanf, etc.) */
56   /* IS_RAW is relevant only for GCC diagnostic format functions.
57      It is set for "raw" formatting functions like pp_printf that
58      are not intended to produce complete diagnostics according to
59      GCC guidelines, and clear for others like error and warning
60      whose format string is checked for proper quoting and spelling.  */
61   bool is_raw;
62   unsigned HOST_WIDE_INT format_num;	/* number of format argument */
63   unsigned HOST_WIDE_INT first_arg_num;	/* number of first arg (zero for varargs) */
64 };
65 
66 /* Initialized in init_dynamic_diag_info.  */
67 static GTY(()) tree local_tree_type_node;
68 static GTY(()) tree local_event_ptr_node;
69 static GTY(()) tree local_gimple_ptr_node;
70 static GTY(()) tree local_cgraph_node_ptr_node;
71 static GTY(()) tree locus;
72 
73 static bool decode_format_attr (const_tree, tree, tree, function_format_info *,
74 				bool);
75 static format_type decode_format_type (const char *, bool * = NULL);
76 
77 static bool check_format_string (const_tree argument,
78 				 unsigned HOST_WIDE_INT format_num,
79 				 int flags, bool *no_add_attrs,
80 				 int expected_format_type);
81 static tree get_constant (const_tree fntype, const_tree atname, tree expr,
82 			  int argno, unsigned HOST_WIDE_INT *value,
83 			  int flags, bool validated_p);
84 static const char *convert_format_name_to_system_name (const char *attr_name);
85 
86 static int first_target_format_type;
87 static const char *format_name (int format_num);
88 static int format_flags (int format_num);
89 
90 /* Emit a warning as per format_warning_va, but construct the substring_loc
91    for the character at offset (CHAR_IDX - 1) within a string constant
92    FORMAT_STRING_CST at FMT_STRING_LOC.  */
93 
94 ATTRIBUTE_GCC_DIAG (5,6)
95 static bool
format_warning_at_char(location_t fmt_string_loc,tree format_string_cst,int char_idx,int opt,const char * gmsgid,...)96 format_warning_at_char (location_t fmt_string_loc, tree format_string_cst,
97 			int char_idx, int opt, const char *gmsgid, ...)
98 {
99   va_list ap;
100   va_start (ap, gmsgid);
101   tree string_type = TREE_TYPE (format_string_cst);
102 
103   /* The callers are of the form:
104        format_warning (format_string_loc, format_string_cst,
105 		       format_chars - orig_format_chars,
106       where format_chars has already been incremented, so that
107       CHAR_IDX is one character beyond where the warning should
108       be emitted.  Fix it.  */
109   char_idx -= 1;
110 
111   substring_loc fmt_loc (fmt_string_loc, string_type, char_idx, char_idx,
112 			 char_idx);
113   format_string_diagnostic_t diag (fmt_loc, NULL, UNKNOWN_LOCATION, NULL,
114 				   NULL);
115   bool warned = diag.emit_warning_va (opt, gmsgid, &ap);
116   va_end (ap);
117 
118   return warned;
119 }
120 
121 
122 /* Emit a warning as per format_warning_va, but construct the substring_loc
123    for the substring at offset (POS1, POS2 - 1) within a string constant
124    FORMAT_STRING_CST at FMT_STRING_LOC.  */
125 
126 ATTRIBUTE_GCC_DIAG (6,7)
127 static bool
format_warning_substr(location_t fmt_string_loc,tree format_string_cst,int pos1,int pos2,int opt,const char * gmsgid,...)128 format_warning_substr (location_t fmt_string_loc, tree format_string_cst,
129 		       int pos1, int pos2, int opt, const char *gmsgid, ...)
130 {
131   va_list ap;
132   va_start (ap, gmsgid);
133   tree string_type = TREE_TYPE (format_string_cst);
134 
135   pos2 -= 1;
136 
137   substring_loc fmt_loc (fmt_string_loc, string_type, pos1, pos1, pos2);
138   format_string_diagnostic_t diag (fmt_loc, NULL, UNKNOWN_LOCATION, NULL,
139 				   NULL);
140   bool warned = diag.emit_warning_va (opt, gmsgid, &ap);
141   va_end (ap);
142 
143   return warned;
144 }
145 
146 
147 /* Check that we have a pointer to a string suitable for use as a format.
148    The default is to check for a char type.
149    For objective-c dialects, this is extended to include references to string
150    objects validated by objc_string_ref_type_p ().
151    Targets may also provide a string object type that can be used within c and
152    c++ and shared with their respective objective-c dialects. In this case the
153    reference to a format string is checked for validity via a hook.
154 
155    The function returns true if strref points to any string type valid for the
156    language dialect and target.  */
157 
158 bool
valid_format_string_type_p(tree strref)159 valid_format_string_type_p (tree strref)
160 {
161   return (strref != NULL
162 	  && TREE_CODE (strref) == POINTER_TYPE
163 	  && (TYPE_MAIN_VARIANT (TREE_TYPE (strref)) == char_type_node
164 	      || objc_string_ref_type_p (strref)
165 	      || (*targetcm.string_object_ref_type_p) ((const_tree) strref)));
166 }
167 
168 /* Handle a "format_arg" attribute; arguments as in
169    struct attribute_spec.handler.  */
170 tree
handle_format_arg_attribute(tree * node,tree atname,tree args,int flags,bool * no_add_attrs)171 handle_format_arg_attribute (tree *node, tree atname,
172 			     tree args, int flags, bool *no_add_attrs)
173 {
174   tree type = *node;
175   /* Note that TREE_VALUE (args) is changed in place below.  */
176   tree *format_num_expr = &TREE_VALUE (args);
177   unsigned HOST_WIDE_INT format_num = 0;
178 
179   if (tree val = get_constant (type, atname, *format_num_expr, 0, &format_num,
180 			       0, false))
181     *format_num_expr = val;
182   else
183     {
184       *no_add_attrs = true;
185       return NULL_TREE;
186     }
187 
188   if (prototype_p (type))
189     {
190       /* The format arg can be any string reference valid for the language and
191 	target.  We cannot be more specific in this case.  */
192       if (!check_format_string (type, format_num, flags, no_add_attrs, -1))
193 	return NULL_TREE;
194     }
195 
196   if (!valid_format_string_type_p (TREE_TYPE (type)))
197     {
198       if (!(flags & (int) ATTR_FLAG_BUILT_IN))
199 	error ("function does not return string type");
200       *no_add_attrs = true;
201       return NULL_TREE;
202     }
203 
204   return NULL_TREE;
205 }
206 
207 /* Verify that the format_num argument is actually a string reference suitable,
208    for the language dialect and target (in case the format attribute is in
209    error).  When we know the specific reference type expected, this is also
210    checked.  */
211 static bool
check_format_string(const_tree fntype,unsigned HOST_WIDE_INT format_num,int flags,bool * no_add_attrs,int expected_format_type)212 check_format_string (const_tree fntype, unsigned HOST_WIDE_INT format_num,
213 		     int flags, bool *no_add_attrs, int expected_format_type)
214 {
215   unsigned HOST_WIDE_INT i;
216   bool is_objc_sref, is_target_sref, is_char_ref;
217   tree ref;
218   int fmt_flags;
219   function_args_iterator iter;
220 
221   i = 1;
222   FOREACH_FUNCTION_ARGS (fntype, ref, iter)
223     {
224       if (i == format_num)
225 	break;
226       i++;
227     }
228 
229   if (!ref
230       || !valid_format_string_type_p (ref))
231     {
232       if (!(flags & (int) ATTR_FLAG_BUILT_IN))
233 	error ("format string argument is not a string type");
234       *no_add_attrs = true;
235       return false;
236     }
237 
238   /* We only know that we want a suitable string reference.  */
239   if (expected_format_type < 0)
240     return true;
241 
242   /* Now check that the arg matches the expected type.  */
243   is_char_ref =
244     (TYPE_MAIN_VARIANT (TREE_TYPE (ref)) == char_type_node);
245 
246   fmt_flags = format_flags (expected_format_type);
247   is_objc_sref = is_target_sref = false;
248   if (!is_char_ref)
249     is_objc_sref = objc_string_ref_type_p (ref);
250 
251   if (!(fmt_flags & FMT_FLAG_PARSE_ARG_CONVERT_EXTERNAL))
252     {
253       if (is_char_ref)
254 	return true; /* OK, we expected a char and found one.  */
255       else
256 	{
257 	  /* We expected a char but found an extended string type.  */
258 	  if (is_objc_sref)
259 	    error ("found a %qs reference but the format argument should"
260 		   " be a string", format_name (gcc_objc_string_format_type));
261 	  else
262 	    error ("found a %qT but the format argument should be a string",
263 		   ref);
264 	  *no_add_attrs = true;
265 	  return false;
266 	}
267     }
268 
269   /* We expect a string object type as the format arg.  */
270   if (is_char_ref)
271     {
272       error ("format argument should be a %qs reference but"
273 	     " a string was found", format_name (expected_format_type));
274       *no_add_attrs = true;
275       return false;
276     }
277 
278   /* We will assert that objective-c will support either its own string type
279      or the target-supplied variant.  */
280   if (!is_objc_sref)
281     is_target_sref = (*targetcm.string_object_ref_type_p) ((const_tree) ref);
282 
283   if (expected_format_type == (int) gcc_objc_string_format_type
284       && (is_objc_sref || is_target_sref))
285     return true;
286 
287   /* We will allow a target string ref to match only itself.  */
288   if (first_target_format_type
289       && expected_format_type >= first_target_format_type
290       && is_target_sref)
291     return true;
292   else
293     {
294       error ("format argument should be a %qs reference",
295 	      format_name (expected_format_type));
296       *no_add_attrs = true;
297       return false;
298     }
299 
300   gcc_unreachable ();
301 }
302 
303 /* Under the control of FLAGS, verify EXPR is a valid constant that
304    refers to a positional argument ARGNO having a string type (char*
305    or, for targets like Darwin, a pointer to struct CFString) to
306    a function type FNTYPE declared with attribute ATNAME.
307    If valid, store the constant's integer value in *VALUE and return
308    the value.
309    If VALIDATED_P is true assert the validation is successful.
310    Returns the converted constant value on success, null otherwise.  */
311 
312 static tree
get_constant(const_tree fntype,const_tree atname,tree expr,int argno,unsigned HOST_WIDE_INT * value,int flags,bool validated_p)313 get_constant (const_tree fntype, const_tree atname, tree expr, int argno,
314 	      unsigned HOST_WIDE_INT *value, int flags, bool validated_p)
315 {
316   /* Require the referenced argument to have a string type.  For targets
317      like Darwin, also accept pointers to struct CFString.  */
318   if (tree val = positional_argument (fntype, atname, expr, STRING_CST,
319 				      argno, flags))
320     {
321       *value = TREE_INT_CST_LOW (val);
322       return val;
323     }
324 
325   gcc_assert (!validated_p);
326   return NULL_TREE;
327 }
328 
329 /* Decode the arguments to a "format" attribute into a
330    function_format_info structure.  It is already known that the list
331    is of the right length.  If VALIDATED_P is true, then these
332    attributes have already been validated and must not be erroneous;
333    if false, it will give an error message.  Returns true if the
334    attributes are successfully decoded, false otherwise.  */
335 
336 static bool
decode_format_attr(const_tree fntype,tree atname,tree args,function_format_info * info,bool validated_p)337 decode_format_attr (const_tree fntype, tree atname, tree args,
338 		    function_format_info *info, bool validated_p)
339 {
340   tree format_type_id = TREE_VALUE (args);
341   /* Note that TREE_VALUE (args) is changed in place below.  Ditto
342      for the value of the next element on the list.  */
343   tree *format_num_expr = &TREE_VALUE (TREE_CHAIN (args));
344   tree *first_arg_num_expr = &TREE_VALUE (TREE_CHAIN (TREE_CHAIN (args)));
345 
346   if (TREE_CODE (format_type_id) != IDENTIFIER_NODE)
347     {
348       gcc_assert (!validated_p);
349       error ("unrecognized format specifier");
350       return false;
351     }
352   else
353     {
354       const char *p = IDENTIFIER_POINTER (format_type_id);
355 
356       info->format_type = decode_format_type (p, &info->is_raw);
357 
358       if (!c_dialect_objc ()
359 	   && info->format_type == gcc_objc_string_format_type)
360 	{
361 	  gcc_assert (!validated_p);
362 	  warning (OPT_Wformat_, "%qE is only allowed in Objective-C dialects",
363 		   format_type_id);
364 	  info->format_type = format_type_error;
365 	  return false;
366 	}
367 
368       if (info->format_type == format_type_error)
369 	{
370 	  gcc_assert (!validated_p);
371 	  warning (OPT_Wformat_, "%qE is an unrecognized format function type",
372 		   format_type_id);
373 	  return false;
374 	}
375     }
376 
377   if (tree val = get_constant (fntype, atname, *format_num_expr,
378 			       2, &info->format_num, 0, validated_p))
379     *format_num_expr = val;
380   else
381     return false;
382 
383   if (tree val = get_constant (fntype, atname, *first_arg_num_expr,
384 			       3, &info->first_arg_num,
385 			       (POSARG_ZERO | POSARG_ELLIPSIS), validated_p))
386     *first_arg_num_expr = val;
387   else
388     return false;
389 
390   if (info->first_arg_num != 0 && info->first_arg_num <= info->format_num)
391     {
392       gcc_assert (!validated_p);
393       error ("format string argument follows the arguments to be formatted");
394       return false;
395     }
396 
397   return true;
398 }
399 
400 /* Check a call to a format function against a parameter list.  */
401 
402 /* The C standard version C++ is treated as equivalent to
403    or inheriting from, for the purpose of format features supported.  */
404 #define CPLUSPLUS_STD_VER	(cxx_dialect < cxx11 ? STD_C94 : STD_C99)
405 /* The C standard version we are checking formats against when pedantic.  */
406 #define C_STD_VER		((int) (c_dialect_cxx ()		   \
407 				 ? CPLUSPLUS_STD_VER			   \
408 				 : (flag_isoc2x				   \
409 				    ? STD_C2X				   \
410 				    : (flag_isoc99			   \
411 				       ? STD_C99			   \
412 				       : (flag_isoc94 ? STD_C94 : STD_C89)))))
413 /* The name to give to the standard version we are warning about when
414    pedantic.  FEATURE_VER is the version in which the feature warned out
415    appeared, which is higher than C_STD_VER.  */
416 #define C_STD_NAME(FEATURE_VER) (c_dialect_cxx ()		\
417 				 ? (cxx_dialect < cxx11 ? "ISO C++98" \
418 				    : "ISO C++11")		\
419 				 : ((FEATURE_VER) == STD_EXT	\
420 				    ? "ISO C"			\
421 				    : ((FEATURE_VER) == STD_C2X	\
422 				       ? "ISO C17"		\
423 				       : "ISO C90")))
424 /* Adjust a C standard version, which may be STD_C9L, to account for
425    -Wno-long-long.  Returns other standard versions unchanged.  */
426 #define ADJ_STD(VER)		((int) ((VER) == STD_C9L		      \
427 				       ? (warn_long_long ? STD_C99 : STD_C89) \
428 				       : (VER)))
429 
430 /* Enum describing the kind of specifiers present in the format and
431    requiring an argument.  */
432 enum format_specifier_kind {
433   CF_KIND_FORMAT,
434   CF_KIND_FIELD_WIDTH,
435   CF_KIND_FIELD_PRECISION
436 };
437 
438 static const char *kind_descriptions[] = {
439   N_("format"),
440   N_("field width specifier"),
441   N_("field precision specifier")
442 };
443 
444 /* Structure describing details of a type expected in format checking,
445    and the type to check against it.  */
446 struct format_wanted_type
447 {
448   /* The type wanted.  */
449   tree wanted_type;
450   /* The name of this type to use in diagnostics.  */
451   const char *wanted_type_name;
452   /* Should be type checked just for scalar width identity.  */
453   int scalar_identity_flag;
454   /* The level of indirection through pointers at which this type occurs.  */
455   int pointer_count;
456   /* Whether, when pointer_count is 1, to allow any character type when
457      pedantic, rather than just the character or void type specified.  */
458   int char_lenient_flag;
459   /* Whether the argument, dereferenced once, is written into and so the
460      argument must not be a pointer to a const-qualified type.  */
461   int writing_in_flag;
462   /* Whether the argument, dereferenced once, is read from and so
463      must not be a NULL pointer.  */
464   int reading_from_flag;
465   /* The kind of specifier that this type is used for.  */
466   enum format_specifier_kind kind;
467   /* The starting character of the specifier.  This never includes the
468      initial percent sign.  */
469   const char *format_start;
470   /* The length of the specifier.  */
471   int format_length;
472   /* The actual parameter to check against the wanted type.  */
473   tree param;
474   /* The argument number of that parameter.  */
475   int arg_num;
476   /* The offset location of this argument with respect to the format
477      string location.  */
478   unsigned int offset_loc;
479   /* The next type to check for this format conversion, or NULL if none.  */
480   struct format_wanted_type *next;
481 };
482 
483 /* Convenience macro for format_length_info meaning unused.  */
484 #define NO_FMT NULL, FMT_LEN_none, STD_C89
485 
486 static const format_length_info printf_length_specs[] =
487 {
488   { "h", FMT_LEN_h, STD_C89, "hh", FMT_LEN_hh, STD_C99, 0 },
489   { "l", FMT_LEN_l, STD_C89, "ll", FMT_LEN_ll, STD_C9L, 0 },
490   { "q", FMT_LEN_ll, STD_EXT, NO_FMT, 0 },
491   { "L", FMT_LEN_L, STD_C89, NO_FMT, 0 },
492   { "z", FMT_LEN_z, STD_C99, NO_FMT, 0 },
493   { "Z", FMT_LEN_z, STD_EXT, NO_FMT, 0 },
494   { "t", FMT_LEN_t, STD_C99, NO_FMT, 0 },
495   { "j", FMT_LEN_j, STD_C99, NO_FMT, 0 },
496   { "H", FMT_LEN_H, STD_EXT, NO_FMT, 0 },
497   { "D", FMT_LEN_D, STD_EXT, "DD", FMT_LEN_DD, STD_EXT, 0 },
498   { NO_FMT, NO_FMT, 0 }
499 };
500 
501 /* Length specifiers valid for asm_fprintf.  */
502 static const format_length_info asm_fprintf_length_specs[] =
503 {
504   { "l", FMT_LEN_l, STD_C89, "ll", FMT_LEN_ll, STD_C89, 0 },
505   { "w", FMT_LEN_w, STD_C89, NO_FMT, 0 },
506   { NO_FMT, NO_FMT, 0 }
507 };
508 
509 /* Length specifiers valid for GCC diagnostics.  */
510 static const format_length_info gcc_diag_length_specs[] =
511 {
512   { "l", FMT_LEN_l, STD_C89, "ll", FMT_LEN_ll, STD_C89, 0 },
513   { "w", FMT_LEN_w, STD_C89, NO_FMT, 0 },
514   { NO_FMT, NO_FMT, 0 }
515 };
516 
517 /* The custom diagnostics all accept the same length specifiers.  */
518 #define gcc_tdiag_length_specs gcc_diag_length_specs
519 #define gcc_cdiag_length_specs gcc_diag_length_specs
520 #define gcc_cxxdiag_length_specs gcc_diag_length_specs
521 #define gcc_dump_printf_length_specs gcc_diag_length_specs
522 
523 /* This differs from printf_length_specs only in that "Z" is not accepted.  */
524 static const format_length_info scanf_length_specs[] =
525 {
526   { "h", FMT_LEN_h, STD_C89, "hh", FMT_LEN_hh, STD_C99, 0 },
527   { "l", FMT_LEN_l, STD_C89, "ll", FMT_LEN_ll, STD_C9L, 0 },
528   { "q", FMT_LEN_ll, STD_EXT, NO_FMT, 0 },
529   { "L", FMT_LEN_L, STD_C89, NO_FMT, 0 },
530   { "z", FMT_LEN_z, STD_C99, NO_FMT, 0 },
531   { "t", FMT_LEN_t, STD_C99, NO_FMT, 0 },
532   { "j", FMT_LEN_j, STD_C99, NO_FMT, 0 },
533   { "H", FMT_LEN_H, STD_EXT, NO_FMT, 0 },
534   { "D", FMT_LEN_D, STD_EXT, "DD", FMT_LEN_DD, STD_EXT, 0 },
535   { NO_FMT, NO_FMT, 0 }
536 };
537 
538 
539 /* All tables for strfmon use STD_C89 everywhere, since -pedantic warnings
540    make no sense for a format type not part of any C standard version.  */
541 static const format_length_info strfmon_length_specs[] =
542 {
543   /* A GNU extension.  */
544   { "L", FMT_LEN_L, STD_C89, NO_FMT, 0 },
545   { NO_FMT, NO_FMT, 0 }
546 };
547 
548 
549 /* For now, the Fortran front-end routines only use l as length modifier.  */
550 static const format_length_info gcc_gfc_length_specs[] =
551 {
552   { "l", FMT_LEN_l, STD_C89, NO_FMT, 0 },
553   { NO_FMT, NO_FMT, 0 }
554 };
555 
556 
557 static const format_flag_spec printf_flag_specs[] =
558 {
559   { ' ',  0, 0, 0, N_("' ' flag"),        N_("the ' ' printf flag"),              STD_C89 },
560   { '+',  0, 0, 0, N_("'+' flag"),        N_("the '+' printf flag"),              STD_C89 },
561   { '#',  0, 0, 0, N_("'#' flag"),        N_("the '#' printf flag"),              STD_C89 },
562   { '0',  0, 0, 0, N_("'0' flag"),        N_("the '0' printf flag"),              STD_C89 },
563   { '-',  0, 0, 0, N_("'-' flag"),        N_("the '-' printf flag"),              STD_C89 },
564   { '\'', 0, 0, 0, N_("''' flag"),        N_("the ''' printf flag"),              STD_EXT },
565   { 'I',  0, 0, 0, N_("'I' flag"),        N_("the 'I' printf flag"),              STD_EXT },
566   { 'w',  0, 0, 0, N_("field width"),     N_("field width in printf format"),     STD_C89 },
567   { 'p',  0, 0, 0, N_("precision"),       N_("precision in printf format"),       STD_C89 },
568   { 'L',  0, 0, 0, N_("length modifier"), N_("length modifier in printf format"), STD_C89 },
569   { 0, 0, 0, 0, NULL, NULL, STD_C89 }
570 };
571 
572 
573 static const format_flag_pair printf_flag_pairs[] =
574 {
575   { ' ', '+', 1, 0   },
576   { '0', '-', 1, 0   },
577   { '0', 'p', 1, 'i' },
578   { 0, 0, 0, 0 }
579 };
580 
581 static const format_flag_spec asm_fprintf_flag_specs[] =
582 {
583   { ' ',  0, 0, 0, N_("' ' flag"),        N_("the ' ' printf flag"),              STD_C89 },
584   { '+',  0, 0, 0, N_("'+' flag"),        N_("the '+' printf flag"),              STD_C89 },
585   { '#',  0, 0, 0, N_("'#' flag"),        N_("the '#' printf flag"),              STD_C89 },
586   { '0',  0, 0, 0, N_("'0' flag"),        N_("the '0' printf flag"),              STD_C89 },
587   { '-',  0, 0, 0, N_("'-' flag"),        N_("the '-' printf flag"),              STD_C89 },
588   { 'w',  0, 0, 0, N_("field width"),     N_("field width in printf format"),     STD_C89 },
589   { 'p',  0, 0, 0, N_("precision"),       N_("precision in printf format"),       STD_C89 },
590   { 'L',  0, 0, 0, N_("length modifier"), N_("length modifier in printf format"), STD_C89 },
591   { 0, 0, 0, 0, NULL, NULL, STD_C89 }
592 };
593 
594 static const format_flag_pair asm_fprintf_flag_pairs[] =
595 {
596   { ' ', '+', 1, 0   },
597   { '0', '-', 1, 0   },
598   { '0', 'p', 1, 'i' },
599   { 0, 0, 0, 0 }
600 };
601 
602 static const format_flag_pair gcc_diag_flag_pairs[] =
603 {
604   { 0, 0, 0, 0 }
605 };
606 
607 #define gcc_tdiag_flag_pairs gcc_diag_flag_pairs
608 #define gcc_cdiag_flag_pairs gcc_diag_flag_pairs
609 #define gcc_cxxdiag_flag_pairs gcc_diag_flag_pairs
610 #define gcc_gfc_flag_pairs gcc_diag_flag_pairs
611 #define gcc_dump_printf_flag_pairs gcc_diag_flag_pairs
612 
613 static const format_flag_spec gcc_diag_flag_specs[] =
614 {
615   { '+',  0, 0, 0, N_("'+' flag"),        N_("the '+' printf flag"),              STD_C89 },
616   { '#',  0, 0, 0, N_("'#' flag"),        N_("the '#' printf flag"),              STD_C89 },
617   { 'q',  0, 0, 1, N_("'q' flag"),        N_("the 'q' diagnostic flag"),          STD_C89 },
618   { 'p',  0, 0, 0, N_("precision"),       N_("precision in printf format"),       STD_C89 },
619   { 'L',  0, 0, 0, N_("length modifier"), N_("length modifier in printf format"), STD_C89 },
620   { 0, 0, 0, 0, NULL, NULL, STD_C89 }
621 };
622 
623 #define gcc_tdiag_flag_specs gcc_diag_flag_specs
624 #define gcc_cdiag_flag_specs gcc_diag_flag_specs
625 #define gcc_cxxdiag_flag_specs gcc_diag_flag_specs
626 #define gcc_gfc_flag_specs gcc_diag_flag_specs
627 #define gcc_dump_printf_flag_specs gcc_diag_flag_specs
628 
629 static const format_flag_spec scanf_flag_specs[] =
630 {
631   { '*',  0, 0, 0, N_("assignment suppression"), N_("the assignment suppression scanf feature"), STD_C89 },
632   { 'a',  0, 0, 0, N_("'a' flag"),               N_("the 'a' scanf flag"),                       STD_EXT },
633   { 'm',  0, 0, 0, N_("'m' flag"),               N_("the 'm' scanf flag"),                       STD_EXT },
634   { 'w',  0, 0, 0, N_("field width"),            N_("field width in scanf format"),              STD_C89 },
635   { 'L',  0, 0, 0, N_("length modifier"),        N_("length modifier in scanf format"),          STD_C89 },
636   { '\'', 0, 0, 0, N_("''' flag"),               N_("the ''' scanf flag"),                       STD_EXT },
637   { 'I',  0, 0, 0, N_("'I' flag"),               N_("the 'I' scanf flag"),                       STD_EXT },
638   { 0, 0, 0, 0, NULL, NULL, STD_C89 }
639 };
640 
641 
642 static const format_flag_pair scanf_flag_pairs[] =
643 {
644   { '*', 'L', 0, 0 },
645   { 'a', 'm', 0, 0 },
646   { 0, 0, 0, 0 }
647 };
648 
649 
650 static const format_flag_spec strftime_flag_specs[] =
651 {
652   { '_', 0,   0, 0, N_("'_' flag"),     N_("the '_' strftime flag"),          STD_EXT },
653   { '-', 0,   0, 0, N_("'-' flag"),     N_("the '-' strftime flag"),          STD_EXT },
654   { '0', 0,   0, 0, N_("'0' flag"),     N_("the '0' strftime flag"),          STD_EXT },
655   { '^', 0,   0, 0, N_("'^' flag"),     N_("the '^' strftime flag"),          STD_EXT },
656   { '#', 0,   0, 0, N_("'#' flag"),     N_("the '#' strftime flag"),          STD_EXT },
657   { 'w', 0,   0, 0, N_("field width"),  N_("field width in strftime format"), STD_EXT },
658   { 'E', 0,   0, 0, N_("'E' modifier"), N_("the 'E' strftime modifier"),      STD_C99 },
659   { 'O', 0,   0, 0, N_("'O' modifier"), N_("the 'O' strftime modifier"),      STD_C99 },
660   { 'O', 'o', 0, 0, NULL,               N_("the 'O' modifier"),               STD_EXT },
661   { 'O', 'p', 0, 0, NULL,               N_("the 'O' modifier"),               STD_C2X },
662   { 0, 0, 0, 0, NULL, NULL, STD_C89 }
663 };
664 
665 
666 static const format_flag_pair strftime_flag_pairs[] =
667 {
668   { 'E', 'O', 0, 0 },
669   { '_', '-', 0, 0 },
670   { '_', '0', 0, 0 },
671   { '-', '0', 0, 0 },
672   { '^', '#', 0, 0 },
673   { 0, 0, 0, 0 }
674 };
675 
676 
677 static const format_flag_spec strfmon_flag_specs[] =
678 {
679   { '=',  0, 1, 0, N_("fill character"),  N_("fill character in strfmon format"),  STD_C89 },
680   { '^',  0, 0, 0, N_("'^' flag"),        N_("the '^' strfmon flag"),              STD_C89 },
681   { '+',  0, 0, 0, N_("'+' flag"),        N_("the '+' strfmon flag"),              STD_C89 },
682   { '(',  0, 0, 0, N_("'(' flag"),        N_("the '(' strfmon flag"),              STD_C89 },
683   { '!',  0, 0, 0, N_("'!' flag"),        N_("the '!' strfmon flag"),              STD_C89 },
684   { '-',  0, 0, 0, N_("'-' flag"),        N_("the '-' strfmon flag"),              STD_C89 },
685   { 'w',  0, 0, 0, N_("field width"),     N_("field width in strfmon format"),     STD_C89 },
686   { '#',  0, 0, 0, N_("left precision"),  N_("left precision in strfmon format"),  STD_C89 },
687   { 'p',  0, 0, 0, N_("right precision"), N_("right precision in strfmon format"), STD_C89 },
688   { 'L',  0, 0, 0, N_("length modifier"), N_("length modifier in strfmon format"), STD_C89 },
689   { 0, 0, 0, 0, NULL, NULL, STD_C89 }
690 };
691 
692 static const format_flag_pair strfmon_flag_pairs[] =
693 {
694   { '+', '(', 0, 0 },
695   { 0, 0, 0, 0 }
696 };
697 
698 
699 static const format_char_info print_char_table[] =
700 {
701   /* C89 conversion specifiers.  */
702   { "di",  0, STD_C89, { T89_I,   T99_SC,  T89_S,   T89_L,   T9L_LL,  TEX_LL,  T99_SST, T99_PD,  T99_IM,  BADLEN,  BADLEN,  BADLEN  }, "-wp0 +'I",  "i",  NULL },
703   { "oxX", 0, STD_C89, { T89_UI,  T99_UC,  T89_US,  T89_UL,  T9L_ULL, TEX_ULL, T99_ST,  T99_UPD, T99_UIM, BADLEN,  BADLEN,  BADLEN }, "-wp0#",     "i",  NULL },
704   { "u",   0, STD_C89, { T89_UI,  T99_UC,  T89_US,  T89_UL,  T9L_ULL, TEX_ULL, T99_ST,  T99_UPD, T99_UIM, BADLEN,  BADLEN,  BADLEN }, "-wp0'I",    "i",  NULL },
705   { "fgG", 0, STD_C89, { T89_D,   BADLEN,  BADLEN,  T99_D,   BADLEN,  T89_LD,  BADLEN,  BADLEN,  BADLEN,  TEX_D32, TEX_D64, TEX_D128 }, "-wp0 +#'I", "",   NULL },
706   { "eE",  0, STD_C89, { T89_D,   BADLEN,  BADLEN,  T99_D,   BADLEN,  T89_LD,  BADLEN,  BADLEN,  BADLEN,  TEX_D32, TEX_D64, TEX_D128 }, "-wp0 +#I",  "",   NULL },
707   { "c",   0, STD_C89, { T89_I,   BADLEN,  BADLEN,  T94_WI,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "-w",        "",   NULL },
708   { "s",   1, STD_C89, { T89_C,   BADLEN,  BADLEN,  T94_W,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "-wp",       "cR", NULL },
709   { "p",   1, STD_C89, { T89_V,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "-w",        "c",  NULL },
710   { "n",   1, STD_C89, { T89_I,   T99_SC,  T89_S,   T89_L,   T9L_LL,  BADLEN,  T99_SST, T99_PD,  T99_IM,  BADLEN,  BADLEN,  BADLEN }, "",          "W",  NULL },
711   /* C99 conversion specifiers.  */
712   { "F",   0, STD_C99, { T99_D,   BADLEN,  BADLEN,  T99_D,   BADLEN,  T99_LD,  BADLEN,  BADLEN,  BADLEN,  TEX_D32, TEX_D64, TEX_D128 }, "-wp0 +#'I", "",   NULL },
713   { "aA",  0, STD_C99, { T99_D,   BADLEN,  BADLEN,  T99_D,   BADLEN,  T99_LD,  BADLEN,  BADLEN,  BADLEN,  TEX_D32, TEX_D64,  TEX_D128 }, "-wp0 +#",   "",   NULL },
714   /* X/Open conversion specifiers.  */
715   { "C",   0, STD_EXT, { TEX_WI,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "-w",        "",   NULL },
716   { "S",   1, STD_EXT, { TEX_W,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "-wp",       "R",  NULL },
717   /* GNU conversion specifiers.  */
718   { "m",   0, STD_EXT, { T89_V,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "-wp",       "",   NULL },
719   { NULL,  0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
720 };
721 
722 static const format_char_info asm_fprintf_char_table[] =
723 {
724   /* C89 conversion specifiers.  */
725   { "di",  0, STD_C89, { T89_I,   BADLEN,  BADLEN,  T89_L,   T9L_LL,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "-wp0 +",  "i", NULL },
726   { "oxX", 0, STD_C89, { T89_UI,  BADLEN,  BADLEN,  T89_UL,  T9L_ULL, BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "-wp0#",   "i", NULL },
727   { "u",   0, STD_C89, { T89_UI,  BADLEN,  BADLEN,  T89_UL,  T9L_ULL, BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "-wp0",    "i", NULL },
728   { "c",   0, STD_C89, { T89_I,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "-w",       "", NULL },
729   { "s",   1, STD_C89, { T89_C,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "-wp",    "cR", NULL },
730 
731   /* asm_fprintf conversion specifiers.  */
732   { "O",   0, STD_C89, NOARGUMENTS, "",      "",   NULL },
733   { "R",   0, STD_C89, NOARGUMENTS, "",      "",   NULL },
734   { "I",   0, STD_C89, NOARGUMENTS, "",      "",   NULL },
735   { "L",   0, STD_C89, NOARGUMENTS, "",      "",   NULL },
736   { "U",   0, STD_C89, NOARGUMENTS, "",      "",   NULL },
737   { "r",   0, STD_C89, { T89_I,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "",  "", NULL },
738   { "z",   0, STD_C89, NOARGUMENTS, "",      "",   NULL },
739   { "@",   0, STD_C89, NOARGUMENTS, "",      "",   NULL },
740   { NULL,  0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
741 };
742 
743 /* GCC-specific format_char_info arrays.  */
744 
745 /* The conversion specifiers implemented within pp_format, and thus supported
746    by all pretty_printer instances within GCC.  */
747 
748 #define PP_FORMAT_CHAR_TABLE \
749   { "di",  0, STD_C89, { T89_I,   BADLEN,  BADLEN,  T89_L,   T9L_LL,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q",  "",   NULL }, \
750   { "ox",  0, STD_C89, { T89_UI,  BADLEN,  BADLEN,  T89_UL,  T9L_ULL, BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q",  "",   NULL }, \
751   { "u",   0, STD_C89, { T89_UI,  BADLEN,  BADLEN,  T89_UL,  T9L_ULL, BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q",  "",   NULL }, \
752   { "c",   0, STD_C89, { T89_I,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q",  "",   NULL }, \
753   { "s",   1, STD_C89, { T89_C,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "pq", "cR", NULL }, \
754   { "p",   1, STD_C89, { T89_V,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q",  "c",  NULL }, \
755   { "r",   1, STD_C89, { T89_C,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "",    "//cR",   NULL }, \
756   { "@",   1, STD_C89, { T_EVENT_PTR,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "", "\"",   NULL }, \
757   { "<",   0, STD_C89, NOARGUMENTS, "",      "<",   NULL }, \
758   { ">",   0, STD_C89, NOARGUMENTS, "",      ">",   NULL }, \
759   { "'" ,  0, STD_C89, NOARGUMENTS, "",      "",    NULL }, \
760   { "{",   1, STD_C89, { T89_C,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "",   "cR", NULL }, \
761   { "}",   0, STD_C89, NOARGUMENTS, "",      "",    NULL }, \
762   { "R",   0, STD_C89, NOARGUMENTS, "",     "\\",   NULL }, \
763   { "m",   0, STD_C89, NOARGUMENTS, "q",     "",   NULL }, \
764   { "Z",   1, STD_C89, { T89_I,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "",    "", &gcc_diag_char_table[0] }
765 
766 static const format_char_info gcc_diag_char_table[] =
767 {
768   /* The conversion specifiers implemented within pp_format.  */
769   PP_FORMAT_CHAR_TABLE,
770 
771   { NULL,  0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
772 };
773 
774 static const format_char_info gcc_tdiag_char_table[] =
775 {
776   /* The conversion specifiers implemented within pp_format.  */
777   PP_FORMAT_CHAR_TABLE,
778 
779   /* Custom conversion specifiers implemented by default_tree_printer.  */
780 
781   /* These will require a "tree" at runtime.  */
782   { "DFTV", 1, STD_C89, { T89_T,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q+", "'",   NULL },
783   { "E", 1, STD_C89, { T89_T,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q+", "",   NULL },
784   { "K", 1, STD_C89, { T89_T,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "", "\"",   NULL },
785 
786   /* G requires a "gimple*" argument at runtime.  */
787   { "G", 1, STD_C89, { T89_G,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "", "\"",   NULL },
788 
789   { NULL,  0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
790 };
791 
792 static const format_char_info gcc_cdiag_char_table[] =
793 {
794   /* The conversion specifiers implemented within pp_format.  */
795   PP_FORMAT_CHAR_TABLE,
796 
797   /* Custom conversion specifiers implemented by c_tree_printer.  */
798 
799   /* These will require a "tree" at runtime.  */
800   { "DFTV", 1, STD_C89, { T89_T,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q+", "'",   NULL },
801   { "E",   1, STD_C89, { T89_T,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q+", "",   NULL },
802   { "K",   1, STD_C89, { T89_T,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "", "\"",   NULL },
803 
804   /* G requires a "gimple*" argument at runtime.  */
805   { "G",   1, STD_C89, { T89_G,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "", "\"",   NULL },
806 
807   { "v",   0, STD_C89, { T89_I,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q#",  "",   NULL },
808 
809   { NULL,  0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
810 };
811 
812 static const format_char_info gcc_cxxdiag_char_table[] =
813 {
814   /* The conversion specifiers implemented within pp_format.  */
815   PP_FORMAT_CHAR_TABLE,
816 
817   /* Custom conversion specifiers implemented by cp_printer.  */
818 
819   /* These will require a "tree" at runtime.  */
820   { "ADFHISTVX",1,STD_C89,{ T89_T,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q+#",   "'",   NULL },
821   { "E", 1,STD_C89,{ T89_T,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q+#",   "",   NULL },
822   { "K", 1, STD_C89,{ T89_T,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "",   "\"",   NULL },
823 
824   /* G requires a "gimple*" argument at runtime.  */
825   { "G", 1, STD_C89,{ T89_G,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "",   "\"",   NULL },
826 
827   /* These accept either an 'int' or an 'enum tree_code' (which is handled as an 'int'.)  */
828   { "CLOPQ",0,STD_C89, { T89_I,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q",  "",   NULL },
829 
830   { NULL,  0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
831 };
832 
833 static const format_char_info gcc_gfc_char_table[] =
834 {
835   /* C89 conversion specifiers.  */
836   { "di",  0, STD_C89, { T89_I,   BADLEN,  BADLEN,  T89_L,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q", "", NULL },
837   { "u",   0, STD_C89, { T89_UI,  BADLEN,  BADLEN,  T89_UL,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q", "", NULL },
838   { "c",   0, STD_C89, { T89_I,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q", "", NULL },
839   { "s",   1, STD_C89, { T89_C,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q", "cR", NULL },
840 
841   /* gfc conversion specifiers.  */
842 
843   { "C",   0, STD_C89, NOARGUMENTS, "",      "",   NULL },
844 
845   /* This will require a "locus" at runtime.  */
846   { "L",   0, STD_C89, { T89_V,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "", "R", NULL },
847 
848   /* These will require nothing.  */
849   { "<>",0, STD_C89, NOARGUMENTS, "",      "",   NULL },
850   { NULL,  0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
851 };
852 
853 static const format_char_info gcc_dump_printf_char_table[] =
854 {
855   /* The conversion specifiers implemented within pp_format.  */
856   PP_FORMAT_CHAR_TABLE,
857 
858   /* Custom conversion specifiers implemented by dump_pretty_printer.  */
859 
860   /* E and G require a "gimple *" argument at runtime.  */
861   { "EG",   1, STD_C89, { T89_G,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "", "\"",   NULL },
862 
863   /* C requires a "cgraph_node *" argument at runtime.  */
864   { "C",   1, STD_C89, { T_CGRAPH_NODE,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "", "\"",   NULL },
865 
866   /* T requires a "tree" at runtime.  */
867   { "T",   1, STD_C89, { T89_T,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "", "\"",   NULL },
868 
869   /* %f requires a "double"; it doesn't support modifiers.  */
870   { "f",   0, STD_C89, { T89_D,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "", "\"",   NULL },
871 
872   { NULL,  0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
873 };
874 
875 static const format_char_info scan_char_table[] =
876 {
877   /* C89 conversion specifiers.  */
878   { "di",    1, STD_C89, { T89_I,   T99_SC,  T89_S,   T89_L,   T9L_LL,  TEX_LL,  T99_SST, T99_PD,  T99_IM,  BADLEN,  BADLEN,  BADLEN }, "*w'I", "W",   NULL },
879   { "u",     1, STD_C89, { T89_UI,  T99_UC,  T89_US,  T89_UL,  T9L_ULL, TEX_ULL, T99_ST,  T99_UPD, T99_UIM, BADLEN,  BADLEN,  BADLEN }, "*w'I", "W",   NULL },
880   { "oxX",   1, STD_C89, { T89_UI,  T99_UC,  T89_US,  T89_UL,  T9L_ULL, TEX_ULL, T99_ST,  T99_UPD, T99_UIM, BADLEN,  BADLEN,  BADLEN }, "*w",   "W",   NULL },
881   { "efgEG", 1, STD_C89, { T89_F,   BADLEN,  BADLEN,  T89_D,   BADLEN,  T89_LD,  BADLEN,  BADLEN,  BADLEN,  TEX_D32, TEX_D64, TEX_D128 }, "*w'",  "W",   NULL },
882   { "c",     1, STD_C89, { T89_C,   BADLEN,  BADLEN,  T94_W,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "*mw",   "cW",  NULL },
883   { "s",     1, STD_C89, { T89_C,   BADLEN,  BADLEN,  T94_W,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "*amw",  "cW",  NULL },
884   { "[",     1, STD_C89, { T89_C,   BADLEN,  BADLEN,  T94_W,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "*amw",  "cW[", NULL },
885   { "p",     2, STD_C89, { T89_V,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "*w",   "W",   NULL },
886   { "n",     1, STD_C89, { T89_I,   T99_SC,  T89_S,   T89_L,   T9L_LL,  BADLEN,  T99_SST, T99_PD,  T99_IM,  BADLEN,  BADLEN,  BADLEN }, "",     "W",   NULL },
887   /* C99 conversion specifiers.  */
888   { "F",   1, STD_C99, { T99_F,   BADLEN,  BADLEN,  T99_D,   BADLEN,  T99_LD,  BADLEN,  BADLEN,  BADLEN,  TEX_D32, TEX_D64, TEX_D128 }, "*w'",  "W",   NULL },
889   { "aA",   1, STD_C99, { T99_F,   BADLEN,  BADLEN,  T99_D,   BADLEN,  T99_LD,  BADLEN,  BADLEN,  BADLEN,  TEX_D32,  TEX_D64,  TEX_D128 }, "*w'",  "W",   NULL },
890   /* X/Open conversion specifiers.  */
891   { "C",     1, STD_EXT, { TEX_W,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "*mw",   "W",   NULL },
892   { "S",     1, STD_EXT, { TEX_W,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "*amw",  "W",   NULL },
893   { NULL, 0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
894 };
895 
896 static const format_char_info time_char_table[] =
897 {
898   /* C89 conversion specifiers.  */
899   { "AZa",		0, STD_C89, NOLENGTHS, "^#",     "",   NULL },
900   { "Bb",		0, STD_C89, NOLENGTHS, "O^#",    "p",  NULL },
901   { "cx",		0, STD_C89, NOLENGTHS, "E",      "3",  NULL },
902   { "HIMSUWdmw",	0, STD_C89, NOLENGTHS, "-_0Ow",  "",   NULL },
903   { "j",		0, STD_C89, NOLENGTHS, "-_0Ow",  "o",  NULL },
904   { "p",		0, STD_C89, NOLENGTHS, "#",      "",   NULL },
905   { "X",		0, STD_C89, NOLENGTHS, "E",      "",   NULL },
906   { "y",		0, STD_C89, NOLENGTHS, "EO-_0w", "4",  NULL },
907   { "Y",		0, STD_C89, NOLENGTHS, "-_0EOw", "o",  NULL },
908   { "%",		0, STD_C89, NOLENGTHS, "",       "",   NULL },
909   /* C99 conversion specifiers.  */
910   { "C",		0, STD_C99, NOLENGTHS, "-_0EOw", "o",  NULL },
911   { "D",		0, STD_C99, NOLENGTHS, "",       "2",  NULL },
912   { "eVu",		0, STD_C99, NOLENGTHS, "-_0Ow",  "",   NULL },
913   { "FRTnrt",		0, STD_C99, NOLENGTHS, "",       "",   NULL },
914   { "g",		0, STD_C99, NOLENGTHS, "O-_0w",  "2o", NULL },
915   { "G",		0, STD_C99, NOLENGTHS, "-_0Ow",  "o",  NULL },
916   { "h",		0, STD_C99, NOLENGTHS, "^#",     "",   NULL },
917   { "z",		0, STD_C99, NOLENGTHS, "O",      "o",  NULL },
918   /* GNU conversion specifiers.  */
919   { "kls",		0, STD_EXT, NOLENGTHS, "-_0Ow",  "",   NULL },
920   { "P",		0, STD_EXT, NOLENGTHS, "",       "",   NULL },
921   { NULL,		0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
922 };
923 
924 static const format_char_info monetary_char_table[] =
925 {
926   { "in", 0, STD_C89, { T89_D, BADLEN, BADLEN, BADLEN, BADLEN, T89_LD, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN }, "=^+(!-w#p", "", NULL },
927   { NULL, 0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
928 };
929 
930 /* This must be in the same order as enum format_type.  */
931 static const format_kind_info format_types_orig[] =
932 {
933   { "gnu_printf",   printf_length_specs,  print_char_table, " +#0-'I", NULL,
934     printf_flag_specs, printf_flag_pairs,
935     FMT_FLAG_ARG_CONVERT|FMT_FLAG_DOLLAR_MULTIPLE|FMT_FLAG_USE_DOLLAR|FMT_FLAG_EMPTY_PREC_OK,
936     'w', 0, 'p', 0, 'L', 0,
937     &integer_type_node, &integer_type_node
938   },
939   { "asm_fprintf",   asm_fprintf_length_specs,  asm_fprintf_char_table, " +#0-", NULL,
940     asm_fprintf_flag_specs, asm_fprintf_flag_pairs,
941     FMT_FLAG_ARG_CONVERT|FMT_FLAG_EMPTY_PREC_OK,
942     'w', 0, 'p', 0, 'L', 0,
943     NULL, NULL
944   },
945   { "gcc_diag",   gcc_diag_length_specs,  gcc_diag_char_table, "q+#", NULL,
946     gcc_diag_flag_specs, gcc_diag_flag_pairs,
947     FMT_FLAG_ARG_CONVERT,
948     0, 0, 'p', 0, 'L', 0,
949     NULL, &integer_type_node
950   },
951   { "gcc_tdiag",   gcc_tdiag_length_specs,  gcc_tdiag_char_table, "q+#", NULL,
952     gcc_tdiag_flag_specs, gcc_tdiag_flag_pairs,
953     FMT_FLAG_ARG_CONVERT,
954     0, 0, 'p', 0, 'L', 0,
955     NULL, &integer_type_node
956   },
957   { "gcc_cdiag",   gcc_cdiag_length_specs,  gcc_cdiag_char_table, "q+#", NULL,
958     gcc_cdiag_flag_specs, gcc_cdiag_flag_pairs,
959     FMT_FLAG_ARG_CONVERT,
960     0, 0, 'p', 0, 'L', 0,
961     NULL, &integer_type_node
962   },
963   { "gcc_cxxdiag",   gcc_cxxdiag_length_specs,  gcc_cxxdiag_char_table, "q+#", NULL,
964     gcc_cxxdiag_flag_specs, gcc_cxxdiag_flag_pairs,
965     FMT_FLAG_ARG_CONVERT,
966     0, 0, 'p', 0, 'L', 0,
967     NULL, &integer_type_node
968   },
969   { "gcc_gfc", gcc_gfc_length_specs, gcc_gfc_char_table, "q+#", NULL,
970     gcc_gfc_flag_specs, gcc_gfc_flag_pairs,
971     FMT_FLAG_ARG_CONVERT,
972     0, 0, 0, 0, 0, 0,
973     NULL, NULL
974   },
975   { "gcc_dump_printf",   gcc_dump_printf_length_specs,
976     gcc_dump_printf_char_table, "q+#", NULL,
977     gcc_dump_printf_flag_specs, gcc_dump_printf_flag_pairs,
978     FMT_FLAG_ARG_CONVERT,
979     0, 0, 'p', 0, 'L', 0,
980     NULL, &integer_type_node
981   },
982   { "NSString",   NULL,  NULL, NULL, NULL,
983     NULL, NULL,
984     FMT_FLAG_ARG_CONVERT|FMT_FLAG_PARSE_ARG_CONVERT_EXTERNAL, 0, 0, 0, 0, 0, 0,
985     NULL, NULL
986   },
987   { "gnu_scanf",    scanf_length_specs,   scan_char_table,  "*'I", NULL,
988     scanf_flag_specs, scanf_flag_pairs,
989     FMT_FLAG_ARG_CONVERT|FMT_FLAG_SCANF_A_KLUDGE|FMT_FLAG_USE_DOLLAR|FMT_FLAG_ZERO_WIDTH_BAD|FMT_FLAG_DOLLAR_GAP_POINTER_OK,
990     'w', 0, 0, '*', 'L', 'm',
991     NULL, NULL
992   },
993   { "gnu_strftime", NULL,                 time_char_table,  "_-0^#", "EO",
994     strftime_flag_specs, strftime_flag_pairs,
995     FMT_FLAG_FANCY_PERCENT_OK, 'w', 0, 0, 0, 0, 0,
996     NULL, NULL
997   },
998   { "gnu_strfmon",  strfmon_length_specs, monetary_char_table, "=^+(!-", NULL,
999     strfmon_flag_specs, strfmon_flag_pairs,
1000     FMT_FLAG_ARG_CONVERT, 'w', '#', 'p', 0, 'L', 0,
1001     NULL, NULL
1002   }
1003 };
1004 
1005 /* This layer of indirection allows GCC to reassign format_types with
1006    new data if necessary, while still allowing the original data to be
1007    const.  */
1008 static const format_kind_info *format_types = format_types_orig;
1009 /* We can modify this one.  We also add target-specific format types
1010    to the end of the array.  */
1011 static format_kind_info *dynamic_format_types;
1012 
1013 static int n_format_types = ARRAY_SIZE (format_types_orig);
1014 
1015 /* Structure detailing the results of checking a format function call
1016    where the format expression may be a conditional expression with
1017    many leaves resulting from nested conditional expressions.  */
1018 struct format_check_results
1019 {
1020   /* Number of leaves of the format argument that could not be checked
1021      as they were not string literals.  */
1022   int number_non_literal;
1023   /* Number of leaves of the format argument that were null pointers or
1024      string literals, but had extra format arguments.  */
1025   int number_extra_args;
1026   location_t extra_arg_loc;
1027   /* Number of leaves of the format argument that were null pointers or
1028      string literals, but had extra format arguments and used $ operand
1029      numbers.  */
1030   int number_dollar_extra_args;
1031   /* Number of leaves of the format argument that were wide string
1032      literals.  */
1033   int number_wide;
1034   /* Number of leaves of the format argument that are not array of "char".  */
1035   int number_non_char;
1036   /* Number of leaves of the format argument that were empty strings.  */
1037   int number_empty;
1038   /* Number of leaves of the format argument that were unterminated
1039      strings.  */
1040   int number_unterminated;
1041   /* Number of leaves of the format argument that were not counted above.  */
1042   int number_other;
1043   /* Location of the format string.  */
1044   location_t format_string_loc;
1045 };
1046 
1047 struct format_check_context
1048 {
1049   format_check_results *res;
1050   function_format_info *info;
1051   tree params;
1052   vec<location_t> *arglocs;
1053 };
1054 
1055 /* Return the format name (as specified in the original table) for the format
1056    type indicated by format_num.  */
1057 static const char *
format_name(int format_num)1058 format_name (int format_num)
1059 {
1060   if (format_num >= 0 && format_num < n_format_types)
1061     return format_types[format_num].name;
1062   gcc_unreachable ();
1063 }
1064 
1065 /* Return the format flags (as specified in the original table) for the format
1066    type indicated by format_num.  */
1067 static int
format_flags(int format_num)1068 format_flags (int format_num)
1069 {
1070   if (format_num >= 0 && format_num < n_format_types)
1071     return format_types[format_num].flags;
1072   gcc_unreachable ();
1073 }
1074 
1075 static void check_format_info (function_format_info *, tree,
1076 			       vec<location_t> *);
1077 static void check_format_arg (void *, tree, unsigned HOST_WIDE_INT);
1078 static void check_format_info_main (format_check_results *,
1079 				    function_format_info *, const char *,
1080 				    location_t, tree,
1081 				    int, tree,
1082 				    unsigned HOST_WIDE_INT,
1083 				    object_allocator<format_wanted_type> &,
1084 				    vec<location_t> *);
1085 
1086 static void init_dollar_format_checking (int, tree);
1087 static int maybe_read_dollar_number (const char **, int,
1088 				     tree, tree *, const format_kind_info *);
1089 static bool avoid_dollar_number (const char *);
1090 static void finish_dollar_format_checking (format_check_results *, int);
1091 
1092 static const format_flag_spec *get_flag_spec (const format_flag_spec *,
1093 					      int, const char *);
1094 
1095 static void check_format_types (const substring_loc &fmt_loc,
1096 				format_wanted_type *,
1097 				const format_kind_info *fki,
1098 				int offset_to_type_start,
1099 				char conversion_char,
1100 				vec<location_t> *arglocs);
1101 static void format_type_warning (const substring_loc &fmt_loc,
1102 				 location_t param_loc,
1103 				 format_wanted_type *, tree,
1104 				 tree,
1105 				 const format_kind_info *fki,
1106 				 int offset_to_type_start,
1107 				 char conversion_char);
1108 
1109 /* Decode a format type from a string, returning the type, or
1110    format_type_error if not valid, in which case the caller should
1111    print an error message.  On success, when IS_RAW is non-null, set
1112    *IS_RAW when the format type corresponds to a GCC "raw" diagnostic
1113    formatting function and clear it otherwise.  */
1114 static format_type
decode_format_type(const char * s,bool * is_raw)1115 decode_format_type (const char *s, bool *is_raw /* = NULL */)
1116 {
1117   bool is_raw_buf;
1118 
1119   if (!is_raw)
1120     is_raw = &is_raw_buf;
1121 
1122   *is_raw = false;
1123 
1124   s = convert_format_name_to_system_name (s);
1125 
1126   size_t slen = strlen (s);
1127   for (int i = 0; i < n_format_types; i++)
1128     {
1129       /* Check for a match with no underscores.  */
1130       if (!strcmp (s, format_types[i].name))
1131 	return static_cast<format_type> (i);
1132 
1133       /* Check for leading and trailing underscores.  */
1134       size_t alen = strlen (format_types[i].name);
1135       if (slen == alen + 4 && s[0] == '_' && s[1] == '_'
1136 	  && s[slen - 1] == '_' && s[slen - 2] == '_'
1137 	  && !strncmp (s + 2, format_types[i].name, alen))
1138 	return static_cast<format_type>(i);
1139 
1140       /* Check for the "_raw" suffix and no leading underscores.  */
1141       if (slen == alen + 4
1142 	  && !strncmp (s, format_types[i].name, alen)
1143 	  && !strcmp (s + alen, "_raw"))
1144 	{
1145 	  *is_raw = true;
1146 	  return static_cast<format_type>(i);
1147 	}
1148 
1149       /* Check for the "_raw__" suffix and leading underscores.  */
1150       if (slen == alen + 8 && s[0] == '_' && s[1] == '_'
1151 	  && !strncmp (s + 2, format_types[i].name, alen)
1152 	  && !strcmp (s + 2 + alen, "_raw__"))
1153 	{
1154 	  *is_raw = true;
1155 	  return static_cast<format_type>(i);
1156 	}
1157     }
1158 
1159   return format_type_error;
1160 }
1161 
1162 
1163 /* Check the argument list of a call to printf, scanf, etc.
1164    ATTRS are the attributes on the function type.  There are NARGS argument
1165    values in the array ARGARRAY.
1166    Also, if -Wsuggest-attribute=format,
1167    warn for calls to vprintf or vscanf in functions with no such format
1168    attribute themselves.  */
1169 
1170 void
check_function_format(const_tree fntype,tree attrs,int nargs,tree * argarray,vec<location_t> * arglocs)1171 check_function_format (const_tree fntype, tree attrs, int nargs,
1172 		       tree *argarray, vec<location_t> *arglocs)
1173 {
1174   tree a;
1175 
1176   tree atname = get_identifier ("format");
1177 
1178   /* See if this function has any format attributes.  */
1179   for (a = attrs; a; a = TREE_CHAIN (a))
1180     {
1181       if (is_attribute_p ("format", get_attribute_name (a)))
1182 	{
1183 	  /* Yup; check it.  */
1184 	  function_format_info info;
1185 	  decode_format_attr (fntype, atname, TREE_VALUE (a), &info,
1186 			      /*validated=*/true);
1187 	  if (warn_format)
1188 	    {
1189 	      /* FIXME: Rewrite all the internal functions in this file
1190 		 to use the ARGARRAY directly instead of constructing this
1191 		 temporary list.  */
1192 	      tree params = NULL_TREE;
1193 	      int i;
1194 	      for (i = nargs - 1; i >= 0; i--)
1195 		params = tree_cons (NULL_TREE, argarray[i], params);
1196 	      check_format_info (&info, params, arglocs);
1197 	    }
1198 
1199 	  /* Attempt to detect whether the current function might benefit
1200 	     from the format attribute if the called function is decorated
1201 	     with it.  Avoid using calls with string literal formats for
1202 	     guidance since those are unlikely to be viable candidates.  */
1203 	  if (warn_suggest_attribute_format
1204 	      && current_function_decl != NULL_TREE
1205 	      && info.first_arg_num == 0
1206 	      && (format_types[info.format_type].flags
1207 		  & (int) FMT_FLAG_ARG_CONVERT)
1208 	      /* c_strlen will fail for a function parameter but succeed
1209 		 for a literal or constant array.  */
1210 	      && !c_strlen (argarray[info.format_num - 1], 1))
1211 	    {
1212 	      tree c;
1213 	      for (c = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl));
1214 		   c;
1215 		   c = TREE_CHAIN (c))
1216 		if (is_attribute_p ("format", TREE_PURPOSE (c))
1217 		    && (decode_format_type (IDENTIFIER_POINTER
1218 					    (TREE_VALUE (TREE_VALUE (c))))
1219 			== info.format_type))
1220 		  break;
1221 	      if (c == NULL_TREE)
1222 		{
1223 		  /* Check if the current function has a parameter to which
1224 		     the format attribute could be attached; if not, it
1225 		     can't be a candidate for a format attribute, despite
1226 		     the vprintf-like or vscanf-like call.  */
1227 		  tree args;
1228 		  for (args = DECL_ARGUMENTS (current_function_decl);
1229 		       args != 0;
1230 		       args = DECL_CHAIN (args))
1231 		    {
1232 		      if (TREE_CODE (TREE_TYPE (args)) == POINTER_TYPE
1233 			  && (TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (args)))
1234 			      == char_type_node))
1235 			break;
1236 		    }
1237 		  if (args != 0)
1238 		    warning (OPT_Wsuggest_attribute_format, "function %qD "
1239 			     "might be a candidate for %qs format attribute",
1240 			     current_function_decl,
1241 			     format_types[info.format_type].name);
1242 		}
1243 	    }
1244 	}
1245     }
1246 }
1247 
1248 
1249 /* Variables used by the checking of $ operand number formats.  */
1250 static char *dollar_arguments_used = NULL;
1251 static char *dollar_arguments_pointer_p = NULL;
1252 static int dollar_arguments_alloc = 0;
1253 static int dollar_arguments_count;
1254 static int dollar_first_arg_num;
1255 static int dollar_max_arg_used;
1256 static int dollar_format_warned;
1257 
1258 /* Initialize the checking for a format string that may contain $
1259    parameter number specifications; we will need to keep track of whether
1260    each parameter has been used.  FIRST_ARG_NUM is the number of the first
1261    argument that is a parameter to the format, or 0 for a vprintf-style
1262    function; PARAMS is the list of arguments starting at this argument.  */
1263 
1264 static void
init_dollar_format_checking(int first_arg_num,tree params)1265 init_dollar_format_checking (int first_arg_num, tree params)
1266 {
1267   tree oparams = params;
1268 
1269   dollar_first_arg_num = first_arg_num;
1270   dollar_arguments_count = 0;
1271   dollar_max_arg_used = 0;
1272   dollar_format_warned = 0;
1273   if (first_arg_num > 0)
1274     {
1275       while (params)
1276 	{
1277 	  dollar_arguments_count++;
1278 	  params = TREE_CHAIN (params);
1279 	}
1280     }
1281   if (dollar_arguments_alloc < dollar_arguments_count)
1282     {
1283       free (dollar_arguments_used);
1284       free (dollar_arguments_pointer_p);
1285       dollar_arguments_alloc = dollar_arguments_count;
1286       dollar_arguments_used = XNEWVEC (char, dollar_arguments_alloc);
1287       dollar_arguments_pointer_p = XNEWVEC (char, dollar_arguments_alloc);
1288     }
1289   if (dollar_arguments_alloc)
1290     {
1291       memset (dollar_arguments_used, 0, dollar_arguments_alloc);
1292       if (first_arg_num > 0)
1293 	{
1294 	  int i = 0;
1295 	  params = oparams;
1296 	  while (params)
1297 	    {
1298 	      dollar_arguments_pointer_p[i] = (TREE_CODE (TREE_TYPE (TREE_VALUE (params)))
1299 					       == POINTER_TYPE);
1300 	      params = TREE_CHAIN (params);
1301 	      i++;
1302 	    }
1303 	}
1304     }
1305 }
1306 
1307 
1308 /* Look for a decimal number followed by a $ in *FORMAT.  If DOLLAR_NEEDED
1309    is set, it is an error if one is not found; otherwise, it is OK.  If
1310    such a number is found, check whether it is within range and mark that
1311    numbered operand as being used for later checking.  Returns the operand
1312    number if found and within range, zero if no such number was found and
1313    this is OK, or -1 on error.  PARAMS points to the first operand of the
1314    format; PARAM_PTR is made to point to the parameter referred to.  If
1315    a $ format is found, *FORMAT is updated to point just after it.  */
1316 
1317 static int
maybe_read_dollar_number(const char ** format,int dollar_needed,tree params,tree * param_ptr,const format_kind_info * fki)1318 maybe_read_dollar_number (const char **format,
1319 			  int dollar_needed, tree params, tree *param_ptr,
1320 			  const format_kind_info *fki)
1321 {
1322   int argnum;
1323   int overflow_flag;
1324   const char *fcp = *format;
1325   if (!ISDIGIT (*fcp))
1326     {
1327       if (dollar_needed)
1328 	{
1329 	  warning (OPT_Wformat_, "missing $ operand number in format");
1330 	  return -1;
1331 	}
1332       else
1333 	return 0;
1334     }
1335   argnum = 0;
1336   overflow_flag = 0;
1337   while (ISDIGIT (*fcp))
1338     {
1339       HOST_WIDE_INT nargnum
1340 	= HOST_WIDE_INT_UC (10) * argnum + (*fcp - '0');
1341       if ((int) nargnum != nargnum)
1342 	overflow_flag = 1;
1343       argnum = nargnum;
1344       fcp++;
1345     }
1346   if (*fcp != '$')
1347     {
1348       if (dollar_needed)
1349 	{
1350 	  warning (OPT_Wformat_, "missing $ operand number in format");
1351 	  return -1;
1352 	}
1353       else
1354 	return 0;
1355     }
1356   *format = fcp + 1;
1357   if (pedantic && !dollar_format_warned)
1358     {
1359       warning (OPT_Wformat_, "%s does not support %%n$ operand number formats",
1360 	       C_STD_NAME (STD_EXT));
1361       dollar_format_warned = 1;
1362     }
1363   if (overflow_flag || argnum == 0
1364       || (dollar_first_arg_num && argnum > dollar_arguments_count))
1365     {
1366       warning (OPT_Wformat_, "operand number out of range in format");
1367       return -1;
1368     }
1369   if (argnum > dollar_max_arg_used)
1370     dollar_max_arg_used = argnum;
1371   /* For vprintf-style functions we may need to allocate more memory to
1372      track which arguments are used.  */
1373   while (dollar_arguments_alloc < dollar_max_arg_used)
1374     {
1375       int nalloc;
1376       nalloc = 2 * dollar_arguments_alloc + 16;
1377       dollar_arguments_used = XRESIZEVEC (char, dollar_arguments_used,
1378 					  nalloc);
1379       dollar_arguments_pointer_p = XRESIZEVEC (char, dollar_arguments_pointer_p,
1380 					       nalloc);
1381       memset (dollar_arguments_used + dollar_arguments_alloc, 0,
1382 	      nalloc - dollar_arguments_alloc);
1383       dollar_arguments_alloc = nalloc;
1384     }
1385   if (!(fki->flags & (int) FMT_FLAG_DOLLAR_MULTIPLE)
1386       && dollar_arguments_used[argnum - 1] == 1)
1387     {
1388       dollar_arguments_used[argnum - 1] = 2;
1389       warning (OPT_Wformat_, "format argument %d used more than once in %s format",
1390 	       argnum, fki->name);
1391     }
1392   else
1393     dollar_arguments_used[argnum - 1] = 1;
1394   if (dollar_first_arg_num)
1395     {
1396       int i;
1397       *param_ptr = params;
1398       for (i = 1; i < argnum && *param_ptr != 0; i++)
1399 	*param_ptr = TREE_CHAIN (*param_ptr);
1400 
1401       /* This case shouldn't be caught here.  */
1402       gcc_assert (*param_ptr);
1403     }
1404   else
1405     *param_ptr = 0;
1406   return argnum;
1407 }
1408 
1409 /* Ensure that FORMAT does not start with a decimal number followed by
1410    a $; give a diagnostic and return true if it does, false otherwise.  */
1411 
1412 static bool
avoid_dollar_number(const char * format)1413 avoid_dollar_number (const char *format)
1414 {
1415   if (!ISDIGIT (*format))
1416     return false;
1417   while (ISDIGIT (*format))
1418     format++;
1419   if (*format == '$')
1420     {
1421       warning (OPT_Wformat_,
1422 	       "%<$%>operand number used after format without operand number");
1423       return true;
1424     }
1425   return false;
1426 }
1427 
1428 
1429 /* Finish the checking for a format string that used $ operand number formats
1430    instead of non-$ formats.  We check for unused operands before used ones
1431    (a serious error, since the implementation of the format function
1432    can't know what types to pass to va_arg to find the later arguments).
1433    and for unused operands at the end of the format (if we know how many
1434    arguments the format had, so not for vprintf).  If there were operand
1435    numbers out of range on a non-vprintf-style format, we won't have reached
1436    here.  If POINTER_GAP_OK, unused arguments are OK if all arguments are
1437    pointers.  */
1438 
1439 static void
finish_dollar_format_checking(format_check_results * res,int pointer_gap_ok)1440 finish_dollar_format_checking (format_check_results *res, int pointer_gap_ok)
1441 {
1442   int i;
1443   bool found_pointer_gap = false;
1444   for (i = 0; i < dollar_max_arg_used; i++)
1445     {
1446       if (!dollar_arguments_used[i])
1447 	{
1448 	  if (pointer_gap_ok && (dollar_first_arg_num == 0
1449 				 || dollar_arguments_pointer_p[i]))
1450 	    found_pointer_gap = true;
1451 	  else
1452 	    warning_at (res->format_string_loc, OPT_Wformat_,
1453 			"format argument %d unused before used argument %d "
1454 			"in %<$%>-style format",
1455 			i + 1, dollar_max_arg_used);
1456 	}
1457     }
1458   if (found_pointer_gap
1459       || (dollar_first_arg_num
1460 	  && dollar_max_arg_used < dollar_arguments_count))
1461     {
1462       res->number_other--;
1463       res->number_dollar_extra_args++;
1464     }
1465 }
1466 
1467 
1468 /* Retrieve the specification for a format flag.  SPEC contains the
1469    specifications for format flags for the applicable kind of format.
1470    FLAG is the flag in question.  If PREDICATES is NULL, the basic
1471    spec for that flag must be retrieved and must exist.  If
1472    PREDICATES is not NULL, it is a string listing possible predicates
1473    for the spec entry; if an entry predicated on any of these is
1474    found, it is returned, otherwise NULL is returned.  */
1475 
1476 static const format_flag_spec *
get_flag_spec(const format_flag_spec * spec,int flag,const char * predicates)1477 get_flag_spec (const format_flag_spec *spec, int flag, const char *predicates)
1478 {
1479   int i;
1480   for (i = 0; spec[i].flag_char != 0; i++)
1481     {
1482       if (spec[i].flag_char != flag)
1483 	continue;
1484       if (predicates != NULL)
1485 	{
1486 	  if (spec[i].predicate != 0
1487 	      && strchr (predicates, spec[i].predicate) != 0)
1488 	    return &spec[i];
1489 	}
1490       else if (spec[i].predicate == 0)
1491 	return &spec[i];
1492     }
1493   gcc_assert (predicates);
1494   return NULL;
1495 }
1496 
1497 
1498 /* Check the argument list of a call to printf, scanf, etc.
1499    INFO points to the function_format_info structure.
1500    PARAMS is the list of argument values.  */
1501 
1502 static void
check_format_info(function_format_info * info,tree params,vec<location_t> * arglocs)1503 check_format_info (function_format_info *info, tree params,
1504 		   vec<location_t> *arglocs)
1505 {
1506   format_check_context format_ctx;
1507   unsigned HOST_WIDE_INT arg_num;
1508   tree format_tree;
1509   format_check_results res;
1510   /* Skip to format argument.  If the argument isn't available, there's
1511      no work for us to do; prototype checking will catch the problem.  */
1512   for (arg_num = 1; ; ++arg_num)
1513     {
1514       if (params == 0)
1515 	return;
1516       if (arg_num == info->format_num)
1517 	break;
1518       params = TREE_CHAIN (params);
1519     }
1520   format_tree = TREE_VALUE (params);
1521   params = TREE_CHAIN (params);
1522   if (format_tree == 0)
1523     return;
1524 
1525   res.number_non_literal = 0;
1526   res.number_extra_args = 0;
1527   res.extra_arg_loc = UNKNOWN_LOCATION;
1528   res.number_dollar_extra_args = 0;
1529   res.number_wide = 0;
1530   res.number_non_char = 0;
1531   res.number_empty = 0;
1532   res.number_unterminated = 0;
1533   res.number_other = 0;
1534   res.format_string_loc = input_location;
1535 
1536   format_ctx.res = &res;
1537   format_ctx.info = info;
1538   format_ctx.params = params;
1539   format_ctx.arglocs = arglocs;
1540 
1541   check_function_arguments_recurse (check_format_arg, &format_ctx,
1542 				    format_tree, arg_num);
1543 
1544   location_t loc = format_ctx.res->format_string_loc;
1545 
1546   if (res.number_non_literal > 0)
1547     {
1548       /* Functions taking a va_list normally pass a non-literal format
1549 	 string.  These functions typically are declared with
1550 	 first_arg_num == 0, so avoid warning in those cases.  */
1551       if (!(format_types[info->format_type].flags & (int) FMT_FLAG_ARG_CONVERT))
1552 	{
1553 	  /* For strftime-like formats, warn for not checking the format
1554 	     string; but there are no arguments to check.  */
1555 	  warning_at (loc, OPT_Wformat_nonliteral,
1556 		      "format not a string literal, format string not checked");
1557 	}
1558       else if (info->first_arg_num != 0)
1559 	{
1560 	  /* If there are no arguments for the format at all, we may have
1561 	     printf (foo) which is likely to be a security hole.  */
1562 	  while (arg_num + 1 < info->first_arg_num)
1563 	    {
1564 	      if (params == 0)
1565 		break;
1566 	      params = TREE_CHAIN (params);
1567 	      ++arg_num;
1568 	    }
1569 	  if (params == 0 && warn_format_security)
1570 	    warning_at (loc, OPT_Wformat_security,
1571 			"format not a string literal and no format arguments");
1572 	  else if (params == 0 && warn_format_nonliteral)
1573 	    warning_at (loc, OPT_Wformat_nonliteral,
1574 			"format not a string literal and no format arguments");
1575 	  else
1576 	    warning_at (loc, OPT_Wformat_nonliteral,
1577 			"format not a string literal, argument types not checked");
1578 	}
1579     }
1580 
1581   /* If there were extra arguments to the format, normally warn.  However,
1582      the standard does say extra arguments are ignored, so in the specific
1583      case where we have multiple leaves (conditional expressions or
1584      ngettext) allow extra arguments if at least one leaf didn't have extra
1585      arguments, but was otherwise OK (either non-literal or checked OK).
1586      If the format is an empty string, this should be counted similarly to the
1587      case of extra format arguments.  */
1588   if (res.number_extra_args > 0 && res.number_non_literal == 0
1589       && res.number_other == 0)
1590     {
1591       if (res.extra_arg_loc == UNKNOWN_LOCATION)
1592 	res.extra_arg_loc = loc;
1593       warning_at (res.extra_arg_loc, OPT_Wformat_extra_args,
1594 		  "too many arguments for format");
1595     }
1596   if (res.number_dollar_extra_args > 0 && res.number_non_literal == 0
1597       && res.number_other == 0)
1598     warning_at (loc, OPT_Wformat_extra_args,
1599 		"unused arguments in %<$%>-style format");
1600   if (res.number_empty > 0 && res.number_non_literal == 0
1601       && res.number_other == 0)
1602     warning_at (loc, OPT_Wformat_zero_length, "zero-length %s format string",
1603 	     format_types[info->format_type].name);
1604 
1605   if (res.number_wide > 0)
1606     warning_at (loc, OPT_Wformat_, "format is a wide character string");
1607 
1608   if (res.number_non_char > 0)
1609     warning_at (loc, OPT_Wformat_,
1610 		"format string is not an array of type %qs", "char");
1611 
1612   if (res.number_unterminated > 0)
1613     warning_at (loc, OPT_Wformat_, "unterminated format string");
1614 }
1615 
1616 /* Callback from check_function_arguments_recurse to check a
1617    format string.  FORMAT_TREE is the format parameter.  ARG_NUM
1618    is the number of the format argument.  CTX points to a
1619    format_check_context.  */
1620 
1621 static void
check_format_arg(void * ctx,tree format_tree,unsigned HOST_WIDE_INT arg_num)1622 check_format_arg (void *ctx, tree format_tree,
1623 		  unsigned HOST_WIDE_INT arg_num)
1624 {
1625   format_check_context *format_ctx = (format_check_context *) ctx;
1626   format_check_results *res = format_ctx->res;
1627   function_format_info *info = format_ctx->info;
1628   tree params = format_ctx->params;
1629   vec<location_t> *arglocs = format_ctx->arglocs;
1630 
1631   int format_length;
1632   HOST_WIDE_INT offset;
1633   const char *format_chars;
1634   tree array_size = 0;
1635   tree array_init;
1636 
1637   location_t fmt_param_loc = EXPR_LOC_OR_LOC (format_tree, input_location);
1638 
1639   /* Pull out a constant value if the front end didn't, and handle location
1640      wrappers.  */
1641   format_tree = fold_for_warn (format_tree);
1642   STRIP_NOPS (format_tree);
1643 
1644   if (integer_zerop (format_tree))
1645     {
1646       /* Skip to first argument to check, so we can see if this format
1647 	 has any arguments (it shouldn't).  */
1648       while (arg_num + 1 < info->first_arg_num)
1649 	{
1650 	  if (params == 0)
1651 	    return;
1652 	  params = TREE_CHAIN (params);
1653 	  ++arg_num;
1654 	}
1655 
1656       if (params == 0)
1657 	res->number_other++;
1658       else
1659 	{
1660 	  if (res->number_extra_args == 0)
1661 	    res->extra_arg_loc = EXPR_LOC_OR_LOC (TREE_VALUE (params),
1662 						  input_location);
1663 	  res->number_extra_args++;
1664 	}
1665       return;
1666     }
1667 
1668   offset = 0;
1669   if (TREE_CODE (format_tree) == POINTER_PLUS_EXPR)
1670     {
1671       tree arg0, arg1;
1672 
1673       arg0 = TREE_OPERAND (format_tree, 0);
1674       arg1 = TREE_OPERAND (format_tree, 1);
1675       STRIP_NOPS (arg0);
1676       STRIP_NOPS (arg1);
1677       if (TREE_CODE (arg1) == INTEGER_CST)
1678 	format_tree = arg0;
1679       else
1680 	{
1681 	  res->number_non_literal++;
1682 	  return;
1683 	}
1684       /* POINTER_PLUS_EXPR offsets are to be interpreted signed.  */
1685       if (!cst_and_fits_in_hwi (arg1))
1686 	{
1687 	  res->number_non_literal++;
1688 	  return;
1689 	}
1690       offset = int_cst_value (arg1);
1691     }
1692   if (TREE_CODE (format_tree) != ADDR_EXPR)
1693     {
1694       res->number_non_literal++;
1695       return;
1696     }
1697   res->format_string_loc = EXPR_LOC_OR_LOC (format_tree, input_location);
1698   format_tree = TREE_OPERAND (format_tree, 0);
1699   if (format_types[info->format_type].flags
1700       & (int) FMT_FLAG_PARSE_ARG_CONVERT_EXTERNAL)
1701     {
1702       bool objc_str = (info->format_type == gcc_objc_string_format_type);
1703       /* We cannot examine this string here - but we can check that it is
1704 	 a valid type.  */
1705       if (TREE_CODE (format_tree) != CONST_DECL
1706 	  || !((objc_str && objc_string_ref_type_p (TREE_TYPE (format_tree)))
1707 		|| (*targetcm.string_object_ref_type_p)
1708 				     ((const_tree) TREE_TYPE (format_tree))))
1709 	{
1710 	  res->number_non_literal++;
1711 	  return;
1712 	}
1713       /* Skip to first argument to check.  */
1714       while (arg_num + 1 < info->first_arg_num)
1715 	{
1716 	  if (params == 0)
1717 	    return;
1718 	  params = TREE_CHAIN (params);
1719 	  ++arg_num;
1720 	}
1721       /* So, we have a valid literal string object and one or more params.
1722 	 We need to use an external helper to parse the string into format
1723 	 info.  For Objective-C variants we provide the resource within the
1724 	 objc tree, for target variants, via a hook.  */
1725       if (objc_str)
1726 	objc_check_format_arg (format_tree, params);
1727       else if (targetcm.check_string_object_format_arg)
1728 	(*targetcm.check_string_object_format_arg) (format_tree, params);
1729       /* Else we can't handle it and retire quietly.  */
1730       return;
1731     }
1732   if (TREE_CODE (format_tree) == ARRAY_REF
1733       && tree_fits_shwi_p (TREE_OPERAND (format_tree, 1))
1734       && (offset += tree_to_shwi (TREE_OPERAND (format_tree, 1))) >= 0)
1735     format_tree = TREE_OPERAND (format_tree, 0);
1736   if (offset < 0)
1737     {
1738       res->number_non_literal++;
1739       return;
1740     }
1741   if (VAR_P (format_tree)
1742       && TREE_CODE (TREE_TYPE (format_tree)) == ARRAY_TYPE
1743       && (array_init = decl_constant_value (format_tree)) != format_tree
1744       && TREE_CODE (array_init) == STRING_CST)
1745     {
1746       /* Extract the string constant initializer.  Note that this may include
1747 	 a trailing NUL character that is not in the array (e.g.
1748 	 const char a[3] = "foo";).  */
1749       array_size = DECL_SIZE_UNIT (format_tree);
1750       format_tree = array_init;
1751     }
1752   if (TREE_CODE (format_tree) != STRING_CST)
1753     {
1754       res->number_non_literal++;
1755       return;
1756     }
1757   tree underlying_type
1758     = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (format_tree)));
1759   if (underlying_type != char_type_node)
1760     {
1761       if (underlying_type == char16_type_node
1762 	  || underlying_type == char32_type_node
1763 	  || underlying_type == wchar_type_node)
1764 	res->number_wide++;
1765       else
1766 	res->number_non_char++;
1767       return;
1768     }
1769   format_chars = TREE_STRING_POINTER (format_tree);
1770   format_length = TREE_STRING_LENGTH (format_tree);
1771   if (array_size != 0)
1772     {
1773       /* Variable length arrays can't be initialized.  */
1774       gcc_assert (TREE_CODE (array_size) == INTEGER_CST);
1775 
1776       if (tree_fits_shwi_p (array_size))
1777 	{
1778 	  HOST_WIDE_INT array_size_value = tree_to_shwi (array_size);
1779 	  if (array_size_value > 0
1780 	      && array_size_value == (int) array_size_value
1781 	      && format_length > array_size_value)
1782 	    format_length = array_size_value;
1783 	}
1784     }
1785   if (offset)
1786     {
1787       if (offset >= format_length)
1788 	{
1789 	  res->number_non_literal++;
1790 	  return;
1791 	}
1792       format_chars += offset;
1793       format_length -= offset;
1794     }
1795   if (format_length < 1 || format_chars[--format_length] != 0)
1796     {
1797       res->number_unterminated++;
1798       return;
1799     }
1800   if (format_length == 0)
1801     {
1802       res->number_empty++;
1803       return;
1804     }
1805 
1806   /* Skip to first argument to check.  */
1807   while (arg_num + 1 < info->first_arg_num)
1808     {
1809       if (params == 0)
1810 	return;
1811       params = TREE_CHAIN (params);
1812       ++arg_num;
1813     }
1814   /* Provisionally increment res->number_other; check_format_info_main
1815      will decrement it if it finds there are extra arguments, but this way
1816      need not adjust it for every return.  */
1817   res->number_other++;
1818   object_allocator <format_wanted_type> fwt_pool ("format_wanted_type pool");
1819   check_format_info_main (res, info, format_chars, fmt_param_loc, format_tree,
1820 			  format_length, params, arg_num, fwt_pool, arglocs);
1821 }
1822 
1823 /* Support class for argument_parser and check_format_info_main.
1824    Tracks any flag characters that have been applied to the
1825    current argument.  */
1826 
1827 class flag_chars_t
1828 {
1829  public:
1830   flag_chars_t ();
1831   bool has_char_p (char ch) const;
1832   void add_char (char ch);
1833   void validate (const format_kind_info *fki,
1834 		 const format_char_info *fci,
1835 		 const format_flag_spec *flag_specs,
1836 		 const char * const format_chars,
1837 		 tree format_string_cst,
1838 		 location_t format_string_loc,
1839 		 const char * const orig_format_chars,
1840 		 char format_char,
1841 		 bool quoted);
1842   int get_alloc_flag (const format_kind_info *fki);
1843   int assignment_suppression_p (const format_kind_info *fki);
1844 
1845  private:
1846   char m_flag_chars[256];
1847 };
1848 
1849 /* Support struct for argument_parser and check_format_info_main.
1850    Encapsulates any length modifier applied to the current argument.  */
1851 
1852 class length_modifier
1853 {
1854 public:
length_modifier()1855   length_modifier ()
1856   : chars (NULL), val (FMT_LEN_none), std (STD_C89),
1857     scalar_identity_flag (0)
1858   {
1859   }
1860 
length_modifier(const char * chars_,enum format_lengths val_,enum format_std_version std_,int scalar_identity_flag_)1861   length_modifier (const char *chars_,
1862 		   enum format_lengths val_,
1863 		   enum format_std_version std_,
1864 		   int scalar_identity_flag_)
1865   : chars (chars_), val (val_), std (std_),
1866     scalar_identity_flag (scalar_identity_flag_)
1867   {
1868   }
1869 
1870   const char *chars;
1871   enum format_lengths val;
1872   enum format_std_version std;
1873   int scalar_identity_flag;
1874 };
1875 
1876 /* Parsing one argument within a format string.  */
1877 
1878 class argument_parser
1879 {
1880  public:
1881   argument_parser (function_format_info *info, const char *&format_chars,
1882 		   tree format_string_cst,
1883 		   const char * const orig_format_chars,
1884 		   location_t format_string_loc, flag_chars_t &flag_chars,
1885 		   int &has_operand_number, tree first_fillin_param,
1886 		   object_allocator <format_wanted_type> &fwt_pool_,
1887 		   vec<location_t> *arglocs);
1888 
1889   bool read_any_dollar ();
1890 
1891   bool read_format_flags ();
1892 
1893   bool
1894   read_any_format_width (tree &params,
1895 			 unsigned HOST_WIDE_INT &arg_num);
1896 
1897   void
1898   read_any_format_left_precision ();
1899 
1900   bool
1901   read_any_format_precision (tree &params,
1902 			     unsigned HOST_WIDE_INT &arg_num);
1903 
1904   void handle_alloc_chars ();
1905 
1906   length_modifier read_any_length_modifier ();
1907 
1908   void read_any_other_modifier ();
1909 
1910   const format_char_info *find_format_char_info (char format_char);
1911 
1912   void
1913   validate_flag_pairs (const format_char_info *fci,
1914 		       char format_char);
1915 
1916   void
1917   give_y2k_warnings (const format_char_info *fci,
1918 		     char format_char);
1919 
1920   void parse_any_scan_set (const format_char_info *fci);
1921 
1922   bool handle_conversions (const format_char_info *fci,
1923 			   const length_modifier &len_modifier,
1924 			   tree &wanted_type,
1925 			   const char *&wanted_type_name,
1926 			   unsigned HOST_WIDE_INT &arg_num,
1927 			   tree &params,
1928 			   char format_char);
1929 
1930   bool
1931   check_argument_type (const format_char_info *fci,
1932 		       const length_modifier &len_modifier,
1933 		       tree &wanted_type,
1934 		       const char *&wanted_type_name,
1935 		       const bool suppressed,
1936 		       unsigned HOST_WIDE_INT &arg_num,
1937 		       tree &params,
1938 		       const int alloc_flag,
1939 		       const char * const format_start,
1940 		       const char * const type_start,
1941 		       location_t fmt_param_loc,
1942 		       char conversion_char);
1943 
1944  private:
1945   const function_format_info *const info;
1946   const format_kind_info * const fki;
1947   const format_flag_spec * const flag_specs;
1948   const char *start_of_this_format;
1949   const char *&format_chars;
1950   const tree format_string_cst;
1951   const char * const orig_format_chars;
1952   const location_t format_string_loc;
1953   object_allocator <format_wanted_type> &fwt_pool;
1954   flag_chars_t &flag_chars;
1955   int main_arg_num;
1956   tree main_arg_params;
1957   int &has_operand_number;
1958   const tree first_fillin_param;
1959   format_wanted_type width_wanted_type;
1960   format_wanted_type precision_wanted_type;
1961  public:
1962   format_wanted_type main_wanted_type;
1963  private:
1964   format_wanted_type *first_wanted_type;
1965   format_wanted_type *last_wanted_type;
1966   vec<location_t> *arglocs;
1967 };
1968 
1969 /* flag_chars_t's constructor.  */
1970 
flag_chars_t()1971 flag_chars_t::flag_chars_t ()
1972 {
1973   m_flag_chars[0] = 0;
1974 }
1975 
1976 /* Has CH been seen as a flag within the current argument?  */
1977 
1978 bool
has_char_p(char ch)1979 flag_chars_t::has_char_p (char ch) const
1980 {
1981   return strchr (m_flag_chars, ch) != 0;
1982 }
1983 
1984 /* Add CH to the flags seen within the current argument.  */
1985 
1986 void
add_char(char ch)1987 flag_chars_t::add_char (char ch)
1988 {
1989   int i = strlen (m_flag_chars);
1990   m_flag_chars[i++] = ch;
1991   m_flag_chars[i] = 0;
1992 }
1993 
1994 /* Validate the individual flags used, removing any that are invalid.  */
1995 
1996 void
validate(const format_kind_info * fki,const format_char_info * fci,const format_flag_spec * flag_specs,const char * const format_chars,tree format_string_cst,location_t format_string_loc,const char * const orig_format_chars,char format_char,bool quoted)1997 flag_chars_t::validate (const format_kind_info *fki,
1998 			const format_char_info *fci,
1999 			const format_flag_spec *flag_specs,
2000 			const char * const format_chars,
2001 			tree format_string_cst,
2002 			location_t format_string_loc,
2003 			const char * const orig_format_chars,
2004 			char format_char,
2005 			bool quoted)
2006 {
2007   int i;
2008   int d = 0;
2009   bool quotflag = false;
2010 
2011   for (i = 0; m_flag_chars[i] != 0; i++)
2012     {
2013       const format_flag_spec *s = get_flag_spec (flag_specs,
2014 						 m_flag_chars[i], NULL);
2015       m_flag_chars[i - d] = m_flag_chars[i];
2016       if (m_flag_chars[i] == fki->length_code_char)
2017 	continue;
2018 
2019       /* Remember if a quoting flag is seen.  */
2020       quotflag |= s->quoting;
2021 
2022       if (strchr (fci->flag_chars, m_flag_chars[i]) == 0)
2023 	{
2024 	  format_warning_at_char (format_string_loc, format_string_cst,
2025 				  format_chars - orig_format_chars,
2026 				  OPT_Wformat_,
2027 				  "%s used with %<%%%c%> %s format",
2028 				  _(s->name), format_char, fki->name);
2029 	  d++;
2030 	  continue;
2031 	}
2032       if (pedantic)
2033 	{
2034 	  const format_flag_spec *t;
2035 	  if (ADJ_STD (s->std) > C_STD_VER)
2036 	    warning_at (format_string_loc, OPT_Wformat_,
2037 			"%s does not support %s",
2038 			C_STD_NAME (s->std), _(s->long_name));
2039 	  t = get_flag_spec (flag_specs, m_flag_chars[i], fci->flags2);
2040 	  if (t != NULL && ADJ_STD (t->std) > ADJ_STD (s->std))
2041 	    {
2042 	      const char *long_name = (t->long_name != NULL
2043 				       ? t->long_name
2044 				       : s->long_name);
2045 	      if (ADJ_STD (t->std) > C_STD_VER)
2046 		warning_at (format_string_loc, OPT_Wformat_,
2047 			    "%s does not support %s with"
2048 			    " the %<%%%c%> %s format",
2049 			    C_STD_NAME (t->std), _(long_name),
2050 			    format_char, fki->name);
2051 	    }
2052 	}
2053 
2054       /* Detect quoting directives used within a quoted sequence, such
2055 	 as GCC's "%<...%qE".  */
2056       if (quoted && s->quoting)
2057 	{
2058 	  format_warning_at_char (format_string_loc, format_string_cst,
2059 				  format_chars - orig_format_chars - 1,
2060 				  OPT_Wformat_,
2061 				  "%s used within a quoted sequence",
2062 				  _(s->name));
2063 	}
2064     }
2065   m_flag_chars[i - d] = 0;
2066 
2067   if (!quoted
2068       && !quotflag
2069       && strchr (fci->flags2, '\''))
2070     {
2071       format_warning_at_char (format_string_loc, format_string_cst,
2072 			      format_chars - orig_format_chars,
2073 			      OPT_Wformat_,
2074 			      "%qc conversion used unquoted",
2075 			      format_char);
2076     }
2077 }
2078 
2079 /* Determine if an assignment-allocation has been set, requiring
2080    an extra char ** for writing back a dynamically-allocated char *.
2081    This is for handling the optional 'm' character in scanf.  */
2082 
2083 int
get_alloc_flag(const format_kind_info * fki)2084 flag_chars_t::get_alloc_flag (const format_kind_info *fki)
2085 {
2086   if ((fki->flags & (int) FMT_FLAG_SCANF_A_KLUDGE)
2087       && has_char_p ('a'))
2088     return 1;
2089   if (fki->alloc_char && has_char_p (fki->alloc_char))
2090     return 1;
2091   return 0;
2092 }
2093 
2094 /* Determine if an assignment-suppression character was seen.
2095    ('*' in scanf, for discarding the converted input).  */
2096 
2097 int
assignment_suppression_p(const format_kind_info * fki)2098 flag_chars_t::assignment_suppression_p (const format_kind_info *fki)
2099 {
2100   if (fki->suppression_char
2101       && has_char_p (fki->suppression_char))
2102     return 1;
2103   return 0;
2104 }
2105 
2106 /* Constructor for argument_parser.  Initialize for parsing one
2107    argument within a format string.  */
2108 
2109 argument_parser::
argument_parser(function_format_info * info_,const char * & format_chars_,tree format_string_cst_,const char * const orig_format_chars_,location_t format_string_loc_,flag_chars_t & flag_chars_,int & has_operand_number_,tree first_fillin_param_,object_allocator<format_wanted_type> & fwt_pool_,vec<location_t> * arglocs_)2110 argument_parser (function_format_info *info_, const char *&format_chars_,
2111 		 tree format_string_cst_,
2112 		 const char * const orig_format_chars_,
2113 		 location_t format_string_loc_,
2114 		 flag_chars_t &flag_chars_,
2115 		 int &has_operand_number_,
2116 		 tree first_fillin_param_,
2117 		 object_allocator <format_wanted_type> &fwt_pool_,
2118 		 vec<location_t> *arglocs_)
2119 : info (info_),
2120   fki (&format_types[info->format_type]),
2121   flag_specs (fki->flag_specs),
2122   start_of_this_format (format_chars_),
2123   format_chars (format_chars_),
2124   format_string_cst (format_string_cst_),
2125   orig_format_chars (orig_format_chars_),
2126   format_string_loc (format_string_loc_),
2127   fwt_pool (fwt_pool_),
2128   flag_chars (flag_chars_),
2129   main_arg_num (0),
2130   main_arg_params (NULL),
2131   has_operand_number (has_operand_number_),
2132   first_fillin_param (first_fillin_param_),
2133   first_wanted_type (NULL),
2134   last_wanted_type (NULL),
2135   arglocs (arglocs_)
2136 {
2137 }
2138 
2139 /* Handle dollars at the start of format arguments, setting up main_arg_params
2140    and main_arg_num.
2141 
2142    Return true if format parsing is to continue, false otherwise.  */
2143 
2144 bool
read_any_dollar()2145 argument_parser::read_any_dollar ()
2146 {
2147   if ((fki->flags & (int) FMT_FLAG_USE_DOLLAR) && has_operand_number != 0)
2148     {
2149       /* Possibly read a $ operand number at the start of the format.
2150 	 If one was previously used, one is required here.  If one
2151 	 is not used here, we can't immediately conclude this is a
2152 	 format without them, since it could be printf %m or scanf %*.  */
2153       int opnum;
2154       opnum = maybe_read_dollar_number (&format_chars, 0,
2155 					first_fillin_param,
2156 					&main_arg_params, fki);
2157       if (opnum == -1)
2158 	return false;
2159       else if (opnum > 0)
2160 	{
2161 	  has_operand_number = 1;
2162 	  main_arg_num = opnum + info->first_arg_num - 1;
2163 	}
2164     }
2165   else if (fki->flags & FMT_FLAG_USE_DOLLAR)
2166     {
2167       if (avoid_dollar_number (format_chars))
2168 	return false;
2169     }
2170   return true;
2171 }
2172 
2173 /* Read any format flags, but do not yet validate them beyond removing
2174    duplicates, since in general validation depends on the rest of
2175    the format.
2176 
2177    Return true if format parsing is to continue, false otherwise.  */
2178 
2179 bool
read_format_flags()2180 argument_parser::read_format_flags ()
2181 {
2182   while (*format_chars != 0
2183 	 && strchr (fki->flag_chars, *format_chars) != 0)
2184     {
2185       const format_flag_spec *s = get_flag_spec (flag_specs,
2186 						 *format_chars, NULL);
2187       if (flag_chars.has_char_p (*format_chars))
2188 	{
2189 	  format_warning_at_char (format_string_loc, format_string_cst,
2190 				  format_chars + 1 - orig_format_chars,
2191 				  OPT_Wformat_,
2192 				  "repeated %s in format", _(s->name));
2193 	}
2194       else
2195 	flag_chars.add_char (*format_chars);
2196 
2197       if (s->skip_next_char)
2198 	{
2199 	  ++format_chars;
2200 	  if (*format_chars == 0)
2201 	    {
2202 	      warning_at (format_string_loc, OPT_Wformat_,
2203 			  "missing fill character at end of strfmon format");
2204 	      return false;
2205 	    }
2206 	}
2207       ++format_chars;
2208     }
2209 
2210   return true;
2211 }
2212 
2213 /* Read any format width, possibly * or *m$.
2214 
2215    Return true if format parsing is to continue, false otherwise.  */
2216 
2217 bool
2218 argument_parser::
read_any_format_width(tree & params,unsigned HOST_WIDE_INT & arg_num)2219 read_any_format_width (tree &params,
2220 		       unsigned HOST_WIDE_INT &arg_num)
2221 {
2222   if (!fki->width_char)
2223     return true;
2224 
2225   if (fki->width_type != NULL && *format_chars == '*')
2226     {
2227       flag_chars.add_char (fki->width_char);
2228       /* "...a field width...may be indicated by an asterisk.
2229 	 In this case, an int argument supplies the field width..."  */
2230       ++format_chars;
2231       if (has_operand_number != 0)
2232 	{
2233 	  int opnum;
2234 	  opnum = maybe_read_dollar_number (&format_chars,
2235 					    has_operand_number == 1,
2236 					    first_fillin_param,
2237 					    &params, fki);
2238 	  if (opnum == -1)
2239 	    return false;
2240 	  else if (opnum > 0)
2241 	    {
2242 	      has_operand_number = 1;
2243 	      arg_num = opnum + info->first_arg_num - 1;
2244 	    }
2245 	  else
2246 	    has_operand_number = 0;
2247 	}
2248       else
2249 	{
2250 	  if (avoid_dollar_number (format_chars))
2251 	    return false;
2252 	}
2253       if (info->first_arg_num != 0)
2254 	{
2255 	  tree cur_param;
2256 	  if (params == 0)
2257 	    cur_param = NULL;
2258 	  else
2259 	    {
2260 	      cur_param = TREE_VALUE (params);
2261 	      if (has_operand_number <= 0)
2262 		{
2263 		  params = TREE_CHAIN (params);
2264 		  ++arg_num;
2265 		}
2266 	    }
2267 	  width_wanted_type.wanted_type = *fki->width_type;
2268 	  width_wanted_type.wanted_type_name = NULL;
2269 	  width_wanted_type.pointer_count = 0;
2270 	  width_wanted_type.char_lenient_flag = 0;
2271 	  width_wanted_type.scalar_identity_flag = 0;
2272 	  width_wanted_type.writing_in_flag = 0;
2273 	  width_wanted_type.reading_from_flag = 0;
2274 	  width_wanted_type.kind = CF_KIND_FIELD_WIDTH;
2275 	  width_wanted_type.format_start = format_chars - 1;
2276 	  width_wanted_type.format_length = 1;
2277 	  width_wanted_type.param = cur_param;
2278 	  width_wanted_type.arg_num = arg_num;
2279 	  width_wanted_type.offset_loc =
2280 	    format_chars - orig_format_chars;
2281 	  width_wanted_type.next = NULL;
2282 	  if (last_wanted_type != 0)
2283 	    last_wanted_type->next = &width_wanted_type;
2284 	  if (first_wanted_type == 0)
2285 	    first_wanted_type = &width_wanted_type;
2286 	  last_wanted_type = &width_wanted_type;
2287 	}
2288     }
2289   else
2290     {
2291       /* Possibly read a numeric width.  If the width is zero,
2292 	 we complain if appropriate.  */
2293       int non_zero_width_char = FALSE;
2294       int found_width = FALSE;
2295       while (ISDIGIT (*format_chars))
2296 	{
2297 	  found_width = TRUE;
2298 	  if (*format_chars != '0')
2299 	    non_zero_width_char = TRUE;
2300 	  ++format_chars;
2301 	}
2302       if (found_width && !non_zero_width_char &&
2303 	  (fki->flags & (int) FMT_FLAG_ZERO_WIDTH_BAD))
2304 	warning_at (format_string_loc, OPT_Wformat_,
2305 		    "zero width in %s format", fki->name);
2306       if (found_width)
2307 	flag_chars.add_char (fki->width_char);
2308     }
2309 
2310   return true;
2311 }
2312 
2313 /* Read any format left precision (must be a number, not *).  */
2314 void
read_any_format_left_precision()2315 argument_parser::read_any_format_left_precision ()
2316 {
2317   if (fki->left_precision_char == 0)
2318     return;
2319   if (*format_chars != '#')
2320     return;
2321 
2322   ++format_chars;
2323   flag_chars.add_char (fki->left_precision_char);
2324   if (!ISDIGIT (*format_chars))
2325     format_warning_at_char (format_string_loc, format_string_cst,
2326 			    format_chars - orig_format_chars,
2327 			    OPT_Wformat_,
2328 			    "empty left precision in %s format", fki->name);
2329   while (ISDIGIT (*format_chars))
2330     ++format_chars;
2331 }
2332 
2333 /* Read any format precision, possibly * or *m$.
2334 
2335    Return true if format parsing is to continue, false otherwise.  */
2336 
2337 bool
2338 argument_parser::
read_any_format_precision(tree & params,unsigned HOST_WIDE_INT & arg_num)2339 read_any_format_precision (tree &params,
2340 			   unsigned HOST_WIDE_INT &arg_num)
2341 {
2342   if (fki->precision_char == 0)
2343     return true;
2344   if (*format_chars != '.')
2345     return true;
2346 
2347   ++format_chars;
2348   flag_chars.add_char (fki->precision_char);
2349   if (fki->precision_type != NULL && *format_chars == '*')
2350     {
2351       /* "...a...precision...may be indicated by an asterisk.
2352 	 In this case, an int argument supplies the...precision."  */
2353       ++format_chars;
2354       if (has_operand_number != 0)
2355 	{
2356 	  int opnum;
2357 	  opnum = maybe_read_dollar_number (&format_chars,
2358 					    has_operand_number == 1,
2359 					    first_fillin_param,
2360 					    &params, fki);
2361 	  if (opnum == -1)
2362 	    return false;
2363 	  else if (opnum > 0)
2364 	    {
2365 	      has_operand_number = 1;
2366 	      arg_num = opnum + info->first_arg_num - 1;
2367 	    }
2368 	  else
2369 	    has_operand_number = 0;
2370 	}
2371       else
2372 	{
2373 	  if (avoid_dollar_number (format_chars))
2374 	    return false;
2375 	}
2376       if (info->first_arg_num != 0)
2377 	{
2378 	  tree cur_param;
2379 	  if (params == 0)
2380 	    cur_param = NULL;
2381 	  else
2382 	    {
2383 	      cur_param = TREE_VALUE (params);
2384 	      if (has_operand_number <= 0)
2385 		{
2386 		  params = TREE_CHAIN (params);
2387 		  ++arg_num;
2388 		}
2389 	    }
2390 	  precision_wanted_type.wanted_type = *fki->precision_type;
2391 	  precision_wanted_type.wanted_type_name = NULL;
2392 	  precision_wanted_type.pointer_count = 0;
2393 	  precision_wanted_type.char_lenient_flag = 0;
2394 	  precision_wanted_type.scalar_identity_flag = 0;
2395 	  precision_wanted_type.writing_in_flag = 0;
2396 	  precision_wanted_type.reading_from_flag = 0;
2397 	  precision_wanted_type.kind = CF_KIND_FIELD_PRECISION;
2398 	  precision_wanted_type.param = cur_param;
2399 	  precision_wanted_type.format_start = format_chars - 2;
2400 	  precision_wanted_type.format_length = 2;
2401 	  precision_wanted_type.arg_num = arg_num;
2402 	  precision_wanted_type.offset_loc =
2403 	    format_chars - orig_format_chars;
2404 	  precision_wanted_type.next = NULL;
2405 	  if (last_wanted_type != 0)
2406 	    last_wanted_type->next = &precision_wanted_type;
2407 	  if (first_wanted_type == 0)
2408 	    first_wanted_type = &precision_wanted_type;
2409 	  last_wanted_type = &precision_wanted_type;
2410 	}
2411     }
2412   else
2413     {
2414       if (!(fki->flags & (int) FMT_FLAG_EMPTY_PREC_OK)
2415 	  && !ISDIGIT (*format_chars))
2416 	format_warning_at_char (format_string_loc, format_string_cst,
2417 				format_chars - orig_format_chars,
2418 				OPT_Wformat_,
2419 				"empty precision in %s format", fki->name);
2420       while (ISDIGIT (*format_chars))
2421 	++format_chars;
2422     }
2423 
2424   return true;
2425 }
2426 
2427 /* Parse any assignment-allocation flags, which request an extra
2428    char ** for writing back a dynamically-allocated char *.
2429    This is for handling the optional 'm' character in scanf,
2430    and, before C99, 'a' (for compatibility with a non-standard
2431    GNU libc extension).  */
2432 
2433 void
handle_alloc_chars()2434 argument_parser::handle_alloc_chars ()
2435 {
2436   if (fki->alloc_char && fki->alloc_char == *format_chars)
2437     {
2438       flag_chars.add_char (fki->alloc_char);
2439       format_chars++;
2440     }
2441 
2442   /* Handle the scanf allocation kludge.  */
2443   if (fki->flags & (int) FMT_FLAG_SCANF_A_KLUDGE)
2444     {
2445       if (*format_chars == 'a' && !flag_isoc99)
2446 	{
2447 	  if (format_chars[1] == 's' || format_chars[1] == 'S'
2448 	      || format_chars[1] == '[')
2449 	    {
2450 	      /* 'a' is used as a flag.  */
2451 	      flag_chars.add_char ('a');
2452 	      format_chars++;
2453 	    }
2454 	}
2455     }
2456 }
2457 
2458 /* Look for length modifiers within the current format argument,
2459    returning a length_modifier instance describing it (or the
2460    default if one is not found).
2461 
2462    Issue warnings about non-standard modifiers.  */
2463 
2464 length_modifier
read_any_length_modifier()2465 argument_parser::read_any_length_modifier ()
2466 {
2467   length_modifier result;
2468 
2469   const format_length_info *fli = fki->length_char_specs;
2470   if (!fli)
2471     return result;
2472 
2473   while (fli->name != 0
2474 	 && strncmp (fli->name, format_chars, strlen (fli->name)))
2475     fli++;
2476   if (fli->name != 0)
2477     {
2478       format_chars += strlen (fli->name);
2479       if (fli->double_name != 0 && fli->name[0] == *format_chars)
2480 	{
2481 	  format_chars++;
2482 	  result = length_modifier (fli->double_name, fli->double_index,
2483 				    fli->double_std, 0);
2484 	}
2485       else
2486 	{
2487 	  result = length_modifier (fli->name, fli->index, fli->std,
2488 				    fli->scalar_identity_flag);
2489 	}
2490       flag_chars.add_char (fki->length_code_char);
2491     }
2492   if (pedantic)
2493     {
2494       /* Warn if the length modifier is non-standard.  */
2495       if (ADJ_STD (result.std) > C_STD_VER)
2496 	warning_at (format_string_loc, OPT_Wformat_,
2497 		    "%s does not support the %qs %s length modifier",
2498 		    C_STD_NAME (result.std), result.chars,
2499 		    fki->name);
2500     }
2501 
2502   return result;
2503 }
2504 
2505 /* Read any other modifier (strftime E/O).  */
2506 
2507 void
read_any_other_modifier()2508 argument_parser::read_any_other_modifier ()
2509 {
2510   if (fki->modifier_chars == NULL)
2511     return;
2512 
2513   while (*format_chars != 0
2514 	 && strchr (fki->modifier_chars, *format_chars) != 0)
2515     {
2516       if (flag_chars.has_char_p (*format_chars))
2517 	{
2518 	  const format_flag_spec *s = get_flag_spec (flag_specs,
2519 						     *format_chars, NULL);
2520 	  format_warning_at_char (format_string_loc, format_string_cst,
2521 				  format_chars - orig_format_chars,
2522 				  OPT_Wformat_,
2523 				  "repeated %s in format", _(s->name));
2524 	}
2525       else
2526 	flag_chars.add_char (*format_chars);
2527       ++format_chars;
2528     }
2529 }
2530 
2531 /* Return the format_char_info corresponding to FORMAT_CHAR,
2532    potentially issuing a warning if the format char is
2533    not supported in the C standard version we are checking
2534    against.
2535 
2536    Issue a warning and return NULL if it is not found.
2537 
2538    Issue warnings about non-standard modifiers.  */
2539 
2540 const format_char_info *
find_format_char_info(char format_char)2541 argument_parser::find_format_char_info (char format_char)
2542 {
2543   const format_char_info *fci = fki->conversion_specs;
2544 
2545   while (fci->format_chars != 0
2546 	 && strchr (fci->format_chars, format_char) == 0)
2547     ++fci;
2548   if (fci->format_chars == 0)
2549     {
2550       format_warning_at_char (format_string_loc, format_string_cst,
2551 			      format_chars - orig_format_chars,
2552 			      OPT_Wformat_,
2553 			      "unknown conversion type character"
2554 			      " %qc in format",
2555 			      format_char);
2556       return NULL;
2557     }
2558 
2559   if (pedantic)
2560     {
2561       if (ADJ_STD (fci->std) > C_STD_VER)
2562 	format_warning_at_char (format_string_loc, format_string_cst,
2563 				format_chars - orig_format_chars,
2564 				OPT_Wformat_,
2565 				"%s does not support the %<%%%c%> %s format",
2566 				C_STD_NAME (fci->std), format_char, fki->name);
2567     }
2568 
2569   return fci;
2570 }
2571 
2572 /* Validate the pairs of flags used.
2573    Issue warnings about incompatible combinations of flags.  */
2574 
2575 void
validate_flag_pairs(const format_char_info * fci,char format_char)2576 argument_parser::validate_flag_pairs (const format_char_info *fci,
2577 				      char format_char)
2578 {
2579   const format_flag_pair * const bad_flag_pairs = fki->bad_flag_pairs;
2580 
2581   for (int i = 0; bad_flag_pairs[i].flag_char1 != 0; i++)
2582     {
2583       const format_flag_spec *s, *t;
2584       if (!flag_chars.has_char_p (bad_flag_pairs[i].flag_char1))
2585 	continue;
2586       if (!flag_chars.has_char_p (bad_flag_pairs[i].flag_char2))
2587 	continue;
2588       if (bad_flag_pairs[i].predicate != 0
2589 	  && strchr (fci->flags2, bad_flag_pairs[i].predicate) == 0)
2590 	continue;
2591       s = get_flag_spec (flag_specs, bad_flag_pairs[i].flag_char1, NULL);
2592       t = get_flag_spec (flag_specs, bad_flag_pairs[i].flag_char2, NULL);
2593       if (bad_flag_pairs[i].ignored)
2594 	{
2595 	  if (bad_flag_pairs[i].predicate != 0)
2596 	    warning_at (format_string_loc, OPT_Wformat_,
2597 			"%s ignored with %s and %<%%%c%> %s format",
2598 			_(s->name), _(t->name), format_char,
2599 			fki->name);
2600 	  else
2601 	    warning_at (format_string_loc, OPT_Wformat_,
2602 			"%s ignored with %s in %s format",
2603 			_(s->name), _(t->name), fki->name);
2604 	}
2605       else
2606 	{
2607 	  if (bad_flag_pairs[i].predicate != 0)
2608 	    warning_at (format_string_loc, OPT_Wformat_,
2609 			"use of %s and %s together with %<%%%c%> %s format",
2610 			_(s->name), _(t->name), format_char,
2611 			fki->name);
2612 	  else
2613 	    warning_at (format_string_loc, OPT_Wformat_,
2614 			"use of %s and %s together in %s format",
2615 			_(s->name), _(t->name), fki->name);
2616 	}
2617     }
2618 }
2619 
2620 /* Give Y2K warnings.  */
2621 
2622 void
give_y2k_warnings(const format_char_info * fci,char format_char)2623 argument_parser::give_y2k_warnings (const format_char_info *fci,
2624 				    char format_char)
2625 {
2626   if (!warn_format_y2k)
2627     return;
2628 
2629   int y2k_level = 0;
2630   if (strchr (fci->flags2, '4') != 0)
2631     if (flag_chars.has_char_p ('E'))
2632       y2k_level = 3;
2633     else
2634       y2k_level = 2;
2635   else if (strchr (fci->flags2, '3') != 0)
2636     y2k_level = 3;
2637   else if (strchr (fci->flags2, '2') != 0)
2638     y2k_level = 2;
2639   if (y2k_level == 3)
2640     warning_at (format_string_loc, OPT_Wformat_y2k,
2641 		"%<%%%c%> yields only last 2 digits of "
2642 		"year in some locales", format_char);
2643   else if (y2k_level == 2)
2644     warning_at (format_string_loc, OPT_Wformat_y2k,
2645 		"%<%%%c%> yields only last 2 digits of year",
2646 		format_char);
2647 }
2648 
2649 /* Parse any "scan sets" enclosed in square brackets, e.g.
2650    for scanf-style calls.  */
2651 
2652 void
parse_any_scan_set(const format_char_info * fci)2653 argument_parser::parse_any_scan_set (const format_char_info *fci)
2654 {
2655   if (strchr (fci->flags2, '[') == NULL)
2656     return;
2657 
2658   /* Skip over scan set, in case it happens to have '%' in it.  */
2659   if (*format_chars == '^')
2660     ++format_chars;
2661   /* Find closing bracket; if one is hit immediately, then
2662      it's part of the scan set rather than a terminator.  */
2663   if (*format_chars == ']')
2664     ++format_chars;
2665   while (*format_chars && *format_chars != ']')
2666     ++format_chars;
2667   if (*format_chars != ']')
2668     /* The end of the format string was reached.  */
2669     format_warning_at_char (format_string_loc, format_string_cst,
2670 			    format_chars - orig_format_chars,
2671 			    OPT_Wformat_,
2672 			    "no closing %<]%> for %<%%[%> format");
2673 }
2674 
2675 /* Return true if this argument is to be continued to be parsed,
2676    false to skip to next argument.  */
2677 
2678 bool
handle_conversions(const format_char_info * fci,const length_modifier & len_modifier,tree & wanted_type,const char * & wanted_type_name,unsigned HOST_WIDE_INT & arg_num,tree & params,char format_char)2679 argument_parser::handle_conversions (const format_char_info *fci,
2680 				     const length_modifier &len_modifier,
2681 				     tree &wanted_type,
2682 				     const char *&wanted_type_name,
2683 				     unsigned HOST_WIDE_INT &arg_num,
2684 				     tree &params,
2685 				     char format_char)
2686 {
2687   enum format_std_version wanted_type_std;
2688 
2689   if (!(fki->flags & (int) FMT_FLAG_ARG_CONVERT))
2690     return true;
2691 
2692   wanted_type = (fci->types[len_modifier.val].type
2693 		 ? *fci->types[len_modifier.val].type : 0);
2694   wanted_type_name = fci->types[len_modifier.val].name;
2695   wanted_type_std = fci->types[len_modifier.val].std;
2696   if (wanted_type == 0)
2697     {
2698       format_warning_at_char (format_string_loc, format_string_cst,
2699 			      format_chars - orig_format_chars,
2700 			      OPT_Wformat_,
2701 			      "use of %qs length modifier with %qc type"
2702 			      " character has either no effect"
2703 			      " or undefined behavior",
2704 			      len_modifier.chars, format_char);
2705       /* Heuristic: skip one argument when an invalid length/type
2706 	 combination is encountered.  */
2707       arg_num++;
2708       if (params != 0)
2709 	params = TREE_CHAIN (params);
2710       return false;
2711     }
2712   else if (pedantic
2713 	   /* Warn if non-standard, provided it is more non-standard
2714 	      than the length and type characters that may already
2715 	      have been warned for.  */
2716 	   && ADJ_STD (wanted_type_std) > ADJ_STD (len_modifier.std)
2717 	   && ADJ_STD (wanted_type_std) > ADJ_STD (fci->std))
2718     {
2719       if (ADJ_STD (wanted_type_std) > C_STD_VER)
2720 	format_warning_at_char (format_string_loc, format_string_cst,
2721 				format_chars - orig_format_chars,
2722 				OPT_Wformat_,
2723 				"%s does not support the %<%%%s%c%> %s format",
2724 				C_STD_NAME (wanted_type_std),
2725 				len_modifier.chars,
2726 				format_char, fki->name);
2727     }
2728 
2729   return true;
2730 }
2731 
2732 /* Check type of argument against desired type.
2733 
2734    Return true if format parsing is to continue, false otherwise.  */
2735 
2736 bool
2737 argument_parser::
check_argument_type(const format_char_info * fci,const length_modifier & len_modifier,tree & wanted_type,const char * & wanted_type_name,const bool suppressed,unsigned HOST_WIDE_INT & arg_num,tree & params,const int alloc_flag,const char * const format_start,const char * const type_start,location_t fmt_param_loc,char conversion_char)2738 check_argument_type (const format_char_info *fci,
2739 		     const length_modifier &len_modifier,
2740 		     tree &wanted_type,
2741 		     const char *&wanted_type_name,
2742 		     const bool suppressed,
2743 		     unsigned HOST_WIDE_INT &arg_num,
2744 		     tree &params,
2745 		     const int alloc_flag,
2746 		     const char * const format_start,
2747 		     const char * const type_start,
2748 		     location_t fmt_param_loc,
2749 		     char conversion_char)
2750 {
2751   if (info->first_arg_num == 0)
2752     return true;
2753 
2754   if ((fci->pointer_count == 0 && wanted_type == void_type_node)
2755       || suppressed)
2756     {
2757       if (main_arg_num != 0)
2758 	{
2759 	  if (suppressed)
2760 	    warning_at (format_string_loc, OPT_Wformat_,
2761 			"operand number specified with "
2762 			"suppressed assignment");
2763 	  else
2764 	    warning_at (format_string_loc, OPT_Wformat_,
2765 			"operand number specified for format "
2766 			"taking no argument");
2767 	}
2768     }
2769   else
2770     {
2771       format_wanted_type *wanted_type_ptr;
2772 
2773       if (main_arg_num != 0)
2774 	{
2775 	  arg_num = main_arg_num;
2776 	  params = main_arg_params;
2777 	}
2778       else
2779 	{
2780 	  ++arg_num;
2781 	  if (has_operand_number > 0)
2782 	    {
2783 	      warning_at (format_string_loc, OPT_Wformat_,
2784 			  "missing $ operand number in format");
2785 	      return false;
2786 	    }
2787 	  else
2788 	    has_operand_number = 0;
2789 	}
2790 
2791       wanted_type_ptr = &main_wanted_type;
2792       while (fci)
2793 	{
2794 	  tree cur_param;
2795 	  if (params == 0)
2796 	    cur_param = NULL;
2797 	  else
2798 	    {
2799 	      cur_param = TREE_VALUE (params);
2800 	      params = TREE_CHAIN (params);
2801 	    }
2802 
2803 	  wanted_type_ptr->wanted_type = wanted_type;
2804 	  wanted_type_ptr->wanted_type_name = wanted_type_name;
2805 	  wanted_type_ptr->pointer_count = fci->pointer_count + alloc_flag;
2806 	  wanted_type_ptr->char_lenient_flag = 0;
2807 	  if (strchr (fci->flags2, 'c') != 0)
2808 	    wanted_type_ptr->char_lenient_flag = 1;
2809 	  wanted_type_ptr->scalar_identity_flag = 0;
2810 	  if (len_modifier.scalar_identity_flag)
2811 	    wanted_type_ptr->scalar_identity_flag = 1;
2812 	  wanted_type_ptr->writing_in_flag = 0;
2813 	  wanted_type_ptr->reading_from_flag = 0;
2814 	  if (alloc_flag)
2815 	    wanted_type_ptr->writing_in_flag = 1;
2816 	  else
2817 	    {
2818 	      if (strchr (fci->flags2, 'W') != 0)
2819 		wanted_type_ptr->writing_in_flag = 1;
2820 	      if (strchr (fci->flags2, 'R') != 0)
2821 		wanted_type_ptr->reading_from_flag = 1;
2822 	    }
2823 	  wanted_type_ptr->kind = CF_KIND_FORMAT;
2824 	  wanted_type_ptr->param = cur_param;
2825 	  wanted_type_ptr->arg_num = arg_num;
2826 	  wanted_type_ptr->format_start = format_start;
2827 	  wanted_type_ptr->format_length = format_chars - format_start;
2828 	  wanted_type_ptr->offset_loc = format_chars - orig_format_chars;
2829 	  wanted_type_ptr->next = NULL;
2830 	  if (last_wanted_type != 0)
2831 	    last_wanted_type->next = wanted_type_ptr;
2832 	  if (first_wanted_type == 0)
2833 	    first_wanted_type = wanted_type_ptr;
2834 	  last_wanted_type = wanted_type_ptr;
2835 
2836 	  fci = fci->chain;
2837 	  if (fci)
2838 	    {
2839 	      wanted_type_ptr = fwt_pool.allocate ();
2840 	      arg_num++;
2841 	      wanted_type = *fci->types[len_modifier.val].type;
2842 	      wanted_type_name = fci->types[len_modifier.val].name;
2843 	    }
2844 	}
2845     }
2846 
2847   if (first_wanted_type != 0)
2848     {
2849       ptrdiff_t offset_to_format_start = (start_of_this_format - 1) - orig_format_chars;
2850       ptrdiff_t offset_to_format_end = (format_chars - 1) - orig_format_chars;
2851       /* By default, use the end of the range for the caret location.  */
2852       substring_loc fmt_loc (fmt_param_loc, TREE_TYPE (format_string_cst),
2853 			     offset_to_format_end,
2854 			     offset_to_format_start, offset_to_format_end);
2855       ptrdiff_t offset_to_type_start = type_start - orig_format_chars;
2856       check_format_types (fmt_loc, first_wanted_type, fki,
2857 			  offset_to_type_start,
2858 			  conversion_char, arglocs);
2859     }
2860 
2861   return true;
2862 }
2863 
2864 /* Describes "paired tokens" within the format string that are
2865    expected to be balanced.  */
2866 
2867 class baltoks_t
2868 {
2869 public:
baltoks_t()2870   baltoks_t (): singlequote (), doublequote () { }
2871 
2872   typedef auto_vec<const char *> balanced_tokens_t;
2873   /* Vectors of pointers to opening brackets ('['), curly brackets ('{'),
2874      quoting directives (like GCC "%<"), parentheses, and angle brackets
2875      ('<').  Used to detect unbalanced tokens.  */
2876   balanced_tokens_t brackets;
2877   balanced_tokens_t curly;
2878   balanced_tokens_t quotdirs;
2879   balanced_tokens_t parens;
2880   balanced_tokens_t pointy;
2881   /* Pointer to the last opening quote.  */
2882   const char *singlequote;
2883   const char *doublequote;
2884 };
2885 
2886 /* Describes a keyword, operator, or other name.  */
2887 
2888 struct token_t
2889 {
2890   const char *name;   /* Keyword/operator name.  */
2891   unsigned char len;  /* Its length.  */
2892   const char *alt;    /* Alternate spelling.  */
2893 };
2894 
2895 /* Helper for initializing global token_t arrays below.  */
2896 #define NAME(name) { name, sizeof name - 1, NULL }
2897 
2898 /* C/C++ operators that are expected to be quoted within the format
2899    string.  */
2900 
2901 static const token_t c_opers[] =
2902   {
2903    NAME ("!="), NAME ("%="),  NAME ("&&"),  NAME ("&="), NAME ("*="),
2904    NAME ("++"), NAME ("+="),  NAME ("--"),  NAME ("-="), NAME ("->"),
2905    NAME ("/="), NAME ("<<"),  NAME ("<<="), NAME ("<="), NAME ("=="),
2906    NAME (">="), NAME (">>="), NAME (">>"),  NAME ("?:"),  NAME ("^="),
2907    NAME ("|="), NAME ("||")
2908   };
2909 
2910 static const token_t cxx_opers[] =
2911   {
2912    NAME ("->*"), NAME (".*"),  NAME ("::"),  NAME ("<=>")
2913   };
2914 
2915 /* Common C/C++ keywords that are expected to be quoted within the format
2916    string.  Keywords like auto, inline, or volatile are exccluded because
2917    they are sometimes used in common terms like /auto variables/, /inline
2918    function/, or /volatile access/ where they should not be quoted.  */
2919 
2920 static const token_t c_keywords[] =
2921   {
2922 #undef NAME
2923 #define NAME(name, alt)  { name, sizeof name - 1, alt }
2924 
2925    NAME ("alignas", NULL),
2926    NAME ("alignof", NULL),
2927    NAME ("asm", NULL),
2928    NAME ("bool", NULL),
2929    NAME ("char", NULL),
2930    NAME ("const %", NULL),
2931    NAME ("const-qualified", "%<const%>-qualified"),
2932    NAME ("float", NULL),
2933    NAME ("ifunc", NULL),
2934    NAME ("int", NULL),
2935    NAME ("long double", NULL),
2936    NAME ("long int", NULL),
2937    NAME ("long long", NULL),
2938    NAME ("malloc", NULL),
2939    NAME ("noclone", NULL),
2940    NAME ("noinline", NULL),
2941    NAME ("nonnull", NULL),
2942    NAME ("noreturn", NULL),
2943    NAME ("nothrow", NULL),
2944    NAME ("offsetof", NULL),
2945    NAME ("readonly", "read-only"),
2946    NAME ("readwrite", "read-write"),
2947    NAME ("restrict %", NULL),
2948    NAME ("restrict-qualified", "%<restrict%>-qualified"),
2949    NAME ("short int", NULL),
2950    NAME ("signed char", NULL),
2951    NAME ("signed int", NULL),
2952    NAME ("signed long", NULL),
2953    NAME ("signed short", NULL),
2954    NAME ("sizeof", NULL),
2955    NAME ("typeof", NULL),
2956    NAME ("unsigned char", NULL),
2957    NAME ("unsigned int", NULL),
2958    NAME ("unsigned long", NULL),
2959    NAME ("unsigned short", NULL),
2960    NAME ("volatile %", NULL),
2961    NAME ("volatile-qualified", "%<volatile%>-qualified"),
2962    NAME ("weakref", NULL),
2963   };
2964 
2965 static const token_t cxx_keywords[] =
2966   {
2967    /* C++ only keywords and operators.  */
2968    NAME ("catch", NULL),
2969    NAME ("constexpr if", NULL),
2970    NAME ("constexpr", NULL),
2971    NAME ("constinit", NULL),
2972    NAME ("consteval", NULL),
2973    NAME ("decltype", NULL),
2974    NAME ("nullptr", NULL),
2975    NAME ("operator delete", NULL),
2976    NAME ("operator new", NULL),
2977    NAME ("typeid", NULL),
2978    NAME ("typeinfo", NULL)
2979   };
2980 
2981 /* Blacklisted words such as misspellings that should be avoided in favor
2982    of the specified alternatives.  */
2983 static const struct
2984 {
2985   const char *name;   /* Bad word.  */
2986   unsigned char len;  /* Its length.  */
2987   const char *alt;    /* Preferred alternative.  */
2988 } badwords[] =
2989   {
2990    NAME ("arg", "argument"),
2991    NAME ("bitfield", "bit-field"),
2992    NAME ("builtin function", "built-in function"),
2993    NAME ("can not", "cannot"),
2994    NAME ("commandline option", "command-line option"),
2995    NAME ("commandline", "command line"),
2996    NAME ("command line option", "command-line option"),
2997    NAME ("decl", "declaration"),
2998    NAME ("enumeral", "enumerated"),
2999    NAME ("floating point", "floating-point"),
3000    NAME ("nonstatic", "non-static"),
3001    NAME ("non-zero", "nonzero"),
3002    NAME ("reg", "register"),
3003    NAME ("stmt", "statement"),
3004   };
3005 
3006 /* Common contractions that should be avoided in favor of the specified
3007    alternatives.  */
3008 
3009 static const struct
3010 {
3011   const char *name;   /* Contraction.  */
3012   unsigned char len;  /* Its length.  */
3013   const char *alt;    /* Preferred alternative.  */
3014 } contrs[] =
3015   {
3016    NAME ("can't", "cannot"),
3017    NAME ("didn't", "did not"),
3018    /* These are commonly abused.  Avoid diagnosing them for now.
3019       NAME ("isn't", "is not"),
3020       NAME ("don't", "is not"),
3021    */
3022    NAME ("mustn't", "must not"),
3023    NAME ("needn't", "need not"),
3024    NAME ("should't", "should not"),
3025    NAME ("that's", "that is"),
3026    NAME ("there's", "there is"),
3027    NAME ("they're", "they are"),
3028    NAME ("what's", "what is"),
3029    NAME ("won't", "will not")
3030   };
3031 
3032 /* Check for unquoted TOKENS.  FORMAT_STRING_LOC is the location of
3033    the format string, FORMAT_STRING_CST the format string itself (as
3034    a tree), ORIG_FORMAT_CHARS and FORMAT_CHARS are pointers to
3035    the beginning of the format string and the character currently
3036    being processed, and BALTOKS describes paired "tokens" within
3037    the format string that are expected to be balanced.
3038    Returns a pointer to the last processed character or null when
3039    nothing was done.  */
3040 
3041 static const char*
check_tokens(const token_t * tokens,unsigned ntoks,location_t format_string_loc,tree format_string_cst,const char * orig_format_chars,const char * format_chars,baltoks_t & baltoks)3042 check_tokens (const token_t *tokens, unsigned ntoks,
3043 	      location_t format_string_loc, tree format_string_cst,
3044 	      const char *orig_format_chars, const char *format_chars,
3045 	      baltoks_t &baltoks)
3046 {
3047   /* For brevity.  */
3048   const int opt = OPT_Wformat_diag;
3049   /* Zero-based starting position of a problem sequence.  */
3050   int fmtchrpos = format_chars - orig_format_chars;
3051 
3052   /* For identifier-like "words," set to the word length.  */
3053   unsigned wlen = 0;
3054   /* Set for an operator, clear for an identifier/word.  */
3055   bool is_oper = false;
3056   bool underscore = false;
3057 
3058   if (format_chars[0] == '_' || ISALPHA (format_chars[0]))
3059     {
3060       while (format_chars[wlen] == '_' || ISALNUM (format_chars[wlen]))
3061 	{
3062 	  underscore |= format_chars[wlen] == '_';
3063 	  ++wlen;
3064 	}
3065     }
3066   else
3067     is_oper = true;
3068 
3069   for (unsigned i = 0; i != ntoks; ++i)
3070     {
3071       unsigned toklen = tokens[i].len;
3072 
3073       if (toklen < wlen
3074 	  || strncmp (format_chars, tokens[i].name, toklen))
3075 	continue;
3076 
3077       if (toklen == 2
3078 	  && format_chars - orig_format_chars > 0
3079 	  && (TOUPPER (format_chars[-1]) == 'C'
3080 	      || TOUPPER (format_chars[-1]) == 'G'))
3081 	return format_chars + toklen - 1;   /* Reference to C++ or G++.  */
3082 
3083       if (ISPUNCT (format_chars[toklen - 1]))
3084 	{
3085 	  if (format_chars[toklen - 1] == format_chars[toklen])
3086 	    return NULL;   /* Operator followed by another punctuator.  */
3087 	}
3088       else if (ISALNUM (format_chars[toklen]))
3089 	return NULL;   /* Keyword prefix for a longer word.  */
3090 
3091       if (toklen == 2
3092 	  && format_chars[0] == '-'
3093 	  && format_chars[1] == '-'
3094 	  && ISALNUM (format_chars[2]))
3095 	return NULL;   /* Probably option like --help.  */
3096 
3097       /* Allow this ugly warning for the time being.  */
3098       if (toklen == 2
3099 	  && format_chars - orig_format_chars > 6
3100 	  && !strncmp (format_chars - 7, " count >= width of ", 19))
3101 	return format_chars + 10;
3102 
3103       /* The token is a type if it ends in an alphabetic character.  */
3104       bool is_type = (ISALPHA (tokens[i].name[toklen - 1])
3105 		      && strchr (tokens[i].name, ' '));
3106 
3107       /* Backtrack to the last alphabetic character (for tokens whose
3108 	 names end in '%').  */
3109       if (!is_oper)
3110 	while (!ISALPHA (tokens[i].name[toklen - 1]))
3111 	  --toklen;
3112 
3113       if (format_warning_substr (format_string_loc, format_string_cst,
3114 				 fmtchrpos, fmtchrpos + toklen, opt,
3115 				 (is_type
3116 				  ? G_("unquoted type name %<%.*s%> in format")
3117 				  : (is_oper
3118 				     ? G_("unquoted operator %<%.*s%> in format")
3119 				     : G_("unquoted keyword %<%.*s%> in format"))),
3120 				 toklen, format_chars)
3121 	  && tokens[i].alt)
3122 	inform (format_string_loc, "use %qs instead", tokens[i].alt);
3123 
3124       return format_chars + toklen - 1;
3125     }
3126 
3127   /* Diagnose unquoted __attribute__.  Consider any parenthesized
3128      argument to the attribute to avoid redundant warnings for
3129      the double parentheses that might follow.  */
3130   if (!strncmp (format_chars, "__attribute", sizeof "__attribute" - 1))
3131     {
3132       unsigned nchars = sizeof "__attribute" - 1;
3133       while ('_' == format_chars[nchars])
3134 	++nchars;
3135 
3136       for (int i = nchars; format_chars[i]; ++i)
3137 	if (' ' != format_chars[i])
3138 	  {
3139 	    nchars = i;
3140 	    break;
3141 	  }
3142 
3143       if (format_chars[nchars] == '(')
3144 	{
3145 	  baltoks.parens.safe_push (format_chars + nchars);
3146 
3147 	  ++nchars;
3148 	  bool close = false;
3149 	  if (format_chars[nchars] == '(')
3150 	    {
3151 	      baltoks.parens.safe_push (format_chars + nchars);
3152 	      close = true;
3153 	      ++nchars;
3154 	    }
3155 	  for (int i = nchars; format_chars[i]; ++i)
3156 	    if (')' == format_chars[i])
3157 	      {
3158 		if (baltoks.parens.length () > 0)
3159 		  baltoks.parens.pop ();
3160 		nchars = i + 1;
3161 		break;
3162 	      }
3163 
3164 	  if (close && format_chars[nchars] == ')')
3165 	    {
3166 	      if (baltoks.parens.length () > 0)
3167 		baltoks.parens.pop ();
3168 	      ++nchars;
3169 	    }
3170 	}
3171 
3172       format_warning_substr (format_string_loc, format_string_cst,
3173 			     fmtchrpos, fmtchrpos + nchars, opt,
3174 			      "unquoted attribute in format");
3175       return format_chars + nchars - 1;
3176     }
3177 
3178   /* Diagnose unquoted built-ins.  */
3179   if (format_chars[0] == '_'
3180       && format_chars[1] == '_'
3181       && (!strncmp (format_chars + 2, "atomic", sizeof "atomic" - 1)
3182 	  || !strncmp (format_chars + 2, "builtin", sizeof "builtin" - 1)
3183 	  || !strncmp (format_chars + 2, "sync", sizeof "sync" - 1)))
3184     {
3185       format_warning_substr (format_string_loc, format_string_cst,
3186 			     fmtchrpos, fmtchrpos + wlen, opt,
3187 			     "unquoted name of built-in function %<%.*s%> "
3188 			     "in format",
3189 			     wlen, format_chars);
3190       return format_chars + wlen - 1;
3191     }
3192 
3193   /* Diagnose unquoted substrings of alphanumeric characters containing
3194      underscores.  They most likely refer to identifiers and should be
3195      quoted.  */
3196   if (underscore)
3197     format_warning_substr (format_string_loc, format_string_cst,
3198 			   format_chars - orig_format_chars,
3199 			   format_chars + wlen - orig_format_chars,
3200 			   opt,
3201 			   "unquoted identifier or keyword %<%.*s%> in format",
3202 			   wlen, format_chars);
3203   else
3204     {
3205       /* Diagnose some common missspellings.  */
3206       for (unsigned i = 0; i != sizeof badwords / sizeof *badwords; ++i)
3207 	{
3208 	  unsigned badwlen = strspn (badwords[i].name, " -");
3209 	  if (wlen >= badwlen
3210 	      && (wlen <= badwords[i].len
3211 		  || (wlen == badwords[i].len + 1U
3212 		      && TOUPPER (format_chars[wlen - 1]) == 'S'))
3213 	      && !strncasecmp (format_chars, badwords[i].name, badwords[i].len))
3214 	    {
3215 	      /* Handle singular as well as plural forms of all bad words
3216 		 even though the latter don't necessarily make sense for
3217 		 all of the former (like "can nots").  */
3218 	      badwlen = badwords[i].len;
3219 	      const char *plural = "";
3220 	      if (TOUPPER (format_chars[badwlen]) == 'S')
3221 		{
3222 		  ++badwlen;
3223 		  plural = "s";
3224 		}
3225 
3226 	      format_warning_substr (format_string_loc, format_string_cst,
3227 				     fmtchrpos, fmtchrpos + badwords[i].len,
3228 				     opt,
3229 				     "misspelled term %<%.*s%> in format; "
3230 				     "use %<%s%s%> instead",
3231 				     badwlen, format_chars,
3232 				     badwords[i].alt, plural);
3233 
3234 	      return format_chars + badwords[i].len - 1;
3235 	    }
3236 	}
3237 
3238       /* Skip C++/G++.  */
3239       if (!strncasecmp (format_chars, "c++", 3)
3240 	  || !strncasecmp (format_chars, "g++", 3))
3241 	return format_chars + 2;
3242     }
3243 
3244   return wlen ? format_chars + wlen - 1 : NULL;
3245 }
3246 
3247 /* Check plain text in a format string of a GCC diagnostic function
3248    for common quoting, punctuation, and spelling mistakes, and issue
3249    -Wformat-diag warnings if they are found.   FORMAT_STRING_LOC is
3250    the location of the format string, FORMAT_STRING_CST the format
3251    string itself (as a tree), ORIG_FORMAT_CHARS and FORMAT_CHARS are
3252    pointers to the beginning of the format string and the character
3253    currently being processed, and BALTOKS describes paired "tokens"
3254    within the format string that are expected to be balanced.
3255    Returns a pointer to the last processed character.  */
3256 
3257 static const char*
check_plain(location_t format_string_loc,tree format_string_cst,const char * orig_format_chars,const char * format_chars,baltoks_t & baltoks)3258 check_plain (location_t format_string_loc, tree format_string_cst,
3259 	     const char *orig_format_chars, const char *format_chars,
3260 	     baltoks_t &baltoks)
3261 {
3262   /* For brevity.  */
3263   const int opt = OPT_Wformat_diag;
3264   /* Zero-based starting position of a problem sequence.  */
3265   int fmtchrpos = format_chars - orig_format_chars;
3266 
3267   if (*format_chars == '%')
3268     {
3269       /* Diagnose %<%s%> and suggest using %qs instead.  */
3270       if (!strncmp (format_chars, "%<%s%>", 6))
3271 	format_warning_substr (format_string_loc, format_string_cst,
3272 			       fmtchrpos, fmtchrpos + 6, opt,
3273 			       "quoted %qs directive in format; "
3274 			       "use %qs instead", "%s", "%qs");
3275       else if (format_chars - orig_format_chars > 2
3276 	       && !strncasecmp (format_chars - 3, "can%'t", 6))
3277 	format_warning_substr (format_string_loc,
3278 			       format_string_cst,
3279 			       fmtchrpos - 3, fmtchrpos + 3, opt,
3280 			       "contraction %<%.*s%> in format; "
3281 			       "use %qs instead",
3282 			       6, format_chars - 3, "cannot");
3283 
3284       return format_chars;
3285     }
3286 
3287   if (baltoks.quotdirs.length ())
3288     {
3289       /* Skip over all plain text within a quoting directive until
3290 	 the next directive.  */
3291       while (*format_chars && '%' != *format_chars)
3292 	++format_chars;
3293 
3294       return format_chars;
3295     }
3296 
3297   /* The length of the problem sequence.  */
3298   int nchars = 0;
3299 
3300   /* Diagnose any whitespace characters other than <space> but only
3301      leading, trailing, and two or more consecutive <space>s.  Do
3302      this before diagnosing control characters because whitespace
3303      is a subset of controls.  */
3304   const char *other_than_space = NULL;
3305   while (ISSPACE (format_chars[nchars]))
3306     {
3307       if (format_chars[nchars] != ' ' && !other_than_space)
3308 	other_than_space = format_chars + nchars;
3309       ++nchars;
3310     }
3311 
3312   if (nchars)
3313     {
3314       /* This is the most common problem: go the extra mile to describe
3315 	 the problem in as much helpful detail as possible.  */
3316       if (other_than_space)
3317 	{
3318 	  format_warning_substr (format_string_loc, format_string_cst,
3319 				 fmtchrpos, fmtchrpos + nchars, opt,
3320 				 "unquoted whitespace character %qc in format",
3321 				 *other_than_space);
3322 	  return format_chars + nchars - 1;
3323 	}
3324 
3325       if (fmtchrpos == 0)
3326 	/* Accept strings of leading spaces with no warning.  */
3327 	return format_chars + nchars - 1;
3328 
3329       if (!format_chars[nchars])
3330 	{
3331 	  format_warning_substr (format_string_loc, format_string_cst,
3332 				 fmtchrpos, fmtchrpos + nchars, opt,
3333 				 "spurious trailing space in format");
3334 	  return format_chars + nchars - 1;
3335 	}
3336 
3337       if (nchars > 1)
3338 	{
3339 	  if (nchars == 2
3340 	      && orig_format_chars < format_chars
3341 	      && format_chars[-1] == '.'
3342 	      && format_chars[0] == ' '
3343 	      && format_chars[1] == ' ')
3344 	    {
3345 	      /* A period followed by two spaces.  */
3346 	      if (ISUPPER (*orig_format_chars))
3347 		{
3348 		  /* If the part before the period is a capitalized
3349 		     sentence check to make sure that what follows
3350 		     is also capitalized.  */
3351 		  if (ISLOWER (format_chars[2]))
3352 		    format_warning_substr (format_string_loc, format_string_cst,
3353 					   fmtchrpos, fmtchrpos + nchars, opt,
3354 					   "inconsistent capitalization in "
3355 					   "format");
3356 		}
3357 	    }
3358 	  else
3359 	    format_warning_substr (format_string_loc, format_string_cst,
3360 				   fmtchrpos, fmtchrpos + nchars, opt,
3361 				   "unquoted sequence of %i consecutive "
3362 				   "space characters in format", nchars);
3363 	  return format_chars + nchars - 1;
3364 	}
3365 
3366       format_chars += nchars;
3367       nchars = 0;
3368     }
3369 
3370   fmtchrpos = format_chars - orig_format_chars;
3371 
3372   /* Diagnose any unquoted control characters other than the terminating
3373      NUL.  */
3374   while (format_chars[nchars] && ISCNTRL (format_chars[nchars]))
3375     ++nchars;
3376 
3377   if (nchars > 1)
3378     {
3379       format_warning_substr (format_string_loc, format_string_cst,
3380 			     fmtchrpos, fmtchrpos + nchars, opt,
3381 			     "unquoted control characters in format");
3382       return format_chars + nchars - 1;
3383     }
3384   if (nchars)
3385     {
3386       format_warning_substr (format_string_loc, format_string_cst,
3387 			     fmtchrpos, fmtchrpos + nchars, opt,
3388 			     "unquoted control character %qc in format",
3389 			     *format_chars);
3390       return format_chars + nchars - 1;
3391     }
3392 
3393   if (ISPUNCT (format_chars[0]))
3394     {
3395       size_t nelts = sizeof c_opers / sizeof *c_opers;
3396       if (const char *ret = check_tokens (c_opers, nelts,
3397 					  format_string_loc, format_string_cst,
3398 					  orig_format_chars, format_chars,
3399 					  baltoks))
3400 	return ret;
3401 
3402       nelts = c_dialect_cxx () ? sizeof cxx_opers / sizeof *cxx_opers : 0;
3403       if (const char *ret = check_tokens (cxx_opers, nelts,
3404 					  format_string_loc, format_string_cst,
3405 					  orig_format_chars, format_chars,
3406 					  baltoks))
3407 	return ret;
3408     }
3409 
3410   if (ISALPHA (format_chars[0]))
3411     {
3412       size_t nelts = sizeof c_keywords / sizeof *c_keywords;
3413       if (const char *ret = check_tokens (c_keywords, nelts,
3414 					  format_string_loc, format_string_cst,
3415 					  orig_format_chars, format_chars,
3416 					  baltoks))
3417 	return ret;
3418 
3419       nelts = c_dialect_cxx () ? sizeof cxx_keywords / sizeof *cxx_keywords : 0;
3420       if (const char *ret = check_tokens (cxx_keywords, nelts,
3421 					  format_string_loc, format_string_cst,
3422 					  orig_format_chars, format_chars,
3423 					  baltoks))
3424 	return ret;
3425     }
3426 
3427   nchars = 0;
3428 
3429   /* Diagnose unquoted options.  */
3430   if  ((format_chars == orig_format_chars
3431 	|| format_chars[-1] == ' ')
3432        && format_chars[0] == '-'
3433        && ((format_chars[1] == '-'
3434 	    && ISALPHA (format_chars[2]))
3435 	   || ISALPHA (format_chars[1])))
3436     {
3437       nchars = 1;
3438       while (ISALNUM (format_chars[nchars])
3439 	     || '_' == format_chars[nchars]
3440 	     || '-' == format_chars[nchars]
3441 	     || '+' == format_chars[nchars])
3442 	++nchars;
3443 
3444       format_warning_substr (format_string_loc, format_string_cst,
3445 			     fmtchrpos, fmtchrpos + nchars, opt,
3446 			     "unquoted option name %<%.*s%> in format",
3447 			     nchars, format_chars);
3448       return format_chars + nchars - 1;
3449     }
3450 
3451   /* Diagnose leading, trailing, and two or more consecutive punctuation
3452      characters.  */
3453   const char *unbalanced = NULL;
3454   while ('%' != format_chars[nchars]
3455 	 && ISPUNCT (format_chars[nchars])
3456 	 && !unbalanced)
3457     {
3458       switch (format_chars[nchars])
3459 	{
3460 	case '[':
3461 	  baltoks.brackets.safe_push (format_chars + nchars);
3462 	  break;
3463 	case '{':
3464 	  baltoks.curly.safe_push (format_chars + nchars);
3465 	  break;
3466 	case '(':
3467 	  baltoks.parens.safe_push (format_chars + nchars);
3468 	  break;
3469 	case '<':
3470 	  baltoks.pointy.safe_push (format_chars + nchars);
3471 	  break;
3472 
3473 	case ']':
3474 	  if (baltoks.brackets.length () > 0)
3475 	    baltoks.brackets.pop ();
3476 	  else
3477 	    unbalanced = format_chars + nchars;
3478 	  break;
3479 	case '}':
3480 	  if (baltoks.curly.length () > 0)
3481 	    baltoks.curly.pop ();
3482 	  else
3483 	    unbalanced = format_chars + nchars;
3484 	  break;
3485 	case ')':
3486 	  if (baltoks.parens.length () > 0)
3487 	    baltoks.parens.pop ();
3488 	  else
3489 	    unbalanced = format_chars + nchars;
3490 	  break;
3491 	case '>':
3492 	  if (baltoks.pointy.length () > 0)
3493 	    baltoks.pointy.pop ();
3494 	  else
3495 	    unbalanced = format_chars + nchars;
3496 	  break;
3497 	}
3498 
3499       ++nchars;
3500     }
3501 
3502   if (unbalanced)
3503     {
3504       format_warning_substr (format_string_loc, format_string_cst,
3505 			     fmtchrpos, fmtchrpos + nchars, opt,
3506 			     "unbalanced punctuation character %qc in format",
3507 			     *unbalanced);
3508       return format_chars + nchars - 1;
3509     }
3510 
3511   if (nchars)
3512     {
3513       /* Consider any identifier that follows the pound ('#') sign
3514 	 a preprocessing directive.  */
3515       if (nchars == 1
3516 	  && format_chars[0] == '#'
3517 	  && ISALPHA (format_chars[1]))
3518 	{
3519 	  while (ISALNUM (format_chars[nchars])
3520 		 || format_chars[nchars] == '_')
3521 	    ++nchars;
3522 
3523 	  format_warning_substr (format_string_loc, format_string_cst,
3524 				 fmtchrpos, fmtchrpos + nchars, opt,
3525 				 "unquoted preprocessing directive %<%.*s%> "
3526 				 "in format", nchars, format_chars);
3527 	  return format_chars + nchars - 1;
3528 	}
3529 
3530       /* Diagnose a bare single quote.  */
3531       if (nchars == 1
3532 	  && format_chars[0] == '\''
3533 	  && format_chars - orig_format_chars
3534 	  && ISALPHA (format_chars[-1])
3535 	  && ISALPHA (format_chars[1]))
3536 	{
3537 	  /* Diagnose a subset of contractions that are best avoided.  */
3538 	  for (unsigned i = 0; i != sizeof contrs / sizeof *contrs; ++i)
3539 	    {
3540 	      const char *apos = strchr (contrs[i].name, '\'');
3541 	      gcc_assert (apos != NULL);
3542 	      int off = apos - contrs[i].name;
3543 
3544 	      if (format_chars - orig_format_chars >= off
3545 		  && !strncmp (format_chars - off,
3546 			       contrs[i].name, contrs[i].len))
3547 		{
3548 		  format_warning_substr (format_string_loc,
3549 					 format_string_cst,
3550 					 fmtchrpos, fmtchrpos + nchars, opt,
3551 					 "contraction %<%.*s%> in format; "
3552 					 "use %qs instead",
3553 					 contrs[i].len, contrs[i].name,
3554 					 contrs[i].alt);
3555 		  return format_chars + nchars - 1;
3556 		}
3557 	    }
3558 
3559 	  if (format_warning_substr (format_string_loc, format_string_cst,
3560 				     fmtchrpos, fmtchrpos + nchars, opt,
3561 				     "bare apostrophe %<'%> in format"))
3562 	    inform (format_string_loc,
3563 		    "if avoiding the apostrophe is not feasible, enclose "
3564 		    "it in a pair of %qs and %qs directives instead",
3565 		    "%<", "%>");
3566 	  return format_chars + nchars - 1;
3567 	}
3568 
3569       /* Diagnose a backtick (grave accent).  */
3570       if (nchars == 1
3571 	  && format_chars[0] == '`')
3572 	{
3573 	  if (format_warning_substr (format_string_loc, format_string_cst,
3574 				     fmtchrpos, fmtchrpos + nchars, opt,
3575 				     "grave accent %<`%> in format"))
3576 	    inform (format_string_loc,
3577 		    "use the apostrophe directive %qs instead", "%'");
3578 	  return format_chars + nchars - 1;
3579 	}
3580 
3581       /* Diagnose a punctuation character after a space.  */
3582       if (nchars == 1
3583 	  && format_chars - orig_format_chars
3584 	  && format_chars[-1] == ' '
3585 	  && strspn (format_chars, "!?:;.,") == 1)
3586 	{
3587 	  format_warning_substr (format_string_loc, format_string_cst,
3588 				 fmtchrpos - 1, fmtchrpos, opt,
3589 				 "space followed by punctuation character "
3590 				 "%<%c%>", format_chars[0]);
3591 	  return format_chars;
3592 	}
3593 
3594       if (nchars == 1)
3595 	{
3596 	  if (!strncmp (format_chars, "\"%s\"", 4))
3597 	    {
3598 	      if (format_warning_substr (format_string_loc, format_string_cst,
3599 					 fmtchrpos, fmtchrpos + 4, opt,
3600 					 "quoted %qs directive in format",
3601 					 "%s"))
3602 		inform (format_string_loc, "if using %qs is not feasible, "
3603 			"use %qs instead", "%qs", "\"%-s\"");
3604 	    }
3605 
3606 	  if (format_chars[0] == '"')
3607 	    {
3608 	      baltoks.doublequote = baltoks.doublequote ? NULL : format_chars;
3609 	      return format_chars + nchars - 1;
3610 	    }
3611 	  if (format_chars[0] == '\'')
3612 	    {
3613 	      baltoks.singlequote = baltoks.singlequote ? NULL : format_chars;
3614 	      return format_chars + nchars - 1;
3615 	    }
3616 	}
3617 
3618       if (fmtchrpos == 0)
3619 	{
3620 	  if (nchars == 1
3621 	      && format_chars[0] == '(')
3622 	    ;   /* Text beginning in an open parenthesis.  */
3623 	  else if (nchars == 3
3624 	      && !strncmp (format_chars, "...", 3)
3625 	      && format_chars[3])
3626 	    ;   /* Text beginning in an ellipsis.  */
3627 	  else
3628 	    {
3629 	      format_warning_substr (format_string_loc, format_string_cst,
3630 				     fmtchrpos, fmtchrpos + nchars, opt,
3631 				     "spurious leading punctuation sequence "
3632 				     "%<%.*s%> in format",
3633 				     nchars, format_chars);
3634 	      return format_chars + nchars - 1;
3635 	    }
3636 	}
3637       else if (!format_chars[nchars])
3638 	{
3639 	  if (nchars == 1
3640 	      && (format_chars[nchars - 1] == ':'
3641 		  || format_chars[nchars - 1] == ')'))
3642 	    ;   /* Text ending in a colon or a closing parenthesis.  */
3643 	  else if (nchars == 1
3644 		   && ((ISUPPER (*orig_format_chars)
3645 			&& format_chars[nchars - 1] == '.')
3646 		       || strspn (format_chars + nchars - 1, "?])") == 1))
3647 		  ;   /* Capitalized sentence terminated by a single period,
3648 			 or text ending in a question mark, closing bracket,
3649 			 or parenthesis.  */
3650 	  else if (nchars == 2
3651 		   && format_chars[0] == '?'
3652 		   && format_chars[1] == ')')
3653 	    ;   /* A question mark after a closing parenthetical note.  */
3654 	  else if (nchars == 2
3655 		   && format_chars[0] == ')'
3656 		   && (format_chars[1] == '?'
3657 		       || format_chars[1] == ';'
3658 		       || format_chars[1] == ':'
3659 		       || (ISUPPER (*orig_format_chars)
3660 			   && format_chars[1] == '.')))
3661 	    ;   /* Closing parenthetical note followed by a question mark,
3662 		   semicolon, or colon at the end of the string, or by
3663 		   a period at the end of a capitalized sentence.  */
3664 	  else if (nchars == 3
3665 		   && format_chars - orig_format_chars > 0
3666 		   && !strncmp (format_chars, "...", 3))
3667 	    ;   /* Text ending in the ellipsis.  */
3668 	  else
3669 	    format_warning_substr (format_string_loc, format_string_cst,
3670 				   fmtchrpos, fmtchrpos + nchars, opt,
3671 				   "spurious trailing punctuation sequence "
3672 				   "%<%.*s%> in format",
3673 				   nchars, format_chars);
3674 
3675 	  return format_chars + nchars - 1;
3676 	}
3677       else if (nchars == 2
3678 	       && format_chars[0] == ')'
3679 	       && (format_chars[1] == ':'
3680 		   || format_chars[1] == ';'
3681 		   || format_chars[1] == ',')
3682 	       && format_chars[2] == ' ')
3683 	;   /* Closing parenthetical note followed by a colon, semicolon
3684 	       or a comma followed by a space in the middle of the string.  */
3685       else if (nchars > 1)
3686 	format_warning_substr (format_string_loc, format_string_cst,
3687 			       fmtchrpos, fmtchrpos + nchars, opt,
3688 			       "unquoted sequence of %i consecutive "
3689 			       "punctuation characters %q.*s in format",
3690 			       nchars, nchars, format_chars);
3691       return format_chars + nchars - 1;
3692     }
3693 
3694   nchars = 0;
3695 
3696   /* Finally, diagnose any unquoted non-graph, non-punctuation characters
3697      other than the terminating NUL.  */
3698   while (format_chars[nchars]
3699 	 && '%' != format_chars[nchars]
3700 	 && !ISPUNCT (format_chars[nchars])
3701 	 && !ISGRAPH (format_chars[nchars]))
3702     ++nchars;
3703 
3704   if (nchars > 1)
3705     {
3706       format_warning_substr (format_string_loc, format_string_cst,
3707 			     fmtchrpos, fmtchrpos + nchars, opt,
3708 			     "unquoted non-graph characters in format");
3709       return format_chars + nchars - 1;
3710     }
3711   if (nchars)
3712     {
3713       format_warning_substr (format_string_loc, format_string_cst,
3714 			     fmtchrpos, fmtchrpos + nchars, opt,
3715 			     "unquoted non-graph character %qc in format",
3716 			     *format_chars);
3717       return format_chars + nchars - 1;
3718     }
3719 
3720   return format_chars;
3721 }
3722 
3723 /* Diagnose unbalanced tokens described by BALTOKS in format string
3724    ORIG_FORMAT_CHARS and the corresponding FORMAT_STRING_CST.  */
3725 
3726 static void
maybe_diag_unbalanced_tokens(location_t format_string_loc,const char * orig_format_chars,tree format_string_cst,baltoks_t & baltoks)3727 maybe_diag_unbalanced_tokens (location_t format_string_loc,
3728 			      const char *orig_format_chars,
3729 			      tree format_string_cst,
3730 			      baltoks_t &baltoks)
3731 {
3732   const char *unbalanced = NULL;
3733 
3734   if (baltoks.brackets.length ())
3735     unbalanced = baltoks.brackets.pop ();
3736   else if (baltoks.curly.length ())
3737     unbalanced = baltoks.curly.pop ();
3738   else if (baltoks.parens.length ())
3739     unbalanced = baltoks.parens.pop ();
3740   else if (baltoks.pointy.length ())
3741     unbalanced = baltoks.pointy.pop ();
3742 
3743   if (unbalanced)
3744     format_warning_at_char (format_string_loc, format_string_cst,
3745 			    unbalanced - orig_format_chars + 1,
3746 			    OPT_Wformat_diag,
3747 			    "unbalanced punctuation character %<%c%> in format",
3748 			    *unbalanced);
3749 
3750   if (baltoks.quotdirs.length ())
3751     format_warning_at_char (format_string_loc, format_string_cst,
3752 			    baltoks.quotdirs.pop () - orig_format_chars,
3753 			    OPT_Wformat_,
3754 			    "unterminated quoting directive");
3755 
3756   const char *quote
3757     = baltoks.singlequote ? baltoks.singlequote : baltoks.doublequote;
3758 
3759   if (quote)
3760     format_warning_at_char (format_string_loc, format_string_cst,
3761   			    quote - orig_format_chars + 1,
3762 			    OPT_Wformat_diag,
3763   			    "unterminated quote character %<%c%> in format",
3764   			    *quote);
3765 }
3766 
3767 /* Do the main part of checking a call to a format function.  FORMAT_CHARS
3768    is the NUL-terminated format string (which at this point may contain
3769    internal NUL characters); FORMAT_LENGTH is its length (excluding the
3770    terminating NUL character).  ARG_NUM is one less than the number of
3771    the first format argument to check; PARAMS points to that format
3772    argument in the list of arguments.  */
3773 
3774 static void
check_format_info_main(format_check_results * res,function_format_info * info,const char * format_chars,location_t fmt_param_loc,tree format_string_cst,int format_length,tree params,unsigned HOST_WIDE_INT arg_num,object_allocator<format_wanted_type> & fwt_pool,vec<location_t> * arglocs)3775 check_format_info_main (format_check_results *res,
3776 			function_format_info *info, const char *format_chars,
3777 			location_t fmt_param_loc, tree format_string_cst,
3778 			int format_length, tree params,
3779 			unsigned HOST_WIDE_INT arg_num,
3780 			object_allocator <format_wanted_type> &fwt_pool,
3781 			vec<location_t> *arglocs)
3782 {
3783   const char * const orig_format_chars = format_chars;
3784   const tree first_fillin_param = params;
3785 
3786   const format_kind_info * const fki = &format_types[info->format_type];
3787   const format_flag_spec * const flag_specs = fki->flag_specs;
3788   const location_t format_string_loc = res->format_string_loc;
3789 
3790   /* -1 if no conversions taking an operand have been found; 0 if one has
3791      and it didn't use $; 1 if $ formats are in use.  */
3792   int has_operand_number = -1;
3793 
3794   /* Vectors of pointers to opening quoting directives (like GCC "%<"),
3795      opening braces, brackets, and parentheses.  Used to detect unbalanced
3796      tokens.  */
3797   baltoks_t baltoks;
3798 
3799   /* Pointers to the most recent color directives (like GCC's "%r or %R").
3800      A starting color directive much be terminated before the end of
3801      the format string.  A terminating directive makes no sense without
3802      a prior starting directive.  */
3803   const char *color_begin = NULL;
3804   const char *color_end = NULL;
3805 
3806   init_dollar_format_checking (info->first_arg_num, first_fillin_param);
3807 
3808   /* In GCC diagnostic functions check plain directives (substrings within
3809      the format string that don't start with %) for quoting and punctuations
3810      problems.  */
3811   bool ck_plain = (!info->is_raw
3812 		   && (info->format_type == gcc_diag_format_type
3813 		       || info->format_type == gcc_tdiag_format_type
3814 		       || info->format_type == gcc_cdiag_format_type
3815 		       || info->format_type == gcc_cxxdiag_format_type));
3816 
3817   while (*format_chars != 0)
3818     {
3819       if (ck_plain)
3820 	format_chars = check_plain (format_string_loc,
3821 				    format_string_cst,
3822 				    orig_format_chars, format_chars,
3823 				    baltoks);
3824 
3825       if (*format_chars == 0 || *format_chars++ != '%')
3826 	continue;
3827 
3828       if (*format_chars == 0)
3829 	{
3830 	  format_warning_at_char (format_string_loc, format_string_cst,
3831 				  format_chars - orig_format_chars,
3832 				  OPT_Wformat_,
3833 				  "spurious trailing %<%%%> in format");
3834 	  continue;
3835 	}
3836       if (*format_chars == '%')
3837 	{
3838 	  ++format_chars;
3839 	  continue;
3840 	}
3841 
3842       /* ARGUMENT_PARSER ctor takes FORMAT_CHARS by reference and calls
3843 	 to ARG_PARSER members may modify the variable.  */
3844       flag_chars_t flag_chars;
3845       argument_parser arg_parser (info, format_chars, format_string_cst,
3846 				  orig_format_chars, format_string_loc,
3847 				  flag_chars, has_operand_number,
3848 				  first_fillin_param, fwt_pool, arglocs);
3849 
3850       if (!arg_parser.read_any_dollar ())
3851 	return;
3852 
3853       if (!arg_parser.read_format_flags ())
3854 	return;
3855 
3856       /* Read any format width, possibly * or *m$.  */
3857       if (!arg_parser.read_any_format_width (params, arg_num))
3858 	return;
3859 
3860       /* Read any format left precision (must be a number, not *).  */
3861       arg_parser.read_any_format_left_precision ();
3862 
3863       /* Read any format precision, possibly * or *m$.  */
3864       if (!arg_parser.read_any_format_precision (params, arg_num))
3865 	return;
3866 
3867       const char *format_start = format_chars;
3868 
3869       arg_parser.handle_alloc_chars ();
3870 
3871       /* The rest of the conversion specification is the length modifier
3872 	 (if any), and the conversion specifier, so this is where the
3873 	 type information starts.  If we need to issue a suggestion
3874 	 about a type mismatch, then we should preserve everything up
3875 	 to here. */
3876       const char *type_start = format_chars;
3877 
3878       /* Read any length modifier, if this kind of format has them.  */
3879       const length_modifier len_modifier
3880 	= arg_parser.read_any_length_modifier ();
3881 
3882       /* Read any modifier (strftime E/O).  */
3883       arg_parser.read_any_other_modifier ();
3884 
3885       char format_char = *format_chars;
3886       if (format_char == 0
3887 	  || (!(fki->flags & (int) FMT_FLAG_FANCY_PERCENT_OK)
3888 	      && format_char == '%'))
3889 	{
3890 	  format_warning_at_char (format_string_loc, format_string_cst,
3891 			     format_chars - orig_format_chars,
3892 			     OPT_Wformat_,
3893 			     "conversion lacks type at end of format");
3894 	  continue;
3895 	}
3896       format_chars++;
3897 
3898       const format_char_info * const fci
3899 	= arg_parser.find_format_char_info (format_char);
3900       if (!fci)
3901 	continue;
3902 
3903       flag_chars.validate (fki, fci, flag_specs, format_chars,
3904 			   format_string_cst,
3905 			   format_string_loc, orig_format_chars, format_char,
3906 			   baltoks.quotdirs.length () > 0);
3907 
3908       const int alloc_flag = flag_chars.get_alloc_flag (fki);
3909       const bool suppressed = flag_chars.assignment_suppression_p (fki);
3910 
3911       /* Diagnose nested or unmatched quoting directives such as GCC's
3912 	 "%<...%<" and "%>...%>".  */
3913       bool quot_begin_p = strchr (fci->flags2, '<');
3914       bool quot_end_p = strchr (fci->flags2, '>');
3915 
3916       if (quot_begin_p && !quot_end_p)
3917 	{
3918 	  if (baltoks.quotdirs.length ())
3919 	    format_warning_at_char (format_string_loc, format_string_cst,
3920 				    format_chars - orig_format_chars,
3921 				    OPT_Wformat_,
3922 				    "nested quoting directive");
3923 	  baltoks.quotdirs.safe_push (format_chars);
3924 	}
3925       else if (!quot_begin_p && quot_end_p)
3926 	{
3927 	  if (baltoks.quotdirs.length ())
3928 	    baltoks.quotdirs.pop ();
3929 	  else
3930 	    format_warning_at_char (format_string_loc, format_string_cst,
3931 				    format_chars - orig_format_chars,
3932 				    OPT_Wformat_,
3933 				    "unmatched quoting directive");
3934 	}
3935 
3936       bool color_begin_p = strchr (fci->flags2, '/');
3937       if (color_begin_p)
3938 	{
3939 	  color_begin = format_chars;
3940 	  color_end = NULL;
3941 	}
3942       else if (strchr (fci->flags2, '\\'))
3943 	{
3944 	  if (color_end)
3945 	    format_warning_at_char (format_string_loc, format_string_cst,
3946 				    format_chars - orig_format_chars,
3947 				    OPT_Wformat_,
3948 				    "%qc directive redundant after prior "
3949 				    "occurence of the same", format_char);
3950 	  else if (!color_begin)
3951 	    format_warning_at_char (format_string_loc, format_string_cst,
3952 				    format_chars - orig_format_chars,
3953 				    OPT_Wformat_,
3954 				    "unmatched color reset directive");
3955 	  color_end = format_chars;
3956 	}
3957 
3958       /* Diagnose directives that shouldn't appear in a quoted sequence.
3959 	 (They are denoted by a double quote in FLAGS2.)  */
3960       if (baltoks.quotdirs.length ())
3961 	{
3962 	  if (strchr (fci->flags2, '"'))
3963 	    format_warning_at_char (format_string_loc, format_string_cst,
3964 				    format_chars - orig_format_chars,
3965 				    OPT_Wformat_,
3966 				    "%qc conversion used within a quoted "
3967 				    "sequence",
3968 				    format_char);
3969 	}
3970 
3971       /* Validate the pairs of flags used.  */
3972       arg_parser.validate_flag_pairs (fci, format_char);
3973 
3974       arg_parser.give_y2k_warnings (fci, format_char);
3975 
3976       arg_parser.parse_any_scan_set (fci);
3977 
3978       tree wanted_type = NULL;
3979       const char *wanted_type_name = NULL;
3980 
3981       if (!arg_parser.handle_conversions (fci, len_modifier,
3982 					  wanted_type, wanted_type_name,
3983 					  arg_num,
3984 					  params,
3985 					  format_char))
3986 	continue;
3987 
3988       arg_parser.main_wanted_type.next = NULL;
3989 
3990       /* Finally. . .check type of argument against desired type!  */
3991       if (!arg_parser.check_argument_type (fci, len_modifier,
3992 					   wanted_type, wanted_type_name,
3993 					   suppressed,
3994 					   arg_num, params,
3995 					   alloc_flag,
3996 					   format_start, type_start,
3997 					   fmt_param_loc,
3998 					   format_char))
3999 	return;
4000     }
4001 
4002   if (format_chars - orig_format_chars != format_length)
4003     format_warning_at_char (format_string_loc, format_string_cst,
4004 			    format_chars + 1 - orig_format_chars,
4005 			    OPT_Wformat_contains_nul,
4006 			    "embedded %<\\0%> in format");
4007   if (info->first_arg_num != 0 && params != 0
4008       && has_operand_number <= 0)
4009     {
4010       res->number_other--;
4011       res->number_extra_args++;
4012     }
4013   if (has_operand_number > 0)
4014     finish_dollar_format_checking (res, fki->flags & (int) FMT_FLAG_DOLLAR_GAP_POINTER_OK);
4015 
4016   maybe_diag_unbalanced_tokens (format_string_loc, orig_format_chars,
4017 				format_string_cst, baltoks);
4018 
4019   if (color_begin && !color_end)
4020     format_warning_at_char (format_string_loc, format_string_cst,
4021 			    color_begin - orig_format_chars,
4022 			    OPT_Wformat_, "unterminated color directive");
4023 }
4024 
4025 /* Check the argument types from a single format conversion (possibly
4026    including width and precision arguments).
4027 
4028    FMT_LOC is the location of the format conversion.
4029 
4030    TYPES is a singly-linked list expressing the parts of the format
4031    conversion that expect argument types, and the arguments they
4032    correspond to.
4033 
4034    OFFSET_TO_TYPE_START is the offset within the execution-charset encoded
4035    format string to where type information begins for the conversion
4036    (the length modifier and conversion specifier).
4037 
4038    CONVERSION_CHAR is the user-provided conversion specifier.
4039 
4040    For example, given:
4041 
4042      sprintf (d, "before %-+*.*lld after", arg3, arg4, arg5);
4043 
4044    then FMT_LOC covers this range:
4045 
4046      sprintf (d, "before %-+*.*lld after", arg3, arg4, arg5);
4047                          ^^^^^^^^^
4048 
4049    and TYPES in this case is a three-entry singly-linked list consisting of:
4050    (1) the check for the field width here:
4051          sprintf (d, "before %-+*.*lld after", arg3, arg4, arg5);
4052                                 ^              ^^^^
4053        against arg3, and
4054    (2) the check for the field precision here:
4055          sprintf (d, "before %-+*.*lld after", arg3, arg4, arg5);
4056                                  ^^                  ^^^^
4057        against arg4, and
4058    (3) the check for the length modifier and conversion char here:
4059          sprintf (d, "before %-+*.*lld after", arg3, arg4, arg5);
4060                                    ^^^                     ^^^^
4061        against arg5.
4062 
4063    OFFSET_TO_TYPE_START is 13, the offset to the "lld" within the
4064    STRING_CST:
4065 
4066                   0000000000111111111122
4067                   0123456789012345678901
4068      sprintf (d, "before %-+*.*lld after", arg3, arg4, arg5);
4069                                ^ ^
4070                                | ` CONVERSION_CHAR: 'd'
4071                                type starts here.  */
4072 
4073 static void
check_format_types(const substring_loc & fmt_loc,format_wanted_type * types,const format_kind_info * fki,int offset_to_type_start,char conversion_char,vec<location_t> * arglocs)4074 check_format_types (const substring_loc &fmt_loc,
4075 		    format_wanted_type *types, const format_kind_info *fki,
4076 		    int offset_to_type_start,
4077 		    char conversion_char,
4078 		    vec<location_t> *arglocs)
4079 {
4080   for (; types != 0; types = types->next)
4081     {
4082       tree cur_param;
4083       tree cur_type;
4084       tree orig_cur_type;
4085       tree wanted_type;
4086       int arg_num;
4087       int i;
4088       int char_type_flag;
4089 
4090       wanted_type = types->wanted_type;
4091       arg_num = types->arg_num;
4092 
4093       /* The following should not occur here.  */
4094       gcc_assert (wanted_type);
4095       gcc_assert (wanted_type != void_type_node || types->pointer_count);
4096 
4097       if (types->pointer_count == 0)
4098 	wanted_type = lang_hooks.types.type_promotes_to (wanted_type);
4099 
4100       wanted_type = TYPE_MAIN_VARIANT (wanted_type);
4101 
4102       cur_param = types->param;
4103       if (!cur_param)
4104         {
4105 	  format_type_warning (fmt_loc, UNKNOWN_LOCATION, types, wanted_type,
4106 			       NULL, fki, offset_to_type_start,
4107 			       conversion_char);
4108           continue;
4109         }
4110 
4111       cur_type = TREE_TYPE (cur_param);
4112       if (cur_type == error_mark_node)
4113 	continue;
4114       orig_cur_type = cur_type;
4115       char_type_flag = 0;
4116 
4117       location_t param_loc = UNKNOWN_LOCATION;
4118       if (EXPR_HAS_LOCATION (cur_param))
4119 	param_loc = EXPR_LOCATION (cur_param);
4120       else if (arglocs)
4121 	{
4122 	  /* arg_num is 1-based.  */
4123 	  gcc_assert (types->arg_num > 0);
4124 	  param_loc = (*arglocs)[types->arg_num - 1];
4125 	}
4126 
4127       STRIP_NOPS (cur_param);
4128 
4129       /* Check the types of any additional pointer arguments
4130 	 that precede the "real" argument.  */
4131       for (i = 0; i < types->pointer_count; ++i)
4132 	{
4133 	  if (TREE_CODE (cur_type) == POINTER_TYPE)
4134 	    {
4135 	      cur_type = TREE_TYPE (cur_type);
4136 	      if (cur_type == error_mark_node)
4137 		break;
4138 
4139 	      /* Check for writing through a NULL pointer.  */
4140 	      if (types->writing_in_flag
4141 		  && i == 0
4142 		  && cur_param != 0
4143 		  && integer_zerop (cur_param))
4144 		warning (OPT_Wformat_, "writing through null pointer "
4145 			 "(argument %d)", arg_num);
4146 
4147 	      /* Check for reading through a NULL pointer.  Ignore
4148 		 printf-family of functions as they are checked for
4149 		 null arguments by the middle-end.  */
4150 	      if (fki->conversion_specs != print_char_table
4151 		  && types->reading_from_flag
4152 		  && i == 0
4153 		  && cur_param != 0
4154 		  && integer_zerop (cur_param))
4155 		warning (OPT_Wformat_, "reading through null pointer "
4156 			 "(argument %d)", arg_num);
4157 
4158 	      if (cur_param != 0 && TREE_CODE (cur_param) == ADDR_EXPR)
4159 		cur_param = TREE_OPERAND (cur_param, 0);
4160 	      else
4161 		cur_param = 0;
4162 
4163 	      /* See if this is an attempt to write into a const type with
4164 		 scanf or with printf "%n".  Note: the writing in happens
4165 		 at the first indirection only, if for example
4166 		 void * const * is passed to scanf %p; passing
4167 		 const void ** is simply passing an incompatible type.  */
4168 	      if (types->writing_in_flag
4169 		  && i == 0
4170 		  && (TYPE_READONLY (cur_type)
4171 		      || (cur_param != 0
4172 			  && (CONSTANT_CLASS_P (cur_param)
4173 			      || (DECL_P (cur_param)
4174 				  && TREE_READONLY (cur_param))))))
4175 		warning (OPT_Wformat_, "writing into constant object "
4176 			 "(argument %d)", arg_num);
4177 
4178 	      /* If there are extra type qualifiers beyond the first
4179 		 indirection, then this makes the types technically
4180 		 incompatible.  */
4181 	      if (i > 0
4182 		  && pedantic
4183 		  && (TYPE_READONLY (cur_type)
4184 		      || TYPE_VOLATILE (cur_type)
4185 		      || TYPE_ATOMIC (cur_type)
4186 		      || TYPE_RESTRICT (cur_type)))
4187 		warning (OPT_Wformat_, "extra type qualifiers in format "
4188 			 "argument (argument %d)",
4189 			 arg_num);
4190 
4191 	    }
4192 	  else
4193 	    {
4194 	      format_type_warning (fmt_loc, param_loc,
4195 				   types, wanted_type, orig_cur_type, fki,
4196 				   offset_to_type_start, conversion_char);
4197 	      break;
4198 	    }
4199 	}
4200 
4201       if (i < types->pointer_count)
4202 	continue;
4203 
4204       cur_type = TYPE_MAIN_VARIANT (cur_type);
4205 
4206       /* Check whether the argument type is a character type.  This leniency
4207 	 only applies to certain formats, flagged with 'c'.  */
4208       if (types->char_lenient_flag)
4209 	char_type_flag = (cur_type == char_type_node
4210 			  || cur_type == signed_char_type_node
4211 			  || cur_type == unsigned_char_type_node);
4212 
4213       /* Check the type of the "real" argument, if there's a type we want.  */
4214       if (lang_hooks.types_compatible_p (wanted_type, cur_type))
4215 	continue;
4216       /* If we want 'void *', allow any pointer type.
4217 	 (Anything else would already have got a warning.)
4218 	 With -Wpedantic, only allow pointers to void and to character
4219 	 types.  */
4220       if (wanted_type == void_type_node
4221 	  && (!pedantic || (i == 1 && char_type_flag)))
4222 	continue;
4223       /* Don't warn about differences merely in signedness, unless
4224 	 -Wpedantic.  With -Wpedantic, warn if the type is a pointer
4225 	 target and not a character type, and for character types at
4226 	 a second level of indirection.  */
4227       if (TREE_CODE (wanted_type) == INTEGER_TYPE
4228 	  && TREE_CODE (cur_type) == INTEGER_TYPE
4229 	  && ((!pedantic && !warn_format_signedness)
4230 	      || (i == 0 && !warn_format_signedness)
4231 	      || (i == 1 && char_type_flag))
4232 	  && (TYPE_UNSIGNED (wanted_type)
4233 	      ? wanted_type == c_common_unsigned_type (cur_type)
4234 	      : wanted_type == c_common_signed_type (cur_type)))
4235 	continue;
4236       /* Don't warn about differences merely in signedness if we know
4237 	 that the current type is integer-promoted and its original type
4238 	 was unsigned such as that it is in the range of WANTED_TYPE.  */
4239       if (TREE_CODE (wanted_type) == INTEGER_TYPE
4240 	  && TREE_CODE (cur_type) == INTEGER_TYPE
4241 	  && warn_format_signedness
4242 	  && TYPE_UNSIGNED (wanted_type)
4243 	  && cur_param != NULL_TREE
4244 	  && TREE_CODE (cur_param) == NOP_EXPR)
4245 	{
4246 	  tree t = TREE_TYPE (TREE_OPERAND (cur_param, 0));
4247 	  if (TYPE_UNSIGNED (t)
4248 	      && cur_type == lang_hooks.types.type_promotes_to (t))
4249 	    continue;
4250 	}
4251       /* Likewise, "signed char", "unsigned char" and "char" are
4252 	 equivalent but the above test won't consider them equivalent.  */
4253       if (wanted_type == char_type_node
4254 	  && (!pedantic || i < 2)
4255 	  && char_type_flag)
4256 	continue;
4257       if (types->scalar_identity_flag
4258 	  && (TREE_CODE (cur_type) == TREE_CODE (wanted_type)
4259 	      || (INTEGRAL_TYPE_P (cur_type)
4260 		  && INTEGRAL_TYPE_P (wanted_type)))
4261 	  && TYPE_PRECISION (cur_type) == TYPE_PRECISION (wanted_type))
4262 	continue;
4263       /* Now we have a type mismatch.  */
4264       format_type_warning (fmt_loc, param_loc, types,
4265 			   wanted_type, orig_cur_type, fki,
4266 			   offset_to_type_start, conversion_char);
4267     }
4268 }
4269 
4270 /* Given type TYPE, attempt to dereference the type N times
4271    (e.g. from ("int ***", 2) to "int *")
4272 
4273    Return the derefenced type, with any qualifiers
4274    such as "const" stripped from the result, or
4275    NULL if unsuccessful (e.g. TYPE is not a pointer type).  */
4276 
4277 static tree
deref_n_times(tree type,int n)4278 deref_n_times (tree type, int n)
4279 {
4280   gcc_assert (type);
4281 
4282   for (int i = n; i > 0; i--)
4283     {
4284       if (TREE_CODE (type) != POINTER_TYPE)
4285 	return NULL_TREE;
4286       type = TREE_TYPE (type);
4287     }
4288   /* Strip off any "const" etc.  */
4289   return build_qualified_type (type, 0);
4290 }
4291 
4292 /* Lookup the format code for FORMAT_LEN within FLI,
4293    returning the string code for expressing it, or NULL
4294    if it is not found.  */
4295 
4296 static const char *
get_modifier_for_format_len(const format_length_info * fli,enum format_lengths format_len)4297 get_modifier_for_format_len (const format_length_info *fli,
4298 			     enum format_lengths format_len)
4299 {
4300   for (; fli->name; fli++)
4301     {
4302       if (fli->index == format_len)
4303 	return fli->name;
4304       if (fli->double_index == format_len)
4305 	return fli->double_name;
4306     }
4307   return NULL;
4308 }
4309 
4310 #if CHECKING_P
4311 
4312 namespace selftest {
4313 
4314 static void
test_get_modifier_for_format_len()4315 test_get_modifier_for_format_len ()
4316 {
4317   ASSERT_STREQ ("h",
4318 		get_modifier_for_format_len (printf_length_specs, FMT_LEN_h));
4319   ASSERT_STREQ ("hh",
4320 		get_modifier_for_format_len (printf_length_specs, FMT_LEN_hh));
4321   ASSERT_STREQ ("L",
4322 		get_modifier_for_format_len (printf_length_specs, FMT_LEN_L));
4323   ASSERT_EQ (NULL,
4324 	     get_modifier_for_format_len (printf_length_specs, FMT_LEN_none));
4325 }
4326 
4327 } // namespace selftest
4328 
4329 #endif /* CHECKING_P */
4330 
4331 /* Determine if SPEC_TYPE and ARG_TYPE are sufficiently similar for a
4332    format_type_detail using SPEC_TYPE to be offered as a suggestion for
4333    Wformat type errors where the argument has type ARG_TYPE.  */
4334 
4335 static bool
matching_type_p(tree spec_type,tree arg_type)4336 matching_type_p (tree spec_type, tree arg_type)
4337 {
4338   gcc_assert (spec_type);
4339   gcc_assert (arg_type);
4340 
4341   /* If any of the types requires structural equality, we can't compare
4342      their canonical types.  */
4343   if (TYPE_STRUCTURAL_EQUALITY_P (spec_type)
4344       || TYPE_STRUCTURAL_EQUALITY_P (arg_type))
4345     return false;
4346 
4347   spec_type = TYPE_CANONICAL (spec_type);
4348   arg_type = TYPE_CANONICAL (arg_type);
4349 
4350   if (TREE_CODE (spec_type) == INTEGER_TYPE
4351       && TREE_CODE (arg_type) == INTEGER_TYPE
4352       && (TYPE_UNSIGNED (spec_type)
4353 	  ? spec_type == c_common_unsigned_type (arg_type)
4354 	  : spec_type == c_common_signed_type (arg_type)))
4355     return true;
4356 
4357   return spec_type == arg_type;
4358 }
4359 
4360 /* Subroutine of get_format_for_type.
4361 
4362    Generate a string containing the length modifier and conversion specifier
4363    that should be used to format arguments of type ARG_TYPE within FKI
4364    (effectively the inverse of the checking code).
4365 
4366    If CONVERSION_CHAR is not zero (the first pass), the resulting suggestion
4367    is required to use it, for correcting bogus length modifiers.
4368    If CONVERSION_CHAR is zero (the second pass), then allow any suggestion
4369    that matches ARG_TYPE.
4370 
4371    If successful, returns a non-NULL string which should be freed
4372    by the caller.
4373    Otherwise, returns NULL.  */
4374 
4375 static char *
get_format_for_type_1(const format_kind_info * fki,tree arg_type,char conversion_char)4376 get_format_for_type_1 (const format_kind_info *fki, tree arg_type,
4377 		       char conversion_char)
4378 {
4379   gcc_assert (arg_type);
4380 
4381   const format_char_info *spec;
4382   for (spec = &fki->conversion_specs[0];
4383        spec->format_chars;
4384        spec++)
4385     {
4386       if (conversion_char)
4387 	if (!strchr (spec->format_chars, conversion_char))
4388 	  continue;
4389 
4390       tree effective_arg_type = deref_n_times (arg_type,
4391 					       spec->pointer_count);
4392       if (!effective_arg_type)
4393 	continue;
4394       for (int i = 0; i < FMT_LEN_MAX; i++)
4395 	{
4396 	  const format_type_detail *ftd = &spec->types[i];
4397 	  if (!ftd->type || *ftd->type == NULL_TREE)
4398 	    continue;
4399 	  if (matching_type_p (*ftd->type, effective_arg_type))
4400 	    {
4401 	      const char *len_modifier
4402 		= get_modifier_for_format_len (fki->length_char_specs,
4403 					       (enum format_lengths)i);
4404 	      if (!len_modifier)
4405 		len_modifier = "";
4406 
4407 	      if (conversion_char)
4408 		/* We found a match, using the given conversion char - the
4409 		   length modifier was incorrect (or absent).
4410 		   Provide a suggestion using the conversion char with the
4411 		   correct length modifier for the type.  */
4412 		return xasprintf ("%s%c", len_modifier, conversion_char);
4413 	      else
4414 		/* 2nd pass: no match was possible using the user-provided
4415 		   conversion char, but we do have a match without using it.
4416 		   Provide a suggestion using the first conversion char
4417 		   listed for the given type.  */
4418 		return xasprintf ("%s%c", len_modifier, spec->format_chars[0]);
4419 	    }
4420 	}
4421    }
4422 
4423   return NULL;
4424 }
4425 
4426 /* Generate a string containing the length modifier and conversion specifier
4427    that should be used to format arguments of type ARG_TYPE within FKI
4428    (effectively the inverse of the checking code).
4429 
4430    If successful, returns a non-NULL string which should be freed
4431    by the caller.
4432    Otherwise, returns NULL.  */
4433 
4434 static char *
get_format_for_type(const format_kind_info * fki,tree arg_type,char conversion_char)4435 get_format_for_type (const format_kind_info *fki, tree arg_type,
4436 		     char conversion_char)
4437 {
4438   gcc_assert (arg_type);
4439   gcc_assert (conversion_char);
4440 
4441   /* First pass: look for a format_char_info containing CONVERSION_CHAR
4442      If we find one, then presumably the length modifier was incorrect
4443      (or absent).  */
4444   char *result = get_format_for_type_1 (fki, arg_type, conversion_char);
4445   if (result)
4446     return result;
4447 
4448   /* Second pass: we didn't find a match for CONVERSION_CHAR, so try
4449      matching just on the type. */
4450   return get_format_for_type_1 (fki, arg_type, '\0');
4451 }
4452 
4453 /* Attempt to get a string for use as a replacement fix-it hint for the
4454    source range in FMT_LOC.
4455 
4456    Preserve all of the text within the range of FMT_LOC up to
4457    OFFSET_TO_TYPE_START, replacing the rest with an appropriate
4458    length modifier and conversion specifier for ARG_TYPE, attempting
4459    to keep the user-provided CONVERSION_CHAR if possible.
4460 
4461    For example, given a long vs long long mismatch for arg5 here:
4462 
4463     000000000111111111122222222223333333333|
4464     123456789012345678901234567890123456789` column numbers
4465                    0000000000111111111122|
4466                    0123456789012345678901` string offsets
4467                           V~~~~~~~~ : range of FMT_LOC, from cols 23-31
4468       sprintf (d, "before %-+*.*lld after", arg3, arg4, arg5);
4469                                 ^ ^
4470                                 | ` CONVERSION_CHAR: 'd'
4471                                 type starts here
4472 
4473    where OFFSET_TO_TYPE_START is 13 (the offset to the "lld" within the
4474    STRING_CST), where the user provided:
4475      %-+*.*lld
4476    the result (assuming "long" argument 5) should be:
4477      %-+*.*ld
4478 
4479    If successful, returns a non-NULL string which should be freed
4480    by the caller.
4481    Otherwise, returns NULL.  */
4482 
4483 static char *
get_corrected_substring(const substring_loc & fmt_loc,format_wanted_type * type,tree arg_type,const format_kind_info * fki,int offset_to_type_start,char conversion_char)4484 get_corrected_substring (const substring_loc &fmt_loc,
4485 			 format_wanted_type *type, tree arg_type,
4486 			 const format_kind_info *fki,
4487 			 int offset_to_type_start, char conversion_char)
4488 {
4489   /* Attempt to provide hints for argument types, but not for field widths
4490      and precisions.  */
4491   if (!arg_type)
4492     return NULL;
4493   if (type->kind != CF_KIND_FORMAT)
4494     return NULL;
4495 
4496   /* Locate the current code within the source range, rejecting
4497      any awkward cases where the format string occupies more than
4498      one line.
4499      Lookup the place where the type starts (including any length
4500      modifiers), getting it as the caret location.  */
4501   substring_loc type_loc (fmt_loc);
4502   type_loc.set_caret_index (offset_to_type_start);
4503 
4504   location_t fmt_substring_loc;
4505   const char *err = type_loc.get_location (&fmt_substring_loc);
4506   if (err)
4507     return NULL;
4508 
4509   source_range fmt_substring_range
4510     = get_range_from_loc (line_table, fmt_substring_loc);
4511 
4512   expanded_location caret
4513     = expand_location_to_spelling_point (fmt_substring_loc);
4514   expanded_location start
4515     = expand_location_to_spelling_point (fmt_substring_range.m_start);
4516   expanded_location finish
4517     = expand_location_to_spelling_point (fmt_substring_range.m_finish);
4518   if (caret.file != start.file)
4519     return NULL;
4520   if (start.file != finish.file)
4521     return NULL;
4522   if (caret.line != start.line)
4523     return NULL;
4524   if (start.line != finish.line)
4525     return NULL;
4526   if (start.column > caret.column)
4527     return NULL;
4528   if (start.column > finish.column)
4529     return NULL;
4530   if (caret.column > finish.column)
4531     return NULL;
4532 
4533   char_span line = location_get_source_line (start.file, start.line);
4534   if (!line)
4535     return NULL;
4536 
4537   /* If we got this far, then we have the line containing the
4538      existing conversion specification.
4539 
4540      Generate a trimmed copy, containing the prefix part of the conversion
4541      specification, up to the (but not including) the length modifier.
4542      In the above example, this would be "%-+*.*".  */
4543   int length_up_to_type = caret.column - start.column;
4544   char_span prefix_span = line.subspan (start.column - 1, length_up_to_type);
4545   char *prefix = prefix_span.xstrdup ();
4546 
4547   /* Now attempt to generate a suggestion for the rest of the specification
4548      (length modifier and conversion char), based on ARG_TYPE and
4549      CONVERSION_CHAR.
4550      In the above example, this would be "ld".  */
4551   char *format_for_type = get_format_for_type (fki, arg_type, conversion_char);
4552   if (!format_for_type)
4553     {
4554       free (prefix);
4555       return NULL;
4556     }
4557 
4558   /* Success.  Generate the resulting suggestion for the whole range of
4559      FMT_LOC by concatenating the two strings.
4560      In the above example, this would be "%-+*.*ld".  */
4561   char *result = concat (prefix, format_for_type, NULL);
4562   free (format_for_type);
4563   free (prefix);
4564   return result;
4565 }
4566 
4567 /* Helper class for adding zero or more trailing '*' to types.
4568 
4569    The format type and name exclude any '*' for pointers, so those
4570    must be formatted manually.  For all the types we currently have,
4571    this is adequate, but formats taking pointers to functions or
4572    arrays would require the full type to be built up in order to
4573    print it with %T.  */
4574 
4575 class indirection_suffix
4576 {
4577  public:
indirection_suffix(int pointer_count)4578   indirection_suffix (int pointer_count) : m_pointer_count (pointer_count) {}
4579 
4580   /* Determine the size of the buffer (including NUL-terminator).  */
4581 
get_buffer_size()4582   size_t get_buffer_size () const
4583   {
4584     return m_pointer_count + 2;
4585   }
4586 
4587   /* Write the '*' to DST and add a NUL-terminator.  */
4588 
fill_buffer(char * dst)4589   void fill_buffer (char *dst) const
4590   {
4591     if (m_pointer_count == 0)
4592       dst[0] = 0;
4593     else if (c_dialect_cxx ())
4594       {
4595 	memset (dst, '*', m_pointer_count);
4596 	dst[m_pointer_count] = 0;
4597       }
4598     else
4599       {
4600 	dst[0] = ' ';
4601 	memset (dst + 1, '*', m_pointer_count);
4602 	dst[m_pointer_count + 1] = 0;
4603       }
4604   }
4605 
4606  private:
4607   int m_pointer_count;
4608 };
4609 
4610 /* Subclass of range_label for labelling the range in the format string
4611    with the type in question, adding trailing '*' for pointer_count.  */
4612 
4613 class range_label_for_format_type_mismatch
4614   : public range_label_for_type_mismatch
4615 {
4616  public:
range_label_for_format_type_mismatch(tree labelled_type,tree other_type,int pointer_count)4617   range_label_for_format_type_mismatch (tree labelled_type, tree other_type,
4618 					int pointer_count)
4619   : range_label_for_type_mismatch (labelled_type, other_type),
4620     m_pointer_count (pointer_count)
4621   {
4622   }
4623 
get_text(unsigned range_idx)4624   label_text get_text (unsigned range_idx) const FINAL OVERRIDE
4625   {
4626     label_text text = range_label_for_type_mismatch::get_text (range_idx);
4627     if (text.m_buffer == NULL)
4628       return text;
4629 
4630     indirection_suffix suffix (m_pointer_count);
4631     char *p = (char *) alloca (suffix.get_buffer_size ());
4632     suffix.fill_buffer (p);
4633 
4634     char *result = concat (text.m_buffer, p, NULL);
4635     text.maybe_free ();
4636     return label_text::take (result);
4637   }
4638 
4639  private:
4640   int m_pointer_count;
4641 };
4642 
4643 /* Give a warning about a format argument of different type from that expected.
4644    The range of the diagnostic is taken from WHOLE_FMT_LOC; the caret location
4645    is based on the location of the char at TYPE->offset_loc.
4646    PARAM_LOC is the location of the relevant argument, or UNKNOWN_LOCATION
4647    if this is unavailable.
4648    WANTED_TYPE is the type the argument should have,
4649    possibly stripped of pointer dereferences.  The description (such as "field
4650    precision"), the placement in the format string, a possibly more
4651    friendly name of WANTED_TYPE, and the number of pointer dereferences
4652    are taken from TYPE.  ARG_TYPE is the type of the actual argument,
4653    or NULL if it is missing.
4654 
4655    OFFSET_TO_TYPE_START is the offset within the execution-charset encoded
4656    format string to where type information begins for the conversion
4657    (the length modifier and conversion specifier).
4658    CONVERSION_CHAR is the user-provided conversion specifier.
4659 
4660    For example, given a type mismatch for argument 5 here:
4661 
4662     00000000011111111112222222222333333333344444444445555555555|
4663     12345678901234567890123456789012345678901234567890123456789` column numbers
4664                    0000000000111111111122|
4665                    0123456789012345678901` offsets within STRING_CST
4666                           V~~~~~~~~ : range of WHOLE_FMT_LOC, from cols 23-31
4667       sprintf (d, "before %-+*.*lld after", int_expr, int_expr, long_expr);
4668                                 ^ ^                             ^~~~~~~~~
4669                                 | ` CONVERSION_CHAR: 'd'        PARAM_LOC
4670                                 type starts here
4671 
4672    OFFSET_TO_TYPE_START is 13, the offset to the "lld" within the
4673    STRING_CST.  */
4674 
4675 static void
format_type_warning(const substring_loc & whole_fmt_loc,location_t param_loc,format_wanted_type * type,tree wanted_type,tree arg_type,const format_kind_info * fki,int offset_to_type_start,char conversion_char)4676 format_type_warning (const substring_loc &whole_fmt_loc,
4677 		     location_t param_loc,
4678 		     format_wanted_type *type,
4679 		     tree wanted_type, tree arg_type,
4680 		     const format_kind_info *fki,
4681 		     int offset_to_type_start,
4682 		     char conversion_char)
4683 {
4684   enum format_specifier_kind kind = type->kind;
4685   const char *wanted_type_name = type->wanted_type_name;
4686   const char *format_start = type->format_start;
4687   int format_length = type->format_length;
4688   int pointer_count = type->pointer_count;
4689   int arg_num = type->arg_num;
4690 
4691   /* If ARG_TYPE is a typedef with a misleading name (for example,
4692      size_t but not the standard size_t expected by printf %zu), avoid
4693      printing the typedef name.  */
4694   if (wanted_type_name
4695       && arg_type
4696       && TYPE_NAME (arg_type)
4697       && TREE_CODE (TYPE_NAME (arg_type)) == TYPE_DECL
4698       && DECL_NAME (TYPE_NAME (arg_type))
4699       && !strcmp (wanted_type_name,
4700 		  lang_hooks.decl_printable_name (TYPE_NAME (arg_type), 2)))
4701     arg_type = TYPE_MAIN_VARIANT (arg_type);
4702 
4703   indirection_suffix suffix (pointer_count);
4704   char *p = (char *) alloca (suffix.get_buffer_size ());
4705   suffix.fill_buffer (p);
4706 
4707   /* WHOLE_FMT_LOC has the caret at the end of the range.
4708      Set the caret to be at the offset from TYPE.  Subtract one
4709      from the offset for the same reason as in format_warning_at_char.  */
4710   substring_loc fmt_loc (whole_fmt_loc);
4711   fmt_loc.set_caret_index (type->offset_loc - 1);
4712 
4713   range_label_for_format_type_mismatch fmt_label (wanted_type, arg_type,
4714 						  pointer_count);
4715   range_label_for_type_mismatch param_label (arg_type, wanted_type);
4716 
4717   /* Get a string for use as a replacement fix-it hint for the range in
4718      fmt_loc, or NULL.  */
4719   char *corrected_substring
4720     = get_corrected_substring (fmt_loc, type, arg_type, fki,
4721 			       offset_to_type_start, conversion_char);
4722   format_string_diagnostic_t diag (fmt_loc, &fmt_label, param_loc, &param_label,
4723 				   corrected_substring);
4724   if (wanted_type_name)
4725     {
4726       if (arg_type)
4727 	diag.emit_warning
4728 	  (OPT_Wformat_,
4729 	   "%s %<%s%.*s%> expects argument of type %<%s%s%>, "
4730 	   "but argument %d has type %qT",
4731 	   gettext (kind_descriptions[kind]),
4732 	   (kind == CF_KIND_FORMAT ? "%" : ""),
4733 	   format_length, format_start,
4734 	   wanted_type_name, p, arg_num, arg_type);
4735       else
4736 	diag.emit_warning
4737 	  (OPT_Wformat_,
4738 	   "%s %<%s%.*s%> expects a matching %<%s%s%> argument",
4739 	   gettext (kind_descriptions[kind]),
4740 	   (kind == CF_KIND_FORMAT ? "%" : ""),
4741 	   format_length, format_start, wanted_type_name, p);
4742     }
4743   else
4744     {
4745       if (arg_type)
4746 	diag.emit_warning
4747 	  (OPT_Wformat_,
4748 	   "%s %<%s%.*s%> expects argument of type %<%T%s%>, "
4749 	   "but argument %d has type %qT",
4750 	   gettext (kind_descriptions[kind]),
4751 	   (kind == CF_KIND_FORMAT ? "%" : ""),
4752 	   format_length, format_start,
4753 	   wanted_type, p, arg_num, arg_type);
4754       else
4755 	diag.emit_warning
4756 	  (OPT_Wformat_,
4757 	   "%s %<%s%.*s%> expects a matching %<%T%s%> argument",
4758 	   gettext (kind_descriptions[kind]),
4759 	   (kind == CF_KIND_FORMAT ? "%" : ""),
4760 	   format_length, format_start, wanted_type, p);
4761     }
4762 
4763   free (corrected_substring);
4764 }
4765 
4766 
4767 /* Given a format_char_info array FCI, and a character C, this function
4768    returns the index into the conversion_specs where that specifier's
4769    data is located.  The character must exist.  */
4770 static unsigned int
find_char_info_specifier_index(const format_char_info * fci,int c)4771 find_char_info_specifier_index (const format_char_info *fci, int c)
4772 {
4773   unsigned i;
4774 
4775   for (i = 0; fci->format_chars; i++, fci++)
4776     if (strchr (fci->format_chars, c))
4777       return i;
4778 
4779   /* We shouldn't be looking for a non-existent specifier.  */
4780   gcc_unreachable ();
4781 }
4782 
4783 /* Given a format_length_info array FLI, and a character C, this
4784    function returns the index into the conversion_specs where that
4785    modifier's data is located.  The character must exist.  */
4786 static unsigned int
find_length_info_modifier_index(const format_length_info * fli,int c)4787 find_length_info_modifier_index (const format_length_info *fli, int c)
4788 {
4789   unsigned i;
4790 
4791   for (i = 0; fli->name; i++, fli++)
4792     if (strchr (fli->name, c))
4793       return i;
4794 
4795   /* We shouldn't be looking for a non-existent modifier.  */
4796   gcc_unreachable ();
4797 }
4798 
4799 /* Determine the type of HOST_WIDE_INT in the code being compiled for
4800    use in GCC's __asm_fprintf__ custom format attribute.  You must
4801    have set dynamic_format_types before calling this function.  */
4802 static void
init_dynamic_asm_fprintf_info(void)4803 init_dynamic_asm_fprintf_info (void)
4804 {
4805   static tree hwi;
4806 
4807   if (!hwi)
4808     {
4809       format_length_info *new_asm_fprintf_length_specs;
4810       unsigned int i;
4811 
4812       /* Find the underlying type for HOST_WIDE_INT.  For the %w
4813 	 length modifier to work, one must have issued: "typedef
4814 	 HOST_WIDE_INT __gcc_host_wide_int__;" in one's source code
4815 	 prior to using that modifier.  */
4816       hwi = maybe_get_identifier ("__gcc_host_wide_int__");
4817       if (!hwi)
4818 	{
4819 	  error ("%<__gcc_host_wide_int__%> is not defined as a type");
4820 	  return;
4821 	}
4822       hwi = identifier_global_value (hwi);
4823       if (!hwi || TREE_CODE (hwi) != TYPE_DECL)
4824 	{
4825 	  error ("%<__gcc_host_wide_int__%> is not defined as a type");
4826 	  return;
4827 	}
4828       hwi = DECL_ORIGINAL_TYPE (hwi);
4829       gcc_assert (hwi);
4830       if (hwi != long_integer_type_node && hwi != long_long_integer_type_node)
4831 	{
4832 	  error ("%<__gcc_host_wide_int__%> is not defined as %<long%>"
4833 		 " or %<long long%>");
4834 	  return;
4835 	}
4836 
4837       /* Create a new (writable) copy of asm_fprintf_length_specs.  */
4838       new_asm_fprintf_length_specs = (format_length_info *)
4839 				     xmemdup (asm_fprintf_length_specs,
4840 					      sizeof (asm_fprintf_length_specs),
4841 					      sizeof (asm_fprintf_length_specs));
4842 
4843       /* HOST_WIDE_INT must be one of 'long' or 'long long'.  */
4844       i = find_length_info_modifier_index (new_asm_fprintf_length_specs, 'w');
4845       if (hwi == long_integer_type_node)
4846 	new_asm_fprintf_length_specs[i].index = FMT_LEN_l;
4847       else if (hwi == long_long_integer_type_node)
4848 	new_asm_fprintf_length_specs[i].index = FMT_LEN_ll;
4849       else
4850 	gcc_unreachable ();
4851 
4852       /* Assign the new data for use.  */
4853       dynamic_format_types[asm_fprintf_format_type].length_char_specs =
4854 	new_asm_fprintf_length_specs;
4855     }
4856 }
4857 
4858 /* Determine the type of a "locus" in the code being compiled for use
4859    in GCC's __gcc_gfc__ custom format attribute.  You must have set
4860    dynamic_format_types before calling this function.  */
4861 static void
init_dynamic_gfc_info(void)4862 init_dynamic_gfc_info (void)
4863 {
4864   if (!locus)
4865     {
4866       static format_char_info *gfc_fci;
4867 
4868       /* For the GCC __gcc_gfc__ custom format specifier to work, one
4869 	 must have declared 'locus' prior to using this attribute.  If
4870 	 we haven't seen this declarations then you shouldn't use the
4871 	 specifier requiring that type.  */
4872       if ((locus = maybe_get_identifier ("locus")))
4873 	{
4874 	  locus = identifier_global_value (locus);
4875 	  if (locus)
4876 	    {
4877 	      if (TREE_CODE (locus) != TYPE_DECL
4878 		  || TREE_TYPE (locus) == error_mark_node)
4879 		{
4880 		  error ("%<locus%> is not defined as a type");
4881 		  locus = 0;
4882 		}
4883 	      else
4884 		locus = TREE_TYPE (locus);
4885 	    }
4886 	}
4887 
4888       /* Assign the new data for use.  */
4889 
4890       /* Handle the __gcc_gfc__ format specifics.  */
4891       if (!gfc_fci)
4892 	dynamic_format_types[gcc_gfc_format_type].conversion_specs =
4893 	  gfc_fci = (format_char_info *)
4894 		     xmemdup (gcc_gfc_char_table,
4895 			      sizeof (gcc_gfc_char_table),
4896 			      sizeof (gcc_gfc_char_table));
4897       if (locus)
4898 	{
4899 	  const unsigned i = find_char_info_specifier_index (gfc_fci, 'L');
4900 	  gfc_fci[i].types[0].type = &locus;
4901 	  gfc_fci[i].pointer_count = 1;
4902 	}
4903     }
4904 }
4905 
4906 /* Lookup the type named NAME and return a NAME type if found.
4907    Otherwise, return void_type_node if NAME has not been used yet,
4908    or NULL_TREE if NAME is not a type (issuing an error).  */
4909 
4910 static tree
get_named_type(const char * name)4911 get_named_type (const char *name)
4912 {
4913   if (tree result = maybe_get_identifier (name))
4914     {
4915       result = identifier_global_tag (result);
4916       if (result)
4917 	{
4918 	  if (TYPE_P (result))
4919 	    ;
4920 	  else if (TREE_CODE (result) == TYPE_DECL)
4921 	    result = TREE_TYPE (result);
4922 	  else
4923 	    {
4924 	      error ("%qs is not defined as a type", name);
4925 	      result = NULL_TREE;
4926 	    }
4927 	}
4928       return result;
4929     }
4930   else
4931     return void_type_node;
4932 }
4933 
4934 /* Determine the types of "tree" and "location_t" in the code being
4935    compiled for use in GCC's diagnostic custom format attributes.  You
4936    must have set dynamic_format_types before calling this function.  */
4937 static void
init_dynamic_diag_info(void)4938 init_dynamic_diag_info (void)
4939 {
4940   /* For the GCC-diagnostics custom format specifiers to work, one
4941      must have declared 'tree' and 'location_t' prior to using those
4942      attributes.  If we haven't seen these declarations then
4943      the specifiers requiring these types shouldn't be used.
4944      However we don't force a hard ICE because we may see only one
4945      or the other type.  */
4946   if (tree loc = maybe_get_identifier ("location_t"))
4947     {
4948       loc = identifier_global_value (loc);
4949       if (loc && TREE_CODE (loc) != TYPE_DECL)
4950 	error ("%<location_t%> is not defined as a type");
4951     }
4952 
4953   /* Initialize the global tree node type local to this file.  */
4954   if (!local_tree_type_node
4955       || local_tree_type_node == void_type_node)
4956     {
4957       /* We need to grab the underlying 'union tree_node' so peek into
4958 	 an extra type level.  */
4959       if ((local_tree_type_node = maybe_get_identifier ("tree")))
4960 	{
4961 	  local_tree_type_node
4962 	    = identifier_global_value (local_tree_type_node);
4963 	  if (local_tree_type_node)
4964 	    {
4965 	      if (TREE_CODE (local_tree_type_node) != TYPE_DECL)
4966 		{
4967 		  error ("%<tree%> is not defined as a type");
4968 		  local_tree_type_node = NULL_TREE;
4969 		}
4970 	      else if (TREE_CODE (TREE_TYPE (local_tree_type_node))
4971 		       != POINTER_TYPE)
4972 		{
4973 		  error ("%<tree%> is not defined as a pointer type");
4974 		  local_tree_type_node = NULL_TREE;
4975 		}
4976 	      else
4977 		local_tree_type_node
4978 		  = TREE_TYPE (TREE_TYPE (local_tree_type_node));
4979 	    }
4980 	}
4981       else
4982 	local_tree_type_node = void_type_node;
4983     }
4984 
4985   /* Similar to the above but for gimple*.  */
4986   if (!local_gimple_ptr_node
4987       || local_gimple_ptr_node == void_type_node)
4988     local_gimple_ptr_node = get_named_type ("gimple");
4989 
4990   /* Similar to the above but for cgraph_node*.  */
4991   if (!local_cgraph_node_ptr_node
4992       || local_cgraph_node_ptr_node == void_type_node)
4993     local_cgraph_node_ptr_node = get_named_type ("cgraph_node");
4994 
4995   /* Similar to the above but for diagnostic_event_id_t*.  */
4996   if (!local_event_ptr_node
4997       || local_event_ptr_node == void_type_node)
4998     local_event_ptr_node = get_named_type ("diagnostic_event_id_t");
4999 
5000   static tree hwi;
5001 
5002   if (!hwi)
5003     {
5004       static format_length_info *diag_ls;
5005       unsigned int i;
5006 
5007       /* Find the underlying type for HOST_WIDE_INT.  For the 'w'
5008 	 length modifier to work, one must have issued: "typedef
5009 	 HOST_WIDE_INT __gcc_host_wide_int__;" in one's source code
5010 	 prior to using that modifier.  */
5011       if ((hwi = maybe_get_identifier ("__gcc_host_wide_int__")))
5012 	{
5013 	  hwi = identifier_global_value (hwi);
5014 	  if (hwi)
5015 	    {
5016 	      if (TREE_CODE (hwi) != TYPE_DECL)
5017 		{
5018 		  error ("%<__gcc_host_wide_int__%> is not defined as a type");
5019 		  hwi = 0;
5020 		}
5021 	      else
5022 		{
5023 		  hwi = DECL_ORIGINAL_TYPE (hwi);
5024 		  gcc_assert (hwi);
5025 		  if (hwi != long_integer_type_node
5026 		      && hwi != long_long_integer_type_node)
5027 		    {
5028 		      error ("%<__gcc_host_wide_int__%> is not defined"
5029 			     " as %<long%> or %<long long%>");
5030 		      hwi = 0;
5031 		    }
5032 		}
5033 	    }
5034 	}
5035 
5036       /* Assign the new data for use.  */
5037 
5038       /* All the GCC diag formats use the same length specs.  */
5039       if (!diag_ls)
5040 	dynamic_format_types[gcc_diag_format_type].length_char_specs =
5041 	  dynamic_format_types[gcc_tdiag_format_type].length_char_specs =
5042 	  dynamic_format_types[gcc_cdiag_format_type].length_char_specs =
5043 	  dynamic_format_types[gcc_cxxdiag_format_type].length_char_specs =
5044 	  dynamic_format_types[gcc_dump_printf_format_type].length_char_specs =
5045 	  diag_ls = (format_length_info *)
5046 		    xmemdup (gcc_diag_length_specs,
5047 			     sizeof (gcc_diag_length_specs),
5048 			     sizeof (gcc_diag_length_specs));
5049       if (hwi)
5050 	{
5051 	  /* HOST_WIDE_INT must be one of 'long' or 'long long'.  */
5052 	  i = find_length_info_modifier_index (diag_ls, 'w');
5053 	  if (hwi == long_integer_type_node)
5054 	    diag_ls[i].index = FMT_LEN_l;
5055 	  else if (hwi == long_long_integer_type_node)
5056 	    diag_ls[i].index = FMT_LEN_ll;
5057 	  else
5058 	    gcc_unreachable ();
5059 	}
5060     }
5061 
5062   /* It's safe to "re-initialize these to the same values.  */
5063   dynamic_format_types[gcc_diag_format_type].conversion_specs =
5064     gcc_diag_char_table;
5065   dynamic_format_types[gcc_tdiag_format_type].conversion_specs =
5066     gcc_tdiag_char_table;
5067   dynamic_format_types[gcc_cdiag_format_type].conversion_specs =
5068     gcc_cdiag_char_table;
5069   dynamic_format_types[gcc_cxxdiag_format_type].conversion_specs =
5070     gcc_cxxdiag_char_table;
5071   dynamic_format_types[gcc_dump_printf_format_type].conversion_specs =
5072     gcc_dump_printf_char_table;
5073 }
5074 
5075 #ifdef TARGET_FORMAT_TYPES
5076 extern const format_kind_info TARGET_FORMAT_TYPES[];
5077 #endif
5078 
5079 #ifdef TARGET_OVERRIDES_FORMAT_ATTRIBUTES
5080 extern const target_ovr_attr TARGET_OVERRIDES_FORMAT_ATTRIBUTES[];
5081 #endif
5082 #ifdef TARGET_OVERRIDES_FORMAT_INIT
5083   extern void TARGET_OVERRIDES_FORMAT_INIT (void);
5084 #endif
5085 
5086 /* Attributes such as "printf" are equivalent to those such as
5087    "gnu_printf" unless this is overridden by a target.  */
5088 static const target_ovr_attr gnu_target_overrides_format_attributes[] =
5089 {
5090   { "gnu_printf",   "printf" },
5091   { "gnu_scanf",    "scanf" },
5092   { "gnu_strftime", "strftime" },
5093   { "gnu_strfmon",  "strfmon" },
5094   { NULL,           NULL }
5095 };
5096 
5097 /* Translate to unified attribute name. This is used in decode_format_type and
5098    decode_format_attr. In attr_name the user specified argument is passed. It
5099    returns the unified format name from TARGET_OVERRIDES_FORMAT_ATTRIBUTES
5100    or the attr_name passed to this function, if there is no matching entry.  */
5101 static const char *
convert_format_name_to_system_name(const char * attr_name)5102 convert_format_name_to_system_name (const char *attr_name)
5103 {
5104   int i;
5105 
5106   if (attr_name == NULL || *attr_name == 0
5107       || strncmp (attr_name, "gcc_", 4) == 0)
5108     return attr_name;
5109 #ifdef TARGET_OVERRIDES_FORMAT_INIT
5110   TARGET_OVERRIDES_FORMAT_INIT ();
5111 #endif
5112 
5113 #ifdef TARGET_OVERRIDES_FORMAT_ATTRIBUTES
5114   /* Check if format attribute is overridden by target.  */
5115   if (TARGET_OVERRIDES_FORMAT_ATTRIBUTES != NULL
5116       && TARGET_OVERRIDES_FORMAT_ATTRIBUTES_COUNT > 0)
5117     {
5118       for (i = 0; i < TARGET_OVERRIDES_FORMAT_ATTRIBUTES_COUNT; ++i)
5119         {
5120           if (cmp_attribs (TARGET_OVERRIDES_FORMAT_ATTRIBUTES[i].named_attr_src,
5121 			   attr_name))
5122             return attr_name;
5123           if (cmp_attribs (TARGET_OVERRIDES_FORMAT_ATTRIBUTES[i].named_attr_dst,
5124 			   attr_name))
5125             return TARGET_OVERRIDES_FORMAT_ATTRIBUTES[i].named_attr_src;
5126         }
5127     }
5128 #endif
5129   /* Otherwise default to gnu format.  */
5130   for (i = 0;
5131        gnu_target_overrides_format_attributes[i].named_attr_src != NULL;
5132        ++i)
5133     {
5134       if (cmp_attribs (gnu_target_overrides_format_attributes[i].named_attr_src,
5135 		       attr_name))
5136         return attr_name;
5137       if (cmp_attribs (gnu_target_overrides_format_attributes[i].named_attr_dst,
5138 		       attr_name))
5139         return gnu_target_overrides_format_attributes[i].named_attr_src;
5140     }
5141 
5142   return attr_name;
5143 }
5144 
5145 /* Handle a "format" attribute; arguments as in
5146    struct attribute_spec.handler.  */
5147 tree
handle_format_attribute(tree * node,tree atname,tree args,int flags,bool * no_add_attrs)5148 handle_format_attribute (tree *node, tree atname, tree args,
5149 			 int flags, bool *no_add_attrs)
5150 {
5151   const_tree type = *node;
5152   function_format_info info;
5153 
5154 #ifdef TARGET_FORMAT_TYPES
5155   /* If the target provides additional format types, we need to
5156      add them to FORMAT_TYPES at first use.  */
5157   if (TARGET_FORMAT_TYPES != NULL && !dynamic_format_types)
5158     {
5159       dynamic_format_types = XNEWVEC (format_kind_info,
5160 				      n_format_types + TARGET_N_FORMAT_TYPES);
5161       memcpy (dynamic_format_types, format_types_orig,
5162 	      sizeof (format_types_orig));
5163       memcpy (&dynamic_format_types[n_format_types], TARGET_FORMAT_TYPES,
5164 	      TARGET_N_FORMAT_TYPES * sizeof (dynamic_format_types[0]));
5165 
5166       format_types = dynamic_format_types;
5167       /* Provide a reference for the first potential external type.  */
5168       first_target_format_type = n_format_types;
5169       n_format_types += TARGET_N_FORMAT_TYPES;
5170     }
5171 #endif
5172 
5173   /* Canonicalize name of format function.  */
5174   if (TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
5175     TREE_VALUE (args) = canonicalize_attr_name (TREE_VALUE (args));
5176 
5177   if (!decode_format_attr (type, atname, args, &info, /* validated_p = */false))
5178     {
5179       *no_add_attrs = true;
5180       return NULL_TREE;
5181     }
5182 
5183   if (prototype_p (type))
5184     {
5185       if (!check_format_string (type, info.format_num, flags,
5186 				no_add_attrs, info.format_type))
5187 	return NULL_TREE;
5188 
5189       if (info.first_arg_num != 0)
5190 	{
5191 	  unsigned HOST_WIDE_INT arg_num = 1;
5192 	  function_args_iterator iter;
5193 	  tree arg_type;
5194 
5195 	  /* Verify that first_arg_num points to the last arg,
5196 	     the ...  */
5197 	  FOREACH_FUNCTION_ARGS (type, arg_type, iter)
5198 	    arg_num++;
5199 
5200 	  if (arg_num != info.first_arg_num)
5201 	    {
5202 	      if (!(flags & (int) ATTR_FLAG_BUILT_IN))
5203 		error ("argument to be formatted is not %<...%>");
5204 	      *no_add_attrs = true;
5205 	      return NULL_TREE;
5206 	    }
5207 	}
5208     }
5209 
5210   /* Check if this is a strftime variant. Just for this variant
5211      FMT_FLAG_ARG_CONVERT is not set.  */
5212   if ((format_types[info.format_type].flags & (int) FMT_FLAG_ARG_CONVERT) == 0
5213       && info.first_arg_num != 0)
5214     {
5215       error ("strftime formats cannot format arguments");
5216       *no_add_attrs = true;
5217       return NULL_TREE;
5218     }
5219 
5220   /* If this is a custom GCC-internal format type, we have to
5221      initialize certain bits at runtime.  */
5222   if (info.format_type == asm_fprintf_format_type
5223       || info.format_type == gcc_gfc_format_type
5224       || info.format_type == gcc_diag_format_type
5225       || info.format_type == gcc_tdiag_format_type
5226       || info.format_type == gcc_cdiag_format_type
5227       || info.format_type == gcc_cxxdiag_format_type
5228       || info.format_type == gcc_dump_printf_format_type)
5229     {
5230       /* Our first time through, we have to make sure that our
5231 	 format_type data is allocated dynamically and is modifiable.  */
5232       if (!dynamic_format_types)
5233 	format_types = dynamic_format_types = (format_kind_info *)
5234 	  xmemdup (format_types_orig, sizeof (format_types_orig),
5235 		   sizeof (format_types_orig));
5236 
5237       /* If this is format __asm_fprintf__, we have to initialize
5238 	 GCC's notion of HOST_WIDE_INT for checking %wd.  */
5239       if (info.format_type == asm_fprintf_format_type)
5240 	init_dynamic_asm_fprintf_info ();
5241       /* If this is format __gcc_gfc__, we have to initialize GCC's
5242 	 notion of 'locus' at runtime for %L.  */
5243       else if (info.format_type == gcc_gfc_format_type)
5244 	init_dynamic_gfc_info ();
5245       /* If this is one of the diagnostic attributes, then we have to
5246 	 initialize 'location_t' and 'tree' at runtime.  */
5247       else if (info.format_type == gcc_diag_format_type
5248 	       || info.format_type == gcc_tdiag_format_type
5249 	       || info.format_type == gcc_cdiag_format_type
5250 	       || info.format_type == gcc_cxxdiag_format_type
5251 	       || info.format_type == gcc_dump_printf_format_type)
5252 	init_dynamic_diag_info ();
5253       else
5254 	gcc_unreachable ();
5255     }
5256 
5257   return NULL_TREE;
5258 }
5259 
5260 #if CHECKING_P
5261 
5262 namespace selftest {
5263 
5264 /* Selftests of location handling.  */
5265 
5266 /* Get the format_kind_info with the given name.  */
5267 
5268 static const format_kind_info *
get_info(const char * name)5269 get_info (const char *name)
5270 {
5271   int idx = decode_format_type (name);
5272   const format_kind_info *fki = &format_types[idx];
5273   ASSERT_STREQ (fki->name, name);
5274   return fki;
5275 }
5276 
5277 /* Verify that get_format_for_type (FKI, TYPE, CONVERSION_CHAR)
5278    is EXPECTED_FORMAT.  */
5279 
5280 static void
assert_format_for_type_streq(const location & loc,const format_kind_info * fki,const char * expected_format,tree type,char conversion_char)5281 assert_format_for_type_streq (const location &loc, const format_kind_info *fki,
5282 			      const char *expected_format, tree type,
5283 			      char conversion_char)
5284 {
5285   gcc_assert (fki);
5286   gcc_assert (expected_format);
5287   gcc_assert (type);
5288 
5289   char *actual_format = get_format_for_type (fki, type, conversion_char);
5290   ASSERT_STREQ_AT (loc, expected_format, actual_format);
5291   free (actual_format);
5292 }
5293 
5294 /* Selftests for get_format_for_type.  */
5295 
5296 #define ASSERT_FORMAT_FOR_TYPE_STREQ(EXPECTED_FORMAT, TYPE, CONVERSION_CHAR) \
5297   assert_format_for_type_streq (SELFTEST_LOCATION, (fki), (EXPECTED_FORMAT), \
5298 				(TYPE), (CONVERSION_CHAR))
5299 
5300 /* Selftest for get_format_for_type for "printf"-style functions.  */
5301 
5302 static void
test_get_format_for_type_printf()5303 test_get_format_for_type_printf ()
5304 {
5305   const format_kind_info *fki = get_info ("gnu_printf");
5306   ASSERT_NE (fki, NULL);
5307 
5308   ASSERT_FORMAT_FOR_TYPE_STREQ ("f", double_type_node, 'i');
5309   ASSERT_FORMAT_FOR_TYPE_STREQ ("Lf", long_double_type_node, 'i');
5310   ASSERT_FORMAT_FOR_TYPE_STREQ ("f", double_type_node, 'o');
5311   ASSERT_FORMAT_FOR_TYPE_STREQ ("Lf", long_double_type_node, 'o');
5312   ASSERT_FORMAT_FOR_TYPE_STREQ ("f", double_type_node, 'x');
5313   ASSERT_FORMAT_FOR_TYPE_STREQ ("Lf", long_double_type_node, 'x');
5314   ASSERT_FORMAT_FOR_TYPE_STREQ ("f", double_type_node, 'X');
5315   ASSERT_FORMAT_FOR_TYPE_STREQ ("Lf", long_double_type_node, 'X');
5316   ASSERT_FORMAT_FOR_TYPE_STREQ ("d", integer_type_node, 'd');
5317   ASSERT_FORMAT_FOR_TYPE_STREQ ("i", integer_type_node, 'i');
5318   ASSERT_FORMAT_FOR_TYPE_STREQ ("o", integer_type_node, 'o');
5319   ASSERT_FORMAT_FOR_TYPE_STREQ ("x", integer_type_node, 'x');
5320   ASSERT_FORMAT_FOR_TYPE_STREQ ("X", integer_type_node, 'X');
5321   ASSERT_FORMAT_FOR_TYPE_STREQ ("d", unsigned_type_node, 'd');
5322   ASSERT_FORMAT_FOR_TYPE_STREQ ("i", unsigned_type_node, 'i');
5323   ASSERT_FORMAT_FOR_TYPE_STREQ ("o", unsigned_type_node, 'o');
5324   ASSERT_FORMAT_FOR_TYPE_STREQ ("x", unsigned_type_node, 'x');
5325   ASSERT_FORMAT_FOR_TYPE_STREQ ("X", unsigned_type_node, 'X');
5326   ASSERT_FORMAT_FOR_TYPE_STREQ ("ld", long_integer_type_node, 'd');
5327   ASSERT_FORMAT_FOR_TYPE_STREQ ("li", long_integer_type_node, 'i');
5328   ASSERT_FORMAT_FOR_TYPE_STREQ ("lx", long_integer_type_node, 'x');
5329   ASSERT_FORMAT_FOR_TYPE_STREQ ("lo", long_unsigned_type_node, 'o');
5330   ASSERT_FORMAT_FOR_TYPE_STREQ ("lx", long_unsigned_type_node, 'x');
5331   ASSERT_FORMAT_FOR_TYPE_STREQ ("lld", long_long_integer_type_node, 'd');
5332   ASSERT_FORMAT_FOR_TYPE_STREQ ("lli", long_long_integer_type_node, 'i');
5333   ASSERT_FORMAT_FOR_TYPE_STREQ ("llo", long_long_unsigned_type_node, 'o');
5334   ASSERT_FORMAT_FOR_TYPE_STREQ ("llx", long_long_unsigned_type_node, 'x');
5335   ASSERT_FORMAT_FOR_TYPE_STREQ ("s", build_pointer_type (char_type_node), 'i');
5336 }
5337 
5338 /* Selftest for get_format_for_type for "scanf"-style functions.  */
5339 
5340 static void
test_get_format_for_type_scanf()5341 test_get_format_for_type_scanf ()
5342 {
5343   const format_kind_info *fki = get_info ("gnu_scanf");
5344   ASSERT_NE (fki, NULL);
5345   ASSERT_FORMAT_FOR_TYPE_STREQ ("d", build_pointer_type (integer_type_node), 'd');
5346   ASSERT_FORMAT_FOR_TYPE_STREQ ("u", build_pointer_type (unsigned_type_node), 'u');
5347   ASSERT_FORMAT_FOR_TYPE_STREQ ("ld",
5348 				build_pointer_type (long_integer_type_node), 'd');
5349   ASSERT_FORMAT_FOR_TYPE_STREQ ("lu",
5350 				build_pointer_type (long_unsigned_type_node), 'u');
5351   ASSERT_FORMAT_FOR_TYPE_STREQ
5352     ("lld", build_pointer_type (long_long_integer_type_node), 'd');
5353   ASSERT_FORMAT_FOR_TYPE_STREQ
5354     ("llu", build_pointer_type (long_long_unsigned_type_node), 'u');
5355   ASSERT_FORMAT_FOR_TYPE_STREQ ("e", build_pointer_type (float_type_node), 'e');
5356   ASSERT_FORMAT_FOR_TYPE_STREQ ("le", build_pointer_type (double_type_node), 'e');
5357 }
5358 
5359 #undef ASSERT_FORMAT_FOR_TYPE_STREQ
5360 
5361 /* Exercise the type-printing label code, to give some coverage
5362    under "make selftest-valgrind" (in particular, to ensure that
5363    the label-printing machinery doesn't leak).  */
5364 
5365 static void
test_type_mismatch_range_labels()5366 test_type_mismatch_range_labels ()
5367 {
5368   /* Create a tempfile and write some text to it.
5369      ....................0000000001 11111111 12 22222222
5370      ....................1234567890 12345678 90 12345678.  */
5371   const char *content = "  printf (\"msg: %i\\n\", msg);\n";
5372   temp_source_file tmp (SELFTEST_LOCATION, ".c", content);
5373   line_table_test ltt;
5374 
5375   linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
5376 
5377   location_t c17 = linemap_position_for_column (line_table, 17);
5378   ASSERT_EQ (LOCATION_COLUMN (c17), 17);
5379   location_t c18 = linemap_position_for_column (line_table, 18);
5380   location_t c24 = linemap_position_for_column (line_table, 24);
5381   location_t c26 = linemap_position_for_column (line_table, 26);
5382 
5383   /* Don't attempt to run the tests if column data might be unavailable.  */
5384   if (c26 > LINE_MAP_MAX_LOCATION_WITH_COLS)
5385     return;
5386 
5387   location_t fmt = make_location (c18, c17, c18);
5388   ASSERT_EQ (LOCATION_COLUMN (fmt), 18);
5389 
5390   location_t param = make_location (c24, c24, c26);
5391   ASSERT_EQ (LOCATION_COLUMN (param), 24);
5392 
5393   range_label_for_format_type_mismatch fmt_label (char_type_node,
5394 						  integer_type_node, 1);
5395   range_label_for_type_mismatch param_label (integer_type_node,
5396 					     char_type_node);
5397   gcc_rich_location richloc (fmt, &fmt_label);
5398   richloc.add_range (param, SHOW_RANGE_WITHOUT_CARET, &param_label);
5399 
5400   test_diagnostic_context dc;
5401   diagnostic_show_locus (&dc, &richloc, DK_ERROR);
5402   if (c_dialect_cxx ())
5403     /* "char*", without a space.  */
5404     ASSERT_STREQ ("   printf (\"msg: %i\\n\", msg);\n"
5405 		  "                 ~^     ~~~\n"
5406 		  "                  |     |\n"
5407 		  "                  char* int\n",
5408 		  pp_formatted_text (dc.printer));
5409   else
5410     /* "char *", with a space.  */
5411     ASSERT_STREQ ("   printf (\"msg: %i\\n\", msg);\n"
5412 		  "                 ~^     ~~~\n"
5413 		  "                  |     |\n"
5414 		  "                  |     int\n"
5415 		  "                  char *\n",
5416 		  pp_formatted_text (dc.printer));
5417 }
5418 
5419 /* Run all of the selftests within this file.  */
5420 
5421 void
c_format_c_tests()5422 c_format_c_tests ()
5423 {
5424   test_get_modifier_for_format_len ();
5425   test_get_format_for_type_printf ();
5426   test_get_format_for_type_scanf ();
5427   test_type_mismatch_range_labels ();
5428 }
5429 
5430 } // namespace selftest
5431 
5432 #endif /* CHECKING_P */
5433 
5434 #include "gt-c-family-c-format.h"
5435