1 /* UndefinedBehaviorSanitizer, undefined behavior detector.
2    Copyright (C) 2013-2014 Free Software Foundation, Inc.
3    Contributed by Marek Polacek <polacek@redhat.com>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tree.h"
25 #include "stor-layout.h"
26 #include "stringpool.h"
27 #include "cgraph.h"
28 #include "tree-pass.h"
29 #include "tree-ssa-alias.h"
30 #include "tree-pretty-print.h"
31 #include "internal-fn.h"
32 #include "gimple-expr.h"
33 #include "gimple.h"
34 #include "gimple-iterator.h"
35 #include "gimple-ssa.h"
36 #include "gimple-walk.h"
37 #include "hashtab.h"
38 #include "output.h"
39 #include "tm_p.h"
40 #include "toplev.h"
41 #include "cfgloop.h"
42 #include "ubsan.h"
43 #include "c-family/c-common.h"
44 #include "rtl.h"
45 #include "expr.h"
46 #include "tree-ssanames.h"
47 #include "asan.h"
48 #include "gimplify-me.h"
49 #include "intl.h"
50 
51 /* Map from a tree to a VAR_DECL tree.  */
52 
53 struct GTY(()) tree_type_map {
54   struct tree_map_base type;
55   tree decl;
56 };
57 
58 #define tree_type_map_eq tree_map_base_eq
59 #define tree_type_map_marked_p tree_map_base_marked_p
60 
61 /* Hash from a tree in a tree_type_map.  */
62 
63 unsigned int
tree_type_map_hash(const void * item)64 tree_type_map_hash (const void *item)
65 {
66   return TYPE_UID (((const struct tree_type_map *)item)->type.from);
67 }
68 
69 static GTY ((if_marked ("tree_type_map_marked_p"), param_is (struct tree_type_map)))
70      htab_t decl_tree_for_type;
71 
72 /* Lookup a VAR_DECL for TYPE, and return it if we find one.  */
73 
74 static tree
decl_for_type_lookup(tree type)75 decl_for_type_lookup (tree type)
76 {
77   /* If the hash table is not initialized yet, create it now.  */
78   if (decl_tree_for_type == NULL)
79     {
80       decl_tree_for_type = htab_create_ggc (10, tree_type_map_hash,
81 					    tree_type_map_eq, 0);
82       /* That also means we don't have to bother with the lookup.  */
83       return NULL_TREE;
84     }
85 
86   struct tree_type_map *h, in;
87   in.type.from = type;
88 
89   h = (struct tree_type_map *)
90       htab_find_with_hash (decl_tree_for_type, &in, TYPE_UID (type));
91   return h ? h->decl : NULL_TREE;
92 }
93 
94 /* Insert a mapping TYPE->DECL in the VAR_DECL for type hashtable.  */
95 
96 static void
decl_for_type_insert(tree type,tree decl)97 decl_for_type_insert (tree type, tree decl)
98 {
99   struct tree_type_map *h;
100   void **slot;
101 
102   h = ggc_alloc_tree_type_map ();
103   h->type.from = type;
104   h->decl = decl;
105   slot = htab_find_slot_with_hash (decl_tree_for_type, h, TYPE_UID (type),
106                                   INSERT);
107   *(struct tree_type_map **) slot = h;
108 }
109 
110 /* Helper routine, which encodes a value in the pointer_sized_int_node.
111    Arguments with precision <= POINTER_SIZE are passed directly,
112    the rest is passed by reference.  T is a value we are to encode.
113    IN_EXPAND_P is true if this function is called during expansion.  */
114 
115 tree
ubsan_encode_value(tree t,bool in_expand_p)116 ubsan_encode_value (tree t, bool in_expand_p)
117 {
118   tree type = TREE_TYPE (t);
119   const unsigned int bitsize = GET_MODE_BITSIZE (TYPE_MODE (type));
120   if (bitsize <= POINTER_SIZE)
121     switch (TREE_CODE (type))
122       {
123       case BOOLEAN_TYPE:
124       case ENUMERAL_TYPE:
125       case INTEGER_TYPE:
126 	return fold_build1 (NOP_EXPR, pointer_sized_int_node, t);
127       case REAL_TYPE:
128 	{
129 	  tree itype = build_nonstandard_integer_type (bitsize, true);
130 	  t = fold_build1 (VIEW_CONVERT_EXPR, itype, t);
131 	  return fold_convert (pointer_sized_int_node, t);
132 	}
133       default:
134 	gcc_unreachable ();
135       }
136   else
137     {
138       if (!DECL_P (t) || !TREE_ADDRESSABLE (t))
139 	{
140 	  /* The reason for this is that we don't want to pessimize
141 	     code by making vars unnecessarily addressable.  */
142 	  tree var = create_tmp_var (type, NULL);
143 	  tree tem = build2 (MODIFY_EXPR, void_type_node, var, t);
144 	  if (in_expand_p)
145 	    {
146 	      rtx mem
147 		= assign_stack_temp_for_type (TYPE_MODE (type),
148 					      GET_MODE_SIZE (TYPE_MODE (type)),
149 					      type);
150 	      SET_DECL_RTL (var, mem);
151 	      expand_assignment (var, t, false);
152 	      return build_fold_addr_expr (var);
153 	    }
154 	  t = build_fold_addr_expr (var);
155 	  return build2 (COMPOUND_EXPR, TREE_TYPE (t), tem, t);
156 	}
157       else
158 	return build_fold_addr_expr (t);
159     }
160 }
161 
162 /* Build
163    struct __ubsan_type_descriptor
164    {
165      unsigned short __typekind;
166      unsigned short __typeinfo;
167      char __typename[];
168    }
169    type.  */
170 
171 static tree
ubsan_type_descriptor_type(void)172 ubsan_type_descriptor_type (void)
173 {
174   static const char *field_names[3]
175     = { "__typekind", "__typeinfo", "__typename" };
176   tree fields[3], ret;
177   tree itype = build_range_type (sizetype, size_zero_node, NULL_TREE);
178   tree flex_arr_type = build_array_type (char_type_node, itype);
179 
180   ret = make_node (RECORD_TYPE);
181   for (int i = 0; i < 3; i++)
182     {
183       fields[i] = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
184 			      get_identifier (field_names[i]),
185 			      (i == 2) ? flex_arr_type
186 			      : short_unsigned_type_node);
187       DECL_CONTEXT (fields[i]) = ret;
188       if (i)
189 	DECL_CHAIN (fields[i - 1]) = fields[i];
190     }
191   TYPE_FIELDS (ret) = fields[0];
192   TYPE_NAME (ret) = get_identifier ("__ubsan_type_descriptor");
193   layout_type (ret);
194   return ret;
195 }
196 
197 /* Build
198    struct __ubsan_source_location
199    {
200      const char *__filename;
201      unsigned int __line;
202      unsigned int __column;
203    }
204    type.  */
205 
206 static tree
ubsan_source_location_type(void)207 ubsan_source_location_type (void)
208 {
209   static const char *field_names[3]
210     = { "__filename", "__line", "__column" };
211   tree fields[3], ret;
212   tree const_char_type = build_qualified_type (char_type_node,
213 					       TYPE_QUAL_CONST);
214 
215   ret = make_node (RECORD_TYPE);
216   for (int i = 0; i < 3; i++)
217     {
218       fields[i] = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
219 			      get_identifier (field_names[i]),
220 			      (i == 0) ? build_pointer_type (const_char_type)
221 			      : unsigned_type_node);
222       DECL_CONTEXT (fields[i]) = ret;
223       if (i)
224 	DECL_CHAIN (fields[i - 1]) = fields[i];
225     }
226   TYPE_FIELDS (ret) = fields[0];
227   TYPE_NAME (ret) = get_identifier ("__ubsan_source_location");
228   layout_type (ret);
229   return ret;
230 }
231 
232 /* Helper routine that returns a CONSTRUCTOR of __ubsan_source_location
233    type with its fields filled from a location_t LOC.  */
234 
235 static tree
ubsan_source_location(location_t loc)236 ubsan_source_location (location_t loc)
237 {
238   expanded_location xloc;
239   tree type = ubsan_source_location_type ();
240 
241   xloc = expand_location (loc);
242   if (xloc.file == NULL)
243     xloc.file = "<unknown>";
244 
245   /* Fill in the values from LOC.  */
246   size_t len = strlen (xloc.file);
247   tree str = build_string (len + 1, xloc.file);
248   TREE_TYPE (str) = build_array_type (char_type_node,
249 				      build_index_type (size_int (len)));
250   TREE_READONLY (str) = 1;
251   TREE_STATIC (str) = 1;
252   str = build_fold_addr_expr (str);
253   tree ctor = build_constructor_va (type, 3, NULL_TREE, str, NULL_TREE,
254 				    build_int_cst (unsigned_type_node,
255 						   xloc.line), NULL_TREE,
256 				    build_int_cst (unsigned_type_node,
257 						   xloc.column));
258   TREE_CONSTANT (ctor) = 1;
259   TREE_STATIC (ctor) = 1;
260 
261   return ctor;
262 }
263 
264 /* This routine returns a magic number for TYPE.  */
265 
266 static unsigned short
get_ubsan_type_info_for_type(tree type)267 get_ubsan_type_info_for_type (tree type)
268 {
269   gcc_assert (TYPE_SIZE (type) && tree_fits_uhwi_p (TYPE_SIZE (type)));
270   int prec = exact_log2 (tree_to_uhwi (TYPE_SIZE (type)));
271   gcc_assert (prec != -1);
272   return (prec << 1) | !TYPE_UNSIGNED (type);
273 }
274 
275 /* Helper routine that returns ADDR_EXPR of a VAR_DECL of a type
276    descriptor.  It first looks into the hash table; if not found,
277    create the VAR_DECL, put it into the hash table and return the
278    ADDR_EXPR of it.  TYPE describes a particular type.  WANT_POINTER_TYPE_P
279    means whether we are interested in the pointer type and not the pointer
280    itself.  */
281 
282 tree
ubsan_type_descriptor(tree type,bool want_pointer_type_p)283 ubsan_type_descriptor (tree type, bool want_pointer_type_p)
284 {
285   /* See through any typedefs.  */
286   type = TYPE_MAIN_VARIANT (type);
287 
288   tree decl = decl_for_type_lookup (type);
289   /* It is possible that some of the earlier created DECLs were found
290      unused, in that case they weren't emitted and varpool_get_node
291      returns NULL node on them.  But now we really need them.  Thus,
292      renew them here.  */
293   if (decl != NULL_TREE && varpool_get_node (decl))
294     return build_fold_addr_expr (decl);
295 
296   tree dtype = ubsan_type_descriptor_type ();
297   tree type2 = type;
298   const char *tname = NULL;
299   char *pretty_name;
300   unsigned char deref_depth = 0;
301   unsigned short tkind, tinfo;
302 
303   /* Get the name of the type, or the name of the pointer type.  */
304   if (want_pointer_type_p)
305     {
306       gcc_assert (POINTER_TYPE_P (type));
307       type2 = TREE_TYPE (type);
308 
309       /* Remove any '*' operators from TYPE.  */
310       while (POINTER_TYPE_P (type2))
311         deref_depth++, type2 = TREE_TYPE (type2);
312 
313       if (TREE_CODE (type2) == METHOD_TYPE)
314         type2 = TYPE_METHOD_BASETYPE (type2);
315     }
316 
317   /* If an array, get its type.  */
318   type2 = strip_array_types (type2);
319 
320   if (TYPE_NAME (type2) != NULL)
321     {
322       if (TREE_CODE (TYPE_NAME (type2)) == IDENTIFIER_NODE)
323 	tname = IDENTIFIER_POINTER (TYPE_NAME (type2));
324       else if (DECL_NAME (TYPE_NAME (type2)) != NULL)
325 	tname = IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type2)));
326     }
327 
328   if (tname == NULL)
329     /* We weren't able to determine the type name.  */
330     tname = "<unknown>";
331 
332   /* Decorate the type name with '', '*', "struct", or "union".  */
333   pretty_name = (char *) alloca (strlen (tname) + 16 + deref_depth);
334   if (want_pointer_type_p)
335     {
336       int pos = sprintf (pretty_name, "'%s%s%s%s%s%s%s",
337 			 TYPE_VOLATILE (type2) ? "volatile " : "",
338 			 TYPE_READONLY (type2) ? "const " : "",
339 			 TYPE_RESTRICT (type2) ? "restrict " : "",
340 			 TYPE_ATOMIC (type2) ? "_Atomic " : "",
341 			 TREE_CODE (type2) == RECORD_TYPE
342 			 ? "struct "
343 			 : TREE_CODE (type2) == UNION_TYPE
344 			   ? "union " : "", tname,
345 			 deref_depth == 0 ? "" : " ");
346       while (deref_depth-- > 0)
347         pretty_name[pos++] = '*';
348       pretty_name[pos++] = '\'';
349       pretty_name[pos] = '\0';
350     }
351   else
352     sprintf (pretty_name, "'%s'", tname);
353 
354   switch (TREE_CODE (type))
355     {
356     case BOOLEAN_TYPE:
357     case ENUMERAL_TYPE:
358     case INTEGER_TYPE:
359       tkind = 0x0000;
360       break;
361     case REAL_TYPE:
362       tkind = 0x0001;
363       break;
364     default:
365       tkind = 0xffff;
366       break;
367     }
368   tinfo = get_ubsan_type_info_for_type (type);
369 
370   /* Create a new VAR_DECL of type descriptor.  */
371   char tmp_name[32];
372   static unsigned int type_var_id_num;
373   ASM_GENERATE_INTERNAL_LABEL (tmp_name, "Lubsan_type", type_var_id_num++);
374   decl = build_decl (UNKNOWN_LOCATION, VAR_DECL, get_identifier (tmp_name),
375 			  dtype);
376   TREE_STATIC (decl) = 1;
377   TREE_PUBLIC (decl) = 0;
378   DECL_ARTIFICIAL (decl) = 1;
379   DECL_IGNORED_P (decl) = 1;
380   DECL_EXTERNAL (decl) = 0;
381 
382   size_t len = strlen (pretty_name);
383   tree str = build_string (len + 1, pretty_name);
384   TREE_TYPE (str) = build_array_type (char_type_node,
385 				      build_index_type (size_int (len)));
386   TREE_READONLY (str) = 1;
387   TREE_STATIC (str) = 1;
388   tree ctor = build_constructor_va (dtype, 3, NULL_TREE,
389 				    build_int_cst (short_unsigned_type_node,
390 						   tkind), NULL_TREE,
391 				    build_int_cst (short_unsigned_type_node,
392 						   tinfo), NULL_TREE, str);
393   TREE_CONSTANT (ctor) = 1;
394   TREE_STATIC (ctor) = 1;
395   DECL_INITIAL (decl) = ctor;
396   varpool_finalize_decl (decl);
397 
398   /* Save the VAR_DECL into the hash table.  */
399   decl_for_type_insert (type, decl);
400 
401   return build_fold_addr_expr (decl);
402 }
403 
404 /* Create a structure for the ubsan library.  NAME is a name of the new
405    structure.  The arguments in ... are of __ubsan_type_descriptor type
406    and there are at most two of them.  MISMATCH are data used by ubsan
407    pointer checking.  */
408 
409 tree
ubsan_create_data(const char * name,const location_t * ploc,const struct ubsan_mismatch_data * mismatch,...)410 ubsan_create_data (const char *name, const location_t *ploc,
411 		   const struct ubsan_mismatch_data *mismatch, ...)
412 {
413   va_list args;
414   tree ret, t;
415   tree fields[5];
416   vec<tree, va_gc> *saved_args = NULL;
417   size_t i = 0;
418   location_t loc = UNKNOWN_LOCATION;
419 
420   /* Firstly, create a pointer to type descriptor type.  */
421   tree td_type = ubsan_type_descriptor_type ();
422   TYPE_READONLY (td_type) = 1;
423   td_type = build_pointer_type (td_type);
424 
425   /* Create the structure type.  */
426   ret = make_node (RECORD_TYPE);
427   if (ploc != NULL)
428     {
429       loc = LOCATION_LOCUS (*ploc);
430       fields[i] = build_decl (UNKNOWN_LOCATION, FIELD_DECL, NULL_TREE,
431 			      ubsan_source_location_type ());
432       DECL_CONTEXT (fields[i]) = ret;
433       i++;
434     }
435 
436   va_start (args, mismatch);
437   for (t = va_arg (args, tree); t != NULL_TREE;
438        i++, t = va_arg (args, tree))
439     {
440       gcc_checking_assert (i < 3);
441       /* Save the tree arguments for later use.  */
442       vec_safe_push (saved_args, t);
443       fields[i] = build_decl (UNKNOWN_LOCATION, FIELD_DECL, NULL_TREE,
444 			      td_type);
445       DECL_CONTEXT (fields[i]) = ret;
446       if (i)
447 	DECL_CHAIN (fields[i - 1]) = fields[i];
448     }
449   va_end (args);
450 
451   if (mismatch != NULL)
452     {
453       /* We have to add two more decls.  */
454       fields[i] = build_decl (UNKNOWN_LOCATION, FIELD_DECL, NULL_TREE,
455 			      pointer_sized_int_node);
456       DECL_CONTEXT (fields[i]) = ret;
457       DECL_CHAIN (fields[i - 1]) = fields[i];
458       i++;
459 
460       fields[i] = build_decl (UNKNOWN_LOCATION, FIELD_DECL, NULL_TREE,
461 			      unsigned_char_type_node);
462       DECL_CONTEXT (fields[i]) = ret;
463       DECL_CHAIN (fields[i - 1]) = fields[i];
464       i++;
465     }
466 
467   TYPE_FIELDS (ret) = fields[0];
468   TYPE_NAME (ret) = get_identifier (name);
469   layout_type (ret);
470 
471   /* Now, fill in the type.  */
472   char tmp_name[32];
473   static unsigned int ubsan_var_id_num;
474   ASM_GENERATE_INTERNAL_LABEL (tmp_name, "Lubsan_data", ubsan_var_id_num++);
475   tree var = build_decl (UNKNOWN_LOCATION, VAR_DECL, get_identifier (tmp_name),
476 			 ret);
477   TREE_STATIC (var) = 1;
478   TREE_PUBLIC (var) = 0;
479   DECL_ARTIFICIAL (var) = 1;
480   DECL_IGNORED_P (var) = 1;
481   DECL_EXTERNAL (var) = 0;
482 
483   vec<constructor_elt, va_gc> *v;
484   vec_alloc (v, i);
485   tree ctor = build_constructor (ret, v);
486 
487   /* If desirable, set the __ubsan_source_location element.  */
488   if (ploc != NULL)
489     CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, ubsan_source_location (loc));
490 
491   size_t nelts = vec_safe_length (saved_args);
492   for (i = 0; i < nelts; i++)
493     {
494       t = (*saved_args)[i];
495       CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t);
496     }
497 
498   if (mismatch != NULL)
499     {
500       /* Append the pointer data.  */
501       CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, mismatch->align);
502       CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, mismatch->ckind);
503     }
504 
505   TREE_CONSTANT (ctor) = 1;
506   TREE_STATIC (ctor) = 1;
507   DECL_INITIAL (var) = ctor;
508   varpool_finalize_decl (var);
509 
510   return var;
511 }
512 
513 /* Instrument the __builtin_unreachable call.  We just call the libubsan
514    routine instead.  */
515 
516 tree
ubsan_instrument_unreachable(location_t loc)517 ubsan_instrument_unreachable (location_t loc)
518 {
519   initialize_sanitizer_builtins ();
520   tree data = ubsan_create_data ("__ubsan_unreachable_data", &loc, NULL,
521 				 NULL_TREE);
522   tree t = builtin_decl_explicit (BUILT_IN_UBSAN_HANDLE_BUILTIN_UNREACHABLE);
523   return build_call_expr_loc (loc, t, 1, build_fold_addr_expr_loc (loc, data));
524 }
525 
526 /* Return true if T is a call to a libubsan routine.  */
527 
528 bool
is_ubsan_builtin_p(tree t)529 is_ubsan_builtin_p (tree t)
530 {
531   return TREE_CODE (t) == FUNCTION_DECL
532 	 && strncmp (IDENTIFIER_POINTER (DECL_NAME (t)),
533 		     "__builtin___ubsan_", 18) == 0;
534 }
535 
536 /* Expand UBSAN_NULL internal call.  */
537 
538 void
ubsan_expand_null_ifn(gimple_stmt_iterator gsi)539 ubsan_expand_null_ifn (gimple_stmt_iterator gsi)
540 {
541   gimple stmt = gsi_stmt (gsi);
542   location_t loc = gimple_location (stmt);
543   gcc_assert (gimple_call_num_args (stmt) == 2);
544   tree ptr = gimple_call_arg (stmt, 0);
545   tree ckind = gimple_call_arg (stmt, 1);
546 
547   basic_block cur_bb = gsi_bb (gsi);
548 
549   /* Split the original block holding the pointer dereference.  */
550   edge e = split_block (cur_bb, stmt);
551 
552   /* Get a hold on the 'condition block', the 'then block' and the
553      'else block'.  */
554   basic_block cond_bb = e->src;
555   basic_block fallthru_bb = e->dest;
556   basic_block then_bb = create_empty_bb (cond_bb);
557   if (current_loops)
558     {
559       add_bb_to_loop (then_bb, cond_bb->loop_father);
560       loops_state_set (LOOPS_NEED_FIXUP);
561     }
562 
563   /* Make an edge coming from the 'cond block' into the 'then block';
564      this edge is unlikely taken, so set up the probability accordingly.  */
565   e = make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
566   e->probability = PROB_VERY_UNLIKELY;
567 
568   /* Connect 'then block' with the 'else block'.  This is needed
569      as the ubsan routines we call in the 'then block' are not noreturn.
570      The 'then block' only has one outcoming edge.  */
571   make_single_succ_edge (then_bb, fallthru_bb, EDGE_FALLTHRU);
572 
573   /* Set up the fallthrough basic block.  */
574   e = find_edge (cond_bb, fallthru_bb);
575   e->flags = EDGE_FALSE_VALUE;
576   e->count = cond_bb->count;
577   e->probability = REG_BR_PROB_BASE - PROB_VERY_UNLIKELY;
578 
579   /* Update dominance info for the newly created then_bb; note that
580      fallthru_bb's dominance info has already been updated by
581      split_bock.  */
582   if (dom_info_available_p (CDI_DOMINATORS))
583     set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
584 
585   /* Put the ubsan builtin call into the newly created BB.  */
586   tree fn = builtin_decl_implicit (BUILT_IN_UBSAN_HANDLE_TYPE_MISMATCH);
587   const struct ubsan_mismatch_data m
588     = { build_zero_cst (pointer_sized_int_node), ckind };
589   tree data = ubsan_create_data ("__ubsan_null_data",
590 				 &loc, &m,
591 				 ubsan_type_descriptor (TREE_TYPE (ptr), true),
592 				 NULL_TREE);
593   data = build_fold_addr_expr_loc (loc, data);
594   gimple g = gimple_build_call (fn, 2, data,
595 				build_zero_cst (pointer_sized_int_node));
596   gimple_set_location (g, loc);
597   gimple_stmt_iterator gsi2 = gsi_start_bb (then_bb);
598   gsi_insert_after (&gsi2, g, GSI_NEW_STMT);
599 
600   /* Unlink the UBSAN_NULLs vops before replacing it.  */
601   unlink_stmt_vdef (stmt);
602 
603   g = gimple_build_cond (EQ_EXPR, ptr, build_int_cst (TREE_TYPE (ptr), 0),
604 			 NULL_TREE, NULL_TREE);
605   gimple_set_location (g, loc);
606 
607   /* Replace the UBSAN_NULL with a GIMPLE_COND stmt.  */
608   gsi_replace (&gsi, g, false);
609 }
610 
611 /* Instrument a member call.  We check whether 'this' is NULL.  */
612 
613 static void
instrument_member_call(gimple_stmt_iterator * iter)614 instrument_member_call (gimple_stmt_iterator *iter)
615 {
616   tree this_parm = gimple_call_arg (gsi_stmt (*iter), 0);
617   tree kind = build_int_cst (unsigned_char_type_node, UBSAN_MEMBER_CALL);
618   gimple g = gimple_build_call_internal (IFN_UBSAN_NULL, 2, this_parm, kind);
619   gimple_set_location (g, gimple_location (gsi_stmt (*iter)));
620   gsi_insert_before (iter, g, GSI_SAME_STMT);
621 }
622 
623 /* Instrument a memory reference.  T is the pointer, IS_LHS says
624    whether the pointer is on the left hand side of the assignment.  */
625 
626 static void
instrument_mem_ref(tree t,gimple_stmt_iterator * iter,bool is_lhs)627 instrument_mem_ref (tree t, gimple_stmt_iterator *iter, bool is_lhs)
628 {
629   enum ubsan_null_ckind ikind = is_lhs ? UBSAN_STORE_OF : UBSAN_LOAD_OF;
630   if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (TREE_TYPE (t))))
631     ikind = UBSAN_MEMBER_ACCESS;
632   tree kind = build_int_cst (unsigned_char_type_node, ikind);
633   gimple g = gimple_build_call_internal (IFN_UBSAN_NULL, 2, t, kind);
634   gimple_set_location (g, gimple_location (gsi_stmt (*iter)));
635   gsi_insert_before (iter, g, GSI_SAME_STMT);
636 }
637 
638 /* Perform the pointer instrumentation.  */
639 
640 static void
instrument_null(gimple_stmt_iterator gsi,bool is_lhs)641 instrument_null (gimple_stmt_iterator gsi, bool is_lhs)
642 {
643   gimple stmt = gsi_stmt (gsi);
644   tree t = is_lhs ? gimple_get_lhs (stmt) : gimple_assign_rhs1 (stmt);
645   t = get_base_address (t);
646   const enum tree_code code = TREE_CODE (t);
647   if (code == MEM_REF
648       && TREE_CODE (TREE_OPERAND (t, 0)) == SSA_NAME)
649     instrument_mem_ref (TREE_OPERAND (t, 0), &gsi, is_lhs);
650   else if (code == ADDR_EXPR
651 	   && POINTER_TYPE_P (TREE_TYPE (t))
652 	   && TREE_CODE (TREE_TYPE (TREE_TYPE (t))) == METHOD_TYPE)
653     instrument_member_call (&gsi);
654 }
655 
656 /* Build an ubsan builtin call for the signed-integer-overflow
657    sanitization.  CODE says what kind of builtin are we building,
658    LOC is a location, LHSTYPE is the type of LHS, OP0 and OP1
659    are operands of the binary operation.  */
660 
661 tree
ubsan_build_overflow_builtin(tree_code code,location_t loc,tree lhstype,tree op0,tree op1)662 ubsan_build_overflow_builtin (tree_code code, location_t loc, tree lhstype,
663 			      tree op0, tree op1)
664 {
665   tree data = ubsan_create_data ("__ubsan_overflow_data", &loc, NULL,
666 				 ubsan_type_descriptor (lhstype, false),
667 				 NULL_TREE);
668   enum built_in_function fn_code;
669 
670   switch (code)
671     {
672     case PLUS_EXPR:
673       fn_code = BUILT_IN_UBSAN_HANDLE_ADD_OVERFLOW;
674       break;
675     case MINUS_EXPR:
676       fn_code = BUILT_IN_UBSAN_HANDLE_SUB_OVERFLOW;
677       break;
678     case MULT_EXPR:
679       fn_code = BUILT_IN_UBSAN_HANDLE_MUL_OVERFLOW;
680       break;
681     case NEGATE_EXPR:
682       fn_code = BUILT_IN_UBSAN_HANDLE_NEGATE_OVERFLOW;
683       break;
684     default:
685       gcc_unreachable ();
686     }
687   tree fn = builtin_decl_explicit (fn_code);
688   return build_call_expr_loc (loc, fn, 2 + (code != NEGATE_EXPR),
689 			      build_fold_addr_expr_loc (loc, data),
690 			      ubsan_encode_value (op0, true),
691 			      op1 ? ubsan_encode_value (op1, true)
692 				  : NULL_TREE);
693 }
694 
695 /* Perform the signed integer instrumentation.  GSI is the iterator
696    pointing at statement we are trying to instrument.  */
697 
698 static void
instrument_si_overflow(gimple_stmt_iterator gsi)699 instrument_si_overflow (gimple_stmt_iterator gsi)
700 {
701   gimple stmt = gsi_stmt (gsi);
702   tree_code code = gimple_assign_rhs_code (stmt);
703   tree lhs = gimple_assign_lhs (stmt);
704   tree lhstype = TREE_TYPE (lhs);
705   tree a, b;
706   gimple g;
707 
708   /* If this is not a signed operation, don't instrument anything here.
709      Also punt on bit-fields.  */
710   if (!INTEGRAL_TYPE_P (lhstype)
711       || TYPE_OVERFLOW_WRAPS (lhstype)
712       || GET_MODE_BITSIZE (TYPE_MODE (lhstype)) != TYPE_PRECISION (lhstype))
713     return;
714 
715   switch (code)
716     {
717     case MINUS_EXPR:
718     case PLUS_EXPR:
719     case MULT_EXPR:
720       /* Transform
721 	 i = u {+,-,*} 5;
722 	 into
723 	 i = UBSAN_CHECK_{ADD,SUB,MUL} (u, 5);  */
724       a = gimple_assign_rhs1 (stmt);
725       b = gimple_assign_rhs2 (stmt);
726       g = gimple_build_call_internal (code == PLUS_EXPR
727 				      ? IFN_UBSAN_CHECK_ADD
728 				      : code == MINUS_EXPR
729 				      ? IFN_UBSAN_CHECK_SUB
730 				      : IFN_UBSAN_CHECK_MUL, 2, a, b);
731       gimple_call_set_lhs (g, lhs);
732       gsi_replace (&gsi, g, false);
733       break;
734     case NEGATE_EXPR:
735       /* Represent i = -u;
736 	 as
737 	 i = UBSAN_CHECK_SUB (0, u);  */
738       a = build_int_cst (lhstype, 0);
739       b = gimple_assign_rhs1 (stmt);
740       g = gimple_build_call_internal (IFN_UBSAN_CHECK_SUB, 2, a, b);
741       gimple_call_set_lhs (g, lhs);
742       gsi_replace (&gsi, g, false);
743       break;
744     case ABS_EXPR:
745       /* Transform i = ABS_EXPR<u>;
746 	 into
747 	 _N = UBSAN_CHECK_SUB (0, u);
748 	 i = ABS_EXPR<_N>;  */
749       a = build_int_cst (lhstype, 0);
750       b = gimple_assign_rhs1 (stmt);
751       g = gimple_build_call_internal (IFN_UBSAN_CHECK_SUB, 2, a, b);
752       a = make_ssa_name (lhstype, NULL);
753       gimple_call_set_lhs (g, a);
754       gimple_set_location (g, gimple_location (stmt));
755       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
756       gimple_assign_set_rhs1 (stmt, a);
757       update_stmt (stmt);
758       break;
759     default:
760       break;
761     }
762 }
763 
764 /* Instrument loads from (non-bitfield) bool and C++ enum values
765    to check if the memory value is outside of the range of the valid
766    type values.  */
767 
768 static void
instrument_bool_enum_load(gimple_stmt_iterator * gsi)769 instrument_bool_enum_load (gimple_stmt_iterator *gsi)
770 {
771   gimple stmt = gsi_stmt (*gsi);
772   tree rhs = gimple_assign_rhs1 (stmt);
773   tree type = TREE_TYPE (rhs);
774   tree minv = NULL_TREE, maxv = NULL_TREE;
775 
776   if (TREE_CODE (type) == BOOLEAN_TYPE && (flag_sanitize & SANITIZE_BOOL))
777     {
778       minv = boolean_false_node;
779       maxv = boolean_true_node;
780     }
781   else if (TREE_CODE (type) == ENUMERAL_TYPE
782 	   && (flag_sanitize & SANITIZE_ENUM)
783 	   && TREE_TYPE (type) != NULL_TREE
784 	   && TREE_CODE (TREE_TYPE (type)) == INTEGER_TYPE
785 	   && (TYPE_PRECISION (TREE_TYPE (type))
786 	       < GET_MODE_PRECISION (TYPE_MODE (type))))
787     {
788       minv = TYPE_MIN_VALUE (TREE_TYPE (type));
789       maxv = TYPE_MAX_VALUE (TREE_TYPE (type));
790     }
791   else
792     return;
793 
794   int modebitsize = GET_MODE_BITSIZE (TYPE_MODE (type));
795   HOST_WIDE_INT bitsize, bitpos;
796   tree offset;
797   enum machine_mode mode;
798   int volatilep = 0, unsignedp = 0;
799   tree base = get_inner_reference (rhs, &bitsize, &bitpos, &offset, &mode,
800 				   &unsignedp, &volatilep, false);
801   tree utype = build_nonstandard_integer_type (modebitsize, 1);
802 
803   if ((TREE_CODE (base) == VAR_DECL && DECL_HARD_REGISTER (base))
804       || (bitpos % modebitsize) != 0
805       || bitsize != modebitsize
806       || GET_MODE_BITSIZE (TYPE_MODE (utype)) != modebitsize
807       || TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
808     return;
809 
810   location_t loc = gimple_location (stmt);
811   tree ptype = build_pointer_type (TREE_TYPE (rhs));
812   tree atype = reference_alias_ptr_type (rhs);
813   gimple g = gimple_build_assign (make_ssa_name (ptype, NULL),
814 				  build_fold_addr_expr (rhs));
815   gimple_set_location (g, loc);
816   gsi_insert_before (gsi, g, GSI_SAME_STMT);
817   tree mem = build2 (MEM_REF, utype, gimple_assign_lhs (g),
818 		     build_int_cst (atype, 0));
819   tree urhs = make_ssa_name (utype, NULL);
820   g = gimple_build_assign (urhs, mem);
821   gimple_set_location (g, loc);
822   gsi_insert_before (gsi, g, GSI_SAME_STMT);
823   minv = fold_convert (utype, minv);
824   maxv = fold_convert (utype, maxv);
825   if (!integer_zerop (minv))
826     {
827       g = gimple_build_assign_with_ops (MINUS_EXPR,
828 					make_ssa_name (utype, NULL),
829 					urhs, minv);
830       gimple_set_location (g, loc);
831       gsi_insert_before (gsi, g, GSI_SAME_STMT);
832     }
833 
834   gimple_stmt_iterator gsi2 = *gsi;
835   basic_block then_bb, fallthru_bb;
836   *gsi = create_cond_insert_point (gsi, true, false, true,
837 				   &then_bb, &fallthru_bb);
838   g = gimple_build_cond (GT_EXPR, gimple_assign_lhs (g),
839 			 int_const_binop (MINUS_EXPR, maxv, minv),
840 			 NULL_TREE, NULL_TREE);
841   gimple_set_location (g, loc);
842   gsi_insert_after (gsi, g, GSI_NEW_STMT);
843 
844   gimple_assign_set_rhs_with_ops (&gsi2, NOP_EXPR, urhs, NULL_TREE);
845   update_stmt (stmt);
846 
847   tree data = ubsan_create_data ("__ubsan_invalid_value_data",
848 				 &loc, NULL,
849 				 ubsan_type_descriptor (type, false),
850 				 NULL_TREE);
851   data = build_fold_addr_expr_loc (loc, data);
852   tree fn = builtin_decl_explicit (BUILT_IN_UBSAN_HANDLE_LOAD_INVALID_VALUE);
853 
854   gsi2 = gsi_after_labels (then_bb);
855   tree val = force_gimple_operand_gsi (&gsi2, ubsan_encode_value (urhs),
856 				       true, NULL_TREE, true, GSI_SAME_STMT);
857   g = gimple_build_call (fn, 2, data, val);
858   gimple_set_location (g, loc);
859   gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
860 }
861 
862 /* Gate and execute functions for ubsan pass.  */
863 
864 static unsigned int
ubsan_pass(void)865 ubsan_pass (void)
866 {
867   basic_block bb;
868   gimple_stmt_iterator gsi;
869 
870   initialize_sanitizer_builtins ();
871 
872   FOR_EACH_BB_FN (bb, cfun)
873     {
874       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
875 	{
876 	  gimple stmt = gsi_stmt (gsi);
877 	  if (is_gimple_debug (stmt) || gimple_clobber_p (stmt))
878 	    {
879 	      gsi_next (&gsi);
880 	      continue;
881 	    }
882 
883 	  if ((flag_sanitize & SANITIZE_SI_OVERFLOW)
884 	      && is_gimple_assign (stmt))
885 	    instrument_si_overflow (gsi);
886 
887 	  if (flag_sanitize & SANITIZE_NULL)
888 	    {
889 	      if (gimple_store_p (stmt))
890 		instrument_null (gsi, true);
891 	      if (gimple_assign_load_p (stmt))
892 		instrument_null (gsi, false);
893 	    }
894 
895 	  if (flag_sanitize & (SANITIZE_BOOL | SANITIZE_ENUM)
896 	      && gimple_assign_load_p (stmt))
897 	    instrument_bool_enum_load (&gsi);
898 
899 	  gsi_next (&gsi);
900 	}
901     }
902   return 0;
903 }
904 
905 static bool
gate_ubsan(void)906 gate_ubsan (void)
907 {
908   return flag_sanitize & (SANITIZE_NULL | SANITIZE_SI_OVERFLOW
909 			  | SANITIZE_BOOL | SANITIZE_ENUM);
910 }
911 
912 namespace {
913 
914 const pass_data pass_data_ubsan =
915 {
916   GIMPLE_PASS, /* type */
917   "ubsan", /* name */
918   OPTGROUP_NONE, /* optinfo_flags */
919   true, /* has_gate */
920   true, /* has_execute */
921   TV_TREE_UBSAN, /* tv_id */
922   ( PROP_cfg | PROP_ssa ), /* properties_required */
923   0, /* properties_provided */
924   0, /* properties_destroyed */
925   0, /* todo_flags_start */
926   TODO_update_ssa, /* todo_flags_finish */
927 };
928 
929 class pass_ubsan : public gimple_opt_pass
930 {
931 public:
pass_ubsan(gcc::context * ctxt)932   pass_ubsan (gcc::context *ctxt)
933     : gimple_opt_pass (pass_data_ubsan, ctxt)
934   {}
935 
936   /* opt_pass methods: */
gate()937   bool gate () { return gate_ubsan (); }
execute()938   unsigned int execute () { return ubsan_pass (); }
939 
940 }; // class pass_ubsan
941 
942 } // anon namespace
943 
944 gimple_opt_pass *
make_pass_ubsan(gcc::context * ctxt)945 make_pass_ubsan (gcc::context *ctxt)
946 {
947   return new pass_ubsan (ctxt);
948 }
949 
950 #include "gt-ubsan.h"
951