1 /* An experimental state machine, for tracking "taint": unsanitized uses
2    of data potentially under an attacker's control.
3 
4    Copyright (C) 2019-2021 Free Software Foundation, Inc.
5    Contributed by David Malcolm <dmalcolm@redhat.com>.
6 
7 This file is part of GCC.
8 
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13 
14 GCC is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17 General Public License for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3.  If not see
21 <http://www.gnu.org/licenses/>.  */
22 
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tree.h"
27 #include "function.h"
28 #include "basic-block.h"
29 #include "gimple.h"
30 #include "options.h"
31 #include "diagnostic-path.h"
32 #include "diagnostic-metadata.h"
33 #include "function.h"
34 #include "json.h"
35 #include "analyzer/analyzer.h"
36 #include "analyzer/analyzer-logging.h"
37 #include "gimple-iterator.h"
38 #include "tristate.h"
39 #include "selftest.h"
40 #include "ordered-hash-map.h"
41 #include "cgraph.h"
42 #include "cfg.h"
43 #include "digraph.h"
44 #include "analyzer/supergraph.h"
45 #include "analyzer/call-string.h"
46 #include "analyzer/program-point.h"
47 #include "analyzer/store.h"
48 #include "analyzer/region-model.h"
49 #include "analyzer/sm.h"
50 #include "analyzer/program-state.h"
51 #include "analyzer/pending-diagnostic.h"
52 
53 #if ENABLE_ANALYZER
54 
55 namespace ana {
56 
57 namespace {
58 
59 /* An enum for describing tainted values.  */
60 
61 enum bounds
62 {
63   /* This tainted value has no upper or lower bound.  */
64   BOUNDS_NONE,
65 
66   /* This tainted value has an upper bound but not lower bound.  */
67   BOUNDS_UPPER,
68 
69   /* This tainted value has a lower bound but no upper bound.  */
70   BOUNDS_LOWER
71 };
72 
73 /* An experimental state machine, for tracking "taint": unsanitized uses
74    of data potentially under an attacker's control.  */
75 
76 class taint_state_machine : public state_machine
77 {
78 public:
79   taint_state_machine (logger *logger);
80 
inherited_state_p() const81   bool inherited_state_p () const FINAL OVERRIDE { return true; }
82 
83   state_t alt_get_inherited_state (const sm_state_map &map,
84 				   const svalue *sval,
85 				   const extrinsic_state &ext_state)
86     const FINAL OVERRIDE;
87 
88   bool on_stmt (sm_context *sm_ctxt,
89 		const supernode *node,
90 		const gimple *stmt) const FINAL OVERRIDE;
91 
92   void on_condition (sm_context *sm_ctxt,
93 		     const supernode *node,
94 		     const gimple *stmt,
95 		     const svalue *lhs,
96 		     enum tree_code op,
97 		     const svalue *rhs) const FINAL OVERRIDE;
98 
99   bool can_purge_p (state_t s) const FINAL OVERRIDE;
100 
101   bool get_taint (state_t s, tree type, enum bounds *out) const;
102 
103   state_t combine_states (state_t s0, state_t s1) const;
104 
105   /* State for a "tainted" value: unsanitized data potentially under an
106      attacker's control.  */
107   state_t m_tainted;
108 
109   /* State for a "tainted" value that has a lower bound.  */
110   state_t m_has_lb;
111 
112   /* State for a "tainted" value that has an upper bound.  */
113   state_t m_has_ub;
114 
115   /* Stop state, for a value we don't want to track any more.  */
116   state_t m_stop;
117 };
118 
119 /* Class for diagnostics relating to taint_state_machine.  */
120 
121 class taint_diagnostic : public pending_diagnostic
122 {
123 public:
taint_diagnostic(const taint_state_machine & sm,tree arg,enum bounds has_bounds)124   taint_diagnostic (const taint_state_machine &sm, tree arg,
125 		    enum bounds has_bounds)
126   : m_sm (sm), m_arg (arg), m_has_bounds (has_bounds)
127   {}
128 
subclass_equal_p(const pending_diagnostic & base_other) const129   bool subclass_equal_p (const pending_diagnostic &base_other) const OVERRIDE
130   {
131     return same_tree_p (m_arg, ((const taint_diagnostic &)base_other).m_arg);
132   }
133 
describe_state_change(const evdesc::state_change & change)134   label_text describe_state_change (const evdesc::state_change &change)
135     FINAL OVERRIDE
136   {
137     if (change.m_new_state == m_sm.m_tainted)
138       {
139 	if (change.m_origin)
140 	  return change.formatted_print ("%qE has an unchecked value here"
141 					 " (from %qE)",
142 					 change.m_expr, change.m_origin);
143 	else
144 	  return change.formatted_print ("%qE gets an unchecked value here",
145 					 change.m_expr);
146       }
147     else if (change.m_new_state == m_sm.m_has_lb)
148       return change.formatted_print ("%qE has its lower bound checked here",
149 				     change.m_expr);
150     else if (change.m_new_state == m_sm.m_has_ub)
151       return change.formatted_print ("%qE has its upper bound checked here",
152 				     change.m_expr);
153     return label_text ();
154   }
155 protected:
156   const taint_state_machine &m_sm;
157   tree m_arg;
158   enum bounds m_has_bounds;
159 };
160 
161 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
162    array index.  */
163 
164 class tainted_array_index : public taint_diagnostic
165 {
166 public:
tainted_array_index(const taint_state_machine & sm,tree arg,enum bounds has_bounds)167   tainted_array_index (const taint_state_machine &sm, tree arg,
168 		       enum bounds has_bounds)
169   : taint_diagnostic (sm, arg, has_bounds)
170   {}
171 
get_kind() const172   const char *get_kind () const FINAL OVERRIDE { return "tainted_array_index"; }
173 
emit(rich_location * rich_loc)174   bool emit (rich_location *rich_loc) FINAL OVERRIDE
175   {
176     diagnostic_metadata m;
177     /* CWE-129: "Improper Validation of Array Index".  */
178     m.add_cwe (129);
179     switch (m_has_bounds)
180       {
181       default:
182 	gcc_unreachable ();
183       case BOUNDS_NONE:
184 	return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_array_index,
185 			     "use of attacker-controlled value %qE"
186 			     " in array lookup without bounds checking",
187 			     m_arg);
188 	break;
189       case BOUNDS_UPPER:
190 	return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_array_index,
191 			     "use of attacker-controlled value %qE"
192 			     " in array lookup without checking for negative",
193 			     m_arg);
194 	break;
195       case BOUNDS_LOWER:
196 	return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_array_index,
197 			     "use of attacker-controlled value %qE"
198 			     " in array lookup without upper-bounds checking",
199 			     m_arg);
200 	break;
201       }
202   }
203 
describe_final_event(const evdesc::final_event & ev)204   label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
205   {
206     switch (m_has_bounds)
207       {
208       default:
209 	gcc_unreachable ();
210       case BOUNDS_NONE:
211 	return ev.formatted_print
212 	  ("use of attacker-controlled value %qE in array lookup"
213 	   " without bounds checking",
214 	   m_arg);
215       case BOUNDS_UPPER:
216 	return ev.formatted_print
217 	  ("use of attacker-controlled value %qE"
218 	   " in array lookup without checking for negative",
219 	   m_arg);
220       case BOUNDS_LOWER:
221 	return ev.formatted_print
222 	  ("use of attacker-controlled value %qE"
223 	   " in array lookup without upper-bounds checking",
224 	   m_arg);
225       }
226   }
227 };
228 
229 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
230    pointer offset.  */
231 
232 class tainted_offset : public taint_diagnostic
233 {
234 public:
tainted_offset(const taint_state_machine & sm,tree arg,enum bounds has_bounds)235   tainted_offset (const taint_state_machine &sm, tree arg,
236 		       enum bounds has_bounds)
237   : taint_diagnostic (sm, arg, has_bounds)
238   {}
239 
get_kind() const240   const char *get_kind () const FINAL OVERRIDE { return "tainted_offset"; }
241 
emit(rich_location * rich_loc)242   bool emit (rich_location *rich_loc) FINAL OVERRIDE
243   {
244     diagnostic_metadata m;
245     /* CWE-823: "Use of Out-of-range Pointer Offset".  */
246     m.add_cwe (823);
247     if (m_arg)
248       switch (m_has_bounds)
249 	{
250 	default:
251 	  gcc_unreachable ();
252 	case BOUNDS_NONE:
253 	  return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_offset,
254 			       "use of attacker-controlled value %qE as offset"
255 			       " without bounds checking",
256 			       m_arg);
257 	  break;
258 	case BOUNDS_UPPER:
259 	  return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_offset,
260 			       "use of attacker-controlled value %qE as offset"
261 			       " without lower-bounds checking",
262 			       m_arg);
263 	  break;
264 	case BOUNDS_LOWER:
265 	  return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_offset,
266 			       "use of attacker-controlled value %qE as offset"
267 			       " without upper-bounds checking",
268 			       m_arg);
269 	  break;
270 	}
271     else
272       switch (m_has_bounds)
273 	{
274 	default:
275 	  gcc_unreachable ();
276 	case BOUNDS_NONE:
277 	  return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_offset,
278 			       "use of attacker-controlled value as offset"
279 			       " without bounds checking");
280 	  break;
281 	case BOUNDS_UPPER:
282 	  return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_offset,
283 			       "use of attacker-controlled value as offset"
284 			       " without lower-bounds checking");
285 	  break;
286 	case BOUNDS_LOWER:
287 	  return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_offset,
288 			       "use of attacker-controlled value as offset"
289 			       " without upper-bounds checking");
290 	  break;
291 	}
292   }
293 
describe_final_event(const evdesc::final_event & ev)294   label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
295   {
296     if (m_arg)
297       switch (m_has_bounds)
298 	{
299 	default:
300 	  gcc_unreachable ();
301 	case BOUNDS_NONE:
302 	  return ev.formatted_print ("use of attacker-controlled value %qE"
303 				     " as offset without bounds checking",
304 				     m_arg);
305 	case BOUNDS_UPPER:
306 	  return ev.formatted_print ("use of attacker-controlled value %qE"
307 				     " as offset without lower-bounds checking",
308 				     m_arg);
309 	case BOUNDS_LOWER:
310 	  return ev.formatted_print ("use of attacker-controlled value %qE"
311 				     " as offset without upper-bounds checking",
312 				     m_arg);
313 	}
314     else
315       switch (m_has_bounds)
316 	{
317 	default:
318 	  gcc_unreachable ();
319 	case BOUNDS_NONE:
320 	  return ev.formatted_print ("use of attacker-controlled value"
321 				     " as offset without bounds checking");
322 	case BOUNDS_UPPER:
323 	  return ev.formatted_print ("use of attacker-controlled value"
324 				     " as offset without lower-bounds"
325 				     " checking");
326 	case BOUNDS_LOWER:
327 	  return ev.formatted_print ("use of attacker-controlled value"
328 				     " as offset without upper-bounds"
329 				     " checking");
330 	}
331   }
332 };
333 
334 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
335    size.  */
336 
337 class tainted_size : public taint_diagnostic
338 {
339 public:
tainted_size(const taint_state_machine & sm,tree arg,enum bounds has_bounds,enum access_direction dir)340   tainted_size (const taint_state_machine &sm, tree arg,
341 		enum bounds has_bounds,
342 		enum access_direction dir)
343   : taint_diagnostic (sm, arg, has_bounds),
344     m_dir (dir)
345   {}
346 
get_kind() const347   const char *get_kind () const FINAL OVERRIDE { return "tainted_size"; }
348 
emit(rich_location * rich_loc)349   bool emit (rich_location *rich_loc) FINAL OVERRIDE
350   {
351     diagnostic_metadata m;
352     m.add_cwe (129);
353     switch (m_has_bounds)
354       {
355       default:
356 	gcc_unreachable ();
357       case BOUNDS_NONE:
358 	return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_size,
359 			     "use of attacker-controlled value %qE as size"
360 			     " without bounds checking",
361 			     m_arg);
362 	break;
363       case BOUNDS_UPPER:
364 	return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_size,
365 			     "use of attacker-controlled value %qE as size"
366 			     " without lower-bounds checking",
367 			     m_arg);
368 	break;
369       case BOUNDS_LOWER:
370 	return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_size,
371 			     "use of attacker-controlled value %qE as size"
372 			     " without upper-bounds checking",
373 			     m_arg);
374 	break;
375       }
376   }
377 
describe_final_event(const evdesc::final_event & ev)378   label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
379   {
380     switch (m_has_bounds)
381       {
382       default:
383 	gcc_unreachable ();
384       case BOUNDS_NONE:
385 	return ev.formatted_print ("use of attacker-controlled value %qE"
386 				   " as size without bounds checking",
387 				   m_arg);
388       case BOUNDS_UPPER:
389 	return ev.formatted_print ("use of attacker-controlled value %qE"
390 				   " as size without lower-bounds checking",
391 				   m_arg);
392       case BOUNDS_LOWER:
393 	return ev.formatted_print ("use of attacker-controlled value %qE"
394 				   " as size without upper-bounds checking",
395 				   m_arg);
396       }
397   }
398 
399 private:
400   enum access_direction m_dir;
401 };
402 
403 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
404    divisor (so that an attacker can trigger a divide by zero).  */
405 
406 class tainted_divisor : public taint_diagnostic
407 {
408 public:
tainted_divisor(const taint_state_machine & sm,tree arg,enum bounds has_bounds)409   tainted_divisor (const taint_state_machine &sm, tree arg,
410 		   enum bounds has_bounds)
411   : taint_diagnostic (sm, arg, has_bounds)
412   {}
413 
get_kind() const414   const char *get_kind () const FINAL OVERRIDE { return "tainted_divisor"; }
415 
emit(rich_location * rich_loc)416   bool emit (rich_location *rich_loc) FINAL OVERRIDE
417   {
418     diagnostic_metadata m;
419     /* CWE-369: "Divide By Zero".  */
420     m.add_cwe (369);
421     if (m_arg)
422       return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_divisor,
423 			   "use of attacker-controlled value %qE as divisor"
424 			   " without checking for zero",
425 			   m_arg);
426     else
427       return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_divisor,
428 			   "use of attacker-controlled value as divisor"
429 			   " without checking for zero");
430   }
431 
describe_final_event(const evdesc::final_event & ev)432   label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
433   {
434     if (m_arg)
435       return ev.formatted_print
436 	("use of attacker-controlled value %qE as divisor"
437 	 " without checking for zero",
438 	 m_arg);
439     else
440       return ev.formatted_print
441 	("use of attacker-controlled value as divisor"
442 	 " without checking for zero");
443   }
444 };
445 
446 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
447    size of a dynamic allocation.  */
448 
449 class tainted_allocation_size : public taint_diagnostic
450 {
451 public:
tainted_allocation_size(const taint_state_machine & sm,tree arg,enum bounds has_bounds,enum memory_space mem_space)452   tainted_allocation_size (const taint_state_machine &sm, tree arg,
453 			   enum bounds has_bounds, enum memory_space mem_space)
454   : taint_diagnostic (sm, arg, has_bounds),
455     m_mem_space (mem_space)
456   {
457     gcc_assert (mem_space == MEMSPACE_STACK || mem_space == MEMSPACE_HEAP);
458   }
459 
get_kind() const460   const char *get_kind () const FINAL OVERRIDE
461   {
462     return "tainted_allocation_size";
463   }
464 
emit(rich_location * rich_loc)465   bool emit (rich_location *rich_loc) FINAL OVERRIDE
466   {
467     diagnostic_metadata m;
468     /* "CWE-789: Memory Allocation with Excessive Size Value".  */
469     m.add_cwe (789);
470     gcc_assert (m_mem_space == MEMSPACE_STACK || m_mem_space == MEMSPACE_HEAP);
471     // TODO: make use of m_mem_space
472     if (m_arg)
473       switch (m_has_bounds)
474 	{
475 	default:
476 	  gcc_unreachable ();
477 	case BOUNDS_NONE:
478 	  return warning_meta (rich_loc, m,
479 			       OPT_Wanalyzer_tainted_allocation_size,
480 			       "use of attacker-controlled value %qE as"
481 			       " allocation size without bounds checking",
482 			       m_arg);
483 	  break;
484 	case BOUNDS_UPPER:
485 	  return warning_meta (rich_loc, m,
486 			       OPT_Wanalyzer_tainted_allocation_size,
487 			       "use of attacker-controlled value %qE as"
488 			       " allocation size without lower-bounds checking",
489 			       m_arg);
490 	  break;
491 	case BOUNDS_LOWER:
492 	  return warning_meta (rich_loc, m,
493 			       OPT_Wanalyzer_tainted_allocation_size,
494 			       "use of attacker-controlled value %qE as"
495 			       " allocation size without upper-bounds checking",
496 			     m_arg);
497 	  break;
498 	}
499     else
500       switch (m_has_bounds)
501 	{
502 	default:
503 	  gcc_unreachable ();
504 	case BOUNDS_NONE:
505 	  return warning_meta (rich_loc, m,
506 			       OPT_Wanalyzer_tainted_allocation_size,
507 			       "use of attacker-controlled value as"
508 			       " allocation size without bounds"
509 			       " checking");
510 	  break;
511 	case BOUNDS_UPPER:
512 	  return warning_meta (rich_loc, m,
513 			       OPT_Wanalyzer_tainted_allocation_size,
514 			       "use of attacker-controlled value as"
515 			       " allocation size without lower-bounds"
516 			       " checking");
517 	  break;
518 	case BOUNDS_LOWER:
519 	  return warning_meta (rich_loc, m,
520 			       OPT_Wanalyzer_tainted_allocation_size,
521 			       "use of attacker-controlled value as"
522 			       " allocation size without upper-bounds"
523 			       " checking");
524 	  break;
525 	}
526   }
527 
describe_final_event(const evdesc::final_event & ev)528   label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
529   {
530     if (m_arg)
531       switch (m_has_bounds)
532 	{
533 	default:
534 	  gcc_unreachable ();
535 	case BOUNDS_NONE:
536 	  return ev.formatted_print
537 	    ("use of attacker-controlled value %qE as allocation size"
538 	     " without bounds checking",
539 	     m_arg);
540 	case BOUNDS_UPPER:
541 	  return ev.formatted_print
542 	    ("use of attacker-controlled value %qE as allocation size"
543 	     " without lower-bounds checking",
544 	     m_arg);
545 	case BOUNDS_LOWER:
546 	  return ev.formatted_print
547 	    ("use of attacker-controlled value %qE as allocation size"
548 	     " without upper-bounds checking",
549 	     m_arg);
550 	}
551     else
552       switch (m_has_bounds)
553 	{
554 	default:
555 	  gcc_unreachable ();
556 	case BOUNDS_NONE:
557 	  return ev.formatted_print
558 	    ("use of attacker-controlled value as allocation size"
559 	     " without bounds checking");
560 	case BOUNDS_UPPER:
561 	  return ev.formatted_print
562 	    ("use of attacker-controlled value as allocation size"
563 	     " without lower-bounds checking");
564 	case BOUNDS_LOWER:
565 	  return ev.formatted_print
566 	    ("use of attacker-controlled value as allocation size"
567 	     " without upper-bounds checking");
568 	}
569   }
570 
571 private:
572   enum memory_space m_mem_space;
573 };
574 
575 /* taint_state_machine's ctor.  */
576 
taint_state_machine(logger * logger)577 taint_state_machine::taint_state_machine (logger *logger)
578 : state_machine ("taint", logger)
579 {
580   m_tainted = add_state ("tainted");
581   m_has_lb = add_state ("has_lb");
582   m_has_ub = add_state ("has_ub");
583   m_stop = add_state ("stop");
584 }
585 
586 state_machine::state_t
alt_get_inherited_state(const sm_state_map & map,const svalue * sval,const extrinsic_state & ext_state) const587 taint_state_machine::alt_get_inherited_state (const sm_state_map &map,
588 					      const svalue *sval,
589 					      const extrinsic_state &ext_state)
590   const
591 {
592   switch (sval->get_kind ())
593     {
594     default:
595       break;
596     case SK_UNARYOP:
597       {
598 	const unaryop_svalue *unaryop_sval
599 	  = as_a <const unaryop_svalue *> (sval);
600 	enum tree_code op = unaryop_sval->get_op ();
601 	const svalue *arg = unaryop_sval->get_arg ();
602 	switch (op)
603 	  {
604 	  case NOP_EXPR:
605 	    {
606 	      state_t arg_state = map.get_state (arg, ext_state);
607 	      return arg_state;
608 	    }
609 	  default:
610 	    gcc_unreachable ();
611 	    break;
612 	  }
613       }
614       break;
615     case SK_BINOP:
616       {
617 	const binop_svalue *binop_sval = as_a <const binop_svalue *> (sval);
618 	enum tree_code op = binop_sval->get_op ();
619 	const svalue *arg0 = binop_sval->get_arg0 ();
620 	const svalue *arg1 = binop_sval->get_arg1 ();
621 	switch (op)
622 	  {
623 	  default:
624 	    break;
625 	  case PLUS_EXPR:
626 	  case MINUS_EXPR:
627 	  case MULT_EXPR:
628 	  case POINTER_PLUS_EXPR:
629 	  case TRUNC_DIV_EXPR:
630 	  case TRUNC_MOD_EXPR:
631 	    {
632 	      state_t arg0_state = map.get_state (arg0, ext_state);
633 	      state_t arg1_state = map.get_state (arg1, ext_state);
634 	      return combine_states (arg0_state, arg1_state);
635 	    }
636 	    break;
637 
638 	  case EQ_EXPR:
639 	  case GE_EXPR:
640 	  case LE_EXPR:
641 	  case NE_EXPR:
642 	  case GT_EXPR:
643 	  case LT_EXPR:
644 	  case UNORDERED_EXPR:
645 	  case ORDERED_EXPR:
646 	    /* Comparisons are just booleans.  */
647 	    return m_start;
648 
649 	  case BIT_AND_EXPR:
650 	  case RSHIFT_EXPR:
651 	    return NULL;
652 	  }
653       }
654       break;
655     }
656   return NULL;
657 }
658 
659 /* Implementation of state_machine::on_stmt vfunc for taint_state_machine.  */
660 
661 bool
on_stmt(sm_context * sm_ctxt,const supernode * node,const gimple * stmt) const662 taint_state_machine::on_stmt (sm_context *sm_ctxt,
663 			       const supernode *node,
664 			       const gimple *stmt) const
665 {
666   if (const gcall *call = dyn_cast <const gcall *> (stmt))
667     if (tree callee_fndecl = sm_ctxt->get_fndecl_for_call (call))
668       {
669 	if (is_named_call_p (callee_fndecl, "fread", call, 4))
670 	  {
671 	    tree arg = gimple_call_arg (call, 0);
672 
673 	    sm_ctxt->on_transition (node, stmt, arg, m_start, m_tainted);
674 
675 	    /* Dereference an ADDR_EXPR.  */
676 	    // TODO: should the engine do this?
677 	    if (TREE_CODE (arg) == ADDR_EXPR)
678 	      sm_ctxt->on_transition (node, stmt, TREE_OPERAND (arg, 0),
679 				      m_start, m_tainted);
680 	    return true;
681 	  }
682       }
683   // TODO: ...etc; many other sources of untrusted data
684 
685   if (const gassign *assign = dyn_cast <const gassign *> (stmt))
686     {
687       enum tree_code op = gimple_assign_rhs_code (assign);
688 
689       switch (op)
690 	{
691 	default:
692 	  break;
693 	case TRUNC_DIV_EXPR:
694 	case CEIL_DIV_EXPR:
695 	case FLOOR_DIV_EXPR:
696 	case ROUND_DIV_EXPR:
697 	case TRUNC_MOD_EXPR:
698 	case CEIL_MOD_EXPR:
699 	case FLOOR_MOD_EXPR:
700 	case ROUND_MOD_EXPR:
701 	case RDIV_EXPR:
702 	case EXACT_DIV_EXPR:
703 	  {
704 	    tree divisor = gimple_assign_rhs2 (assign);;
705 	    state_t state = sm_ctxt->get_state (stmt, divisor);
706 	    enum bounds b;
707 	    if (get_taint (state, TREE_TYPE (divisor), &b))
708 	      {
709 		tree diag_divisor = sm_ctxt->get_diagnostic_tree (divisor);
710 		sm_ctxt->warn  (node, stmt, divisor,
711 				new tainted_divisor (*this, diag_divisor, b));
712 		sm_ctxt->set_next_state (stmt, divisor, m_stop);
713 	      }
714 	  }
715 	  break;
716 	}
717     }
718 
719   return false;
720 }
721 
722 /* Implementation of state_machine::on_condition vfunc for taint_state_machine.
723    Potentially transition state 'tainted' to 'has_ub' or 'has_lb',
724    and states 'has_ub' and 'has_lb' to 'stop'.  */
725 
726 void
on_condition(sm_context * sm_ctxt,const supernode * node,const gimple * stmt,const svalue * lhs,enum tree_code op,const svalue * rhs ATTRIBUTE_UNUSED) const727 taint_state_machine::on_condition (sm_context *sm_ctxt,
728 				   const supernode *node,
729 				   const gimple *stmt,
730 				   const svalue *lhs,
731 				   enum tree_code op,
732 				   const svalue *rhs ATTRIBUTE_UNUSED) const
733 {
734   if (stmt == NULL)
735     return;
736 
737   // TODO: this doesn't use the RHS; should we make it symmetric?
738 
739   // TODO
740   switch (op)
741     {
742       //case NE_EXPR:
743       //case EQ_EXPR:
744     case GE_EXPR:
745     case GT_EXPR:
746       {
747 	sm_ctxt->on_transition (node, stmt, lhs, m_tainted,
748 				m_has_lb);
749 	sm_ctxt->on_transition (node, stmt, lhs, m_has_ub,
750 				m_stop);
751       }
752       break;
753     case LE_EXPR:
754     case LT_EXPR:
755       {
756 	sm_ctxt->on_transition (node, stmt, lhs, m_tainted,
757 				m_has_ub);
758 	sm_ctxt->on_transition (node, stmt, lhs, m_has_lb,
759 				m_stop);
760       }
761       break;
762     default:
763       break;
764     }
765 }
766 
767 bool
can_purge_p(state_t s ATTRIBUTE_UNUSED) const768 taint_state_machine::can_purge_p (state_t s ATTRIBUTE_UNUSED) const
769 {
770   return true;
771 }
772 
773 /* If STATE is a tainted state, write the bounds to *OUT and return true.
774    Otherwise return false.
775    Use the signedness of TYPE to determine if "has_ub" is tainted.  */
776 
777 bool
get_taint(state_t state,tree type,enum bounds * out) const778 taint_state_machine::get_taint (state_t state, tree type,
779 				enum bounds *out) const
780 {
781   /* Unsigned types have an implicit lower bound.  */
782   bool is_unsigned = false;
783   if (type)
784     if (INTEGRAL_TYPE_P (type))
785       is_unsigned = TYPE_UNSIGNED (type);
786 
787   /* Can't use a switch as the states are non-const.  */
788   if (state == m_tainted)
789     {
790       *out = is_unsigned ? BOUNDS_LOWER : BOUNDS_NONE;
791       return true;
792     }
793   else if (state == m_has_lb)
794     {
795       *out = BOUNDS_LOWER;
796       return true;
797     }
798   else if (state == m_has_ub && !is_unsigned)
799     {
800       /* Missing lower bound.  */
801       *out = BOUNDS_UPPER;
802       return true;
803     }
804   return false;
805 }
806 
807 /* Find the most tainted state of S0 and S1.  */
808 
809 state_machine::state_t
combine_states(state_t s0,state_t s1) const810 taint_state_machine::combine_states (state_t s0, state_t s1) const
811 {
812   gcc_assert (s0);
813   gcc_assert (s1);
814   if (s0 == s1)
815     return s0;
816   if (s0 == m_tainted || s1 == m_tainted)
817     return m_tainted;
818   if (s0 == m_stop)
819     return s1;
820   if (s1 == m_stop)
821     return s0;
822   if (s0 == m_start)
823     return s1;
824   if (s1 == m_start)
825     return s0;
826   gcc_unreachable ();
827 }
828 
829 } // anonymous namespace
830 
831 /* Internal interface to this file. */
832 
833 state_machine *
make_taint_state_machine(logger * logger)834 make_taint_state_machine (logger *logger)
835 {
836   return new taint_state_machine (logger);
837 }
838 
839 /* Complain to CTXT if accessing REG leads could lead to arbitrary
840    memory access under an attacker's control (due to taint).  */
841 
842 void
check_region_for_taint(const region * reg,enum access_direction dir,region_model_context * ctxt) const843 region_model::check_region_for_taint (const region *reg,
844 				      enum access_direction dir,
845 				      region_model_context *ctxt) const
846 {
847   gcc_assert (reg);
848   gcc_assert (ctxt);
849 
850   LOG_SCOPE (ctxt->get_logger ());
851 
852   sm_state_map *smap;
853   const state_machine *sm;
854   unsigned sm_idx;
855   if (!ctxt->get_taint_map (&smap, &sm, &sm_idx))
856     return;
857 
858   gcc_assert (smap);
859   gcc_assert (sm);
860 
861   const taint_state_machine &taint_sm = (const taint_state_machine &)*sm;
862 
863   const extrinsic_state *ext_state = ctxt->get_ext_state ();
864   if (!ext_state)
865     return;
866 
867   const region *iter_region = reg;
868   while (iter_region)
869     {
870       switch (iter_region->get_kind ())
871 	{
872 	default:
873 	  break;
874 
875 	case RK_ELEMENT:
876 	  {
877 	    const element_region *element_reg
878 	      = (const element_region *)iter_region;
879 	    const svalue *index = element_reg->get_index ();
880 	    const state_machine::state_t
881 	      state = smap->get_state (index, *ext_state);
882 	    gcc_assert (state);
883 	    enum bounds b;
884 	    if (taint_sm.get_taint (state, index->get_type (), &b))
885 	    {
886 	      tree arg = get_representative_tree (index);
887 	      ctxt->warn (new tainted_array_index (taint_sm, arg, b));
888 	    }
889 	  }
890 	  break;
891 
892 	case RK_OFFSET:
893 	  {
894 	    const offset_region *offset_reg
895 	      = (const offset_region *)iter_region;
896 	    const svalue *offset = offset_reg->get_byte_offset ();
897 	    const state_machine::state_t
898 	      state = smap->get_state (offset, *ext_state);
899 	    gcc_assert (state);
900 	    /* Handle implicit cast to sizetype.  */
901 	    tree effective_type = offset->get_type ();
902 	    if (const svalue *cast = offset->maybe_undo_cast ())
903 	      if (cast->get_type ())
904 		effective_type = cast->get_type ();
905 	    enum bounds b;
906 	    if (taint_sm.get_taint (state, effective_type, &b))
907 	      {
908 		tree arg = get_representative_tree (offset);
909 		ctxt->warn (new tainted_offset (taint_sm, arg, b));
910 	      }
911 	  }
912 	  break;
913 
914 	case RK_CAST:
915 	  {
916 	    const cast_region *cast_reg
917 	      = as_a <const cast_region *> (iter_region);
918 	    iter_region = cast_reg->get_original_region ();
919 	    continue;
920 	  }
921 
922 	case RK_SIZED:
923 	  {
924 	    const sized_region *sized_reg
925 	      = (const sized_region *)iter_region;
926 	    const svalue *size_sval = sized_reg->get_byte_size_sval (m_mgr);
927 	    const state_machine::state_t
928 	      state = smap->get_state (size_sval, *ext_state);
929 	    gcc_assert (state);
930 	    enum bounds b;
931 	    if (taint_sm.get_taint (state, size_sval->get_type (), &b))
932 	      {
933 		tree arg = get_representative_tree (size_sval);
934 		ctxt->warn (new tainted_size (taint_sm, arg, b, dir));
935 	      }
936 	  }
937 	  break;
938 	}
939 
940       iter_region = iter_region->get_parent_region ();
941     }
942 }
943 
944 /* Complain to CTXT about a tainted allocation size if SIZE_IN_BYTES is
945    under an attacker's control (due to taint), where the allocation
946    is happening within MEM_SPACE.  */
947 
948 void
check_dynamic_size_for_taint(enum memory_space mem_space,const svalue * size_in_bytes,region_model_context * ctxt) const949 region_model::check_dynamic_size_for_taint (enum memory_space mem_space,
950 					    const svalue *size_in_bytes,
951 					    region_model_context *ctxt) const
952 {
953   gcc_assert (mem_space == MEMSPACE_STACK || mem_space == MEMSPACE_HEAP);
954   gcc_assert (size_in_bytes);
955   gcc_assert (ctxt);
956 
957   LOG_SCOPE (ctxt->get_logger ());
958 
959   sm_state_map *smap;
960   const state_machine *sm;
961   unsigned sm_idx;
962   if (!ctxt->get_taint_map (&smap, &sm, &sm_idx))
963     return;
964 
965   gcc_assert (smap);
966   gcc_assert (sm);
967 
968   const taint_state_machine &taint_sm = (const taint_state_machine &)*sm;
969 
970   const extrinsic_state *ext_state = ctxt->get_ext_state ();
971   if (!ext_state)
972     return;
973 
974   const state_machine::state_t
975     state = smap->get_state (size_in_bytes, *ext_state);
976   gcc_assert (state);
977   enum bounds b;
978   if (taint_sm.get_taint (state, size_in_bytes->get_type (), &b))
979     {
980       tree arg = get_representative_tree (size_in_bytes);
981       ctxt->warn (new tainted_allocation_size (taint_sm, arg, b, mem_space));
982     }
983 }
984 
985 } // namespace ana
986 
987 #endif /* #if ENABLE_ANALYZER */
988