1 /* An experimental state machine, for tracking "taint": unsanitized uses
2    of data potentially under an attacker's control.
3 
4    Copyright (C) 2019-2021 Free Software Foundation, Inc.
5    Contributed by David Malcolm <dmalcolm@redhat.com>.
6 
7 This file is part of GCC.
8 
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13 
14 GCC is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17 General Public License for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3.  If not see
21 <http://www.gnu.org/licenses/>.  */
22 
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tree.h"
27 #include "function.h"
28 #include "basic-block.h"
29 #include "gimple.h"
30 #include "options.h"
31 #include "diagnostic-path.h"
32 #include "diagnostic-metadata.h"
33 #include "function.h"
34 #include "json.h"
35 #include "analyzer/analyzer.h"
36 #include "diagnostic-event-id.h"
37 #include "analyzer/analyzer-logging.h"
38 #include "analyzer/sm.h"
39 #include "analyzer/pending-diagnostic.h"
40 
41 #if ENABLE_ANALYZER
42 
43 namespace ana {
44 
45 namespace {
46 
47 /* An experimental state machine, for tracking "taint": unsanitized uses
48    of data potentially under an attacker's control.  */
49 
50 class taint_state_machine : public state_machine
51 {
52 public:
53   taint_state_machine (logger *logger);
54 
inherited_state_p() const55   bool inherited_state_p () const FINAL OVERRIDE { return true; }
56 
57   bool on_stmt (sm_context *sm_ctxt,
58 		const supernode *node,
59 		const gimple *stmt) const FINAL OVERRIDE;
60 
61   void on_condition (sm_context *sm_ctxt,
62 		     const supernode *node,
63 		     const gimple *stmt,
64 		     tree lhs,
65 		     enum tree_code op,
66 		     tree rhs) const FINAL OVERRIDE;
67 
68   bool can_purge_p (state_t s) const FINAL OVERRIDE;
69 
70   /* State for a "tainted" value: unsanitized data potentially under an
71      attacker's control.  */
72   state_t m_tainted;
73 
74   /* State for a "tainted" value that has a lower bound.  */
75   state_t m_has_lb;
76 
77   /* State for a "tainted" value that has an upper bound.  */
78   state_t m_has_ub;
79 
80   /* Stop state, for a value we don't want to track any more.  */
81   state_t m_stop;
82 };
83 
84 enum bounds
85 {
86   BOUNDS_NONE,
87   BOUNDS_UPPER,
88   BOUNDS_LOWER
89 };
90 
91 class tainted_array_index
92   : public pending_diagnostic_subclass<tainted_array_index>
93 {
94 public:
tainted_array_index(const taint_state_machine & sm,tree arg,enum bounds has_bounds)95   tainted_array_index (const taint_state_machine &sm, tree arg,
96 		       enum bounds has_bounds)
97   : m_sm (sm), m_arg (arg), m_has_bounds (has_bounds) {}
98 
get_kind() const99   const char *get_kind () const FINAL OVERRIDE { return "tainted_array_index"; }
100 
operator ==(const tainted_array_index & other) const101   bool operator== (const tainted_array_index &other) const
102   {
103     return same_tree_p (m_arg, other.m_arg);
104   }
105 
emit(rich_location * rich_loc)106   bool emit (rich_location *rich_loc) FINAL OVERRIDE
107   {
108     diagnostic_metadata m;
109     m.add_cwe (129);
110     switch (m_has_bounds)
111       {
112       default:
113 	gcc_unreachable ();
114       case BOUNDS_NONE:
115 	return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_array_index,
116 			     "use of tainted value %qE in array lookup"
117 			     " without bounds checking",
118 			     m_arg);
119 	break;
120       case BOUNDS_UPPER:
121 	return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_array_index,
122 			     "use of tainted value %qE in array lookup"
123 			     " without lower-bounds checking",
124 			     m_arg);
125 	break;
126       case BOUNDS_LOWER:
127 	return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_array_index,
128 			     "use of tainted value %qE in array lookup"
129 			     " without upper-bounds checking",
130 			     m_arg);
131 	break;
132       }
133   }
134 
describe_state_change(const evdesc::state_change & change)135   label_text describe_state_change (const evdesc::state_change &change)
136     FINAL OVERRIDE
137   {
138     if (change.m_new_state == m_sm.m_tainted)
139       {
140 	if (change.m_origin)
141 	  return change.formatted_print ("%qE has an unchecked value here"
142 					 " (from %qE)",
143 					 change.m_expr, change.m_origin);
144 	else
145 	  return change.formatted_print ("%qE gets an unchecked value here",
146 					 change.m_expr);
147       }
148     else if (change.m_new_state == m_sm.m_has_lb)
149       return change.formatted_print ("%qE has its lower bound checked here",
150 				     change.m_expr);
151     else if (change.m_new_state == m_sm.m_has_ub)
152       return change.formatted_print ("%qE has its upper bound checked here",
153 				     change.m_expr);
154     return label_text ();
155   }
156 
describe_final_event(const evdesc::final_event & ev)157   label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
158   {
159     switch (m_has_bounds)
160       {
161       default:
162 	gcc_unreachable ();
163       case BOUNDS_NONE:
164 	return ev.formatted_print ("use of tainted value %qE in array lookup"
165 				   " without bounds checking",
166 				   m_arg);
167       case BOUNDS_UPPER:
168 	return ev.formatted_print ("use of tainted value %qE in array lookup"
169 				   " without lower-bounds checking",
170 				   m_arg);
171       case BOUNDS_LOWER:
172 	return ev.formatted_print ("use of tainted value %qE in array lookup"
173 				   " without upper-bounds checking",
174 				   m_arg);
175       }
176   }
177 
178 private:
179   const taint_state_machine &m_sm;
180   tree m_arg;
181   enum bounds m_has_bounds;
182 };
183 
184 /* taint_state_machine's ctor.  */
185 
taint_state_machine(logger * logger)186 taint_state_machine::taint_state_machine (logger *logger)
187 : state_machine ("taint", logger)
188 {
189   m_tainted = add_state ("tainted");
190   m_has_lb = add_state ("has_lb");
191   m_has_ub = add_state ("has_ub");
192   m_stop = add_state ("stop");
193 }
194 
195 /* Implementation of state_machine::on_stmt vfunc for taint_state_machine.  */
196 
197 bool
on_stmt(sm_context * sm_ctxt,const supernode * node,const gimple * stmt) const198 taint_state_machine::on_stmt (sm_context *sm_ctxt,
199 			       const supernode *node,
200 			       const gimple *stmt) const
201 {
202   if (const gcall *call = dyn_cast <const gcall *> (stmt))
203     if (tree callee_fndecl = sm_ctxt->get_fndecl_for_call (call))
204       {
205 	if (is_named_call_p (callee_fndecl, "fread", call, 4))
206 	  {
207 	    tree arg = gimple_call_arg (call, 0);
208 
209 	    sm_ctxt->on_transition (node, stmt, arg, m_start, m_tainted);
210 
211 	    /* Dereference an ADDR_EXPR.  */
212 	    // TODO: should the engine do this?
213 	    if (TREE_CODE (arg) == ADDR_EXPR)
214 	      sm_ctxt->on_transition (node, stmt, TREE_OPERAND (arg, 0),
215 				      m_start, m_tainted);
216 	    return true;
217 	  }
218       }
219   // TODO: ...etc; many other sources of untrusted data
220 
221   if (const gassign *assign = dyn_cast <const gassign *> (stmt))
222     {
223       tree rhs1 = gimple_assign_rhs1 (assign);
224       enum tree_code op = gimple_assign_rhs_code (assign);
225 
226       /* Check array accesses.  */
227       if (op == ARRAY_REF)
228 	{
229 	  tree arg = TREE_OPERAND (rhs1, 1);
230 
231 	  /* Unsigned types have an implicit lower bound.  */
232 	  bool is_unsigned = false;
233 	  if (INTEGRAL_TYPE_P (TREE_TYPE (arg)))
234 	    is_unsigned = TYPE_UNSIGNED (TREE_TYPE (arg));
235 
236 	  state_t state = sm_ctxt->get_state (stmt, arg);
237 	  /* Can't use a switch as the states are non-const.  */
238 	  if (state == m_tainted)
239 	    {
240 	      /* Complain about missing bounds.  */
241 	      tree diag_arg = sm_ctxt->get_diagnostic_tree (arg);
242 	      pending_diagnostic *d
243 		= new tainted_array_index (*this, diag_arg,
244 					   is_unsigned
245 					   ? BOUNDS_LOWER : BOUNDS_NONE);
246 	      sm_ctxt->warn (node, stmt, arg, d);
247 	      sm_ctxt->set_next_state (stmt, arg, m_stop);
248 	    }
249 	  else if (state == m_has_lb)
250 	    {
251 	      /* Complain about missing upper bound.  */
252 	      tree diag_arg = sm_ctxt->get_diagnostic_tree (arg);
253 	      sm_ctxt->warn (node, stmt, arg,
254 			      new tainted_array_index (*this, diag_arg,
255 						       BOUNDS_LOWER));
256 	      sm_ctxt->set_next_state (stmt, arg, m_stop);
257 	    }
258 	  else if (state == m_has_ub)
259 	    {
260 	      /* Complain about missing lower bound.  */
261 	      if (!is_unsigned)
262 		{
263 		  tree diag_arg = sm_ctxt->get_diagnostic_tree (arg);
264 		  sm_ctxt->warn  (node, stmt, arg,
265 				  new tainted_array_index (*this, diag_arg,
266 							   BOUNDS_UPPER));
267 		  sm_ctxt->set_next_state (stmt, arg, m_stop);
268 		}
269 	    }
270 	}
271     }
272 
273   return false;
274 }
275 
276 /* Implementation of state_machine::on_condition vfunc for taint_state_machine.
277    Potentially transition state 'tainted' to 'has_ub' or 'has_lb',
278    and states 'has_ub' and 'has_lb' to 'stop'.  */
279 
280 void
on_condition(sm_context * sm_ctxt,const supernode * node,const gimple * stmt,tree lhs,enum tree_code op,tree rhs ATTRIBUTE_UNUSED) const281 taint_state_machine::on_condition (sm_context *sm_ctxt,
282 				   const supernode *node,
283 				   const gimple *stmt,
284 				   tree lhs,
285 				   enum tree_code op,
286 				   tree rhs ATTRIBUTE_UNUSED) const
287 {
288   if (stmt == NULL)
289     return;
290 
291   // TODO: this doesn't use the RHS; should we make it symmetric?
292 
293   // TODO
294   switch (op)
295     {
296       //case NE_EXPR:
297       //case EQ_EXPR:
298     case GE_EXPR:
299     case GT_EXPR:
300       {
301 	sm_ctxt->on_transition (node, stmt, lhs, m_tainted,
302 				m_has_lb);
303 	sm_ctxt->on_transition (node, stmt, lhs, m_has_ub,
304 				m_stop);
305       }
306       break;
307     case LE_EXPR:
308     case LT_EXPR:
309       {
310 	sm_ctxt->on_transition (node, stmt, lhs, m_tainted,
311 				m_has_ub);
312 	sm_ctxt->on_transition (node, stmt, lhs, m_has_lb,
313 				m_stop);
314       }
315       break;
316     default:
317       break;
318     }
319 }
320 
321 bool
can_purge_p(state_t s ATTRIBUTE_UNUSED) const322 taint_state_machine::can_purge_p (state_t s ATTRIBUTE_UNUSED) const
323 {
324   return true;
325 }
326 
327 } // anonymous namespace
328 
329 /* Internal interface to this file. */
330 
331 state_machine *
make_taint_state_machine(logger * logger)332 make_taint_state_machine (logger *logger)
333 {
334   return new taint_state_machine (logger);
335 }
336 
337 } // namespace ana
338 
339 #endif /* #if ENABLE_ANALYZER */
340