1 /* Proof-of-concept of a -fanalyzer plugin.
2    Detect (some) uses of CPython API outside of the Global Interpreter Lock.
3    https://docs.python.org/3/c-api/init.html#thread-state-and-the-global-interpreter-lock
4 */
5 /* { dg-options "-g" } */
6 
7 #include "gcc-plugin.h"
8 #include "config.h"
9 #include "system.h"
10 #include "coretypes.h"
11 #include "diagnostic.h"
12 #include "tree.h"
13 #include "gimple.h"
14 #include "gimple-iterator.h"
15 #include "gimple-walk.h"
16 #include "diagnostic-event-id.h"
17 #include "analyzer/analyzer.h"
18 #include "analyzer/analyzer-logging.h"
19 #include "json.h"
20 #include "analyzer/sm.h"
21 #include "analyzer/pending-diagnostic.h"
22 
23 int plugin_is_GPL_compatible;
24 
25 #if ENABLE_ANALYZER
26 
27 namespace ana {
28 
29 static bool
type_based_on_pyobject_p(tree type)30 type_based_on_pyobject_p (tree type)
31 {
32   /* Ideally we'd also check for "subclasses" here by iterating up the
33      first field of each struct.  */
34   if (TREE_CODE (type) != RECORD_TYPE)
35     return false;
36   tree name = TYPE_IDENTIFIER (type);
37   if (!name)
38     return false;
39   return id_equal (name, "PyObject");
40 }
41 
42 /* An experimental state machine, for tracking whether the GIL is held,
43    as global state..  */
44 
45 class gil_state_machine : public state_machine
46 {
47 public:
48   gil_state_machine (logger *logger);
49 
inherited_state_p()50   bool inherited_state_p () const FINAL OVERRIDE { return false; }
51 
52   bool on_stmt (sm_context *sm_ctxt,
53 		const supernode *node,
54 		const gimple *stmt) const FINAL OVERRIDE;
55 
56   void on_condition (sm_context *sm_ctxt,
57 		     const supernode *node,
58 		     const gimple *stmt,
59 		     tree lhs,
60 		     enum tree_code op,
61 		     tree rhs) const FINAL OVERRIDE;
62 
63   bool can_purge_p (state_t s) const FINAL OVERRIDE;
64 
65   void check_for_pyobject_usage_without_gil (sm_context *sm_ctxt,
66 					     const supernode *node,
67 					     const gimple *stmt,
68 					     tree op) const;
69 
70  private:
71   void check_for_pyobject_in_call (sm_context *sm_ctxt,
72 				   const supernode *node,
73 				   const gcall *call,
74 				   tree callee_fndecl) const;
75 
76  public:
77   /* These states are "global", rather than per-expression.  */
78 
79   /* State for when we've released the GIL.  */
80   state_t m_released_gil;
81 
82   /* Stop state.  */
83   state_t m_stop;
84 };
85 
86 /* Subclass for diagnostics involving the GIL.  */
87 
88 class gil_diagnostic : public pending_diagnostic
89 {
90 public:
fixup_location(location_t loc)91   location_t fixup_location (location_t loc) const FINAL OVERRIDE
92   {
93     /* Ideally we'd check for specific macros here, and only
94        resolve certain macros.  */
95     if (linemap_location_from_macro_expansion_p (line_table, loc))
96       loc = linemap_resolve_location (line_table, loc,
97 				      LRK_MACRO_EXPANSION_POINT, NULL);
98     return loc;
99   }
100 
describe_state_change(const evdesc::state_change & change)101   label_text describe_state_change (const evdesc::state_change &change)
102     FINAL OVERRIDE
103   {
104     if (change.is_global_p ()
105 	&& change.m_new_state == m_sm.m_released_gil)
106       return change.formatted_print ("releasing the GIL here");
107     if (change.is_global_p ()
108 	&& change.m_new_state == m_sm.get_start_state ())
109       return change.formatted_print ("acquiring the GIL here");
110     return label_text ();
111   }
112 
113  protected:
gil_diagnostic(const gil_state_machine & sm)114   gil_diagnostic (const gil_state_machine &sm) : m_sm (sm)
115   {
116   }
117 
118  private:
119   const gil_state_machine &m_sm;
120 };
121 
122 class double_save_thread : public gil_diagnostic
123 {
124  public:
double_save_thread(const gil_state_machine & sm,const gcall * call)125   double_save_thread (const gil_state_machine &sm, const gcall *call)
126   : gil_diagnostic (sm), m_call (call)
127   {}
128 
get_kind()129   const char *get_kind () const FINAL OVERRIDE
130   {
131     return "double_save_thread";
132   }
133 
subclass_equal_p(const pending_diagnostic & base_other)134   bool subclass_equal_p (const pending_diagnostic &base_other) const OVERRIDE
135   {
136     const double_save_thread &sub_other
137       = (const double_save_thread &)base_other;
138     return m_call == sub_other.m_call;
139   }
140 
emit(rich_location * rich_loc)141   bool emit (rich_location *rich_loc) FINAL OVERRIDE
142   {
143     return warning_at (rich_loc, 0,
144 		       "nested usage of %qs", "Py_BEGIN_ALLOW_THREADS");
145   }
146 
describe_final_event(const evdesc::final_event & ev)147   label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
148   {
149     return ev.formatted_print ("nested usage of %qs here",
150 			       "Py_BEGIN_ALLOW_THREADS");
151   }
152 
153  private:
154   const gcall *m_call;
155 };
156 
157 class fncall_without_gil : public gil_diagnostic
158 {
159  public:
fncall_without_gil(const gil_state_machine & sm,const gcall * call,tree callee_fndecl,unsigned arg_idx)160   fncall_without_gil (const gil_state_machine &sm, const gcall *call,
161 		      tree callee_fndecl, unsigned arg_idx)
162   : gil_diagnostic (sm), m_call (call), m_callee_fndecl (callee_fndecl),
163     m_arg_idx (arg_idx)
164   {}
165 
get_kind()166   const char *get_kind () const FINAL OVERRIDE
167   {
168     return "fncall_without_gil";
169   }
170 
subclass_equal_p(const pending_diagnostic & base_other)171   bool subclass_equal_p (const pending_diagnostic &base_other) const OVERRIDE
172   {
173     const fncall_without_gil &sub_other
174       = (const fncall_without_gil &)base_other;
175     return (m_call == sub_other.m_call
176 	    && m_callee_fndecl == sub_other.m_callee_fndecl
177 	    && m_arg_idx == sub_other.m_arg_idx);
178   }
179 
emit(rich_location * rich_loc)180   bool emit (rich_location *rich_loc) FINAL OVERRIDE
181   {
182     auto_diagnostic_group d;
183     /* There isn't a warning ID for use to use.  */
184     if (m_callee_fndecl)
185       return warning_at (rich_loc, 0,
186 			 "use of PyObject as argument %i of %qE"
187 			 " without the GIL",
188 			 m_arg_idx + 1, m_callee_fndecl);
189     else
190       return warning_at (rich_loc, 0,
191 			 "use of PyObject as argument %i of call"
192 			 " without the GIL",
193 			 m_arg_idx + 1, m_callee_fndecl);
194   }
195 
describe_final_event(const evdesc::final_event & ev)196   label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
197   {
198     if (m_callee_fndecl)
199       return ev.formatted_print ("use of PyObject as argument %i of %qE here"
200 				 " without the GIL",
201 				 m_arg_idx + 1, m_callee_fndecl);
202     else
203       return ev.formatted_print ("use of PyObject as argument %i of call here"
204 				 " without the GIL",
205 				 m_arg_idx + 1, m_callee_fndecl);
206   }
207 
208  private:
209   const gcall *m_call;
210   tree m_callee_fndecl;
211   unsigned m_arg_idx;
212 };
213 
214 class pyobject_usage_without_gil : public gil_diagnostic
215 {
216  public:
pyobject_usage_without_gil(const gil_state_machine & sm,tree expr)217   pyobject_usage_without_gil (const gil_state_machine &sm, tree expr)
218   : gil_diagnostic (sm), m_expr (expr)
219   {}
220 
get_kind()221   const char *get_kind () const FINAL OVERRIDE
222   {
223     return "pyobject_usage_without_gil";
224   }
225 
subclass_equal_p(const pending_diagnostic & base_other)226   bool subclass_equal_p (const pending_diagnostic &base_other) const OVERRIDE
227   {
228     return same_tree_p (m_expr,
229 			((const pyobject_usage_without_gil&)base_other).m_expr);
230   }
231 
emit(rich_location * rich_loc)232   bool emit (rich_location *rich_loc) FINAL OVERRIDE
233   {
234     auto_diagnostic_group d;
235     /* There isn't a warning ID for use to use.  */
236     return warning_at (rich_loc, 0,
237 		       "use of PyObject %qE without the GIL", m_expr);
238   }
239 
describe_final_event(const evdesc::final_event & ev)240   label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
241   {
242     return ev.formatted_print ("PyObject %qE used here without the GIL",
243 			       m_expr);
244   }
245 
246  private:
247   tree m_expr;
248 };
249 
250 /* gil_state_machine's ctor.  */
251 
gil_state_machine(logger * logger)252 gil_state_machine::gil_state_machine (logger *logger)
253 : state_machine ("gil", logger)
254 {
255   m_released_gil = add_state ("released_gil");
256   m_stop = add_state ("stop");
257 }
258 
259 struct cb_data
260 {
cb_datacb_data261   cb_data (const gil_state_machine &sm, sm_context *sm_ctxt,
262 	   const supernode *snode, const gimple *stmt)
263   : m_sm (sm), m_sm_ctxt (sm_ctxt), m_snode (snode), m_stmt (stmt)
264   {
265   }
266 
267   const gil_state_machine &m_sm;
268   sm_context *m_sm_ctxt;
269   const supernode *m_snode;
270   const gimple *m_stmt;
271 };
272 
273 static bool
check_for_pyobject(gimple *,tree op,tree,void * data)274 check_for_pyobject (gimple *, tree op, tree, void *data)
275 {
276   cb_data *d = (cb_data *)data;
277   d->m_sm.check_for_pyobject_usage_without_gil (d->m_sm_ctxt, d->m_snode,
278 						d->m_stmt, op);
279   return true;
280 }
281 
282 /* Assuming that the GIL has been released, complain about any
283    PyObject * arguments passed to CALL.  */
284 
285 void
check_for_pyobject_in_call(sm_context * sm_ctxt,const supernode * node,const gcall * call,tree callee_fndecl)286 gil_state_machine::check_for_pyobject_in_call (sm_context *sm_ctxt,
287 					       const supernode *node,
288 					       const gcall *call,
289 					       tree callee_fndecl) const
290 {
291   for (unsigned i = 0; i < gimple_call_num_args (call); i++)
292     {
293       tree arg = gimple_call_arg (call, i);
294       if (TREE_CODE (TREE_TYPE (arg)) != POINTER_TYPE)
295 	continue;
296       tree type = TREE_TYPE (TREE_TYPE (arg));
297       if (type_based_on_pyobject_p (type))
298 	{
299 	  sm_ctxt->warn (node, call, NULL_TREE,
300 			 new fncall_without_gil (*this, call,
301 						 callee_fndecl,
302 						 i));
303 	  sm_ctxt->set_global_state (m_stop);
304 	}
305     }
306 }
307 
308 /* Implementation of state_machine::on_stmt vfunc for gil_state_machine.  */
309 
310 bool
on_stmt(sm_context * sm_ctxt,const supernode * node,const gimple * stmt)311 gil_state_machine::on_stmt (sm_context *sm_ctxt,
312 			    const supernode *node,
313 			    const gimple *stmt) const
314 {
315   const state_t global_state = sm_ctxt->get_global_state ();
316   if (const gcall *call = dyn_cast <const gcall *> (stmt))
317     {
318       if (tree callee_fndecl = sm_ctxt->get_fndecl_for_call (call))
319 	{
320 	  if (is_named_call_p (callee_fndecl, "PyEval_SaveThread", call, 0))
321 	    {
322 	      if (0)
323 		inform (input_location, "found call to %qs",
324 			"PyEval_SaveThread");
325 	      if (global_state == m_released_gil)
326 		{
327 		  sm_ctxt->warn (node, stmt, NULL_TREE,
328 				 new double_save_thread (*this, call));
329 		  sm_ctxt->set_global_state (m_stop);
330 		}
331 	      else
332 		sm_ctxt->set_global_state (m_released_gil);
333 	      return true;
334 	    }
335 	  else if (is_named_call_p (callee_fndecl, "PyEval_RestoreThread",
336 				    call, 1))
337 	    {
338 	      if (0)
339 		inform (input_location, "found call to %qs",
340 			"PyEval_SaveThread");
341 	      if (global_state == m_released_gil)
342 		sm_ctxt->set_global_state (m_start);
343 	      return true;
344 	    }
345 	  else if (global_state == m_released_gil)
346 	    {
347 	      /* Find PyObject * args of calls to fns with unknown bodies.  */
348 	      if (!fndecl_has_gimple_body_p (callee_fndecl))
349 		check_for_pyobject_in_call (sm_ctxt, node, call, callee_fndecl);
350 	    }
351 	}
352       else if (global_state == m_released_gil)
353 	check_for_pyobject_in_call (sm_ctxt, node, call, NULL);
354     }
355   else
356     if (global_state == m_released_gil)
357       {
358 	/* Walk the stmt, finding uses of PyObject (or "subclasses").  */
359 	cb_data d (*this, sm_ctxt, node, stmt);
360 	walk_stmt_load_store_addr_ops (const_cast <gimple *> (stmt), &d,
361 				       check_for_pyobject,
362 				       check_for_pyobject,
363 				       check_for_pyobject);
364     }
365   return false;
366 }
367 
368 /* Implementation of state_machine::on_condition vfunc for
369    gil_state_machine.  */
370 
371 void
on_condition(sm_context * sm_ctxt ATTRIBUTE_UNUSED,const supernode * node ATTRIBUTE_UNUSED,const gimple * stmt ATTRIBUTE_UNUSED,tree lhs ATTRIBUTE_UNUSED,enum tree_code op ATTRIBUTE_UNUSED,tree rhs ATTRIBUTE_UNUSED)372 gil_state_machine::on_condition (sm_context *sm_ctxt ATTRIBUTE_UNUSED,
373 				 const supernode *node ATTRIBUTE_UNUSED,
374 				 const gimple *stmt ATTRIBUTE_UNUSED,
375 				 tree lhs ATTRIBUTE_UNUSED,
376 				 enum tree_code op ATTRIBUTE_UNUSED,
377 				 tree rhs ATTRIBUTE_UNUSED) const
378 {
379   // Empty
380 }
381 
382 bool
can_purge_p(state_t s ATTRIBUTE_UNUSED)383 gil_state_machine::can_purge_p (state_t s ATTRIBUTE_UNUSED) const
384 {
385   return true;
386 }
387 
388 void
check_for_pyobject_usage_without_gil(sm_context * sm_ctxt,const supernode * node,const gimple * stmt,tree op)389 gil_state_machine::check_for_pyobject_usage_without_gil (sm_context *sm_ctxt,
390 							 const supernode *node,
391 							 const gimple *stmt,
392 							 tree op) const
393 {
394   tree type = TREE_TYPE (op);
395   if (type_based_on_pyobject_p (type))
396     {
397       sm_ctxt->warn (node, stmt, NULL_TREE,
398 		     new pyobject_usage_without_gil (*this, op));
399       sm_ctxt->set_global_state (m_stop);
400     }
401 }
402 
403 /* Callback handler for the PLUGIN_ANALYZER_INIT event.  */
404 
405 static void
gil_analyzer_init_cb(void * gcc_data,void *)406 gil_analyzer_init_cb (void *gcc_data, void */*user_data*/)
407 {
408   ana::plugin_analyzer_init_iface *iface
409     = (ana::plugin_analyzer_init_iface *)gcc_data;
410   LOG_SCOPE (iface->get_logger ());
411   if (0)
412     inform (input_location, "got here: gil_analyzer_init_cb");
413   iface->register_state_machine (new gil_state_machine (iface->get_logger ()));
414 }
415 
416 } // namespace ana
417 
418 #endif /* #if ENABLE_ANALYZER */
419 
420 int
plugin_init(struct plugin_name_args * plugin_info,struct plugin_gcc_version * version)421 plugin_init (struct plugin_name_args *plugin_info,
422 	     struct plugin_gcc_version *version)
423 {
424 #if ENABLE_ANALYZER
425   const char *plugin_name = plugin_info->base_name;
426   if (0)
427     inform (input_location, "got here; %qs", plugin_name);
428   register_callback (plugin_info->base_name,
429 		     PLUGIN_ANALYZER_INIT,
430 		     ana::gil_analyzer_init_cb,
431 		     NULL); /* void *user_data */
432 #else
433   sorry_no_analyzer ();
434 #endif
435   return 0;
436 }
437