1 /* Proof-of-concept of a -fanalyzer plugin.
2    Detect (some) uses of CPython API outside of the Global Interpreter Lock.
3    https://docs.python.org/3/c-api/init.html#thread-state-and-the-global-interpreter-lock
4 */
5 /* { dg-options "-g" } */
6 
7 #include "gcc-plugin.h"
8 #include "config.h"
9 #include "system.h"
10 #include "coretypes.h"
11 #include "diagnostic.h"
12 #include "tree.h"
13 #include "gimple.h"
14 #include "gimple-iterator.h"
15 #include "gimple-walk.h"
16 #include "diagnostic-event-id.h"
17 #include "analyzer/analyzer.h"
18 #include "analyzer/analyzer-logging.h"
19 #include "json.h"
20 #include "analyzer/sm.h"
21 #include "analyzer/pending-diagnostic.h"
22 
23 int plugin_is_GPL_compatible;
24 
25 #if ENABLE_ANALYZER
26 
27 namespace ana {
28 
29 static bool
type_based_on_pyobject_p(tree type)30 type_based_on_pyobject_p (tree type)
31 {
32   /* Ideally we'd also check for "subclasses" here by iterating up the
33      first field of each struct.  */
34   if (TREE_CODE (type) != RECORD_TYPE)
35     return false;
36   tree name = TYPE_IDENTIFIER (type);
37   if (!name)
38     return false;
39   return id_equal (name, "PyObject");
40 }
41 
42 /* An experimental state machine, for tracking whether the GIL is held,
43    as global state..  */
44 
45 class gil_state_machine : public state_machine
46 {
47 public:
48   gil_state_machine (logger *logger);
49 
inherited_state_p()50   bool inherited_state_p () const FINAL OVERRIDE { return false; }
51 
52   bool on_stmt (sm_context *sm_ctxt,
53 		const supernode *node,
54 		const gimple *stmt) const FINAL OVERRIDE;
55 
56   bool can_purge_p (state_t s) const FINAL OVERRIDE;
57 
58   void check_for_pyobject_usage_without_gil (sm_context *sm_ctxt,
59 					     const supernode *node,
60 					     const gimple *stmt,
61 					     tree op) const;
62 
63  private:
64   void check_for_pyobject_in_call (sm_context *sm_ctxt,
65 				   const supernode *node,
66 				   const gcall *call,
67 				   tree callee_fndecl) const;
68 
69  public:
70   /* These states are "global", rather than per-expression.  */
71 
72   /* State for when we've released the GIL.  */
73   state_t m_released_gil;
74 
75   /* Stop state.  */
76   state_t m_stop;
77 };
78 
79 /* Subclass for diagnostics involving the GIL.  */
80 
81 class gil_diagnostic : public pending_diagnostic
82 {
83 public:
fixup_location(location_t loc)84   location_t fixup_location (location_t loc) const FINAL OVERRIDE
85   {
86     /* Ideally we'd check for specific macros here, and only
87        resolve certain macros.  */
88     if (linemap_location_from_macro_expansion_p (line_table, loc))
89       loc = linemap_resolve_location (line_table, loc,
90 				      LRK_MACRO_EXPANSION_POINT, NULL);
91     return loc;
92   }
93 
describe_state_change(const evdesc::state_change & change)94   label_text describe_state_change (const evdesc::state_change &change)
95     FINAL OVERRIDE
96   {
97     if (change.is_global_p ()
98 	&& change.m_new_state == m_sm.m_released_gil)
99       return change.formatted_print ("releasing the GIL here");
100     if (change.is_global_p ()
101 	&& change.m_new_state == m_sm.get_start_state ())
102       return change.formatted_print ("acquiring the GIL here");
103     return label_text ();
104   }
105 
106  protected:
gil_diagnostic(const gil_state_machine & sm)107   gil_diagnostic (const gil_state_machine &sm) : m_sm (sm)
108   {
109   }
110 
111  private:
112   const gil_state_machine &m_sm;
113 };
114 
115 class double_save_thread : public gil_diagnostic
116 {
117  public:
double_save_thread(const gil_state_machine & sm,const gcall * call)118   double_save_thread (const gil_state_machine &sm, const gcall *call)
119   : gil_diagnostic (sm), m_call (call)
120   {}
121 
get_kind()122   const char *get_kind () const FINAL OVERRIDE
123   {
124     return "double_save_thread";
125   }
126 
subclass_equal_p(const pending_diagnostic & base_other)127   bool subclass_equal_p (const pending_diagnostic &base_other) const OVERRIDE
128   {
129     const double_save_thread &sub_other
130       = (const double_save_thread &)base_other;
131     return m_call == sub_other.m_call;
132   }
133 
emit(rich_location * rich_loc)134   bool emit (rich_location *rich_loc) FINAL OVERRIDE
135   {
136     return warning_at (rich_loc, 0,
137 		       "nested usage of %qs", "Py_BEGIN_ALLOW_THREADS");
138   }
139 
describe_final_event(const evdesc::final_event & ev)140   label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
141   {
142     return ev.formatted_print ("nested usage of %qs here",
143 			       "Py_BEGIN_ALLOW_THREADS");
144   }
145 
146  private:
147   const gcall *m_call;
148 };
149 
150 class fncall_without_gil : public gil_diagnostic
151 {
152  public:
fncall_without_gil(const gil_state_machine & sm,const gcall * call,tree callee_fndecl,unsigned arg_idx)153   fncall_without_gil (const gil_state_machine &sm, const gcall *call,
154 		      tree callee_fndecl, unsigned arg_idx)
155   : gil_diagnostic (sm), m_call (call), m_callee_fndecl (callee_fndecl),
156     m_arg_idx (arg_idx)
157   {}
158 
get_kind()159   const char *get_kind () const FINAL OVERRIDE
160   {
161     return "fncall_without_gil";
162   }
163 
subclass_equal_p(const pending_diagnostic & base_other)164   bool subclass_equal_p (const pending_diagnostic &base_other) const OVERRIDE
165   {
166     const fncall_without_gil &sub_other
167       = (const fncall_without_gil &)base_other;
168     return (m_call == sub_other.m_call
169 	    && m_callee_fndecl == sub_other.m_callee_fndecl
170 	    && m_arg_idx == sub_other.m_arg_idx);
171   }
172 
emit(rich_location * rich_loc)173   bool emit (rich_location *rich_loc) FINAL OVERRIDE
174   {
175     auto_diagnostic_group d;
176     /* There isn't a warning ID for use to use.  */
177     if (m_callee_fndecl)
178       return warning_at (rich_loc, 0,
179 			 "use of PyObject as argument %i of %qE"
180 			 " without the GIL",
181 			 m_arg_idx + 1, m_callee_fndecl);
182     else
183       return warning_at (rich_loc, 0,
184 			 "use of PyObject as argument %i of call"
185 			 " without the GIL",
186 			 m_arg_idx + 1, m_callee_fndecl);
187   }
188 
describe_final_event(const evdesc::final_event & ev)189   label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
190   {
191     if (m_callee_fndecl)
192       return ev.formatted_print ("use of PyObject as argument %i of %qE here"
193 				 " without the GIL",
194 				 m_arg_idx + 1, m_callee_fndecl);
195     else
196       return ev.formatted_print ("use of PyObject as argument %i of call here"
197 				 " without the GIL",
198 				 m_arg_idx + 1, m_callee_fndecl);
199   }
200 
201  private:
202   const gcall *m_call;
203   tree m_callee_fndecl;
204   unsigned m_arg_idx;
205 };
206 
207 class pyobject_usage_without_gil : public gil_diagnostic
208 {
209  public:
pyobject_usage_without_gil(const gil_state_machine & sm,tree expr)210   pyobject_usage_without_gil (const gil_state_machine &sm, tree expr)
211   : gil_diagnostic (sm), m_expr (expr)
212   {}
213 
get_kind()214   const char *get_kind () const FINAL OVERRIDE
215   {
216     return "pyobject_usage_without_gil";
217   }
218 
subclass_equal_p(const pending_diagnostic & base_other)219   bool subclass_equal_p (const pending_diagnostic &base_other) const OVERRIDE
220   {
221     return same_tree_p (m_expr,
222 			((const pyobject_usage_without_gil&)base_other).m_expr);
223   }
224 
emit(rich_location * rich_loc)225   bool emit (rich_location *rich_loc) FINAL OVERRIDE
226   {
227     auto_diagnostic_group d;
228     /* There isn't a warning ID for use to use.  */
229     return warning_at (rich_loc, 0,
230 		       "use of PyObject %qE without the GIL", m_expr);
231   }
232 
describe_final_event(const evdesc::final_event & ev)233   label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
234   {
235     return ev.formatted_print ("PyObject %qE used here without the GIL",
236 			       m_expr);
237   }
238 
239  private:
240   tree m_expr;
241 };
242 
243 /* gil_state_machine's ctor.  */
244 
gil_state_machine(logger * logger)245 gil_state_machine::gil_state_machine (logger *logger)
246 : state_machine ("gil", logger)
247 {
248   m_released_gil = add_state ("released_gil");
249   m_stop = add_state ("stop");
250 }
251 
252 struct cb_data
253 {
cb_datacb_data254   cb_data (const gil_state_machine &sm, sm_context *sm_ctxt,
255 	   const supernode *snode, const gimple *stmt)
256   : m_sm (sm), m_sm_ctxt (sm_ctxt), m_snode (snode), m_stmt (stmt)
257   {
258   }
259 
260   const gil_state_machine &m_sm;
261   sm_context *m_sm_ctxt;
262   const supernode *m_snode;
263   const gimple *m_stmt;
264 };
265 
266 static bool
check_for_pyobject(gimple *,tree op,tree,void * data)267 check_for_pyobject (gimple *, tree op, tree, void *data)
268 {
269   cb_data *d = (cb_data *)data;
270   d->m_sm.check_for_pyobject_usage_without_gil (d->m_sm_ctxt, d->m_snode,
271 						d->m_stmt, op);
272   return true;
273 }
274 
275 /* Assuming that the GIL has been released, complain about any
276    PyObject * arguments passed to CALL.  */
277 
278 void
check_for_pyobject_in_call(sm_context * sm_ctxt,const supernode * node,const gcall * call,tree callee_fndecl)279 gil_state_machine::check_for_pyobject_in_call (sm_context *sm_ctxt,
280 					       const supernode *node,
281 					       const gcall *call,
282 					       tree callee_fndecl) const
283 {
284   for (unsigned i = 0; i < gimple_call_num_args (call); i++)
285     {
286       tree arg = gimple_call_arg (call, i);
287       if (TREE_CODE (TREE_TYPE (arg)) != POINTER_TYPE)
288 	continue;
289       tree type = TREE_TYPE (TREE_TYPE (arg));
290       if (type_based_on_pyobject_p (type))
291 	{
292 	  sm_ctxt->warn (node, call, NULL_TREE,
293 			 new fncall_without_gil (*this, call,
294 						 callee_fndecl,
295 						 i));
296 	  sm_ctxt->set_global_state (m_stop);
297 	}
298     }
299 }
300 
301 /* Implementation of state_machine::on_stmt vfunc for gil_state_machine.  */
302 
303 bool
on_stmt(sm_context * sm_ctxt,const supernode * node,const gimple * stmt)304 gil_state_machine::on_stmt (sm_context *sm_ctxt,
305 			    const supernode *node,
306 			    const gimple *stmt) const
307 {
308   const state_t global_state = sm_ctxt->get_global_state ();
309   if (const gcall *call = dyn_cast <const gcall *> (stmt))
310     {
311       if (tree callee_fndecl = sm_ctxt->get_fndecl_for_call (call))
312 	{
313 	  if (is_named_call_p (callee_fndecl, "PyEval_SaveThread", call, 0))
314 	    {
315 	      if (0)
316 		inform (input_location, "found call to %qs",
317 			"PyEval_SaveThread");
318 	      if (global_state == m_released_gil)
319 		{
320 		  sm_ctxt->warn (node, stmt, NULL_TREE,
321 				 new double_save_thread (*this, call));
322 		  sm_ctxt->set_global_state (m_stop);
323 		}
324 	      else
325 		sm_ctxt->set_global_state (m_released_gil);
326 	      return true;
327 	    }
328 	  else if (is_named_call_p (callee_fndecl, "PyEval_RestoreThread",
329 				    call, 1))
330 	    {
331 	      if (0)
332 		inform (input_location, "found call to %qs",
333 			"PyEval_SaveThread");
334 	      if (global_state == m_released_gil)
335 		sm_ctxt->set_global_state (m_start);
336 	      return true;
337 	    }
338 	  else if (global_state == m_released_gil)
339 	    {
340 	      /* Find PyObject * args of calls to fns with unknown bodies.  */
341 	      if (!fndecl_has_gimple_body_p (callee_fndecl))
342 		check_for_pyobject_in_call (sm_ctxt, node, call, callee_fndecl);
343 	    }
344 	}
345       else if (global_state == m_released_gil)
346 	check_for_pyobject_in_call (sm_ctxt, node, call, NULL);
347     }
348   else
349     if (global_state == m_released_gil)
350       {
351 	/* Walk the stmt, finding uses of PyObject (or "subclasses").  */
352 	cb_data d (*this, sm_ctxt, node, stmt);
353 	walk_stmt_load_store_addr_ops (const_cast <gimple *> (stmt), &d,
354 				       check_for_pyobject,
355 				       check_for_pyobject,
356 				       check_for_pyobject);
357     }
358   return false;
359 }
360 
361 bool
can_purge_p(state_t s ATTRIBUTE_UNUSED)362 gil_state_machine::can_purge_p (state_t s ATTRIBUTE_UNUSED) const
363 {
364   return true;
365 }
366 
367 void
check_for_pyobject_usage_without_gil(sm_context * sm_ctxt,const supernode * node,const gimple * stmt,tree op)368 gil_state_machine::check_for_pyobject_usage_without_gil (sm_context *sm_ctxt,
369 							 const supernode *node,
370 							 const gimple *stmt,
371 							 tree op) const
372 {
373   tree type = TREE_TYPE (op);
374   if (type_based_on_pyobject_p (type))
375     {
376       sm_ctxt->warn (node, stmt, NULL_TREE,
377 		     new pyobject_usage_without_gil (*this, op));
378       sm_ctxt->set_global_state (m_stop);
379     }
380 }
381 
382 /* Callback handler for the PLUGIN_ANALYZER_INIT event.  */
383 
384 static void
gil_analyzer_init_cb(void * gcc_data,void *)385 gil_analyzer_init_cb (void *gcc_data, void */*user_data*/)
386 {
387   ana::plugin_analyzer_init_iface *iface
388     = (ana::plugin_analyzer_init_iface *)gcc_data;
389   LOG_SCOPE (iface->get_logger ());
390   if (0)
391     inform (input_location, "got here: gil_analyzer_init_cb");
392   iface->register_state_machine (new gil_state_machine (iface->get_logger ()));
393 }
394 
395 } // namespace ana
396 
397 #endif /* #if ENABLE_ANALYZER */
398 
399 int
plugin_init(struct plugin_name_args * plugin_info,struct plugin_gcc_version * version)400 plugin_init (struct plugin_name_args *plugin_info,
401 	     struct plugin_gcc_version *version)
402 {
403 #if ENABLE_ANALYZER
404   const char *plugin_name = plugin_info->base_name;
405   if (0)
406     inform (input_location, "got here; %qs", plugin_name);
407   register_callback (plugin_info->base_name,
408 		     PLUGIN_ANALYZER_INIT,
409 		     ana::gil_analyzer_init_cb,
410 		     NULL); /* void *user_data */
411 #else
412   sorry_no_analyzer ();
413 #endif
414   return 0;
415 }
416