1 /* Proof-of-concept of a -fanalyzer plugin.
2 Detect (some) uses of CPython API outside of the Global Interpreter Lock.
3 https://docs.python.org/3/c-api/init.html#thread-state-and-the-global-interpreter-lock
4 */
5 /* { dg-options "-g" } */
6
7 #include "gcc-plugin.h"
8 #include "config.h"
9 #include "system.h"
10 #include "coretypes.h"
11 #include "diagnostic.h"
12 #include "tree.h"
13 #include "gimple.h"
14 #include "gimple-iterator.h"
15 #include "gimple-walk.h"
16 #include "diagnostic-event-id.h"
17 #include "analyzer/analyzer.h"
18 #include "analyzer/analyzer-logging.h"
19 #include "json.h"
20 #include "analyzer/sm.h"
21 #include "analyzer/pending-diagnostic.h"
22
23 int plugin_is_GPL_compatible;
24
25 #if ENABLE_ANALYZER
26
27 namespace ana {
28
29 static bool
type_based_on_pyobject_p(tree type)30 type_based_on_pyobject_p (tree type)
31 {
32 /* Ideally we'd also check for "subclasses" here by iterating up the
33 first field of each struct. */
34 if (TREE_CODE (type) != RECORD_TYPE)
35 return false;
36 tree name = TYPE_IDENTIFIER (type);
37 if (!name)
38 return false;
39 return id_equal (name, "PyObject");
40 }
41
42 /* An experimental state machine, for tracking whether the GIL is held,
43 as global state.. */
44
45 class gil_state_machine : public state_machine
46 {
47 public:
48 gil_state_machine (logger *logger);
49
inherited_state_p()50 bool inherited_state_p () const FINAL OVERRIDE { return false; }
51
52 bool on_stmt (sm_context *sm_ctxt,
53 const supernode *node,
54 const gimple *stmt) const FINAL OVERRIDE;
55
56 void on_condition (sm_context *sm_ctxt,
57 const supernode *node,
58 const gimple *stmt,
59 tree lhs,
60 enum tree_code op,
61 tree rhs) const FINAL OVERRIDE;
62
63 bool can_purge_p (state_t s) const FINAL OVERRIDE;
64
65 void check_for_pyobject_usage_without_gil (sm_context *sm_ctxt,
66 const supernode *node,
67 const gimple *stmt,
68 tree op) const;
69
70 private:
71 void check_for_pyobject_in_call (sm_context *sm_ctxt,
72 const supernode *node,
73 const gcall *call,
74 tree callee_fndecl) const;
75
76 public:
77 /* These states are "global", rather than per-expression. */
78
79 /* State for when we've released the GIL. */
80 state_t m_released_gil;
81
82 /* Stop state. */
83 state_t m_stop;
84 };
85
86 /* Subclass for diagnostics involving the GIL. */
87
88 class gil_diagnostic : public pending_diagnostic
89 {
90 public:
fixup_location(location_t loc)91 location_t fixup_location (location_t loc) const FINAL OVERRIDE
92 {
93 /* Ideally we'd check for specific macros here, and only
94 resolve certain macros. */
95 if (linemap_location_from_macro_expansion_p (line_table, loc))
96 loc = linemap_resolve_location (line_table, loc,
97 LRK_MACRO_EXPANSION_POINT, NULL);
98 return loc;
99 }
100
describe_state_change(const evdesc::state_change & change)101 label_text describe_state_change (const evdesc::state_change &change)
102 FINAL OVERRIDE
103 {
104 if (change.is_global_p ()
105 && change.m_new_state == m_sm.m_released_gil)
106 return change.formatted_print ("releasing the GIL here");
107 if (change.is_global_p ()
108 && change.m_new_state == m_sm.get_start_state ())
109 return change.formatted_print ("acquiring the GIL here");
110 return label_text ();
111 }
112
113 protected:
gil_diagnostic(const gil_state_machine & sm)114 gil_diagnostic (const gil_state_machine &sm) : m_sm (sm)
115 {
116 }
117
118 private:
119 const gil_state_machine &m_sm;
120 };
121
122 class double_save_thread : public gil_diagnostic
123 {
124 public:
double_save_thread(const gil_state_machine & sm,const gcall * call)125 double_save_thread (const gil_state_machine &sm, const gcall *call)
126 : gil_diagnostic (sm), m_call (call)
127 {}
128
get_kind()129 const char *get_kind () const FINAL OVERRIDE
130 {
131 return "double_save_thread";
132 }
133
subclass_equal_p(const pending_diagnostic & base_other)134 bool subclass_equal_p (const pending_diagnostic &base_other) const OVERRIDE
135 {
136 const double_save_thread &sub_other
137 = (const double_save_thread &)base_other;
138 return m_call == sub_other.m_call;
139 }
140
emit(rich_location * rich_loc)141 bool emit (rich_location *rich_loc) FINAL OVERRIDE
142 {
143 return warning_at (rich_loc, 0,
144 "nested usage of %qs", "Py_BEGIN_ALLOW_THREADS");
145 }
146
describe_final_event(const evdesc::final_event & ev)147 label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
148 {
149 return ev.formatted_print ("nested usage of %qs here",
150 "Py_BEGIN_ALLOW_THREADS");
151 }
152
153 private:
154 const gcall *m_call;
155 };
156
157 class fncall_without_gil : public gil_diagnostic
158 {
159 public:
fncall_without_gil(const gil_state_machine & sm,const gcall * call,tree callee_fndecl,unsigned arg_idx)160 fncall_without_gil (const gil_state_machine &sm, const gcall *call,
161 tree callee_fndecl, unsigned arg_idx)
162 : gil_diagnostic (sm), m_call (call), m_callee_fndecl (callee_fndecl),
163 m_arg_idx (arg_idx)
164 {}
165
get_kind()166 const char *get_kind () const FINAL OVERRIDE
167 {
168 return "fncall_without_gil";
169 }
170
subclass_equal_p(const pending_diagnostic & base_other)171 bool subclass_equal_p (const pending_diagnostic &base_other) const OVERRIDE
172 {
173 const fncall_without_gil &sub_other
174 = (const fncall_without_gil &)base_other;
175 return (m_call == sub_other.m_call
176 && m_callee_fndecl == sub_other.m_callee_fndecl
177 && m_arg_idx == sub_other.m_arg_idx);
178 }
179
emit(rich_location * rich_loc)180 bool emit (rich_location *rich_loc) FINAL OVERRIDE
181 {
182 auto_diagnostic_group d;
183 /* There isn't a warning ID for use to use. */
184 if (m_callee_fndecl)
185 return warning_at (rich_loc, 0,
186 "use of PyObject as argument %i of %qE"
187 " without the GIL",
188 m_arg_idx + 1, m_callee_fndecl);
189 else
190 return warning_at (rich_loc, 0,
191 "use of PyObject as argument %i of call"
192 " without the GIL",
193 m_arg_idx + 1, m_callee_fndecl);
194 }
195
describe_final_event(const evdesc::final_event & ev)196 label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
197 {
198 if (m_callee_fndecl)
199 return ev.formatted_print ("use of PyObject as argument %i of %qE here"
200 " without the GIL",
201 m_arg_idx + 1, m_callee_fndecl);
202 else
203 return ev.formatted_print ("use of PyObject as argument %i of call here"
204 " without the GIL",
205 m_arg_idx + 1, m_callee_fndecl);
206 }
207
208 private:
209 const gcall *m_call;
210 tree m_callee_fndecl;
211 unsigned m_arg_idx;
212 };
213
214 class pyobject_usage_without_gil : public gil_diagnostic
215 {
216 public:
pyobject_usage_without_gil(const gil_state_machine & sm,tree expr)217 pyobject_usage_without_gil (const gil_state_machine &sm, tree expr)
218 : gil_diagnostic (sm), m_expr (expr)
219 {}
220
get_kind()221 const char *get_kind () const FINAL OVERRIDE
222 {
223 return "pyobject_usage_without_gil";
224 }
225
subclass_equal_p(const pending_diagnostic & base_other)226 bool subclass_equal_p (const pending_diagnostic &base_other) const OVERRIDE
227 {
228 return same_tree_p (m_expr,
229 ((const pyobject_usage_without_gil&)base_other).m_expr);
230 }
231
emit(rich_location * rich_loc)232 bool emit (rich_location *rich_loc) FINAL OVERRIDE
233 {
234 auto_diagnostic_group d;
235 /* There isn't a warning ID for use to use. */
236 return warning_at (rich_loc, 0,
237 "use of PyObject %qE without the GIL", m_expr);
238 }
239
describe_final_event(const evdesc::final_event & ev)240 label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
241 {
242 return ev.formatted_print ("PyObject %qE used here without the GIL",
243 m_expr);
244 }
245
246 private:
247 tree m_expr;
248 };
249
250 /* gil_state_machine's ctor. */
251
gil_state_machine(logger * logger)252 gil_state_machine::gil_state_machine (logger *logger)
253 : state_machine ("gil", logger)
254 {
255 m_released_gil = add_state ("released_gil");
256 m_stop = add_state ("stop");
257 }
258
259 struct cb_data
260 {
cb_datacb_data261 cb_data (const gil_state_machine &sm, sm_context *sm_ctxt,
262 const supernode *snode, const gimple *stmt)
263 : m_sm (sm), m_sm_ctxt (sm_ctxt), m_snode (snode), m_stmt (stmt)
264 {
265 }
266
267 const gil_state_machine &m_sm;
268 sm_context *m_sm_ctxt;
269 const supernode *m_snode;
270 const gimple *m_stmt;
271 };
272
273 static bool
check_for_pyobject(gimple *,tree op,tree,void * data)274 check_for_pyobject (gimple *, tree op, tree, void *data)
275 {
276 cb_data *d = (cb_data *)data;
277 d->m_sm.check_for_pyobject_usage_without_gil (d->m_sm_ctxt, d->m_snode,
278 d->m_stmt, op);
279 return true;
280 }
281
282 /* Assuming that the GIL has been released, complain about any
283 PyObject * arguments passed to CALL. */
284
285 void
check_for_pyobject_in_call(sm_context * sm_ctxt,const supernode * node,const gcall * call,tree callee_fndecl)286 gil_state_machine::check_for_pyobject_in_call (sm_context *sm_ctxt,
287 const supernode *node,
288 const gcall *call,
289 tree callee_fndecl) const
290 {
291 for (unsigned i = 0; i < gimple_call_num_args (call); i++)
292 {
293 tree arg = gimple_call_arg (call, i);
294 if (TREE_CODE (TREE_TYPE (arg)) != POINTER_TYPE)
295 continue;
296 tree type = TREE_TYPE (TREE_TYPE (arg));
297 if (type_based_on_pyobject_p (type))
298 {
299 sm_ctxt->warn (node, call, NULL_TREE,
300 new fncall_without_gil (*this, call,
301 callee_fndecl,
302 i));
303 sm_ctxt->set_global_state (m_stop);
304 }
305 }
306 }
307
308 /* Implementation of state_machine::on_stmt vfunc for gil_state_machine. */
309
310 bool
on_stmt(sm_context * sm_ctxt,const supernode * node,const gimple * stmt)311 gil_state_machine::on_stmt (sm_context *sm_ctxt,
312 const supernode *node,
313 const gimple *stmt) const
314 {
315 const state_t global_state = sm_ctxt->get_global_state ();
316 if (const gcall *call = dyn_cast <const gcall *> (stmt))
317 {
318 if (tree callee_fndecl = sm_ctxt->get_fndecl_for_call (call))
319 {
320 if (is_named_call_p (callee_fndecl, "PyEval_SaveThread", call, 0))
321 {
322 if (0)
323 inform (input_location, "found call to %qs",
324 "PyEval_SaveThread");
325 if (global_state == m_released_gil)
326 {
327 sm_ctxt->warn (node, stmt, NULL_TREE,
328 new double_save_thread (*this, call));
329 sm_ctxt->set_global_state (m_stop);
330 }
331 else
332 sm_ctxt->set_global_state (m_released_gil);
333 return true;
334 }
335 else if (is_named_call_p (callee_fndecl, "PyEval_RestoreThread",
336 call, 1))
337 {
338 if (0)
339 inform (input_location, "found call to %qs",
340 "PyEval_SaveThread");
341 if (global_state == m_released_gil)
342 sm_ctxt->set_global_state (m_start);
343 return true;
344 }
345 else if (global_state == m_released_gil)
346 {
347 /* Find PyObject * args of calls to fns with unknown bodies. */
348 if (!fndecl_has_gimple_body_p (callee_fndecl))
349 check_for_pyobject_in_call (sm_ctxt, node, call, callee_fndecl);
350 }
351 }
352 else if (global_state == m_released_gil)
353 check_for_pyobject_in_call (sm_ctxt, node, call, NULL);
354 }
355 else
356 if (global_state == m_released_gil)
357 {
358 /* Walk the stmt, finding uses of PyObject (or "subclasses"). */
359 cb_data d (*this, sm_ctxt, node, stmt);
360 walk_stmt_load_store_addr_ops (const_cast <gimple *> (stmt), &d,
361 check_for_pyobject,
362 check_for_pyobject,
363 check_for_pyobject);
364 }
365 return false;
366 }
367
368 /* Implementation of state_machine::on_condition vfunc for
369 gil_state_machine. */
370
371 void
on_condition(sm_context * sm_ctxt ATTRIBUTE_UNUSED,const supernode * node ATTRIBUTE_UNUSED,const gimple * stmt ATTRIBUTE_UNUSED,tree lhs ATTRIBUTE_UNUSED,enum tree_code op ATTRIBUTE_UNUSED,tree rhs ATTRIBUTE_UNUSED)372 gil_state_machine::on_condition (sm_context *sm_ctxt ATTRIBUTE_UNUSED,
373 const supernode *node ATTRIBUTE_UNUSED,
374 const gimple *stmt ATTRIBUTE_UNUSED,
375 tree lhs ATTRIBUTE_UNUSED,
376 enum tree_code op ATTRIBUTE_UNUSED,
377 tree rhs ATTRIBUTE_UNUSED) const
378 {
379 // Empty
380 }
381
382 bool
can_purge_p(state_t s ATTRIBUTE_UNUSED)383 gil_state_machine::can_purge_p (state_t s ATTRIBUTE_UNUSED) const
384 {
385 return true;
386 }
387
388 void
check_for_pyobject_usage_without_gil(sm_context * sm_ctxt,const supernode * node,const gimple * stmt,tree op)389 gil_state_machine::check_for_pyobject_usage_without_gil (sm_context *sm_ctxt,
390 const supernode *node,
391 const gimple *stmt,
392 tree op) const
393 {
394 tree type = TREE_TYPE (op);
395 if (type_based_on_pyobject_p (type))
396 {
397 sm_ctxt->warn (node, stmt, NULL_TREE,
398 new pyobject_usage_without_gil (*this, op));
399 sm_ctxt->set_global_state (m_stop);
400 }
401 }
402
403 /* Callback handler for the PLUGIN_ANALYZER_INIT event. */
404
405 static void
gil_analyzer_init_cb(void * gcc_data,void *)406 gil_analyzer_init_cb (void *gcc_data, void */*user_data*/)
407 {
408 ana::plugin_analyzer_init_iface *iface
409 = (ana::plugin_analyzer_init_iface *)gcc_data;
410 LOG_SCOPE (iface->get_logger ());
411 if (0)
412 inform (input_location, "got here: gil_analyzer_init_cb");
413 iface->register_state_machine (new gil_state_machine (iface->get_logger ()));
414 }
415
416 } // namespace ana
417
418 #endif /* #if ENABLE_ANALYZER */
419
420 int
plugin_init(struct plugin_name_args * plugin_info,struct plugin_gcc_version * version)421 plugin_init (struct plugin_name_args *plugin_info,
422 struct plugin_gcc_version *version)
423 {
424 #if ENABLE_ANALYZER
425 const char *plugin_name = plugin_info->base_name;
426 if (0)
427 inform (input_location, "got here; %qs", plugin_name);
428 register_callback (plugin_info->base_name,
429 PLUGIN_ANALYZER_INIT,
430 ana::gil_analyzer_init_cb,
431 NULL); /* void *user_data */
432 #else
433 sorry_no_analyzer ();
434 #endif
435 return 0;
436 }
437