1 /*
2 
3   Reference Cycle Garbage Collection
4   ==================================
5 
6   Neil Schemenauer <nas@arctrix.com>
7 
8   Based on a post on the python-dev list.  Ideas from Guido van Rossum,
9   Eric Tiedemann, and various others.
10 
11   http://www.arctrix.com/nas/python/gc/
12 
13   The following mailing list threads provide a historical perspective on
14   the design of this module.  Note that a fair amount of refinement has
15   occurred since those discussions.
16 
17   http://mail.python.org/pipermail/python-dev/2000-March/002385.html
18   http://mail.python.org/pipermail/python-dev/2000-March/002434.html
19   http://mail.python.org/pipermail/python-dev/2000-March/002497.html
20 
21   For a highlevel view of the collection process, read the collect
22   function.
23 
24 */
25 
26 #include "Python.h"
27 #include "pycore_context.h"
28 #include "pycore_object.h"
29 #include "pycore_pymem.h"
30 #include "pycore_pystate.h"
31 #include "frameobject.h"        /* for PyFrame_ClearFreeList */
32 #include "pydtrace.h"
33 #include "pytime.h"             /* for _PyTime_GetMonotonicClock() */
34 
35 /*[clinic input]
36 module gc
37 [clinic start generated code]*/
38 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=b5c9690ecc842d79]*/
39 
40 #define GC_DEBUG (0)  /* Enable more asserts */
41 
42 #define GC_NEXT _PyGCHead_NEXT
43 #define GC_PREV _PyGCHead_PREV
44 
45 // update_refs() set this bit for all objects in current generation.
46 // subtract_refs() and move_unreachable() uses this to distinguish
47 // visited object is in GCing or not.
48 //
49 // move_unreachable() removes this flag from reachable objects.
50 // Only unreachable objects have this flag.
51 //
52 // No objects in interpreter have this flag after GC ends.
53 #define PREV_MASK_COLLECTING   _PyGC_PREV_MASK_COLLECTING
54 
55 // Lowest bit of _gc_next is used for UNREACHABLE flag.
56 //
57 // This flag represents the object is in unreachable list in move_unreachable()
58 //
59 // Although this flag is used only in move_unreachable(), move_unreachable()
60 // doesn't clear this flag to skip unnecessary iteration.
61 // move_legacy_finalizers() removes this flag instead.
62 // Between them, unreachable list is not normal list and we can not use
63 // most gc_list_* functions for it.
64 #define NEXT_MASK_UNREACHABLE  (1)
65 
66 /* Get an object's GC head */
67 #define AS_GC(o) ((PyGC_Head *)(o)-1)
68 
69 /* Get the object given the GC head */
70 #define FROM_GC(g) ((PyObject *)(((PyGC_Head *)g)+1))
71 
72 static inline int
gc_is_collecting(PyGC_Head * g)73 gc_is_collecting(PyGC_Head *g)
74 {
75     return (g->_gc_prev & PREV_MASK_COLLECTING) != 0;
76 }
77 
78 static inline void
gc_clear_collecting(PyGC_Head * g)79 gc_clear_collecting(PyGC_Head *g)
80 {
81     g->_gc_prev &= ~PREV_MASK_COLLECTING;
82 }
83 
84 static inline Py_ssize_t
gc_get_refs(PyGC_Head * g)85 gc_get_refs(PyGC_Head *g)
86 {
87     return (Py_ssize_t)(g->_gc_prev >> _PyGC_PREV_SHIFT);
88 }
89 
90 static inline void
gc_set_refs(PyGC_Head * g,Py_ssize_t refs)91 gc_set_refs(PyGC_Head *g, Py_ssize_t refs)
92 {
93     g->_gc_prev = (g->_gc_prev & ~_PyGC_PREV_MASK)
94         | ((uintptr_t)(refs) << _PyGC_PREV_SHIFT);
95 }
96 
97 static inline void
gc_reset_refs(PyGC_Head * g,Py_ssize_t refs)98 gc_reset_refs(PyGC_Head *g, Py_ssize_t refs)
99 {
100     g->_gc_prev = (g->_gc_prev & _PyGC_PREV_MASK_FINALIZED)
101         | PREV_MASK_COLLECTING
102         | ((uintptr_t)(refs) << _PyGC_PREV_SHIFT);
103 }
104 
105 static inline void
gc_decref(PyGC_Head * g)106 gc_decref(PyGC_Head *g)
107 {
108     _PyObject_ASSERT_WITH_MSG(FROM_GC(g),
109                               gc_get_refs(g) > 0,
110                               "refcount is too small");
111     g->_gc_prev -= 1 << _PyGC_PREV_SHIFT;
112 }
113 
114 /* Python string to use if unhandled exception occurs */
115 static PyObject *gc_str = NULL;
116 
117 /* set for debugging information */
118 #define DEBUG_STATS             (1<<0) /* print collection statistics */
119 #define DEBUG_COLLECTABLE       (1<<1) /* print collectable objects */
120 #define DEBUG_UNCOLLECTABLE     (1<<2) /* print uncollectable objects */
121 #define DEBUG_SAVEALL           (1<<5) /* save all garbage in gc.garbage */
122 #define DEBUG_LEAK              DEBUG_COLLECTABLE | \
123                 DEBUG_UNCOLLECTABLE | \
124                 DEBUG_SAVEALL
125 
126 #define GEN_HEAD(state, n) (&(state)->generations[n].head)
127 
128 void
_PyGC_Initialize(struct _gc_runtime_state * state)129 _PyGC_Initialize(struct _gc_runtime_state *state)
130 {
131     state->enabled = 1; /* automatic collection enabled? */
132 
133 #define _GEN_HEAD(n) GEN_HEAD(state, n)
134     struct gc_generation generations[NUM_GENERATIONS] = {
135         /* PyGC_Head,                                    threshold,    count */
136         {{(uintptr_t)_GEN_HEAD(0), (uintptr_t)_GEN_HEAD(0)},   700,        0},
137         {{(uintptr_t)_GEN_HEAD(1), (uintptr_t)_GEN_HEAD(1)},   10,         0},
138         {{(uintptr_t)_GEN_HEAD(2), (uintptr_t)_GEN_HEAD(2)},   10,         0},
139     };
140     for (int i = 0; i < NUM_GENERATIONS; i++) {
141         state->generations[i] = generations[i];
142     };
143     state->generation0 = GEN_HEAD(state, 0);
144     struct gc_generation permanent_generation = {
145           {(uintptr_t)&state->permanent_generation.head,
146            (uintptr_t)&state->permanent_generation.head}, 0, 0
147     };
148     state->permanent_generation = permanent_generation;
149 }
150 
151 /*
152 _gc_prev values
153 ---------------
154 
155 Between collections, _gc_prev is used for doubly linked list.
156 
157 Lowest two bits of _gc_prev are used for flags.
158 PREV_MASK_COLLECTING is used only while collecting and cleared before GC ends
159 or _PyObject_GC_UNTRACK() is called.
160 
161 During a collection, _gc_prev is temporary used for gc_refs, and the gc list
162 is singly linked until _gc_prev is restored.
163 
164 gc_refs
165     At the start of a collection, update_refs() copies the true refcount
166     to gc_refs, for each object in the generation being collected.
167     subtract_refs() then adjusts gc_refs so that it equals the number of
168     times an object is referenced directly from outside the generation
169     being collected.
170 
171 PREV_MASK_COLLECTING
172     Objects in generation being collected are marked PREV_MASK_COLLECTING in
173     update_refs().
174 
175 
176 _gc_next values
177 ---------------
178 
179 _gc_next takes these values:
180 
181 0
182     The object is not tracked
183 
184 != 0
185     Pointer to the next object in the GC list.
186     Additionally, lowest bit is used temporary for
187     NEXT_MASK_UNREACHABLE flag described below.
188 
189 NEXT_MASK_UNREACHABLE
190     move_unreachable() then moves objects not reachable (whether directly or
191     indirectly) from outside the generation into an "unreachable" set and
192     set this flag.
193 
194     Objects that are found to be reachable have gc_refs set to 1.
195     When this flag is set for the reachable object, the object must be in
196     "unreachable" set.
197     The flag is unset and the object is moved back to "reachable" set.
198 
199     move_legacy_finalizers() will remove this flag from "unreachable" set.
200 */
201 
202 /*** list functions ***/
203 
204 static inline void
gc_list_init(PyGC_Head * list)205 gc_list_init(PyGC_Head *list)
206 {
207     // List header must not have flags.
208     // We can assign pointer by simple cast.
209     list->_gc_prev = (uintptr_t)list;
210     list->_gc_next = (uintptr_t)list;
211 }
212 
213 static inline int
gc_list_is_empty(PyGC_Head * list)214 gc_list_is_empty(PyGC_Head *list)
215 {
216     return (list->_gc_next == (uintptr_t)list);
217 }
218 
219 /* Append `node` to `list`. */
220 static inline void
gc_list_append(PyGC_Head * node,PyGC_Head * list)221 gc_list_append(PyGC_Head *node, PyGC_Head *list)
222 {
223     PyGC_Head *last = (PyGC_Head *)list->_gc_prev;
224 
225     // last <-> node
226     _PyGCHead_SET_PREV(node, last);
227     _PyGCHead_SET_NEXT(last, node);
228 
229     // node <-> list
230     _PyGCHead_SET_NEXT(node, list);
231     list->_gc_prev = (uintptr_t)node;
232 }
233 
234 /* Remove `node` from the gc list it's currently in. */
235 static inline void
gc_list_remove(PyGC_Head * node)236 gc_list_remove(PyGC_Head *node)
237 {
238     PyGC_Head *prev = GC_PREV(node);
239     PyGC_Head *next = GC_NEXT(node);
240 
241     _PyGCHead_SET_NEXT(prev, next);
242     _PyGCHead_SET_PREV(next, prev);
243 
244     node->_gc_next = 0; /* object is not currently tracked */
245 }
246 
247 /* Move `node` from the gc list it's currently in (which is not explicitly
248  * named here) to the end of `list`.  This is semantically the same as
249  * gc_list_remove(node) followed by gc_list_append(node, list).
250  */
251 static void
gc_list_move(PyGC_Head * node,PyGC_Head * list)252 gc_list_move(PyGC_Head *node, PyGC_Head *list)
253 {
254     /* Unlink from current list. */
255     PyGC_Head *from_prev = GC_PREV(node);
256     PyGC_Head *from_next = GC_NEXT(node);
257     _PyGCHead_SET_NEXT(from_prev, from_next);
258     _PyGCHead_SET_PREV(from_next, from_prev);
259 
260     /* Relink at end of new list. */
261     // list must not have flags.  So we can skip macros.
262     PyGC_Head *to_prev = (PyGC_Head*)list->_gc_prev;
263     _PyGCHead_SET_PREV(node, to_prev);
264     _PyGCHead_SET_NEXT(to_prev, node);
265     list->_gc_prev = (uintptr_t)node;
266     _PyGCHead_SET_NEXT(node, list);
267 }
268 
269 /* append list `from` onto list `to`; `from` becomes an empty list */
270 static void
gc_list_merge(PyGC_Head * from,PyGC_Head * to)271 gc_list_merge(PyGC_Head *from, PyGC_Head *to)
272 {
273     assert(from != to);
274     if (!gc_list_is_empty(from)) {
275         PyGC_Head *to_tail = GC_PREV(to);
276         PyGC_Head *from_head = GC_NEXT(from);
277         PyGC_Head *from_tail = GC_PREV(from);
278         assert(from_head != from);
279         assert(from_tail != from);
280 
281         _PyGCHead_SET_NEXT(to_tail, from_head);
282         _PyGCHead_SET_PREV(from_head, to_tail);
283 
284         _PyGCHead_SET_NEXT(from_tail, to);
285         _PyGCHead_SET_PREV(to, from_tail);
286     }
287     gc_list_init(from);
288 }
289 
290 static Py_ssize_t
gc_list_size(PyGC_Head * list)291 gc_list_size(PyGC_Head *list)
292 {
293     PyGC_Head *gc;
294     Py_ssize_t n = 0;
295     for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(gc)) {
296         n++;
297     }
298     return n;
299 }
300 
301 /* Append objects in a GC list to a Python list.
302  * Return 0 if all OK, < 0 if error (out of memory for list).
303  */
304 static int
append_objects(PyObject * py_list,PyGC_Head * gc_list)305 append_objects(PyObject *py_list, PyGC_Head *gc_list)
306 {
307     PyGC_Head *gc;
308     for (gc = GC_NEXT(gc_list); gc != gc_list; gc = GC_NEXT(gc)) {
309         PyObject *op = FROM_GC(gc);
310         if (op != py_list) {
311             if (PyList_Append(py_list, op)) {
312                 return -1; /* exception */
313             }
314         }
315     }
316     return 0;
317 }
318 
319 #if GC_DEBUG
320 // validate_list checks list consistency.  And it works as document
321 // describing when expected_mask is set / unset.
322 static void
validate_list(PyGC_Head * head,uintptr_t expected_mask)323 validate_list(PyGC_Head *head, uintptr_t expected_mask)
324 {
325     PyGC_Head *prev = head;
326     PyGC_Head *gc = GC_NEXT(head);
327     while (gc != head) {
328         assert(GC_NEXT(gc) != NULL);
329         assert(GC_PREV(gc) == prev);
330         assert((gc->_gc_prev & PREV_MASK_COLLECTING) == expected_mask);
331         prev = gc;
332         gc = GC_NEXT(gc);
333     }
334     assert(prev == GC_PREV(head));
335 }
336 #else
337 #define validate_list(x,y) do{}while(0)
338 #endif
339 
340 /*** end of list stuff ***/
341 
342 
343 /* Set all gc_refs = ob_refcnt.  After this, gc_refs is > 0 and
344  * PREV_MASK_COLLECTING bit is set for all objects in containers.
345  */
346 static void
update_refs(PyGC_Head * containers)347 update_refs(PyGC_Head *containers)
348 {
349     PyGC_Head *gc = GC_NEXT(containers);
350     for (; gc != containers; gc = GC_NEXT(gc)) {
351         gc_reset_refs(gc, Py_REFCNT(FROM_GC(gc)));
352         /* Python's cyclic gc should never see an incoming refcount
353          * of 0:  if something decref'ed to 0, it should have been
354          * deallocated immediately at that time.
355          * Possible cause (if the assert triggers):  a tp_dealloc
356          * routine left a gc-aware object tracked during its teardown
357          * phase, and did something-- or allowed something to happen --
358          * that called back into Python.  gc can trigger then, and may
359          * see the still-tracked dying object.  Before this assert
360          * was added, such mistakes went on to allow gc to try to
361          * delete the object again.  In a debug build, that caused
362          * a mysterious segfault, when _Py_ForgetReference tried
363          * to remove the object from the doubly-linked list of all
364          * objects a second time.  In a release build, an actual
365          * double deallocation occurred, which leads to corruption
366          * of the allocator's internal bookkeeping pointers.  That's
367          * so serious that maybe this should be a release-build
368          * check instead of an assert?
369          */
370         _PyObject_ASSERT(FROM_GC(gc), gc_get_refs(gc) != 0);
371     }
372 }
373 
374 /* A traversal callback for subtract_refs. */
375 static int
visit_decref(PyObject * op,void * parent)376 visit_decref(PyObject *op, void *parent)
377 {
378     _PyObject_ASSERT(_PyObject_CAST(parent), !_PyObject_IsFreed(op));
379 
380     if (PyObject_IS_GC(op)) {
381         PyGC_Head *gc = AS_GC(op);
382         /* We're only interested in gc_refs for objects in the
383          * generation being collected, which can be recognized
384          * because only they have positive gc_refs.
385          */
386         if (gc_is_collecting(gc)) {
387             gc_decref(gc);
388         }
389     }
390     return 0;
391 }
392 
393 /* Subtract internal references from gc_refs.  After this, gc_refs is >= 0
394  * for all objects in containers, and is GC_REACHABLE for all tracked gc
395  * objects not in containers.  The ones with gc_refs > 0 are directly
396  * reachable from outside containers, and so can't be collected.
397  */
398 static void
subtract_refs(PyGC_Head * containers)399 subtract_refs(PyGC_Head *containers)
400 {
401     traverseproc traverse;
402     PyGC_Head *gc = GC_NEXT(containers);
403     for (; gc != containers; gc = GC_NEXT(gc)) {
404         PyObject *op = FROM_GC(gc);
405         traverse = Py_TYPE(op)->tp_traverse;
406         (void) traverse(FROM_GC(gc),
407                        (visitproc)visit_decref,
408                        op);
409     }
410 }
411 
412 /* A traversal callback for move_unreachable. */
413 static int
visit_reachable(PyObject * op,PyGC_Head * reachable)414 visit_reachable(PyObject *op, PyGC_Head *reachable)
415 {
416     if (!PyObject_IS_GC(op)) {
417         return 0;
418     }
419 
420     PyGC_Head *gc = AS_GC(op);
421     const Py_ssize_t gc_refs = gc_get_refs(gc);
422 
423     // Ignore untracked objects and objects in other generation.
424     if (gc->_gc_next == 0 || !gc_is_collecting(gc)) {
425         return 0;
426     }
427 
428     if (gc->_gc_next & NEXT_MASK_UNREACHABLE) {
429         /* This had gc_refs = 0 when move_unreachable got
430          * to it, but turns out it's reachable after all.
431          * Move it back to move_unreachable's 'young' list,
432          * and move_unreachable will eventually get to it
433          * again.
434          */
435         // Manually unlink gc from unreachable list because
436         PyGC_Head *prev = GC_PREV(gc);
437         PyGC_Head *next = (PyGC_Head*)(gc->_gc_next & ~NEXT_MASK_UNREACHABLE);
438         _PyObject_ASSERT(FROM_GC(prev),
439                          prev->_gc_next & NEXT_MASK_UNREACHABLE);
440         _PyObject_ASSERT(FROM_GC(next),
441                          next->_gc_next & NEXT_MASK_UNREACHABLE);
442         prev->_gc_next = gc->_gc_next;  // copy NEXT_MASK_UNREACHABLE
443         _PyGCHead_SET_PREV(next, prev);
444 
445         gc_list_append(gc, reachable);
446         gc_set_refs(gc, 1);
447     }
448     else if (gc_refs == 0) {
449         /* This is in move_unreachable's 'young' list, but
450          * the traversal hasn't yet gotten to it.  All
451          * we need to do is tell move_unreachable that it's
452          * reachable.
453          */
454         gc_set_refs(gc, 1);
455     }
456     /* Else there's nothing to do.
457      * If gc_refs > 0, it must be in move_unreachable's 'young'
458      * list, and move_unreachable will eventually get to it.
459      */
460     else {
461         _PyObject_ASSERT_WITH_MSG(op, gc_refs > 0, "refcount is too small");
462     }
463     return 0;
464 }
465 
466 /* Move the unreachable objects from young to unreachable.  After this,
467  * all objects in young don't have PREV_MASK_COLLECTING flag and
468  * unreachable have the flag.
469  * All objects in young after this are directly or indirectly reachable
470  * from outside the original young; and all objects in unreachable are
471  * not.
472  *
473  * This function restores _gc_prev pointer.  young and unreachable are
474  * doubly linked list after this function.
475  * But _gc_next in unreachable list has NEXT_MASK_UNREACHABLE flag.
476  * So we can not gc_list_* functions for unreachable until we remove the flag.
477  */
478 static void
move_unreachable(PyGC_Head * young,PyGC_Head * unreachable)479 move_unreachable(PyGC_Head *young, PyGC_Head *unreachable)
480 {
481     // previous elem in the young list, used for restore gc_prev.
482     PyGC_Head *prev = young;
483     PyGC_Head *gc = GC_NEXT(young);
484 
485     /* Invariants:  all objects "to the left" of us in young are reachable
486      * (directly or indirectly) from outside the young list as it was at entry.
487      *
488      * All other objects from the original young "to the left" of us are in
489      * unreachable now, and have NEXT_MASK_UNREACHABLE.  All objects to the
490      * left of us in 'young' now have been scanned, and no objects here
491      * or to the right have been scanned yet.
492      */
493 
494     while (gc != young) {
495         if (gc_get_refs(gc)) {
496             /* gc is definitely reachable from outside the
497              * original 'young'.  Mark it as such, and traverse
498              * its pointers to find any other objects that may
499              * be directly reachable from it.  Note that the
500              * call to tp_traverse may append objects to young,
501              * so we have to wait until it returns to determine
502              * the next object to visit.
503              */
504             PyObject *op = FROM_GC(gc);
505             traverseproc traverse = Py_TYPE(op)->tp_traverse;
506             _PyObject_ASSERT_WITH_MSG(op, gc_get_refs(gc) > 0,
507                                       "refcount is too small");
508             // NOTE: visit_reachable may change gc->_gc_next when
509             // young->_gc_prev == gc.  Don't do gc = GC_NEXT(gc) before!
510             (void) traverse(op,
511                     (visitproc)visit_reachable,
512                     (void *)young);
513             // relink gc_prev to prev element.
514             _PyGCHead_SET_PREV(gc, prev);
515             // gc is not COLLECTING state after here.
516             gc_clear_collecting(gc);
517             prev = gc;
518         }
519         else {
520             /* This *may* be unreachable.  To make progress,
521              * assume it is.  gc isn't directly reachable from
522              * any object we've already traversed, but may be
523              * reachable from an object we haven't gotten to yet.
524              * visit_reachable will eventually move gc back into
525              * young if that's so, and we'll see it again.
526              */
527             // Move gc to unreachable.
528             // No need to gc->next->prev = prev because it is single linked.
529             prev->_gc_next = gc->_gc_next;
530 
531             // We can't use gc_list_append() here because we use
532             // NEXT_MASK_UNREACHABLE here.
533             PyGC_Head *last = GC_PREV(unreachable);
534             // NOTE: Since all objects in unreachable set has
535             // NEXT_MASK_UNREACHABLE flag, we set it unconditionally.
536             // But this may set the flat to unreachable too.
537             // move_legacy_finalizers() should care about it.
538             last->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)gc);
539             _PyGCHead_SET_PREV(gc, last);
540             gc->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)unreachable);
541             unreachable->_gc_prev = (uintptr_t)gc;
542         }
543         gc = (PyGC_Head*)prev->_gc_next;
544     }
545     // young->_gc_prev must be last element remained in the list.
546     young->_gc_prev = (uintptr_t)prev;
547 }
548 
549 static void
untrack_tuples(PyGC_Head * head)550 untrack_tuples(PyGC_Head *head)
551 {
552     PyGC_Head *next, *gc = GC_NEXT(head);
553     while (gc != head) {
554         PyObject *op = FROM_GC(gc);
555         next = GC_NEXT(gc);
556         if (PyTuple_CheckExact(op)) {
557             _PyTuple_MaybeUntrack(op);
558         }
559         gc = next;
560     }
561 }
562 
563 /* Try to untrack all currently tracked dictionaries */
564 static void
untrack_dicts(PyGC_Head * head)565 untrack_dicts(PyGC_Head *head)
566 {
567     PyGC_Head *next, *gc = GC_NEXT(head);
568     while (gc != head) {
569         PyObject *op = FROM_GC(gc);
570         next = GC_NEXT(gc);
571         if (PyDict_CheckExact(op)) {
572             _PyDict_MaybeUntrack(op);
573         }
574         gc = next;
575     }
576 }
577 
578 /* Return true if object has a pre-PEP 442 finalization method. */
579 static int
has_legacy_finalizer(PyObject * op)580 has_legacy_finalizer(PyObject *op)
581 {
582     return op->ob_type->tp_del != NULL;
583 }
584 
585 /* Move the objects in unreachable with tp_del slots into `finalizers`.
586  *
587  * This function also removes NEXT_MASK_UNREACHABLE flag
588  * from _gc_next in unreachable.
589  */
590 static void
move_legacy_finalizers(PyGC_Head * unreachable,PyGC_Head * finalizers)591 move_legacy_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers)
592 {
593     PyGC_Head *gc, *next;
594     unreachable->_gc_next &= ~NEXT_MASK_UNREACHABLE;
595 
596     /* March over unreachable.  Move objects with finalizers into
597      * `finalizers`.
598      */
599     for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
600         PyObject *op = FROM_GC(gc);
601 
602         _PyObject_ASSERT(op, gc->_gc_next & NEXT_MASK_UNREACHABLE);
603         gc->_gc_next &= ~NEXT_MASK_UNREACHABLE;
604         next = (PyGC_Head*)gc->_gc_next;
605 
606         if (has_legacy_finalizer(op)) {
607             gc_clear_collecting(gc);
608             gc_list_move(gc, finalizers);
609         }
610     }
611 }
612 
613 /* A traversal callback for move_legacy_finalizer_reachable. */
614 static int
visit_move(PyObject * op,PyGC_Head * tolist)615 visit_move(PyObject *op, PyGC_Head *tolist)
616 {
617     if (PyObject_IS_GC(op)) {
618         PyGC_Head *gc = AS_GC(op);
619         if (gc_is_collecting(gc)) {
620             gc_list_move(gc, tolist);
621             gc_clear_collecting(gc);
622         }
623     }
624     return 0;
625 }
626 
627 /* Move objects that are reachable from finalizers, from the unreachable set
628  * into finalizers set.
629  */
630 static void
move_legacy_finalizer_reachable(PyGC_Head * finalizers)631 move_legacy_finalizer_reachable(PyGC_Head *finalizers)
632 {
633     traverseproc traverse;
634     PyGC_Head *gc = GC_NEXT(finalizers);
635     for (; gc != finalizers; gc = GC_NEXT(gc)) {
636         /* Note that the finalizers list may grow during this. */
637         traverse = Py_TYPE(FROM_GC(gc))->tp_traverse;
638         (void) traverse(FROM_GC(gc),
639                         (visitproc)visit_move,
640                         (void *)finalizers);
641     }
642 }
643 
644 /* Clear all weakrefs to unreachable objects, and if such a weakref has a
645  * callback, invoke it if necessary.  Note that it's possible for such
646  * weakrefs to be outside the unreachable set -- indeed, those are precisely
647  * the weakrefs whose callbacks must be invoked.  See gc_weakref.txt for
648  * overview & some details.  Some weakrefs with callbacks may be reclaimed
649  * directly by this routine; the number reclaimed is the return value.  Other
650  * weakrefs with callbacks may be moved into the `old` generation.  Objects
651  * moved into `old` have gc_refs set to GC_REACHABLE; the objects remaining in
652  * unreachable are left at GC_TENTATIVELY_UNREACHABLE.  When this returns,
653  * no object in `unreachable` is weakly referenced anymore.
654  */
655 static int
handle_weakrefs(PyGC_Head * unreachable,PyGC_Head * old)656 handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old)
657 {
658     PyGC_Head *gc;
659     PyObject *op;               /* generally FROM_GC(gc) */
660     PyWeakReference *wr;        /* generally a cast of op */
661     PyGC_Head wrcb_to_call;     /* weakrefs with callbacks to call */
662     PyGC_Head *next;
663     int num_freed = 0;
664 
665     gc_list_init(&wrcb_to_call);
666 
667     /* Clear all weakrefs to the objects in unreachable.  If such a weakref
668      * also has a callback, move it into `wrcb_to_call` if the callback
669      * needs to be invoked.  Note that we cannot invoke any callbacks until
670      * all weakrefs to unreachable objects are cleared, lest the callback
671      * resurrect an unreachable object via a still-active weakref.  We
672      * make another pass over wrcb_to_call, invoking callbacks, after this
673      * pass completes.
674      */
675     for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
676         PyWeakReference **wrlist;
677 
678         op = FROM_GC(gc);
679         next = GC_NEXT(gc);
680 
681         if (PyWeakref_Check(op)) {
682             /* A weakref inside the unreachable set must be cleared.  If we
683              * allow its callback to execute inside delete_garbage(), it
684              * could expose objects that have tp_clear already called on
685              * them.  Or, it could resurrect unreachable objects.  One way
686              * this can happen is if some container objects do not implement
687              * tp_traverse.  Then, wr_object can be outside the unreachable
688              * set but can be deallocated as a result of breaking the
689              * reference cycle.  If we don't clear the weakref, the callback
690              * will run and potentially cause a crash.  See bpo-38006 for
691              * one example.
692              */
693             _PyWeakref_ClearRef((PyWeakReference *)op);
694         }
695 
696         if (! PyType_SUPPORTS_WEAKREFS(Py_TYPE(op)))
697             continue;
698 
699         /* It supports weakrefs.  Does it have any? */
700         wrlist = (PyWeakReference **)
701                                 PyObject_GET_WEAKREFS_LISTPTR(op);
702 
703         /* `op` may have some weakrefs.  March over the list, clear
704          * all the weakrefs, and move the weakrefs with callbacks
705          * that must be called into wrcb_to_call.
706          */
707         for (wr = *wrlist; wr != NULL; wr = *wrlist) {
708             PyGC_Head *wrasgc;                  /* AS_GC(wr) */
709 
710             /* _PyWeakref_ClearRef clears the weakref but leaves
711              * the callback pointer intact.  Obscure:  it also
712              * changes *wrlist.
713              */
714             _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op);
715             _PyWeakref_ClearRef(wr);
716             _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None);
717             if (wr->wr_callback == NULL) {
718                 /* no callback */
719                 continue;
720             }
721 
722             /* Headache time.  `op` is going away, and is weakly referenced by
723              * `wr`, which has a callback.  Should the callback be invoked?  If wr
724              * is also trash, no:
725              *
726              * 1. There's no need to call it.  The object and the weakref are
727              *    both going away, so it's legitimate to pretend the weakref is
728              *    going away first.  The user has to ensure a weakref outlives its
729              *    referent if they want a guarantee that the wr callback will get
730              *    invoked.
731              *
732              * 2. It may be catastrophic to call it.  If the callback is also in
733              *    cyclic trash (CT), then although the CT is unreachable from
734              *    outside the current generation, CT may be reachable from the
735              *    callback.  Then the callback could resurrect insane objects.
736              *
737              * Since the callback is never needed and may be unsafe in this case,
738              * wr is simply left in the unreachable set.  Note that because we
739              * already called _PyWeakref_ClearRef(wr), its callback will never
740              * trigger.
741              *
742              * OTOH, if wr isn't part of CT, we should invoke the callback:  the
743              * weakref outlived the trash.  Note that since wr isn't CT in this
744              * case, its callback can't be CT either -- wr acted as an external
745              * root to this generation, and therefore its callback did too.  So
746              * nothing in CT is reachable from the callback either, so it's hard
747              * to imagine how calling it later could create a problem for us.  wr
748              * is moved to wrcb_to_call in this case.
749              */
750             if (gc_is_collecting(AS_GC(wr))) {
751                 /* it should already have been cleared above */
752                 assert(wr->wr_object == Py_None);
753                 continue;
754             }
755 
756             /* Create a new reference so that wr can't go away
757              * before we can process it again.
758              */
759             Py_INCREF(wr);
760 
761             /* Move wr to wrcb_to_call, for the next pass. */
762             wrasgc = AS_GC(wr);
763             assert(wrasgc != next); /* wrasgc is reachable, but
764                                        next isn't, so they can't
765                                        be the same */
766             gc_list_move(wrasgc, &wrcb_to_call);
767         }
768     }
769 
770     /* Invoke the callbacks we decided to honor.  It's safe to invoke them
771      * because they can't reference unreachable objects.
772      */
773     while (! gc_list_is_empty(&wrcb_to_call)) {
774         PyObject *temp;
775         PyObject *callback;
776 
777         gc = (PyGC_Head*)wrcb_to_call._gc_next;
778         op = FROM_GC(gc);
779         _PyObject_ASSERT(op, PyWeakref_Check(op));
780         wr = (PyWeakReference *)op;
781         callback = wr->wr_callback;
782         _PyObject_ASSERT(op, callback != NULL);
783 
784         /* copy-paste of weakrefobject.c's handle_callback() */
785         temp = PyObject_CallFunctionObjArgs(callback, wr, NULL);
786         if (temp == NULL)
787             PyErr_WriteUnraisable(callback);
788         else
789             Py_DECREF(temp);
790 
791         /* Give up the reference we created in the first pass.  When
792          * op's refcount hits 0 (which it may or may not do right now),
793          * op's tp_dealloc will decref op->wr_callback too.  Note
794          * that the refcount probably will hit 0 now, and because this
795          * weakref was reachable to begin with, gc didn't already
796          * add it to its count of freed objects.  Example:  a reachable
797          * weak value dict maps some key to this reachable weakref.
798          * The callback removes this key->weakref mapping from the
799          * dict, leaving no other references to the weakref (excepting
800          * ours).
801          */
802         Py_DECREF(op);
803         if (wrcb_to_call._gc_next == (uintptr_t)gc) {
804             /* object is still alive -- move it */
805             gc_list_move(gc, old);
806         }
807         else {
808             ++num_freed;
809         }
810     }
811 
812     return num_freed;
813 }
814 
815 static void
debug_cycle(const char * msg,PyObject * op)816 debug_cycle(const char *msg, PyObject *op)
817 {
818     PySys_FormatStderr("gc: %s <%s %p>\n",
819                        msg, Py_TYPE(op)->tp_name, op);
820 }
821 
822 /* Handle uncollectable garbage (cycles with tp_del slots, and stuff reachable
823  * only from such cycles).
824  * If DEBUG_SAVEALL, all objects in finalizers are appended to the module
825  * garbage list (a Python list), else only the objects in finalizers with
826  * __del__ methods are appended to garbage.  All objects in finalizers are
827  * merged into the old list regardless.
828  */
829 static void
handle_legacy_finalizers(struct _gc_runtime_state * state,PyGC_Head * finalizers,PyGC_Head * old)830 handle_legacy_finalizers(struct _gc_runtime_state *state,
831                          PyGC_Head *finalizers, PyGC_Head *old)
832 {
833     assert(!PyErr_Occurred());
834 
835     PyGC_Head *gc = GC_NEXT(finalizers);
836     if (state->garbage == NULL) {
837         state->garbage = PyList_New(0);
838         if (state->garbage == NULL)
839             Py_FatalError("gc couldn't create gc.garbage list");
840     }
841     for (; gc != finalizers; gc = GC_NEXT(gc)) {
842         PyObject *op = FROM_GC(gc);
843 
844         if ((state->debug & DEBUG_SAVEALL) || has_legacy_finalizer(op)) {
845             if (PyList_Append(state->garbage, op) < 0) {
846                 PyErr_Clear();
847                 break;
848             }
849         }
850     }
851 
852     gc_list_merge(finalizers, old);
853 }
854 
855 /* Run first-time finalizers (if any) on all the objects in collectable.
856  * Note that this may remove some (or even all) of the objects from the
857  * list, due to refcounts falling to 0.
858  */
859 static void
finalize_garbage(PyGC_Head * collectable)860 finalize_garbage(PyGC_Head *collectable)
861 {
862     destructor finalize;
863     PyGC_Head seen;
864 
865     /* While we're going through the loop, `finalize(op)` may cause op, or
866      * other objects, to be reclaimed via refcounts falling to zero.  So
867      * there's little we can rely on about the structure of the input
868      * `collectable` list across iterations.  For safety, we always take the
869      * first object in that list and move it to a temporary `seen` list.
870      * If objects vanish from the `collectable` and `seen` lists we don't
871      * care.
872      */
873     gc_list_init(&seen);
874 
875     while (!gc_list_is_empty(collectable)) {
876         PyGC_Head *gc = GC_NEXT(collectable);
877         PyObject *op = FROM_GC(gc);
878         gc_list_move(gc, &seen);
879         if (!_PyGCHead_FINALIZED(gc) &&
880                 (finalize = Py_TYPE(op)->tp_finalize) != NULL) {
881             _PyGCHead_SET_FINALIZED(gc);
882             Py_INCREF(op);
883             finalize(op);
884             assert(!PyErr_Occurred());
885             Py_DECREF(op);
886         }
887     }
888     gc_list_merge(&seen, collectable);
889 }
890 
891 /* Walk the collectable list and check that they are really unreachable
892    from the outside (some objects could have been resurrected by a
893    finalizer). */
894 static int
check_garbage(PyGC_Head * collectable)895 check_garbage(PyGC_Head *collectable)
896 {
897     int ret = 0;
898     PyGC_Head *gc;
899     for (gc = GC_NEXT(collectable); gc != collectable; gc = GC_NEXT(gc)) {
900         // Use gc_refs and break gc_prev again.
901         gc_set_refs(gc, Py_REFCNT(FROM_GC(gc)));
902         _PyObject_ASSERT(FROM_GC(gc), gc_get_refs(gc) != 0);
903     }
904     subtract_refs(collectable);
905     PyGC_Head *prev = collectable;
906     for (gc = GC_NEXT(collectable); gc != collectable; gc = GC_NEXT(gc)) {
907         _PyObject_ASSERT_WITH_MSG(FROM_GC(gc),
908                                   gc_get_refs(gc) >= 0,
909                                   "refcount is too small");
910         if (gc_get_refs(gc) != 0) {
911             ret = -1;
912         }
913         // Restore gc_prev here.
914         _PyGCHead_SET_PREV(gc, prev);
915         gc_clear_collecting(gc);
916         prev = gc;
917     }
918     return ret;
919 }
920 
921 /* Break reference cycles by clearing the containers involved.  This is
922  * tricky business as the lists can be changing and we don't know which
923  * objects may be freed.  It is possible I screwed something up here.
924  */
925 static void
delete_garbage(struct _gc_runtime_state * state,PyGC_Head * collectable,PyGC_Head * old)926 delete_garbage(struct _gc_runtime_state *state,
927                PyGC_Head *collectable, PyGC_Head *old)
928 {
929     assert(!PyErr_Occurred());
930 
931     while (!gc_list_is_empty(collectable)) {
932         PyGC_Head *gc = GC_NEXT(collectable);
933         PyObject *op = FROM_GC(gc);
934 
935         _PyObject_ASSERT_WITH_MSG(op, Py_REFCNT(op) > 0,
936                                   "refcount is too small");
937 
938         if (state->debug & DEBUG_SAVEALL) {
939             assert(state->garbage != NULL);
940             if (PyList_Append(state->garbage, op) < 0) {
941                 PyErr_Clear();
942             }
943         }
944         else {
945             inquiry clear;
946             if ((clear = Py_TYPE(op)->tp_clear) != NULL) {
947                 Py_INCREF(op);
948                 (void) clear(op);
949                 if (PyErr_Occurred()) {
950                     _PyErr_WriteUnraisableMsg("in tp_clear of",
951                                               (PyObject*)Py_TYPE(op));
952                 }
953                 Py_DECREF(op);
954             }
955         }
956         if (GC_NEXT(collectable) == gc) {
957             /* object is still alive, move it, it may die later */
958             gc_list_move(gc, old);
959         }
960     }
961 }
962 
963 /* Clear all free lists
964  * All free lists are cleared during the collection of the highest generation.
965  * Allocated items in the free list may keep a pymalloc arena occupied.
966  * Clearing the free lists may give back memory to the OS earlier.
967  */
968 static void
clear_freelists(void)969 clear_freelists(void)
970 {
971     (void)PyMethod_ClearFreeList();
972     (void)PyFrame_ClearFreeList();
973     (void)PyCFunction_ClearFreeList();
974     (void)PyTuple_ClearFreeList();
975     (void)PyUnicode_ClearFreeList();
976     (void)PyFloat_ClearFreeList();
977     (void)PyList_ClearFreeList();
978     (void)PyDict_ClearFreeList();
979     (void)PySet_ClearFreeList();
980     (void)PyAsyncGen_ClearFreeLists();
981     (void)PyContext_ClearFreeList();
982 }
983 
984 // Show stats for objects in each gennerations.
985 static void
show_stats_each_generations(struct _gc_runtime_state * state)986 show_stats_each_generations(struct _gc_runtime_state *state)
987 {
988     char buf[100];
989     size_t pos = 0;
990 
991     for (int i = 0; i < NUM_GENERATIONS && pos < sizeof(buf); i++) {
992         pos += PyOS_snprintf(buf+pos, sizeof(buf)-pos,
993                              " %"PY_FORMAT_SIZE_T"d",
994                              gc_list_size(GEN_HEAD(state, i)));
995     }
996 
997     PySys_FormatStderr(
998         "gc: objects in each generation:%s\n"
999         "gc: objects in permanent generation: %zd\n",
1000         buf, gc_list_size(&state->permanent_generation.head));
1001 }
1002 
1003 /* This is the main function.  Read this to understand how the
1004  * collection process works. */
1005 static Py_ssize_t
collect(struct _gc_runtime_state * state,int generation,Py_ssize_t * n_collected,Py_ssize_t * n_uncollectable,int nofail)1006 collect(struct _gc_runtime_state *state, int generation,
1007         Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable, int nofail)
1008 {
1009     int i;
1010     Py_ssize_t m = 0; /* # objects collected */
1011     Py_ssize_t n = 0; /* # unreachable objects that couldn't be collected */
1012     PyGC_Head *young; /* the generation we are examining */
1013     PyGC_Head *old; /* next older generation */
1014     PyGC_Head unreachable; /* non-problematic unreachable trash */
1015     PyGC_Head finalizers;  /* objects with, & reachable from, __del__ */
1016     PyGC_Head *gc;
1017     _PyTime_t t1 = 0;   /* initialize to prevent a compiler warning */
1018 
1019     if (state->debug & DEBUG_STATS) {
1020         PySys_WriteStderr("gc: collecting generation %d...\n", generation);
1021         show_stats_each_generations(state);
1022         t1 = _PyTime_GetMonotonicClock();
1023     }
1024 
1025     if (PyDTrace_GC_START_ENABLED())
1026         PyDTrace_GC_START(generation);
1027 
1028     /* update collection and allocation counters */
1029     if (generation+1 < NUM_GENERATIONS)
1030         state->generations[generation+1].count += 1;
1031     for (i = 0; i <= generation; i++)
1032         state->generations[i].count = 0;
1033 
1034     /* merge younger generations with one we are currently collecting */
1035     for (i = 0; i < generation; i++) {
1036         gc_list_merge(GEN_HEAD(state, i), GEN_HEAD(state, generation));
1037     }
1038 
1039     /* handy references */
1040     young = GEN_HEAD(state, generation);
1041     if (generation < NUM_GENERATIONS-1)
1042         old = GEN_HEAD(state, generation+1);
1043     else
1044         old = young;
1045 
1046     validate_list(young, 0);
1047     validate_list(old, 0);
1048     /* Using ob_refcnt and gc_refs, calculate which objects in the
1049      * container set are reachable from outside the set (i.e., have a
1050      * refcount greater than 0 when all the references within the
1051      * set are taken into account).
1052      */
1053     update_refs(young);  // gc_prev is used for gc_refs
1054     subtract_refs(young);
1055 
1056     /* Leave everything reachable from outside young in young, and move
1057      * everything else (in young) to unreachable.
1058      * NOTE:  This used to move the reachable objects into a reachable
1059      * set instead.  But most things usually turn out to be reachable,
1060      * so it's more efficient to move the unreachable things.
1061      */
1062     gc_list_init(&unreachable);
1063     move_unreachable(young, &unreachable);  // gc_prev is pointer again
1064     validate_list(young, 0);
1065 
1066     untrack_tuples(young);
1067     /* Move reachable objects to next generation. */
1068     if (young != old) {
1069         if (generation == NUM_GENERATIONS - 2) {
1070             state->long_lived_pending += gc_list_size(young);
1071         }
1072         gc_list_merge(young, old);
1073     }
1074     else {
1075         /* We only untrack dicts in full collections, to avoid quadratic
1076            dict build-up. See issue #14775. */
1077         untrack_dicts(young);
1078         state->long_lived_pending = 0;
1079         state->long_lived_total = gc_list_size(young);
1080     }
1081 
1082     /* All objects in unreachable are trash, but objects reachable from
1083      * legacy finalizers (e.g. tp_del) can't safely be deleted.
1084      */
1085     gc_list_init(&finalizers);
1086     // NEXT_MASK_UNREACHABLE is cleared here.
1087     // After move_legacy_finalizers(), unreachable is normal list.
1088     move_legacy_finalizers(&unreachable, &finalizers);
1089     /* finalizers contains the unreachable objects with a legacy finalizer;
1090      * unreachable objects reachable *from* those are also uncollectable,
1091      * and we move those into the finalizers list too.
1092      */
1093     move_legacy_finalizer_reachable(&finalizers);
1094 
1095     validate_list(&finalizers, 0);
1096     validate_list(&unreachable, PREV_MASK_COLLECTING);
1097 
1098     /* Print debugging information. */
1099     if (state->debug & DEBUG_COLLECTABLE) {
1100         for (gc = GC_NEXT(&unreachable); gc != &unreachable; gc = GC_NEXT(gc)) {
1101             debug_cycle("collectable", FROM_GC(gc));
1102         }
1103     }
1104 
1105     /* Clear weakrefs and invoke callbacks as necessary. */
1106     m += handle_weakrefs(&unreachable, old);
1107 
1108     validate_list(old, 0);
1109     validate_list(&unreachable, PREV_MASK_COLLECTING);
1110 
1111     /* Call tp_finalize on objects which have one. */
1112     finalize_garbage(&unreachable);
1113 
1114     if (check_garbage(&unreachable)) { // clear PREV_MASK_COLLECTING here
1115         gc_list_merge(&unreachable, old);
1116     }
1117     else {
1118         /* Call tp_clear on objects in the unreachable set.  This will cause
1119          * the reference cycles to be broken.  It may also cause some objects
1120          * in finalizers to be freed.
1121          */
1122         m += gc_list_size(&unreachable);
1123         delete_garbage(state, &unreachable, old);
1124     }
1125 
1126     /* Collect statistics on uncollectable objects found and print
1127      * debugging information. */
1128     for (gc = GC_NEXT(&finalizers); gc != &finalizers; gc = GC_NEXT(gc)) {
1129         n++;
1130         if (state->debug & DEBUG_UNCOLLECTABLE)
1131             debug_cycle("uncollectable", FROM_GC(gc));
1132     }
1133     if (state->debug & DEBUG_STATS) {
1134         double d = _PyTime_AsSecondsDouble(_PyTime_GetMonotonicClock() - t1);
1135         PySys_WriteStderr(
1136             "gc: done, %" PY_FORMAT_SIZE_T "d unreachable, "
1137             "%" PY_FORMAT_SIZE_T "d uncollectable, %.4fs elapsed\n",
1138             n+m, n, d);
1139     }
1140 
1141     /* Append instances in the uncollectable set to a Python
1142      * reachable list of garbage.  The programmer has to deal with
1143      * this if they insist on creating this type of structure.
1144      */
1145     handle_legacy_finalizers(state, &finalizers, old);
1146     validate_list(old, 0);
1147 
1148     /* Clear free list only during the collection of the highest
1149      * generation */
1150     if (generation == NUM_GENERATIONS-1) {
1151         clear_freelists();
1152     }
1153 
1154     if (PyErr_Occurred()) {
1155         if (nofail) {
1156             PyErr_Clear();
1157         }
1158         else {
1159             if (gc_str == NULL)
1160                 gc_str = PyUnicode_FromString("garbage collection");
1161             PyErr_WriteUnraisable(gc_str);
1162             Py_FatalError("unexpected exception during garbage collection");
1163         }
1164     }
1165 
1166     /* Update stats */
1167     if (n_collected) {
1168         *n_collected = m;
1169     }
1170     if (n_uncollectable) {
1171         *n_uncollectable = n;
1172     }
1173 
1174     struct gc_generation_stats *stats = &state->generation_stats[generation];
1175     stats->collections++;
1176     stats->collected += m;
1177     stats->uncollectable += n;
1178 
1179     if (PyDTrace_GC_DONE_ENABLED()) {
1180         PyDTrace_GC_DONE(n+m);
1181     }
1182 
1183     assert(!PyErr_Occurred());
1184     return n+m;
1185 }
1186 
1187 /* Invoke progress callbacks to notify clients that garbage collection
1188  * is starting or stopping
1189  */
1190 static void
invoke_gc_callback(struct _gc_runtime_state * state,const char * phase,int generation,Py_ssize_t collected,Py_ssize_t uncollectable)1191 invoke_gc_callback(struct _gc_runtime_state *state, const char *phase,
1192                    int generation, Py_ssize_t collected,
1193                    Py_ssize_t uncollectable)
1194 {
1195     assert(!PyErr_Occurred());
1196 
1197     /* we may get called very early */
1198     if (state->callbacks == NULL) {
1199         return;
1200     }
1201 
1202     /* The local variable cannot be rebound, check it for sanity */
1203     assert(PyList_CheckExact(state->callbacks));
1204     PyObject *info = NULL;
1205     if (PyList_GET_SIZE(state->callbacks) != 0) {
1206         info = Py_BuildValue("{sisnsn}",
1207             "generation", generation,
1208             "collected", collected,
1209             "uncollectable", uncollectable);
1210         if (info == NULL) {
1211             PyErr_WriteUnraisable(NULL);
1212             return;
1213         }
1214     }
1215     for (Py_ssize_t i=0; i<PyList_GET_SIZE(state->callbacks); i++) {
1216         PyObject *r, *cb = PyList_GET_ITEM(state->callbacks, i);
1217         Py_INCREF(cb); /* make sure cb doesn't go away */
1218         r = PyObject_CallFunction(cb, "sO", phase, info);
1219         if (r == NULL) {
1220             PyErr_WriteUnraisable(cb);
1221         }
1222         else {
1223             Py_DECREF(r);
1224         }
1225         Py_DECREF(cb);
1226     }
1227     Py_XDECREF(info);
1228     assert(!PyErr_Occurred());
1229 }
1230 
1231 /* Perform garbage collection of a generation and invoke
1232  * progress callbacks.
1233  */
1234 static Py_ssize_t
collect_with_callback(struct _gc_runtime_state * state,int generation)1235 collect_with_callback(struct _gc_runtime_state *state, int generation)
1236 {
1237     assert(!PyErr_Occurred());
1238     Py_ssize_t result, collected, uncollectable;
1239     invoke_gc_callback(state, "start", generation, 0, 0);
1240     result = collect(state, generation, &collected, &uncollectable, 0);
1241     invoke_gc_callback(state, "stop", generation, collected, uncollectable);
1242     assert(!PyErr_Occurred());
1243     return result;
1244 }
1245 
1246 static Py_ssize_t
collect_generations(struct _gc_runtime_state * state)1247 collect_generations(struct _gc_runtime_state *state)
1248 {
1249     /* Find the oldest generation (highest numbered) where the count
1250      * exceeds the threshold.  Objects in the that generation and
1251      * generations younger than it will be collected. */
1252     Py_ssize_t n = 0;
1253     for (int i = NUM_GENERATIONS-1; i >= 0; i--) {
1254         if (state->generations[i].count > state->generations[i].threshold) {
1255             /* Avoid quadratic performance degradation in number
1256                of tracked objects. See comments at the beginning
1257                of this file, and issue #4074.
1258             */
1259             if (i == NUM_GENERATIONS - 1
1260                 && state->long_lived_pending < state->long_lived_total / 4)
1261                 continue;
1262             n = collect_with_callback(state, i);
1263             break;
1264         }
1265     }
1266     return n;
1267 }
1268 
1269 #include "clinic/gcmodule.c.h"
1270 
1271 /*[clinic input]
1272 gc.enable
1273 
1274 Enable automatic garbage collection.
1275 [clinic start generated code]*/
1276 
1277 static PyObject *
gc_enable_impl(PyObject * module)1278 gc_enable_impl(PyObject *module)
1279 /*[clinic end generated code: output=45a427e9dce9155c input=81ac4940ca579707]*/
1280 {
1281     _PyRuntime.gc.enabled = 1;
1282     Py_RETURN_NONE;
1283 }
1284 
1285 /*[clinic input]
1286 gc.disable
1287 
1288 Disable automatic garbage collection.
1289 [clinic start generated code]*/
1290 
1291 static PyObject *
gc_disable_impl(PyObject * module)1292 gc_disable_impl(PyObject *module)
1293 /*[clinic end generated code: output=97d1030f7aa9d279 input=8c2e5a14e800d83b]*/
1294 {
1295     _PyRuntime.gc.enabled = 0;
1296     Py_RETURN_NONE;
1297 }
1298 
1299 /*[clinic input]
1300 gc.isenabled -> bool
1301 
1302 Returns true if automatic garbage collection is enabled.
1303 [clinic start generated code]*/
1304 
1305 static int
gc_isenabled_impl(PyObject * module)1306 gc_isenabled_impl(PyObject *module)
1307 /*[clinic end generated code: output=1874298331c49130 input=30005e0422373b31]*/
1308 {
1309     return _PyRuntime.gc.enabled;
1310 }
1311 
1312 /*[clinic input]
1313 gc.collect -> Py_ssize_t
1314 
1315     generation: int(c_default="NUM_GENERATIONS - 1") = 2
1316 
1317 Run the garbage collector.
1318 
1319 With no arguments, run a full collection.  The optional argument
1320 may be an integer specifying which generation to collect.  A ValueError
1321 is raised if the generation number is invalid.
1322 
1323 The number of unreachable objects is returned.
1324 [clinic start generated code]*/
1325 
1326 static Py_ssize_t
gc_collect_impl(PyObject * module,int generation)1327 gc_collect_impl(PyObject *module, int generation)
1328 /*[clinic end generated code: output=b697e633043233c7 input=40720128b682d879]*/
1329 {
1330 
1331     if (generation < 0 || generation >= NUM_GENERATIONS) {
1332         PyErr_SetString(PyExc_ValueError, "invalid generation");
1333         return -1;
1334     }
1335 
1336     struct _gc_runtime_state *state = &_PyRuntime.gc;
1337     Py_ssize_t n;
1338     if (state->collecting) {
1339         /* already collecting, don't do anything */
1340         n = 0;
1341     }
1342     else {
1343         state->collecting = 1;
1344         n = collect_with_callback(state, generation);
1345         state->collecting = 0;
1346     }
1347     return n;
1348 }
1349 
1350 /*[clinic input]
1351 gc.set_debug
1352 
1353     flags: int
1354         An integer that can have the following bits turned on:
1355           DEBUG_STATS - Print statistics during collection.
1356           DEBUG_COLLECTABLE - Print collectable objects found.
1357           DEBUG_UNCOLLECTABLE - Print unreachable but uncollectable objects
1358             found.
1359           DEBUG_SAVEALL - Save objects to gc.garbage rather than freeing them.
1360           DEBUG_LEAK - Debug leaking programs (everything but STATS).
1361     /
1362 
1363 Set the garbage collection debugging flags.
1364 
1365 Debugging information is written to sys.stderr.
1366 [clinic start generated code]*/
1367 
1368 static PyObject *
gc_set_debug_impl(PyObject * module,int flags)1369 gc_set_debug_impl(PyObject *module, int flags)
1370 /*[clinic end generated code: output=7c8366575486b228 input=5e5ce15e84fbed15]*/
1371 {
1372     _PyRuntime.gc.debug = flags;
1373 
1374     Py_RETURN_NONE;
1375 }
1376 
1377 /*[clinic input]
1378 gc.get_debug -> int
1379 
1380 Get the garbage collection debugging flags.
1381 [clinic start generated code]*/
1382 
1383 static int
gc_get_debug_impl(PyObject * module)1384 gc_get_debug_impl(PyObject *module)
1385 /*[clinic end generated code: output=91242f3506cd1e50 input=91a101e1c3b98366]*/
1386 {
1387     return _PyRuntime.gc.debug;
1388 }
1389 
1390 PyDoc_STRVAR(gc_set_thresh__doc__,
1391 "set_threshold(threshold0, [threshold1, threshold2]) -> None\n"
1392 "\n"
1393 "Sets the collection thresholds.  Setting threshold0 to zero disables\n"
1394 "collection.\n");
1395 
1396 static PyObject *
gc_set_threshold(PyObject * self,PyObject * args)1397 gc_set_threshold(PyObject *self, PyObject *args)
1398 {
1399     struct _gc_runtime_state *state = &_PyRuntime.gc;
1400     if (!PyArg_ParseTuple(args, "i|ii:set_threshold",
1401                           &state->generations[0].threshold,
1402                           &state->generations[1].threshold,
1403                           &state->generations[2].threshold))
1404         return NULL;
1405     for (int i = 3; i < NUM_GENERATIONS; i++) {
1406         /* generations higher than 2 get the same threshold */
1407         state->generations[i].threshold = state->generations[2].threshold;
1408     }
1409     Py_RETURN_NONE;
1410 }
1411 
1412 /*[clinic input]
1413 gc.get_threshold
1414 
1415 Return the current collection thresholds.
1416 [clinic start generated code]*/
1417 
1418 static PyObject *
gc_get_threshold_impl(PyObject * module)1419 gc_get_threshold_impl(PyObject *module)
1420 /*[clinic end generated code: output=7902bc9f41ecbbd8 input=286d79918034d6e6]*/
1421 {
1422     struct _gc_runtime_state *state = &_PyRuntime.gc;
1423     return Py_BuildValue("(iii)",
1424                          state->generations[0].threshold,
1425                          state->generations[1].threshold,
1426                          state->generations[2].threshold);
1427 }
1428 
1429 /*[clinic input]
1430 gc.get_count
1431 
1432 Return a three-tuple of the current collection counts.
1433 [clinic start generated code]*/
1434 
1435 static PyObject *
gc_get_count_impl(PyObject * module)1436 gc_get_count_impl(PyObject *module)
1437 /*[clinic end generated code: output=354012e67b16398f input=a392794a08251751]*/
1438 {
1439     struct _gc_runtime_state *state = &_PyRuntime.gc;
1440     return Py_BuildValue("(iii)",
1441                          state->generations[0].count,
1442                          state->generations[1].count,
1443                          state->generations[2].count);
1444 }
1445 
1446 static int
referrersvisit(PyObject * obj,PyObject * objs)1447 referrersvisit(PyObject* obj, PyObject *objs)
1448 {
1449     Py_ssize_t i;
1450     for (i = 0; i < PyTuple_GET_SIZE(objs); i++)
1451         if (PyTuple_GET_ITEM(objs, i) == obj)
1452             return 1;
1453     return 0;
1454 }
1455 
1456 static int
gc_referrers_for(PyObject * objs,PyGC_Head * list,PyObject * resultlist)1457 gc_referrers_for(PyObject *objs, PyGC_Head *list, PyObject *resultlist)
1458 {
1459     PyGC_Head *gc;
1460     PyObject *obj;
1461     traverseproc traverse;
1462     for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(gc)) {
1463         obj = FROM_GC(gc);
1464         traverse = Py_TYPE(obj)->tp_traverse;
1465         if (obj == objs || obj == resultlist)
1466             continue;
1467         if (traverse(obj, (visitproc)referrersvisit, objs)) {
1468             if (PyList_Append(resultlist, obj) < 0)
1469                 return 0; /* error */
1470         }
1471     }
1472     return 1; /* no error */
1473 }
1474 
1475 PyDoc_STRVAR(gc_get_referrers__doc__,
1476 "get_referrers(*objs) -> list\n\
1477 Return the list of objects that directly refer to any of objs.");
1478 
1479 static PyObject *
gc_get_referrers(PyObject * self,PyObject * args)1480 gc_get_referrers(PyObject *self, PyObject *args)
1481 {
1482     int i;
1483     if (PySys_Audit("gc.get_referrers", "(O)", args) < 0) {
1484         return NULL;
1485     }
1486 
1487     PyObject *result = PyList_New(0);
1488     if (!result) return NULL;
1489 
1490     struct _gc_runtime_state *state = &_PyRuntime.gc;
1491     for (i = 0; i < NUM_GENERATIONS; i++) {
1492         if (!(gc_referrers_for(args, GEN_HEAD(state, i), result))) {
1493             Py_DECREF(result);
1494             return NULL;
1495         }
1496     }
1497     return result;
1498 }
1499 
1500 /* Append obj to list; return true if error (out of memory), false if OK. */
1501 static int
referentsvisit(PyObject * obj,PyObject * list)1502 referentsvisit(PyObject *obj, PyObject *list)
1503 {
1504     return PyList_Append(list, obj) < 0;
1505 }
1506 
1507 PyDoc_STRVAR(gc_get_referents__doc__,
1508 "get_referents(*objs) -> list\n\
1509 Return the list of objects that are directly referred to by objs.");
1510 
1511 static PyObject *
gc_get_referents(PyObject * self,PyObject * args)1512 gc_get_referents(PyObject *self, PyObject *args)
1513 {
1514     Py_ssize_t i;
1515     if (PySys_Audit("gc.get_referents", "(O)", args) < 0) {
1516         return NULL;
1517     }
1518     PyObject *result = PyList_New(0);
1519 
1520     if (result == NULL)
1521         return NULL;
1522 
1523     for (i = 0; i < PyTuple_GET_SIZE(args); i++) {
1524         traverseproc traverse;
1525         PyObject *obj = PyTuple_GET_ITEM(args, i);
1526 
1527         if (! PyObject_IS_GC(obj))
1528             continue;
1529         traverse = Py_TYPE(obj)->tp_traverse;
1530         if (! traverse)
1531             continue;
1532         if (traverse(obj, (visitproc)referentsvisit, result)) {
1533             Py_DECREF(result);
1534             return NULL;
1535         }
1536     }
1537     return result;
1538 }
1539 
1540 /*[clinic input]
1541 gc.get_objects
1542     generation: Py_ssize_t(accept={int, NoneType}, c_default="-1") = None
1543         Generation to extract the objects from.
1544 
1545 Return a list of objects tracked by the collector (excluding the list returned).
1546 
1547 If generation is not None, return only the objects tracked by the collector
1548 that are in that generation.
1549 [clinic start generated code]*/
1550 
1551 static PyObject *
gc_get_objects_impl(PyObject * module,Py_ssize_t generation)1552 gc_get_objects_impl(PyObject *module, Py_ssize_t generation)
1553 /*[clinic end generated code: output=48b35fea4ba6cb0e input=ef7da9df9806754c]*/
1554 {
1555     int i;
1556     PyObject* result;
1557     struct _gc_runtime_state *state = &_PyRuntime.gc;
1558 
1559     if (PySys_Audit("gc.get_objects", "n", generation) < 0) {
1560         return NULL;
1561     }
1562 
1563     result = PyList_New(0);
1564     if (result == NULL) {
1565         return NULL;
1566     }
1567 
1568     /* If generation is passed, we extract only that generation */
1569     if (generation != -1) {
1570         if (generation >= NUM_GENERATIONS) {
1571             PyErr_Format(PyExc_ValueError,
1572                          "generation parameter must be less than the number of "
1573                          "available generations (%i)",
1574                           NUM_GENERATIONS);
1575             goto error;
1576         }
1577 
1578         if (generation < 0) {
1579             PyErr_SetString(PyExc_ValueError,
1580                             "generation parameter cannot be negative");
1581             goto error;
1582         }
1583 
1584         if (append_objects(result, GEN_HEAD(state, generation))) {
1585             goto error;
1586         }
1587 
1588         return result;
1589     }
1590 
1591     /* If generation is not passed or None, get all objects from all generations */
1592     for (i = 0; i < NUM_GENERATIONS; i++) {
1593         if (append_objects(result, GEN_HEAD(state, i))) {
1594             goto error;
1595         }
1596     }
1597     return result;
1598 
1599 error:
1600     Py_DECREF(result);
1601     return NULL;
1602 }
1603 
1604 /*[clinic input]
1605 gc.get_stats
1606 
1607 Return a list of dictionaries containing per-generation statistics.
1608 [clinic start generated code]*/
1609 
1610 static PyObject *
gc_get_stats_impl(PyObject * module)1611 gc_get_stats_impl(PyObject *module)
1612 /*[clinic end generated code: output=a8ab1d8a5d26f3ab input=1ef4ed9d17b1a470]*/
1613 {
1614     int i;
1615     struct gc_generation_stats stats[NUM_GENERATIONS], *st;
1616 
1617     /* To get consistent values despite allocations while constructing
1618        the result list, we use a snapshot of the running stats. */
1619     struct _gc_runtime_state *state = &_PyRuntime.gc;
1620     for (i = 0; i < NUM_GENERATIONS; i++) {
1621         stats[i] = state->generation_stats[i];
1622     }
1623 
1624     PyObject *result = PyList_New(0);
1625     if (result == NULL)
1626         return NULL;
1627 
1628     for (i = 0; i < NUM_GENERATIONS; i++) {
1629         PyObject *dict;
1630         st = &stats[i];
1631         dict = Py_BuildValue("{snsnsn}",
1632                              "collections", st->collections,
1633                              "collected", st->collected,
1634                              "uncollectable", st->uncollectable
1635                             );
1636         if (dict == NULL)
1637             goto error;
1638         if (PyList_Append(result, dict)) {
1639             Py_DECREF(dict);
1640             goto error;
1641         }
1642         Py_DECREF(dict);
1643     }
1644     return result;
1645 
1646 error:
1647     Py_XDECREF(result);
1648     return NULL;
1649 }
1650 
1651 
1652 /*[clinic input]
1653 gc.is_tracked
1654 
1655     obj: object
1656     /
1657 
1658 Returns true if the object is tracked by the garbage collector.
1659 
1660 Simple atomic objects will return false.
1661 [clinic start generated code]*/
1662 
1663 static PyObject *
gc_is_tracked(PyObject * module,PyObject * obj)1664 gc_is_tracked(PyObject *module, PyObject *obj)
1665 /*[clinic end generated code: output=14f0103423b28e31 input=d83057f170ea2723]*/
1666 {
1667     PyObject *result;
1668 
1669     if (PyObject_IS_GC(obj) && _PyObject_GC_IS_TRACKED(obj))
1670         result = Py_True;
1671     else
1672         result = Py_False;
1673     Py_INCREF(result);
1674     return result;
1675 }
1676 
1677 /*[clinic input]
1678 gc.freeze
1679 
1680 Freeze all current tracked objects and ignore them for future collections.
1681 
1682 This can be used before a POSIX fork() call to make the gc copy-on-write friendly.
1683 Note: collection before a POSIX fork() call may free pages for future allocation
1684 which can cause copy-on-write.
1685 [clinic start generated code]*/
1686 
1687 static PyObject *
gc_freeze_impl(PyObject * module)1688 gc_freeze_impl(PyObject *module)
1689 /*[clinic end generated code: output=502159d9cdc4c139 input=b602b16ac5febbe5]*/
1690 {
1691     struct _gc_runtime_state *state = &_PyRuntime.gc;
1692     for (int i = 0; i < NUM_GENERATIONS; ++i) {
1693         gc_list_merge(GEN_HEAD(state, i), &state->permanent_generation.head);
1694         state->generations[i].count = 0;
1695     }
1696     Py_RETURN_NONE;
1697 }
1698 
1699 /*[clinic input]
1700 gc.unfreeze
1701 
1702 Unfreeze all objects in the permanent generation.
1703 
1704 Put all objects in the permanent generation back into oldest generation.
1705 [clinic start generated code]*/
1706 
1707 static PyObject *
gc_unfreeze_impl(PyObject * module)1708 gc_unfreeze_impl(PyObject *module)
1709 /*[clinic end generated code: output=1c15f2043b25e169 input=2dd52b170f4cef6c]*/
1710 {
1711     struct _gc_runtime_state *state = &_PyRuntime.gc;
1712     gc_list_merge(&state->permanent_generation.head, GEN_HEAD(state, NUM_GENERATIONS-1));
1713     Py_RETURN_NONE;
1714 }
1715 
1716 /*[clinic input]
1717 gc.get_freeze_count -> Py_ssize_t
1718 
1719 Return the number of objects in the permanent generation.
1720 [clinic start generated code]*/
1721 
1722 static Py_ssize_t
gc_get_freeze_count_impl(PyObject * module)1723 gc_get_freeze_count_impl(PyObject *module)
1724 /*[clinic end generated code: output=61cbd9f43aa032e1 input=45ffbc65cfe2a6ed]*/
1725 {
1726     return gc_list_size(&_PyRuntime.gc.permanent_generation.head);
1727 }
1728 
1729 
1730 PyDoc_STRVAR(gc__doc__,
1731 "This module provides access to the garbage collector for reference cycles.\n"
1732 "\n"
1733 "enable() -- Enable automatic garbage collection.\n"
1734 "disable() -- Disable automatic garbage collection.\n"
1735 "isenabled() -- Returns true if automatic collection is enabled.\n"
1736 "collect() -- Do a full collection right now.\n"
1737 "get_count() -- Return the current collection counts.\n"
1738 "get_stats() -- Return list of dictionaries containing per-generation stats.\n"
1739 "set_debug() -- Set debugging flags.\n"
1740 "get_debug() -- Get debugging flags.\n"
1741 "set_threshold() -- Set the collection thresholds.\n"
1742 "get_threshold() -- Return the current the collection thresholds.\n"
1743 "get_objects() -- Return a list of all objects tracked by the collector.\n"
1744 "is_tracked() -- Returns true if a given object is tracked.\n"
1745 "get_referrers() -- Return the list of objects that refer to an object.\n"
1746 "get_referents() -- Return the list of objects that an object refers to.\n"
1747 "freeze() -- Freeze all tracked objects and ignore them for future collections.\n"
1748 "unfreeze() -- Unfreeze all objects in the permanent generation.\n"
1749 "get_freeze_count() -- Return the number of objects in the permanent generation.\n");
1750 
1751 static PyMethodDef GcMethods[] = {
1752     GC_ENABLE_METHODDEF
1753     GC_DISABLE_METHODDEF
1754     GC_ISENABLED_METHODDEF
1755     GC_SET_DEBUG_METHODDEF
1756     GC_GET_DEBUG_METHODDEF
1757     GC_GET_COUNT_METHODDEF
1758     {"set_threshold",  gc_set_threshold, METH_VARARGS, gc_set_thresh__doc__},
1759     GC_GET_THRESHOLD_METHODDEF
1760     GC_COLLECT_METHODDEF
1761     GC_GET_OBJECTS_METHODDEF
1762     GC_GET_STATS_METHODDEF
1763     GC_IS_TRACKED_METHODDEF
1764     {"get_referrers",  gc_get_referrers, METH_VARARGS,
1765         gc_get_referrers__doc__},
1766     {"get_referents",  gc_get_referents, METH_VARARGS,
1767         gc_get_referents__doc__},
1768     GC_FREEZE_METHODDEF
1769     GC_UNFREEZE_METHODDEF
1770     GC_GET_FREEZE_COUNT_METHODDEF
1771     {NULL,      NULL}           /* Sentinel */
1772 };
1773 
1774 static struct PyModuleDef gcmodule = {
1775     PyModuleDef_HEAD_INIT,
1776     "gc",              /* m_name */
1777     gc__doc__,         /* m_doc */
1778     -1,                /* m_size */
1779     GcMethods,         /* m_methods */
1780     NULL,              /* m_reload */
1781     NULL,              /* m_traverse */
1782     NULL,              /* m_clear */
1783     NULL               /* m_free */
1784 };
1785 
1786 PyMODINIT_FUNC
PyInit_gc(void)1787 PyInit_gc(void)
1788 {
1789     PyObject *m;
1790 
1791     m = PyModule_Create(&gcmodule);
1792 
1793     if (m == NULL) {
1794         return NULL;
1795     }
1796 
1797     struct _gc_runtime_state *state = &_PyRuntime.gc;
1798     if (state->garbage == NULL) {
1799         state->garbage = PyList_New(0);
1800         if (state->garbage == NULL)
1801             return NULL;
1802     }
1803     Py_INCREF(state->garbage);
1804     if (PyModule_AddObject(m, "garbage", state->garbage) < 0)
1805         return NULL;
1806 
1807     if (state->callbacks == NULL) {
1808         state->callbacks = PyList_New(0);
1809         if (state->callbacks == NULL)
1810             return NULL;
1811     }
1812     Py_INCREF(state->callbacks);
1813     if (PyModule_AddObject(m, "callbacks", state->callbacks) < 0)
1814         return NULL;
1815 
1816 #define ADD_INT(NAME) if (PyModule_AddIntConstant(m, #NAME, NAME) < 0) return NULL
1817     ADD_INT(DEBUG_STATS);
1818     ADD_INT(DEBUG_COLLECTABLE);
1819     ADD_INT(DEBUG_UNCOLLECTABLE);
1820     ADD_INT(DEBUG_SAVEALL);
1821     ADD_INT(DEBUG_LEAK);
1822 #undef ADD_INT
1823     return m;
1824 }
1825 
1826 /* API to invoke gc.collect() from C */
1827 Py_ssize_t
PyGC_Collect(void)1828 PyGC_Collect(void)
1829 {
1830     struct _gc_runtime_state *state = &_PyRuntime.gc;
1831     if (!state->enabled) {
1832         return 0;
1833     }
1834 
1835     Py_ssize_t n;
1836     if (state->collecting) {
1837         /* already collecting, don't do anything */
1838         n = 0;
1839     }
1840     else {
1841         PyObject *exc, *value, *tb;
1842         state->collecting = 1;
1843         PyErr_Fetch(&exc, &value, &tb);
1844         n = collect_with_callback(state, NUM_GENERATIONS - 1);
1845         PyErr_Restore(exc, value, tb);
1846         state->collecting = 0;
1847     }
1848 
1849     return n;
1850 }
1851 
1852 Py_ssize_t
_PyGC_CollectIfEnabled(void)1853 _PyGC_CollectIfEnabled(void)
1854 {
1855     return PyGC_Collect();
1856 }
1857 
1858 Py_ssize_t
_PyGC_CollectNoFail(void)1859 _PyGC_CollectNoFail(void)
1860 {
1861     assert(!PyErr_Occurred());
1862 
1863     struct _gc_runtime_state *state = &_PyRuntime.gc;
1864     Py_ssize_t n;
1865 
1866     /* Ideally, this function is only called on interpreter shutdown,
1867        and therefore not recursively.  Unfortunately, when there are daemon
1868        threads, a daemon thread can start a cyclic garbage collection
1869        during interpreter shutdown (and then never finish it).
1870        See http://bugs.python.org/issue8713#msg195178 for an example.
1871        */
1872     if (state->collecting) {
1873         n = 0;
1874     }
1875     else {
1876         state->collecting = 1;
1877         n = collect(state, NUM_GENERATIONS - 1, NULL, NULL, 1);
1878         state->collecting = 0;
1879     }
1880     return n;
1881 }
1882 
1883 void
_PyGC_DumpShutdownStats(_PyRuntimeState * runtime)1884 _PyGC_DumpShutdownStats(_PyRuntimeState *runtime)
1885 {
1886     struct _gc_runtime_state *state = &runtime->gc;
1887     if (!(state->debug & DEBUG_SAVEALL)
1888         && state->garbage != NULL && PyList_GET_SIZE(state->garbage) > 0) {
1889         const char *message;
1890         if (state->debug & DEBUG_UNCOLLECTABLE)
1891             message = "gc: %zd uncollectable objects at " \
1892                 "shutdown";
1893         else
1894             message = "gc: %zd uncollectable objects at " \
1895                 "shutdown; use gc.set_debug(gc.DEBUG_UNCOLLECTABLE) to list them";
1896         /* PyErr_WarnFormat does too many things and we are at shutdown,
1897            the warnings module's dependencies (e.g. linecache) may be gone
1898            already. */
1899         if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0,
1900                                      "gc", NULL, message,
1901                                      PyList_GET_SIZE(state->garbage)))
1902             PyErr_WriteUnraisable(NULL);
1903         if (state->debug & DEBUG_UNCOLLECTABLE) {
1904             PyObject *repr = NULL, *bytes = NULL;
1905             repr = PyObject_Repr(state->garbage);
1906             if (!repr || !(bytes = PyUnicode_EncodeFSDefault(repr)))
1907                 PyErr_WriteUnraisable(state->garbage);
1908             else {
1909                 PySys_WriteStderr(
1910                     "      %s\n",
1911                     PyBytes_AS_STRING(bytes)
1912                     );
1913             }
1914             Py_XDECREF(repr);
1915             Py_XDECREF(bytes);
1916         }
1917     }
1918 }
1919 
1920 void
_PyGC_Fini(_PyRuntimeState * runtime)1921 _PyGC_Fini(_PyRuntimeState *runtime)
1922 {
1923     struct _gc_runtime_state *state = &runtime->gc;
1924     Py_CLEAR(state->garbage);
1925     Py_CLEAR(state->callbacks);
1926 }
1927 
1928 /* for debugging */
1929 void
_PyGC_Dump(PyGC_Head * g)1930 _PyGC_Dump(PyGC_Head *g)
1931 {
1932     _PyObject_Dump(FROM_GC(g));
1933 }
1934 
1935 /* extension modules might be compiled with GC support so these
1936    functions must always be available */
1937 
1938 void
PyObject_GC_Track(void * op_raw)1939 PyObject_GC_Track(void *op_raw)
1940 {
1941     PyObject *op = _PyObject_CAST(op_raw);
1942     if (_PyObject_GC_IS_TRACKED(op)) {
1943         _PyObject_ASSERT_FAILED_MSG(op,
1944                                     "object already tracked "
1945                                     "by the garbage collector");
1946     }
1947     _PyObject_GC_TRACK(op);
1948 }
1949 
1950 void
PyObject_GC_UnTrack(void * op_raw)1951 PyObject_GC_UnTrack(void *op_raw)
1952 {
1953     PyObject *op = _PyObject_CAST(op_raw);
1954     /* Obscure:  the Py_TRASHCAN mechanism requires that we be able to
1955      * call PyObject_GC_UnTrack twice on an object.
1956      */
1957     if (_PyObject_GC_IS_TRACKED(op)) {
1958         _PyObject_GC_UNTRACK(op);
1959     }
1960 }
1961 
1962 static PyObject *
_PyObject_GC_Alloc(int use_calloc,size_t basicsize)1963 _PyObject_GC_Alloc(int use_calloc, size_t basicsize)
1964 {
1965     struct _gc_runtime_state *state = &_PyRuntime.gc;
1966     PyObject *op;
1967     PyGC_Head *g;
1968     size_t size;
1969     if (basicsize > PY_SSIZE_T_MAX - sizeof(PyGC_Head))
1970         return PyErr_NoMemory();
1971     size = sizeof(PyGC_Head) + basicsize;
1972     if (use_calloc)
1973         g = (PyGC_Head *)PyObject_Calloc(1, size);
1974     else
1975         g = (PyGC_Head *)PyObject_Malloc(size);
1976     if (g == NULL)
1977         return PyErr_NoMemory();
1978     assert(((uintptr_t)g & 3) == 0);  // g must be aligned 4bytes boundary
1979     g->_gc_next = 0;
1980     g->_gc_prev = 0;
1981     state->generations[0].count++; /* number of allocated GC objects */
1982     if (state->generations[0].count > state->generations[0].threshold &&
1983         state->enabled &&
1984         state->generations[0].threshold &&
1985         !state->collecting &&
1986         !PyErr_Occurred()) {
1987         state->collecting = 1;
1988         collect_generations(state);
1989         state->collecting = 0;
1990     }
1991     op = FROM_GC(g);
1992     return op;
1993 }
1994 
1995 PyObject *
_PyObject_GC_Malloc(size_t basicsize)1996 _PyObject_GC_Malloc(size_t basicsize)
1997 {
1998     return _PyObject_GC_Alloc(0, basicsize);
1999 }
2000 
2001 PyObject *
_PyObject_GC_Calloc(size_t basicsize)2002 _PyObject_GC_Calloc(size_t basicsize)
2003 {
2004     return _PyObject_GC_Alloc(1, basicsize);
2005 }
2006 
2007 PyObject *
_PyObject_GC_New(PyTypeObject * tp)2008 _PyObject_GC_New(PyTypeObject *tp)
2009 {
2010     PyObject *op = _PyObject_GC_Malloc(_PyObject_SIZE(tp));
2011     if (op != NULL)
2012         op = PyObject_INIT(op, tp);
2013     return op;
2014 }
2015 
2016 PyVarObject *
_PyObject_GC_NewVar(PyTypeObject * tp,Py_ssize_t nitems)2017 _PyObject_GC_NewVar(PyTypeObject *tp, Py_ssize_t nitems)
2018 {
2019     size_t size;
2020     PyVarObject *op;
2021 
2022     if (nitems < 0) {
2023         PyErr_BadInternalCall();
2024         return NULL;
2025     }
2026     size = _PyObject_VAR_SIZE(tp, nitems);
2027     op = (PyVarObject *) _PyObject_GC_Malloc(size);
2028     if (op != NULL)
2029         op = PyObject_INIT_VAR(op, tp, nitems);
2030     return op;
2031 }
2032 
2033 PyVarObject *
_PyObject_GC_Resize(PyVarObject * op,Py_ssize_t nitems)2034 _PyObject_GC_Resize(PyVarObject *op, Py_ssize_t nitems)
2035 {
2036     const size_t basicsize = _PyObject_VAR_SIZE(Py_TYPE(op), nitems);
2037     _PyObject_ASSERT((PyObject *)op, !_PyObject_GC_IS_TRACKED(op));
2038     if (basicsize > PY_SSIZE_T_MAX - sizeof(PyGC_Head)) {
2039         return (PyVarObject *)PyErr_NoMemory();
2040     }
2041 
2042     PyGC_Head *g = AS_GC(op);
2043     g = (PyGC_Head *)PyObject_REALLOC(g,  sizeof(PyGC_Head) + basicsize);
2044     if (g == NULL)
2045         return (PyVarObject *)PyErr_NoMemory();
2046     op = (PyVarObject *) FROM_GC(g);
2047     Py_SIZE(op) = nitems;
2048     return op;
2049 }
2050 
2051 void
PyObject_GC_Del(void * op)2052 PyObject_GC_Del(void *op)
2053 {
2054     PyGC_Head *g = AS_GC(op);
2055     if (_PyObject_GC_IS_TRACKED(op)) {
2056         gc_list_remove(g);
2057     }
2058     struct _gc_runtime_state *state = &_PyRuntime.gc;
2059     if (state->generations[0].count > 0) {
2060         state->generations[0].count--;
2061     }
2062     PyObject_FREE(g);
2063 }
2064