1 /* Parse tree node implementation */
2 
3 #include "Python.h"
4 #include "node.h"
5 #include "errcode.h"
6 
7 node *
PyNode_New(int type)8 PyNode_New(int type)
9 {
10     node *n = (node *) PyObject_MALLOC(1 * sizeof(node));
11     if (n == NULL)
12         return NULL;
13     n->n_type = type;
14     n->n_str = NULL;
15     n->n_lineno = 0;
16     n->n_end_lineno = 0;
17     n->n_end_col_offset = -1;
18     n->n_nchildren = 0;
19     n->n_child = NULL;
20     return n;
21 }
22 
23 /* See comments at XXXROUNDUP below.  Returns -1 on overflow. */
24 static int
fancy_roundup(int n)25 fancy_roundup(int n)
26 {
27     /* Round up to the closest power of 2 >= n. */
28     int result = 256;
29     assert(n > 128);
30     while (result < n) {
31         result <<= 1;
32         if (result <= 0)
33             return -1;
34     }
35     return result;
36 }
37 
38 /* A gimmick to make massive numbers of reallocs quicker.  The result is
39  * a number >= the input.  In PyNode_AddChild, it's used like so, when
40  * we're about to add child number current_size + 1:
41  *
42  *     if XXXROUNDUP(current_size) < XXXROUNDUP(current_size + 1):
43  *         allocate space for XXXROUNDUP(current_size + 1) total children
44  *     else:
45  *         we already have enough space
46  *
47  * Since a node starts out empty, we must have
48  *
49  *     XXXROUNDUP(0) < XXXROUNDUP(1)
50  *
51  * so that we allocate space for the first child.  One-child nodes are very
52  * common (presumably that would change if we used a more abstract form
53  * of syntax tree), so to avoid wasting memory it's desirable that
54  * XXXROUNDUP(1) == 1.  That in turn forces XXXROUNDUP(0) == 0.
55  *
56  * Else for 2 <= n <= 128, we round up to the closest multiple of 4.  Why 4?
57  * Rounding up to a multiple of an exact power of 2 is very efficient, and
58  * most nodes with more than one child have <= 4 kids.
59  *
60  * Else we call fancy_roundup() to grow proportionately to n.  We've got an
61  * extreme case then (like test_longexp.py), and on many platforms doing
62  * anything less than proportional growth leads to exorbitant runtime
63  * (e.g., MacPython), or extreme fragmentation of user address space (e.g.,
64  * Win98).
65  *
66  * In a run of compileall across the 2.3a0 Lib directory, Andrew MacIntyre
67  * reported that, with this scheme, 89% of PyObject_REALLOC calls in
68  * PyNode_AddChild passed 1 for the size, and 9% passed 4.  So this usually
69  * wastes very little memory, but is very effective at sidestepping
70  * platform-realloc disasters on vulnerable platforms.
71  *
72  * Note that this would be straightforward if a node stored its current
73  * capacity.  The code is tricky to avoid that.
74  */
75 #define XXXROUNDUP(n) ((n) <= 1 ? (n) :                         \
76                (n) <= 128 ? (int)_Py_SIZE_ROUND_UP((n), 4) :    \
77                fancy_roundup(n))
78 
79 
80 void
_PyNode_FinalizeEndPos(node * n)81 _PyNode_FinalizeEndPos(node *n)
82 {
83     int nch = NCH(n);
84     node *last;
85     if (nch == 0) {
86         return;
87     }
88     last = CHILD(n, nch - 1);
89     _PyNode_FinalizeEndPos(last);
90     n->n_end_lineno = last->n_end_lineno;
91     n->n_end_col_offset = last->n_end_col_offset;
92 }
93 
94 int
PyNode_AddChild(node * n1,int type,char * str,int lineno,int col_offset,int end_lineno,int end_col_offset)95 PyNode_AddChild(node *n1, int type, char *str, int lineno, int col_offset,
96                 int end_lineno, int end_col_offset)
97 {
98     const int nch = n1->n_nchildren;
99     int current_capacity;
100     int required_capacity;
101     node *n;
102 
103     // finalize end position of previous node (if any)
104     if (nch > 0) {
105         _PyNode_FinalizeEndPos(CHILD(n1, nch - 1));
106     }
107 
108     if (nch == INT_MAX || nch < 0)
109         return E_OVERFLOW;
110 
111     current_capacity = XXXROUNDUP(nch);
112     required_capacity = XXXROUNDUP(nch + 1);
113     if (current_capacity < 0 || required_capacity < 0)
114         return E_OVERFLOW;
115     if (current_capacity < required_capacity) {
116         if ((size_t)required_capacity > SIZE_MAX / sizeof(node)) {
117             return E_NOMEM;
118         }
119         n = n1->n_child;
120         n = (node *) PyObject_REALLOC(n,
121                                       required_capacity * sizeof(node));
122         if (n == NULL)
123             return E_NOMEM;
124         n1->n_child = n;
125     }
126 
127     n = &n1->n_child[n1->n_nchildren++];
128     n->n_type = type;
129     n->n_str = str;
130     n->n_lineno = lineno;
131     n->n_col_offset = col_offset;
132     n->n_end_lineno = end_lineno;  // this and below will be updates after all children are added.
133     n->n_end_col_offset = end_col_offset;
134     n->n_nchildren = 0;
135     n->n_child = NULL;
136     return 0;
137 }
138 
139 /* Forward */
140 static void freechildren(node *);
141 static Py_ssize_t sizeofchildren(node *n);
142 
143 
144 void
PyNode_Free(node * n)145 PyNode_Free(node *n)
146 {
147     if (n != NULL) {
148         freechildren(n);
149         PyObject_FREE(n);
150     }
151 }
152 
153 Py_ssize_t
_PyNode_SizeOf(node * n)154 _PyNode_SizeOf(node *n)
155 {
156     Py_ssize_t res = 0;
157 
158     if (n != NULL)
159         res = sizeof(node) + sizeofchildren(n);
160     return res;
161 }
162 
163 static void
freechildren(node * n)164 freechildren(node *n)
165 {
166     int i;
167     for (i = NCH(n); --i >= 0; )
168         freechildren(CHILD(n, i));
169     if (n->n_child != NULL)
170         PyObject_FREE(n->n_child);
171     if (STR(n) != NULL)
172         PyObject_FREE(STR(n));
173 }
174 
175 static Py_ssize_t
sizeofchildren(node * n)176 sizeofchildren(node *n)
177 {
178     Py_ssize_t res = 0;
179     int i;
180     for (i = NCH(n); --i >= 0; )
181         res += sizeofchildren(CHILD(n, i));
182     if (n->n_child != NULL)
183         /* allocated size of n->n_child array */
184         res += XXXROUNDUP(NCH(n)) * sizeof(node);
185     if (STR(n) != NULL)
186         res += strlen(STR(n)) + 1;
187     return res;
188 }
189