1 /* Parse tree node implementation */
2 
3 #include "Python.h"
4 #include "node.h"
5 #include "errcode.h"
6 
7 node *
PyNode_New(int type)8 PyNode_New(int type)
9 {
10     node *n = (node *) PyObject_MALLOC(1 * sizeof(node));
11     if (n == NULL)
12         return NULL;
13     n->n_type = type;
14     n->n_str = NULL;
15     n->n_lineno = 0;
16     n->n_end_lineno = 0;
17     n->n_col_offset = 0;
18     n->n_end_col_offset = -1;
19     n->n_nchildren = 0;
20     n->n_child = NULL;
21     return n;
22 }
23 
24 /* See comments at XXXROUNDUP below.  Returns -1 on overflow. */
25 static int
fancy_roundup(int n)26 fancy_roundup(int n)
27 {
28     /* Round up to the closest power of 2 >= n. */
29     int result = 256;
30     assert(n > 128);
31     while (result < n) {
32         result <<= 1;
33         if (result <= 0)
34             return -1;
35     }
36     return result;
37 }
38 
39 /* A gimmick to make massive numbers of reallocs quicker.  The result is
40  * a number >= the input.  In PyNode_AddChild, it's used like so, when
41  * we're about to add child number current_size + 1:
42  *
43  *     if XXXROUNDUP(current_size) < XXXROUNDUP(current_size + 1):
44  *         allocate space for XXXROUNDUP(current_size + 1) total children
45  *     else:
46  *         we already have enough space
47  *
48  * Since a node starts out empty, we must have
49  *
50  *     XXXROUNDUP(0) < XXXROUNDUP(1)
51  *
52  * so that we allocate space for the first child.  One-child nodes are very
53  * common (presumably that would change if we used a more abstract form
54  * of syntax tree), so to avoid wasting memory it's desirable that
55  * XXXROUNDUP(1) == 1.  That in turn forces XXXROUNDUP(0) == 0.
56  *
57  * Else for 2 <= n <= 128, we round up to the closest multiple of 4.  Why 4?
58  * Rounding up to a multiple of an exact power of 2 is very efficient, and
59  * most nodes with more than one child have <= 4 kids.
60  *
61  * Else we call fancy_roundup() to grow proportionately to n.  We've got an
62  * extreme case then (like test_longexp.py), and on many platforms doing
63  * anything less than proportional growth leads to exorbitant runtime
64  * (e.g., MacPython), or extreme fragmentation of user address space (e.g.,
65  * Win98).
66  *
67  * In a run of compileall across the 2.3a0 Lib directory, Andrew MacIntyre
68  * reported that, with this scheme, 89% of PyObject_REALLOC calls in
69  * PyNode_AddChild passed 1 for the size, and 9% passed 4.  So this usually
70  * wastes very little memory, but is very effective at sidestepping
71  * platform-realloc disasters on vulnerable platforms.
72  *
73  * Note that this would be straightforward if a node stored its current
74  * capacity.  The code is tricky to avoid that.
75  */
76 #define XXXROUNDUP(n) ((n) <= 1 ? (n) :                         \
77                (n) <= 128 ? (int)_Py_SIZE_ROUND_UP((n), 4) :    \
78                fancy_roundup(n))
79 
80 
81 void
_PyNode_FinalizeEndPos(node * n)82 _PyNode_FinalizeEndPos(node *n)
83 {
84     int nch = NCH(n);
85     node *last;
86     if (nch == 0) {
87         return;
88     }
89     last = CHILD(n, nch - 1);
90     _PyNode_FinalizeEndPos(last);
91     n->n_end_lineno = last->n_end_lineno;
92     n->n_end_col_offset = last->n_end_col_offset;
93 }
94 
95 int
PyNode_AddChild(node * n1,int type,char * str,int lineno,int col_offset,int end_lineno,int end_col_offset)96 PyNode_AddChild(node *n1, int type, char *str, int lineno, int col_offset,
97                 int end_lineno, int end_col_offset)
98 {
99     const int nch = n1->n_nchildren;
100     int current_capacity;
101     int required_capacity;
102     node *n;
103 
104     // finalize end position of previous node (if any)
105     if (nch > 0) {
106         _PyNode_FinalizeEndPos(CHILD(n1, nch - 1));
107     }
108 
109     if (nch == INT_MAX || nch < 0)
110         return E_OVERFLOW;
111 
112     current_capacity = XXXROUNDUP(nch);
113     required_capacity = XXXROUNDUP(nch + 1);
114     if (current_capacity < 0 || required_capacity < 0)
115         return E_OVERFLOW;
116     if (current_capacity < required_capacity) {
117         if ((size_t)required_capacity > SIZE_MAX / sizeof(node)) {
118             return E_NOMEM;
119         }
120         n = n1->n_child;
121         n = (node *) PyObject_REALLOC(n,
122                                       required_capacity * sizeof(node));
123         if (n == NULL)
124             return E_NOMEM;
125         n1->n_child = n;
126     }
127 
128     n = &n1->n_child[n1->n_nchildren++];
129     n->n_type = type;
130     n->n_str = str;
131     n->n_lineno = lineno;
132     n->n_col_offset = col_offset;
133     n->n_end_lineno = end_lineno;  // this and below will be updates after all children are added.
134     n->n_end_col_offset = end_col_offset;
135     n->n_nchildren = 0;
136     n->n_child = NULL;
137     return 0;
138 }
139 
140 /* Forward */
141 static void freechildren(node *);
142 static Py_ssize_t sizeofchildren(node *n);
143 
144 
145 void
PyNode_Free(node * n)146 PyNode_Free(node *n)
147 {
148     if (n != NULL) {
149         freechildren(n);
150         PyObject_FREE(n);
151     }
152 }
153 
154 Py_ssize_t
_PyNode_SizeOf(node * n)155 _PyNode_SizeOf(node *n)
156 {
157     Py_ssize_t res = 0;
158 
159     if (n != NULL)
160         res = sizeof(node) + sizeofchildren(n);
161     return res;
162 }
163 
164 static void
freechildren(node * n)165 freechildren(node *n)
166 {
167     int i;
168     for (i = NCH(n); --i >= 0; )
169         freechildren(CHILD(n, i));
170     if (n->n_child != NULL)
171         PyObject_FREE(n->n_child);
172     if (STR(n) != NULL)
173         PyObject_FREE(STR(n));
174 }
175 
176 static Py_ssize_t
sizeofchildren(node * n)177 sizeofchildren(node *n)
178 {
179     Py_ssize_t res = 0;
180     int i;
181     for (i = NCH(n); --i >= 0; )
182         res += sizeofchildren(CHILD(n, i));
183     if (n->n_child != NULL)
184         /* allocated size of n->n_child array */
185         res += XXXROUNDUP(NCH(n)) * sizeof(node);
186     if (STR(n) != NULL)
187         res += strlen(STR(n)) + 1;
188     return res;
189 }
190