1 #ifndef TREE_SITTER_SUBTREE_H_
2 #define TREE_SITTER_SUBTREE_H_
3 
4 #ifdef __cplusplus
5 extern "C" {
6 #endif
7 
8 #include <limits.h>
9 #include <stdbool.h>
10 #include <stdio.h>
11 #include "./length.h"
12 #include "./array.h"
13 #include "./error_costs.h"
14 #include "tree_sitter/api.h"
15 #include "tree_sitter/parser.h"
16 
17 #define TS_TREE_STATE_NONE USHRT_MAX
18 #define NULL_SUBTREE ((Subtree) {.ptr = NULL})
19 
20 // The serialized state of an external scanner.
21 //
22 // Every time an external token subtree is created after a call to an
23 // external scanner, the scanner's `serialize` function is called to
24 // retrieve a serialized copy of its state. The bytes are then copied
25 // onto the subtree itself so that the scanner's state can later be
26 // restored using its `deserialize` function.
27 //
28 // Small byte arrays are stored inline, and long ones are allocated
29 // separately on the heap.
30 typedef struct {
31   union {
32     char *long_data;
33     char short_data[24];
34   };
35   uint32_t length;
36 } ExternalScannerState;
37 
38 // A compact representation of a subtree.
39 //
40 // This representation is used for small leaf nodes that are not
41 // errors, and were not created by an external scanner.
42 typedef struct {
43   bool is_inline : 1;
44   bool visible : 1;
45   bool named : 1;
46   bool extra : 1;
47   bool has_changes : 1;
48   bool is_missing : 1;
49   bool is_keyword : 1;
50   uint8_t symbol;
51   uint8_t padding_bytes;
52   uint8_t size_bytes;
53   uint8_t padding_columns;
54   uint8_t padding_rows : 4;
55   uint8_t lookahead_bytes : 4;
56   uint16_t parse_state;
57 } SubtreeInlineData;
58 
59 // A heap-allocated representation of a subtree.
60 //
61 // This representation is used for parent nodes, external tokens,
62 // errors, and other leaf nodes whose data is too large to fit into
63 // the inlinen representation.
64 typedef struct {
65   volatile uint32_t ref_count;
66   Length padding;
67   Length size;
68   uint32_t lookahead_bytes;
69   uint32_t error_cost;
70   uint32_t child_count;
71   TSSymbol symbol;
72   TSStateId parse_state;
73 
74   bool visible : 1;
75   bool named : 1;
76   bool extra : 1;
77   bool fragile_left : 1;
78   bool fragile_right : 1;
79   bool has_changes : 1;
80   bool has_external_tokens : 1;
81   bool depends_on_column: 1;
82   bool is_missing : 1;
83   bool is_keyword : 1;
84 
85   union {
86     // Non-terminal subtrees (`child_count > 0`)
87     struct {
88       uint32_t visible_child_count;
89       uint32_t named_child_count;
90       uint32_t node_count;
91       uint32_t repeat_depth;
92       int32_t dynamic_precedence;
93       uint16_t production_id;
94       struct {
95         TSSymbol symbol;
96         TSStateId parse_state;
97       } first_leaf;
98     };
99 
100     // External terminal subtrees (`child_count == 0 && has_external_tokens`)
101     ExternalScannerState external_scanner_state;
102 
103     // Error terminal subtrees (`child_count == 0 && symbol == ts_builtin_sym_error`)
104     int32_t lookahead_char;
105   };
106 } SubtreeHeapData;
107 
108 // The fundamental building block of a syntax tree.
109 typedef union {
110   SubtreeInlineData data;
111   const SubtreeHeapData *ptr;
112 } Subtree;
113 
114 // Like Subtree, but mutable.
115 typedef union {
116   SubtreeInlineData data;
117   SubtreeHeapData *ptr;
118 } MutableSubtree;
119 
120 typedef Array(Subtree) SubtreeArray;
121 typedef Array(MutableSubtree) MutableSubtreeArray;
122 
123 typedef struct {
124   MutableSubtreeArray free_trees;
125   MutableSubtreeArray tree_stack;
126 } SubtreePool;
127 
128 void ts_external_scanner_state_init(ExternalScannerState *, const char *, unsigned);
129 const char *ts_external_scanner_state_data(const ExternalScannerState *);
130 
131 void ts_subtree_array_copy(SubtreeArray, SubtreeArray *);
132 void ts_subtree_array_clear(SubtreePool *, SubtreeArray *);
133 void ts_subtree_array_delete(SubtreePool *, SubtreeArray *);
134 void ts_subtree_array_remove_trailing_extras(SubtreeArray *, SubtreeArray *);
135 void ts_subtree_array_reverse(SubtreeArray *);
136 
137 SubtreePool ts_subtree_pool_new(uint32_t capacity);
138 void ts_subtree_pool_delete(SubtreePool *);
139 
140 Subtree ts_subtree_new_leaf(
141   SubtreePool *, TSSymbol, Length, Length, uint32_t,
142   TSStateId, bool, bool, bool, const TSLanguage *
143 );
144 Subtree ts_subtree_new_error(
145   SubtreePool *, int32_t, Length, Length, uint32_t, TSStateId, const TSLanguage *
146 );
147 MutableSubtree ts_subtree_new_node(TSSymbol, SubtreeArray *, unsigned, const TSLanguage *);
148 Subtree ts_subtree_new_error_node(SubtreeArray *, bool, const TSLanguage *);
149 Subtree ts_subtree_new_missing_leaf(SubtreePool *, TSSymbol, Length, const TSLanguage *);
150 MutableSubtree ts_subtree_make_mut(SubtreePool *, Subtree);
151 void ts_subtree_retain(Subtree);
152 void ts_subtree_release(SubtreePool *, Subtree);
153 bool ts_subtree_eq(Subtree, Subtree);
154 int ts_subtree_compare(Subtree, Subtree);
155 void ts_subtree_set_symbol(MutableSubtree *, TSSymbol, const TSLanguage *);
156 void ts_subtree_summarize(MutableSubtree, const Subtree *, uint32_t, const TSLanguage *);
157 void ts_subtree_summarize_children(MutableSubtree, const TSLanguage *);
158 void ts_subtree_balance(Subtree, SubtreePool *, const TSLanguage *);
159 Subtree ts_subtree_edit(Subtree, const TSInputEdit *edit, SubtreePool *);
160 char *ts_subtree_string(Subtree, const TSLanguage *, bool include_all);
161 void ts_subtree_print_dot_graph(Subtree, const TSLanguage *, FILE *);
162 Subtree ts_subtree_last_external_token(Subtree);
163 bool ts_subtree_external_scanner_state_eq(Subtree, Subtree);
164 
165 #define SUBTREE_GET(self, name) (self.data.is_inline ? self.data.name : self.ptr->name)
166 
ts_subtree_symbol(Subtree self)167 static inline TSSymbol ts_subtree_symbol(Subtree self) { return SUBTREE_GET(self, symbol); }
ts_subtree_visible(Subtree self)168 static inline bool ts_subtree_visible(Subtree self) { return SUBTREE_GET(self, visible); }
ts_subtree_named(Subtree self)169 static inline bool ts_subtree_named(Subtree self) { return SUBTREE_GET(self, named); }
ts_subtree_extra(Subtree self)170 static inline bool ts_subtree_extra(Subtree self) { return SUBTREE_GET(self, extra); }
ts_subtree_has_changes(Subtree self)171 static inline bool ts_subtree_has_changes(Subtree self) { return SUBTREE_GET(self, has_changes); }
ts_subtree_missing(Subtree self)172 static inline bool ts_subtree_missing(Subtree self) { return SUBTREE_GET(self, is_missing); }
ts_subtree_is_keyword(Subtree self)173 static inline bool ts_subtree_is_keyword(Subtree self) { return SUBTREE_GET(self, is_keyword); }
ts_subtree_parse_state(Subtree self)174 static inline TSStateId ts_subtree_parse_state(Subtree self) { return SUBTREE_GET(self, parse_state); }
ts_subtree_lookahead_bytes(Subtree self)175 static inline uint32_t ts_subtree_lookahead_bytes(Subtree self) { return SUBTREE_GET(self, lookahead_bytes); }
176 
177 #undef SUBTREE_GET
178 
179 // Get the size needed to store a heap-allocated subtree with the given
180 // number of children.
ts_subtree_alloc_size(uint32_t child_count)181 static inline size_t ts_subtree_alloc_size(uint32_t child_count) {
182   return child_count * sizeof(Subtree) + sizeof(SubtreeHeapData);
183 }
184 
185 // Get a subtree's children, which are allocated immediately before the
186 // tree's own heap data.
187 #define ts_subtree_children(self) \
188   ((self).data.is_inline ? NULL : (Subtree *)((self).ptr) - (self).ptr->child_count)
189 
ts_subtree_set_extra(MutableSubtree * self,bool is_extra)190 static inline void ts_subtree_set_extra(MutableSubtree *self, bool is_extra) {
191   if (self->data.is_inline) {
192     self->data.extra = is_extra;
193   } else {
194     self->ptr->extra = is_extra;
195   }
196 }
197 
ts_subtree_leaf_symbol(Subtree self)198 static inline TSSymbol ts_subtree_leaf_symbol(Subtree self) {
199   if (self.data.is_inline) return self.data.symbol;
200   if (self.ptr->child_count == 0) return self.ptr->symbol;
201   return self.ptr->first_leaf.symbol;
202 }
203 
ts_subtree_leaf_parse_state(Subtree self)204 static inline TSStateId ts_subtree_leaf_parse_state(Subtree self) {
205   if (self.data.is_inline) return self.data.parse_state;
206   if (self.ptr->child_count == 0) return self.ptr->parse_state;
207   return self.ptr->first_leaf.parse_state;
208 }
209 
ts_subtree_padding(Subtree self)210 static inline Length ts_subtree_padding(Subtree self) {
211   if (self.data.is_inline) {
212     Length result = {self.data.padding_bytes, {self.data.padding_rows, self.data.padding_columns}};
213     return result;
214   } else {
215     return self.ptr->padding;
216   }
217 }
218 
ts_subtree_size(Subtree self)219 static inline Length ts_subtree_size(Subtree self) {
220   if (self.data.is_inline) {
221     Length result = {self.data.size_bytes, {0, self.data.size_bytes}};
222     return result;
223   } else {
224     return self.ptr->size;
225   }
226 }
227 
ts_subtree_total_size(Subtree self)228 static inline Length ts_subtree_total_size(Subtree self) {
229   return length_add(ts_subtree_padding(self), ts_subtree_size(self));
230 }
231 
ts_subtree_total_bytes(Subtree self)232 static inline uint32_t ts_subtree_total_bytes(Subtree self) {
233   return ts_subtree_total_size(self).bytes;
234 }
235 
ts_subtree_child_count(Subtree self)236 static inline uint32_t ts_subtree_child_count(Subtree self) {
237   return self.data.is_inline ? 0 : self.ptr->child_count;
238 }
239 
ts_subtree_repeat_depth(Subtree self)240 static inline uint32_t ts_subtree_repeat_depth(Subtree self) {
241   return self.data.is_inline ? 0 : self.ptr->repeat_depth;
242 }
243 
ts_subtree_node_count(Subtree self)244 static inline uint32_t ts_subtree_node_count(Subtree self) {
245   return (self.data.is_inline || self.ptr->child_count == 0) ? 1 : self.ptr->node_count;
246 }
247 
ts_subtree_visible_child_count(Subtree self)248 static inline uint32_t ts_subtree_visible_child_count(Subtree self) {
249   if (ts_subtree_child_count(self) > 0) {
250     return self.ptr->visible_child_count;
251   } else {
252     return 0;
253   }
254 }
255 
ts_subtree_error_cost(Subtree self)256 static inline uint32_t ts_subtree_error_cost(Subtree self) {
257   if (ts_subtree_missing(self)) {
258     return ERROR_COST_PER_MISSING_TREE + ERROR_COST_PER_RECOVERY;
259   } else {
260     return self.data.is_inline ? 0 : self.ptr->error_cost;
261   }
262 }
263 
ts_subtree_dynamic_precedence(Subtree self)264 static inline int32_t ts_subtree_dynamic_precedence(Subtree self) {
265   return (self.data.is_inline || self.ptr->child_count == 0) ? 0 : self.ptr->dynamic_precedence;
266 }
267 
ts_subtree_production_id(Subtree self)268 static inline uint16_t ts_subtree_production_id(Subtree self) {
269   if (ts_subtree_child_count(self) > 0) {
270     return self.ptr->production_id;
271   } else {
272     return 0;
273   }
274 }
275 
ts_subtree_fragile_left(Subtree self)276 static inline bool ts_subtree_fragile_left(Subtree self) {
277   return self.data.is_inline ? false : self.ptr->fragile_left;
278 }
279 
ts_subtree_fragile_right(Subtree self)280 static inline bool ts_subtree_fragile_right(Subtree self) {
281   return self.data.is_inline ? false : self.ptr->fragile_right;
282 }
283 
ts_subtree_has_external_tokens(Subtree self)284 static inline bool ts_subtree_has_external_tokens(Subtree self) {
285   return self.data.is_inline ? false : self.ptr->has_external_tokens;
286 }
287 
ts_subtree_depends_on_column(Subtree self)288 static inline bool ts_subtree_depends_on_column(Subtree self) {
289   return self.data.is_inline ? false : self.ptr->depends_on_column;
290 }
291 
ts_subtree_is_fragile(Subtree self)292 static inline bool ts_subtree_is_fragile(Subtree self) {
293   return self.data.is_inline ? false : (self.ptr->fragile_left || self.ptr->fragile_right);
294 }
295 
ts_subtree_is_error(Subtree self)296 static inline bool ts_subtree_is_error(Subtree self) {
297   return ts_subtree_symbol(self) == ts_builtin_sym_error;
298 }
299 
ts_subtree_is_eof(Subtree self)300 static inline bool ts_subtree_is_eof(Subtree self) {
301   return ts_subtree_symbol(self) == ts_builtin_sym_end;
302 }
303 
ts_subtree_from_mut(MutableSubtree self)304 static inline Subtree ts_subtree_from_mut(MutableSubtree self) {
305   Subtree result;
306   result.data = self.data;
307   return result;
308 }
309 
ts_subtree_to_mut_unsafe(Subtree self)310 static inline MutableSubtree ts_subtree_to_mut_unsafe(Subtree self) {
311   MutableSubtree result;
312   result.data = self.data;
313   return result;
314 }
315 
316 #ifdef __cplusplus
317 }
318 #endif
319 
320 #endif  // TREE_SITTER_SUBTREE_H_
321