1 #ifndef TREE_SITTER_LANGUAGE_H_
2 #define TREE_SITTER_LANGUAGE_H_
3
4 #ifdef __cplusplus
5 extern "C" {
6 #endif
7
8 #include "./subtree.h"
9 #include "tree_sitter/parser.h"
10
11 #define ts_builtin_sym_error_repeat (ts_builtin_sym_error - 1)
12
13 typedef struct {
14 const TSParseAction *actions;
15 uint32_t action_count;
16 bool is_reusable;
17 } TableEntry;
18
19 typedef struct {
20 const TSLanguage *language;
21 const uint16_t *data;
22 const uint16_t *group_end;
23 TSStateId state;
24 uint16_t table_value;
25 uint16_t section_index;
26 uint16_t group_count;
27 bool is_small_state;
28
29 const TSParseAction *actions;
30 TSSymbol symbol;
31 TSStateId next_state;
32 uint16_t action_count;
33 } LookaheadIterator;
34
35 void ts_language_table_entry(const TSLanguage *, TSStateId, TSSymbol, TableEntry *);
36
37 TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *, TSSymbol);
38
39 TSSymbol ts_language_public_symbol(const TSLanguage *, TSSymbol);
40
ts_language_is_symbol_external(const TSLanguage * self,TSSymbol symbol)41 static inline bool ts_language_is_symbol_external(const TSLanguage *self, TSSymbol symbol) {
42 return 0 < symbol && symbol < self->external_token_count + 1;
43 }
44
ts_language_actions(const TSLanguage * self,TSStateId state,TSSymbol symbol,uint32_t * count)45 static inline const TSParseAction *ts_language_actions(
46 const TSLanguage *self,
47 TSStateId state,
48 TSSymbol symbol,
49 uint32_t *count
50 ) {
51 TableEntry entry;
52 ts_language_table_entry(self, state, symbol, &entry);
53 *count = entry.action_count;
54 return entry.actions;
55 }
56
ts_language_has_reduce_action(const TSLanguage * self,TSStateId state,TSSymbol symbol)57 static inline bool ts_language_has_reduce_action(
58 const TSLanguage *self,
59 TSStateId state,
60 TSSymbol symbol
61 ) {
62 TableEntry entry;
63 ts_language_table_entry(self, state, symbol, &entry);
64 return entry.action_count > 0 && entry.actions[0].type == TSParseActionTypeReduce;
65 }
66
67 // Lookup the table value for a given symbol and state.
68 //
69 // For non-terminal symbols, the table value represents a successor state.
70 // For terminal symbols, it represents an index in the actions table.
71 // For 'large' parse states, this is a direct lookup. For 'small' parse
72 // states, this requires searching through the symbol groups to find
73 // the given symbol.
ts_language_lookup(const TSLanguage * self,TSStateId state,TSSymbol symbol)74 static inline uint16_t ts_language_lookup(
75 const TSLanguage *self,
76 TSStateId state,
77 TSSymbol symbol
78 ) {
79 if (state >= self->large_state_count) {
80 uint32_t index = self->small_parse_table_map[state - self->large_state_count];
81 const uint16_t *data = &self->small_parse_table[index];
82 uint16_t group_count = *(data++);
83 for (unsigned i = 0; i < group_count; i++) {
84 uint16_t section_value = *(data++);
85 uint16_t symbol_count = *(data++);
86 for (unsigned i = 0; i < symbol_count; i++) {
87 if (*(data++) == symbol) return section_value;
88 }
89 }
90 return 0;
91 } else {
92 return self->parse_table[state * self->symbol_count + symbol];
93 }
94 }
95
ts_language_has_actions(const TSLanguage * self,TSStateId state,TSSymbol symbol)96 static inline bool ts_language_has_actions(
97 const TSLanguage *self,
98 TSStateId state,
99 TSSymbol symbol
100 ) {
101 return ts_language_lookup(self, state, symbol) != 0;
102 }
103
104 // Iterate over all of the symbols that are valid in the given state.
105 //
106 // For 'large' parse states, this just requires iterating through
107 // all possible symbols and checking the parse table for each one.
108 // For 'small' parse states, this exploits the structure of the
109 // table to only visit the valid symbols.
ts_language_lookaheads(const TSLanguage * self,TSStateId state)110 static inline LookaheadIterator ts_language_lookaheads(
111 const TSLanguage *self,
112 TSStateId state
113 ) {
114 bool is_small_state = state >= self->large_state_count;
115 const uint16_t *data;
116 const uint16_t *group_end = NULL;
117 uint16_t group_count = 0;
118 if (is_small_state) {
119 uint32_t index = self->small_parse_table_map[state - self->large_state_count];
120 data = &self->small_parse_table[index];
121 group_end = data + 1;
122 group_count = *data;
123 } else {
124 data = &self->parse_table[state * self->symbol_count] - 1;
125 }
126 return (LookaheadIterator) {
127 .language = self,
128 .data = data,
129 .group_end = group_end,
130 .group_count = group_count,
131 .is_small_state = is_small_state,
132 .symbol = UINT16_MAX,
133 .next_state = 0,
134 };
135 }
136
ts_lookahead_iterator_next(LookaheadIterator * self)137 static inline bool ts_lookahead_iterator_next(LookaheadIterator *self) {
138 // For small parse states, valid symbols are listed explicitly,
139 // grouped by their value. There's no need to look up the actions
140 // again until moving to the next group.
141 if (self->is_small_state) {
142 self->data++;
143 if (self->data == self->group_end) {
144 if (self->group_count == 0) return false;
145 self->group_count--;
146 self->table_value = *(self->data++);
147 unsigned symbol_count = *(self->data++);
148 self->group_end = self->data + symbol_count;
149 self->symbol = *self->data;
150 } else {
151 self->symbol = *self->data;
152 return true;
153 }
154 }
155
156 // For large parse states, iterate through every symbol until one
157 // is found that has valid actions.
158 else {
159 do {
160 self->data++;
161 self->symbol++;
162 if (self->symbol >= self->language->symbol_count) return false;
163 self->table_value = *self->data;
164 } while (!self->table_value);
165 }
166
167 // Depending on if the symbols is terminal or non-terminal, the table value either
168 // represents a list of actions or a successor state.
169 if (self->symbol < self->language->token_count) {
170 const TSParseActionEntry *entry = &self->language->parse_actions[self->table_value];
171 self->action_count = entry->entry.count;
172 self->actions = (const TSParseAction *)(entry + 1);
173 self->next_state = 0;
174 } else {
175 self->action_count = 0;
176 self->next_state = self->table_value;
177 }
178 return true;
179 }
180
ts_language_next_state(const TSLanguage * self,TSStateId state,TSSymbol symbol)181 static inline TSStateId ts_language_next_state(
182 const TSLanguage *self,
183 TSStateId state,
184 TSSymbol symbol
185 ) {
186 if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) {
187 return 0;
188 } else if (symbol < self->token_count) {
189 uint32_t count;
190 const TSParseAction *actions = ts_language_actions(self, state, symbol, &count);
191 if (count > 0) {
192 TSParseAction action = actions[count - 1];
193 if (action.type == TSParseActionTypeShift) {
194 return action.shift.extra ? state : action.shift.state;
195 }
196 }
197 return 0;
198 } else {
199 return ts_language_lookup(self, state, symbol);
200 }
201 }
202
ts_language_enabled_external_tokens(const TSLanguage * self,unsigned external_scanner_state)203 static inline const bool *ts_language_enabled_external_tokens(
204 const TSLanguage *self,
205 unsigned external_scanner_state
206 ) {
207 if (external_scanner_state == 0) {
208 return NULL;
209 } else {
210 return self->external_scanner.states + self->external_token_count * external_scanner_state;
211 }
212 }
213
ts_language_alias_sequence(const TSLanguage * self,uint32_t production_id)214 static inline const TSSymbol *ts_language_alias_sequence(
215 const TSLanguage *self,
216 uint32_t production_id
217 ) {
218 return production_id ?
219 &self->alias_sequences[production_id * self->max_alias_sequence_length] :
220 NULL;
221 }
222
ts_language_alias_at(const TSLanguage * self,uint32_t production_id,uint32_t child_index)223 static inline TSSymbol ts_language_alias_at(
224 const TSLanguage *self,
225 uint32_t production_id,
226 uint32_t child_index
227 ) {
228 return production_id ?
229 self->alias_sequences[production_id * self->max_alias_sequence_length + child_index] :
230 0;
231 }
232
ts_language_field_map(const TSLanguage * self,uint32_t production_id,const TSFieldMapEntry ** start,const TSFieldMapEntry ** end)233 static inline void ts_language_field_map(
234 const TSLanguage *self,
235 uint32_t production_id,
236 const TSFieldMapEntry **start,
237 const TSFieldMapEntry **end
238 ) {
239 if (self->field_count == 0) {
240 *start = NULL;
241 *end = NULL;
242 return;
243 }
244
245 TSFieldMapSlice slice = self->field_map_slices[production_id];
246 *start = &self->field_map_entries[slice.index];
247 *end = &self->field_map_entries[slice.index] + slice.length;
248 }
249
ts_language_aliases_for_symbol(const TSLanguage * self,TSSymbol original_symbol,const TSSymbol ** start,const TSSymbol ** end)250 static inline void ts_language_aliases_for_symbol(
251 const TSLanguage *self,
252 TSSymbol original_symbol,
253 const TSSymbol **start,
254 const TSSymbol **end
255 ) {
256 *start = &self->public_symbol_map[original_symbol];
257 *end = *start + 1;
258
259 unsigned i = 0;
260 for (;;) {
261 TSSymbol symbol = self->alias_map[i++];
262 if (symbol == 0 || symbol > original_symbol) break;
263 uint16_t count = self->alias_map[i++];
264 if (symbol == original_symbol) {
265 *start = &self->alias_map[i];
266 *end = &self->alias_map[i + count];
267 break;
268 }
269 i += count;
270 }
271 }
272
273
274 #ifdef __cplusplus
275 }
276 #endif
277
278 #endif // TREE_SITTER_LANGUAGE_H_
279