1 #ifndef TREE_SITTER_LANGUAGE_H_
2 #define TREE_SITTER_LANGUAGE_H_
3 
4 #ifdef __cplusplus
5 extern "C" {
6 #endif
7 
8 #include "./subtree.h"
9 #include "tree_sitter/parser.h"
10 
11 #define ts_builtin_sym_error_repeat (ts_builtin_sym_error - 1)
12 
13 typedef struct {
14   const TSParseAction *actions;
15   uint32_t action_count;
16   bool is_reusable;
17 } TableEntry;
18 
19 typedef struct {
20   const TSLanguage *language;
21   const uint16_t *data;
22   const uint16_t *group_end;
23   TSStateId state;
24   uint16_t table_value;
25   uint16_t section_index;
26   uint16_t group_count;
27   bool is_small_state;
28 
29   const TSParseAction *actions;
30   TSSymbol symbol;
31   TSStateId next_state;
32   uint16_t action_count;
33 } LookaheadIterator;
34 
35 void ts_language_table_entry(const TSLanguage *, TSStateId, TSSymbol, TableEntry *);
36 
37 TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *, TSSymbol);
38 
39 TSSymbol ts_language_public_symbol(const TSLanguage *, TSSymbol);
40 
ts_language_is_symbol_external(const TSLanguage * self,TSSymbol symbol)41 static inline bool ts_language_is_symbol_external(const TSLanguage *self, TSSymbol symbol) {
42   return 0 < symbol && symbol < self->external_token_count + 1;
43 }
44 
ts_language_actions(const TSLanguage * self,TSStateId state,TSSymbol symbol,uint32_t * count)45 static inline const TSParseAction *ts_language_actions(
46   const TSLanguage *self,
47   TSStateId state,
48   TSSymbol symbol,
49   uint32_t *count
50 ) {
51   TableEntry entry;
52   ts_language_table_entry(self, state, symbol, &entry);
53   *count = entry.action_count;
54   return entry.actions;
55 }
56 
ts_language_has_reduce_action(const TSLanguage * self,TSStateId state,TSSymbol symbol)57 static inline bool ts_language_has_reduce_action(
58   const TSLanguage *self,
59   TSStateId state,
60   TSSymbol symbol
61 ) {
62   TableEntry entry;
63   ts_language_table_entry(self, state, symbol, &entry);
64   return entry.action_count > 0 && entry.actions[0].type == TSParseActionTypeReduce;
65 }
66 
67 // Lookup the table value for a given symbol and state.
68 //
69 // For non-terminal symbols, the table value represents a successor state.
70 // For terminal symbols, it represents an index in the actions table.
71 // For 'large' parse states, this is a direct lookup. For 'small' parse
72 // states, this requires searching through the symbol groups to find
73 // the given symbol.
ts_language_lookup(const TSLanguage * self,TSStateId state,TSSymbol symbol)74 static inline uint16_t ts_language_lookup(
75   const TSLanguage *self,
76   TSStateId state,
77   TSSymbol symbol
78 ) {
79   if (state >= self->large_state_count) {
80     uint32_t index = self->small_parse_table_map[state - self->large_state_count];
81     const uint16_t *data = &self->small_parse_table[index];
82     uint16_t group_count = *(data++);
83     for (unsigned i = 0; i < group_count; i++) {
84       uint16_t section_value = *(data++);
85       uint16_t symbol_count = *(data++);
86       for (unsigned i = 0; i < symbol_count; i++) {
87         if (*(data++) == symbol) return section_value;
88       }
89     }
90     return 0;
91   } else {
92     return self->parse_table[state * self->symbol_count + symbol];
93   }
94 }
95 
ts_language_has_actions(const TSLanguage * self,TSStateId state,TSSymbol symbol)96 static inline bool ts_language_has_actions(
97   const TSLanguage *self,
98   TSStateId state,
99   TSSymbol symbol
100 ) {
101   return ts_language_lookup(self, state, symbol) != 0;
102 }
103 
104 // Iterate over all of the symbols that are valid in the given state.
105 //
106 // For 'large' parse states, this just requires iterating through
107 // all possible symbols and checking the parse table for each one.
108 // For 'small' parse states, this exploits the structure of the
109 // table to only visit the valid symbols.
ts_language_lookaheads(const TSLanguage * self,TSStateId state)110 static inline LookaheadIterator ts_language_lookaheads(
111   const TSLanguage *self,
112   TSStateId state
113 ) {
114   bool is_small_state = state >= self->large_state_count;
115   const uint16_t *data;
116   const uint16_t *group_end = NULL;
117   uint16_t group_count = 0;
118   if (is_small_state) {
119     uint32_t index = self->small_parse_table_map[state - self->large_state_count];
120     data = &self->small_parse_table[index];
121     group_end = data + 1;
122     group_count = *data;
123   } else {
124     data = &self->parse_table[state * self->symbol_count] - 1;
125   }
126   return (LookaheadIterator) {
127     .language = self,
128     .data = data,
129     .group_end = group_end,
130     .group_count = group_count,
131     .is_small_state = is_small_state,
132     .symbol = UINT16_MAX,
133     .next_state = 0,
134   };
135 }
136 
ts_lookahead_iterator_next(LookaheadIterator * self)137 static inline bool ts_lookahead_iterator_next(LookaheadIterator *self) {
138   // For small parse states, valid symbols are listed explicitly,
139   // grouped by their value. There's no need to look up the actions
140   // again until moving to the next group.
141   if (self->is_small_state) {
142     self->data++;
143     if (self->data == self->group_end) {
144       if (self->group_count == 0) return false;
145       self->group_count--;
146       self->table_value = *(self->data++);
147       unsigned symbol_count = *(self->data++);
148       self->group_end = self->data + symbol_count;
149       self->symbol = *self->data;
150     } else {
151       self->symbol = *self->data;
152       return true;
153     }
154   }
155 
156   // For large parse states, iterate through every symbol until one
157   // is found that has valid actions.
158   else {
159     do {
160       self->data++;
161       self->symbol++;
162       if (self->symbol >= self->language->symbol_count) return false;
163       self->table_value = *self->data;
164     } while (!self->table_value);
165   }
166 
167   // Depending on if the symbols is terminal or non-terminal, the table value either
168   // represents a list of actions or a successor state.
169   if (self->symbol < self->language->token_count) {
170     const TSParseActionEntry *entry = &self->language->parse_actions[self->table_value];
171     self->action_count = entry->entry.count;
172     self->actions = (const TSParseAction *)(entry + 1);
173     self->next_state = 0;
174   } else {
175     self->action_count = 0;
176     self->next_state = self->table_value;
177   }
178   return true;
179 }
180 
ts_language_next_state(const TSLanguage * self,TSStateId state,TSSymbol symbol)181 static inline TSStateId ts_language_next_state(
182   const TSLanguage *self,
183   TSStateId state,
184   TSSymbol symbol
185 ) {
186   if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) {
187     return 0;
188   } else if (symbol < self->token_count) {
189     uint32_t count;
190     const TSParseAction *actions = ts_language_actions(self, state, symbol, &count);
191     if (count > 0) {
192       TSParseAction action = actions[count - 1];
193       if (action.type == TSParseActionTypeShift) {
194         return action.shift.extra ? state : action.shift.state;
195       }
196     }
197     return 0;
198   } else {
199     return ts_language_lookup(self, state, symbol);
200   }
201 }
202 
ts_language_enabled_external_tokens(const TSLanguage * self,unsigned external_scanner_state)203 static inline const bool *ts_language_enabled_external_tokens(
204   const TSLanguage *self,
205   unsigned external_scanner_state
206 ) {
207   if (external_scanner_state == 0) {
208     return NULL;
209   } else {
210     return self->external_scanner.states + self->external_token_count * external_scanner_state;
211   }
212 }
213 
ts_language_alias_sequence(const TSLanguage * self,uint32_t production_id)214 static inline const TSSymbol *ts_language_alias_sequence(
215   const TSLanguage *self,
216   uint32_t production_id
217 ) {
218   return production_id ?
219     &self->alias_sequences[production_id * self->max_alias_sequence_length] :
220     NULL;
221 }
222 
ts_language_alias_at(const TSLanguage * self,uint32_t production_id,uint32_t child_index)223 static inline TSSymbol ts_language_alias_at(
224   const TSLanguage *self,
225   uint32_t production_id,
226   uint32_t child_index
227 ) {
228   return production_id ?
229     self->alias_sequences[production_id * self->max_alias_sequence_length + child_index] :
230     0;
231 }
232 
ts_language_field_map(const TSLanguage * self,uint32_t production_id,const TSFieldMapEntry ** start,const TSFieldMapEntry ** end)233 static inline void ts_language_field_map(
234   const TSLanguage *self,
235   uint32_t production_id,
236   const TSFieldMapEntry **start,
237   const TSFieldMapEntry **end
238 ) {
239   if (self->field_count == 0) {
240     *start = NULL;
241     *end = NULL;
242     return;
243   }
244 
245   TSFieldMapSlice slice = self->field_map_slices[production_id];
246   *start = &self->field_map_entries[slice.index];
247   *end = &self->field_map_entries[slice.index] + slice.length;
248 }
249 
ts_language_aliases_for_symbol(const TSLanguage * self,TSSymbol original_symbol,const TSSymbol ** start,const TSSymbol ** end)250 static inline void ts_language_aliases_for_symbol(
251   const TSLanguage *self,
252   TSSymbol original_symbol,
253   const TSSymbol **start,
254   const TSSymbol **end
255 ) {
256   *start = &self->public_symbol_map[original_symbol];
257   *end = *start + 1;
258 
259   unsigned i = 0;
260   for (;;) {
261     TSSymbol symbol = self->alias_map[i++];
262     if (symbol == 0 || symbol > original_symbol) break;
263     uint16_t count = self->alias_map[i++];
264     if (symbol == original_symbol) {
265       *start = &self->alias_map[i];
266       *end = &self->alias_map[i + count];
267       break;
268     }
269     i += count;
270   }
271 }
272 
273 
274 #ifdef __cplusplus
275 }
276 #endif
277 
278 #endif  // TREE_SITTER_LANGUAGE_H_
279