1 #include "tree_sitter/api.h"
2 #include "./alloc.h"
3 #include "./array.h"
4 #include "./language.h"
5 #include "./point.h"
6 #include "./tree_cursor.h"
7 #include "./unicode.h"
8 #include <wctype.h>
9
10 // #define DEBUG_ANALYZE_QUERY
11 // #define LOG(...) fprintf(stderr, __VA_ARGS__)
12 #define LOG(...)
13
14 #define MAX_STEP_CAPTURE_COUNT 3
15 #define MAX_STATE_PREDECESSOR_COUNT 100
16 #define MAX_ANALYSIS_STATE_DEPTH 12
17 #define MAX_NEGATED_FIELD_COUNT 8
18
19 /*
20 * Stream - A sequence of unicode characters derived from a UTF8 string.
21 * This struct is used in parsing queries from S-expressions.
22 */
23 typedef struct {
24 const char *input;
25 const char *start;
26 const char *end;
27 int32_t next;
28 uint8_t next_size;
29 } Stream;
30
31 /*
32 * QueryStep - A step in the process of matching a query. Each node within
33 * a query S-expression corresponds to one of these steps. An entire pattern
34 * is represented as a sequence of these steps. The basic properties of a
35 * node are represented by these fields:
36 * - `symbol` - The grammar symbol to match. A zero value represents the
37 * wildcard symbol, '_'.
38 * - `field` - The field name to match. A zero value means that a field name
39 * was not specified.
40 * - `capture_ids` - An array of integers representing the names of captures
41 * associated with this node in the pattern, terminated by a `NONE` value.
42 * - `depth` - The depth where this node occurs in the pattern. The root node
43 * of the pattern has depth zero.
44 *
45 * For simple patterns, steps are matched in sequential order. But in order to
46 * handle alternative/repeated/optional sub-patterns, query steps are not always
47 * structured as a linear sequence; they sometimes need to split and merge. This
48 * is done using the following fields:
49 * - `alternative_index` - The index of a different query step that serves as
50 * an alternative to this step. A `NONE` value represents no alternative.
51 * When a query state reaches a step with an alternative index, the state
52 * is duplicated, with one copy remaining at the original step, and one copy
53 * moving to the alternative step. The alternative may have its own alternative
54 * step, so this splitting is an iterative process.
55 * - `is_dead_end` - Indication that this state cannot be passed directly, and
56 * exists only in order to redirect to an alternative index, with no splitting.
57 * - `is_pass_through` - Indication that state has no matching logic of its own,
58 * and exists only to split a state. One copy of the state advances immediately
59 * to the next step, and one moves to the alternative step.
60 *
61 * Steps have some additional fields in order to handle the `.` (or "anchor") operator,
62 * which forbids additional child nodes:
63 * - `is_immediate` - Indication that the node matching this step cannot be preceded
64 * by other sibling nodes that weren't specified in the pattern.
65 * - `is_last_child` - Indicates that the node matching this step cannot have any
66 * subsequent named siblings.
67 */
68 typedef struct {
69 TSSymbol symbol;
70 TSSymbol supertype_symbol;
71 TSFieldId field;
72 uint16_t capture_ids[MAX_STEP_CAPTURE_COUNT];
73 uint16_t depth;
74 uint16_t alternative_index;
75 uint16_t negated_field_list_id;
76 bool contains_captures: 1;
77 bool is_immediate: 1;
78 bool is_last_child: 1;
79 bool is_pass_through: 1;
80 bool is_dead_end: 1;
81 bool alternative_is_immediate: 1;
82 bool is_definite: 1;
83 } QueryStep;
84
85 /*
86 * Slice - A slice of an external array. Within a query, capture names,
87 * literal string values, and predicate step informations are stored in three
88 * contiguous arrays. Individual captures, string values, and predicates are
89 * represented as slices of these three arrays.
90 */
91 typedef struct {
92 uint32_t offset;
93 uint32_t length;
94 } Slice;
95
96 /*
97 * SymbolTable - a two-way mapping of strings to ids.
98 */
99 typedef struct {
100 Array(char) characters;
101 Array(Slice) slices;
102 } SymbolTable;
103
104 /*
105 * PatternEntry - Information about the starting point for matching a particular
106 * pattern. These entries are stored in a 'pattern map' - a sorted array that
107 * makes it possible to efficiently lookup patterns based on the symbol for their
108 * first step. The entry consists of the following fields:
109 * - `pattern_index` - the index of the pattern within the query
110 * - `step_index` - the index of the pattern's first step in the shared `steps` array
111 * - `is_rooted` - whether or not the pattern has a single root node. This property
112 * affects decisions about whether or not to start the pattern for nodes outside
113 * of a QueryCursor's range restriction.
114 */
115 typedef struct {
116 uint16_t step_index;
117 uint16_t pattern_index;
118 bool is_rooted;
119 } PatternEntry;
120
121 typedef struct {
122 Slice steps;
123 Slice predicate_steps;
124 uint32_t start_byte;
125 } QueryPattern;
126
127 typedef struct {
128 uint32_t byte_offset;
129 uint16_t step_index;
130 } StepOffset;
131
132 /*
133 * QueryState - The state of an in-progress match of a particular pattern
134 * in a query. While executing, a `TSQueryCursor` must keep track of a number
135 * of possible in-progress matches. Each of those possible matches is
136 * represented as one of these states. Fields:
137 * - `id` - A numeric id that is exposed to the public API. This allows the
138 * caller to remove a given match, preventing any more of its captures
139 * from being returned.
140 * - `start_depth` - The depth in the tree where the first step of the state's
141 * pattern was matched.
142 * - `pattern_index` - The pattern that the state is matching.
143 * - `consumed_capture_count` - The number of captures from this match that
144 * have already been returned.
145 * - `capture_list_id` - A numeric id that can be used to retrieve the state's
146 * list of captures from the `CaptureListPool`.
147 * - `seeking_immediate_match` - A flag that indicates that the state's next
148 * step must be matched by the very next sibling. This is used when
149 * processing repetitions.
150 * - `has_in_progress_alternatives` - A flag that indicates that there is are
151 * other states that have the same captures as this state, but are at
152 * different steps in their pattern. This means that in order to obey the
153 * 'longest-match' rule, this state should not be returned as a match until
154 * it is clear that there can be no other alternative match with more captures.
155 */
156 typedef struct {
157 uint32_t id;
158 uint32_t capture_list_id;
159 uint16_t start_depth;
160 uint16_t step_index;
161 uint16_t pattern_index;
162 uint16_t consumed_capture_count: 12;
163 bool seeking_immediate_match: 1;
164 bool has_in_progress_alternatives: 1;
165 bool dead: 1;
166 bool needs_parent: 1;
167 } QueryState;
168
169 typedef Array(TSQueryCapture) CaptureList;
170
171 /*
172 * CaptureListPool - A collection of *lists* of captures. Each query state needs
173 * to maintain its own list of captures. To avoid repeated allocations, this struct
174 * maintains a fixed set of capture lists, and keeps track of which ones are
175 * currently in use by a query state.
176 */
177 typedef struct {
178 Array(CaptureList) list;
179 CaptureList empty_list;
180 // The maximum number of capture lists that we are allowed to allocate. We
181 // never allow `list` to allocate more entries than this, dropping pending
182 // matches if needed to stay under the limit.
183 uint32_t max_capture_list_count;
184 // The number of capture lists allocated in `list` that are not currently in
185 // use. We reuse those existing-but-unused capture lists before trying to
186 // allocate any new ones. We use an invalid value (UINT32_MAX) for a capture
187 // list's length to indicate that it's not in use.
188 uint32_t free_capture_list_count;
189 } CaptureListPool;
190
191 /*
192 * AnalysisState - The state needed for walking the parse table when analyzing
193 * a query pattern, to determine at which steps the pattern might fail to match.
194 */
195 typedef struct {
196 TSStateId parse_state;
197 TSSymbol parent_symbol;
198 uint16_t child_index;
199 TSFieldId field_id: 15;
200 bool done: 1;
201 } AnalysisStateEntry;
202
203 typedef struct {
204 AnalysisStateEntry stack[MAX_ANALYSIS_STATE_DEPTH];
205 uint16_t depth;
206 uint16_t step_index;
207 } AnalysisState;
208
209 typedef Array(AnalysisState) AnalysisStateSet;
210
211 /*
212 * AnalysisSubgraph - A subset of the states in the parse table that are used
213 * in constructing nodes with a certain symbol. Each state is accompanied by
214 * some information about the possible node that could be produced in
215 * downstream states.
216 */
217 typedef struct {
218 TSStateId state;
219 uint8_t production_id;
220 uint8_t child_index: 7;
221 bool done: 1;
222 } AnalysisSubgraphNode;
223
224 typedef struct {
225 TSSymbol symbol;
226 Array(TSStateId) start_states;
227 Array(AnalysisSubgraphNode) nodes;
228 } AnalysisSubgraph;
229
230 /*
231 * StatePredecessorMap - A map that stores the predecessors of each parse state.
232 * This is used during query analysis to determine which parse states can lead
233 * to which reduce actions.
234 */
235 typedef struct {
236 TSStateId *contents;
237 } StatePredecessorMap;
238
239 /*
240 * TSQuery - A tree query, compiled from a string of S-expressions. The query
241 * itself is immutable. The mutable state used in the process of executing the
242 * query is stored in a `TSQueryCursor`.
243 */
244 struct TSQuery {
245 SymbolTable captures;
246 SymbolTable predicate_values;
247 Array(QueryStep) steps;
248 Array(PatternEntry) pattern_map;
249 Array(TSQueryPredicateStep) predicate_steps;
250 Array(QueryPattern) patterns;
251 Array(StepOffset) step_offsets;
252 Array(TSFieldId) negated_fields;
253 Array(char) string_buffer;
254 const TSLanguage *language;
255 uint16_t wildcard_root_pattern_count;
256 };
257
258 /*
259 * TSQueryCursor - A stateful struct used to execute a query on a tree.
260 */
261 struct TSQueryCursor {
262 const TSQuery *query;
263 TSTreeCursor cursor;
264 Array(QueryState) states;
265 Array(QueryState) finished_states;
266 CaptureListPool capture_list_pool;
267 uint32_t depth;
268 uint32_t start_byte;
269 uint32_t end_byte;
270 TSPoint start_point;
271 TSPoint end_point;
272 uint32_t next_state_id;
273 bool ascending;
274 bool halted;
275 bool did_exceed_match_limit;
276 };
277
278 static const TSQueryError PARENT_DONE = -1;
279 static const uint16_t PATTERN_DONE_MARKER = UINT16_MAX;
280 static const uint16_t NONE = UINT16_MAX;
281 static const TSSymbol WILDCARD_SYMBOL = 0;
282 static const TSSymbol NAMED_WILDCARD_SYMBOL = UINT16_MAX - 1;
283
284 /**********
285 * Stream
286 **********/
287
288 // Advance to the next unicode code point in the stream.
stream_advance(Stream * self)289 static bool stream_advance(Stream *self) {
290 self->input += self->next_size;
291 if (self->input < self->end) {
292 uint32_t size = ts_decode_utf8(
293 (const uint8_t *)self->input,
294 self->end - self->input,
295 &self->next
296 );
297 if (size > 0) {
298 self->next_size = size;
299 return true;
300 }
301 } else {
302 self->next_size = 0;
303 self->next = '\0';
304 }
305 return false;
306 }
307
308 // Reset the stream to the given input position, represented as a pointer
309 // into the input string.
stream_reset(Stream * self,const char * input)310 static void stream_reset(Stream *self, const char *input) {
311 self->input = input;
312 self->next_size = 0;
313 stream_advance(self);
314 }
315
stream_new(const char * string,uint32_t length)316 static Stream stream_new(const char *string, uint32_t length) {
317 Stream self = {
318 .next = 0,
319 .input = string,
320 .start = string,
321 .end = string + length,
322 };
323 stream_advance(&self);
324 return self;
325 }
326
stream_skip_whitespace(Stream * self)327 static void stream_skip_whitespace(Stream *self) {
328 for (;;) {
329 if (iswspace(self->next)) {
330 stream_advance(self);
331 } else if (self->next == ';') {
332 // skip over comments
333 stream_advance(self);
334 while (self->next && self->next != '\n') {
335 if (!stream_advance(self)) break;
336 }
337 } else {
338 break;
339 }
340 }
341 }
342
stream_is_ident_start(Stream * self)343 static bool stream_is_ident_start(Stream *self) {
344 return iswalnum(self->next) || self->next == '_' || self->next == '-';
345 }
346
stream_scan_identifier(Stream * stream)347 static void stream_scan_identifier(Stream *stream) {
348 do {
349 stream_advance(stream);
350 } while (
351 iswalnum(stream->next) ||
352 stream->next == '_' ||
353 stream->next == '-' ||
354 stream->next == '.' ||
355 stream->next == '?' ||
356 stream->next == '!'
357 );
358 }
359
stream_offset(Stream * self)360 static uint32_t stream_offset(Stream *self) {
361 return self->input - self->start;
362 }
363
364 /******************
365 * CaptureListPool
366 ******************/
367
capture_list_pool_new(void)368 static CaptureListPool capture_list_pool_new(void) {
369 return (CaptureListPool) {
370 .list = array_new(),
371 .empty_list = array_new(),
372 .max_capture_list_count = UINT32_MAX,
373 .free_capture_list_count = 0,
374 };
375 }
376
capture_list_pool_reset(CaptureListPool * self)377 static void capture_list_pool_reset(CaptureListPool *self) {
378 for (uint16_t i = 0; i < self->list.size; i++) {
379 // This invalid size means that the list is not in use.
380 self->list.contents[i].size = UINT32_MAX;
381 }
382 self->free_capture_list_count = self->list.size;
383 }
384
capture_list_pool_delete(CaptureListPool * self)385 static void capture_list_pool_delete(CaptureListPool *self) {
386 for (uint16_t i = 0; i < self->list.size; i++) {
387 array_delete(&self->list.contents[i]);
388 }
389 array_delete(&self->list);
390 }
391
capture_list_pool_get(const CaptureListPool * self,uint16_t id)392 static const CaptureList *capture_list_pool_get(const CaptureListPool *self, uint16_t id) {
393 if (id >= self->list.size) return &self->empty_list;
394 return &self->list.contents[id];
395 }
396
capture_list_pool_get_mut(CaptureListPool * self,uint16_t id)397 static CaptureList *capture_list_pool_get_mut(CaptureListPool *self, uint16_t id) {
398 assert(id < self->list.size);
399 return &self->list.contents[id];
400 }
401
capture_list_pool_is_empty(const CaptureListPool * self)402 static bool capture_list_pool_is_empty(const CaptureListPool *self) {
403 // The capture list pool is empty if all allocated lists are in use, and we
404 // have reached the maximum allowed number of allocated lists.
405 return self->free_capture_list_count == 0 && self->list.size >= self->max_capture_list_count;
406 }
407
capture_list_pool_acquire(CaptureListPool * self)408 static uint16_t capture_list_pool_acquire(CaptureListPool *self) {
409 // First see if any already allocated capture list is currently unused.
410 if (self->free_capture_list_count > 0) {
411 for (uint16_t i = 0; i < self->list.size; i++) {
412 if (self->list.contents[i].size == UINT32_MAX) {
413 array_clear(&self->list.contents[i]);
414 self->free_capture_list_count--;
415 return i;
416 }
417 }
418 }
419
420 // Otherwise allocate and initialize a new capture list, as long as that
421 // doesn't put us over the requested maximum.
422 uint32_t i = self->list.size;
423 if (i >= self->max_capture_list_count) {
424 return NONE;
425 }
426 CaptureList list;
427 array_init(&list);
428 array_push(&self->list, list);
429 return i;
430 }
431
capture_list_pool_release(CaptureListPool * self,uint16_t id)432 static void capture_list_pool_release(CaptureListPool *self, uint16_t id) {
433 if (id >= self->list.size) return;
434 self->list.contents[id].size = UINT32_MAX;
435 self->free_capture_list_count++;
436 }
437
438 /**************
439 * SymbolTable
440 **************/
441
symbol_table_new(void)442 static SymbolTable symbol_table_new(void) {
443 return (SymbolTable) {
444 .characters = array_new(),
445 .slices = array_new(),
446 };
447 }
448
symbol_table_delete(SymbolTable * self)449 static void symbol_table_delete(SymbolTable *self) {
450 array_delete(&self->characters);
451 array_delete(&self->slices);
452 }
453
symbol_table_id_for_name(const SymbolTable * self,const char * name,uint32_t length)454 static int symbol_table_id_for_name(
455 const SymbolTable *self,
456 const char *name,
457 uint32_t length
458 ) {
459 for (unsigned i = 0; i < self->slices.size; i++) {
460 Slice slice = self->slices.contents[i];
461 if (
462 slice.length == length &&
463 !strncmp(&self->characters.contents[slice.offset], name, length)
464 ) return i;
465 }
466 return -1;
467 }
468
symbol_table_name_for_id(const SymbolTable * self,uint16_t id,uint32_t * length)469 static const char *symbol_table_name_for_id(
470 const SymbolTable *self,
471 uint16_t id,
472 uint32_t *length
473 ) {
474 Slice slice = self->slices.contents[id];
475 *length = slice.length;
476 return &self->characters.contents[slice.offset];
477 }
478
symbol_table_insert_name(SymbolTable * self,const char * name,uint32_t length)479 static uint16_t symbol_table_insert_name(
480 SymbolTable *self,
481 const char *name,
482 uint32_t length
483 ) {
484 int id = symbol_table_id_for_name(self, name, length);
485 if (id >= 0) return (uint16_t)id;
486 Slice slice = {
487 .offset = self->characters.size,
488 .length = length,
489 };
490 array_grow_by(&self->characters, length + 1);
491 memcpy(&self->characters.contents[slice.offset], name, length);
492 self->characters.contents[self->characters.size - 1] = 0;
493 array_push(&self->slices, slice);
494 return self->slices.size - 1;
495 }
496
497 /************
498 * QueryStep
499 ************/
500
query_step__new(TSSymbol symbol,uint16_t depth,bool is_immediate)501 static QueryStep query_step__new(
502 TSSymbol symbol,
503 uint16_t depth,
504 bool is_immediate
505 ) {
506 return (QueryStep) {
507 .symbol = symbol,
508 .depth = depth,
509 .field = 0,
510 .capture_ids = {NONE, NONE, NONE},
511 .alternative_index = NONE,
512 .negated_field_list_id = 0,
513 .contains_captures = false,
514 .is_last_child = false,
515 .is_pass_through = false,
516 .is_dead_end = false,
517 .is_definite = false,
518 .is_immediate = is_immediate,
519 .alternative_is_immediate = false,
520 };
521 }
522
query_step__add_capture(QueryStep * self,uint16_t capture_id)523 static void query_step__add_capture(QueryStep *self, uint16_t capture_id) {
524 for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) {
525 if (self->capture_ids[i] == NONE) {
526 self->capture_ids[i] = capture_id;
527 break;
528 }
529 }
530 }
531
query_step__remove_capture(QueryStep * self,uint16_t capture_id)532 static void query_step__remove_capture(QueryStep *self, uint16_t capture_id) {
533 for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) {
534 if (self->capture_ids[i] == capture_id) {
535 self->capture_ids[i] = NONE;
536 while (i + 1 < MAX_STEP_CAPTURE_COUNT) {
537 if (self->capture_ids[i + 1] == NONE) break;
538 self->capture_ids[i] = self->capture_ids[i + 1];
539 self->capture_ids[i + 1] = NONE;
540 i++;
541 }
542 break;
543 }
544 }
545 }
546
547 /**********************
548 * StatePredecessorMap
549 **********************/
550
state_predecessor_map_new(const TSLanguage * language)551 static inline StatePredecessorMap state_predecessor_map_new(const TSLanguage *language) {
552 return (StatePredecessorMap) {
553 .contents = ts_calloc(language->state_count * (MAX_STATE_PREDECESSOR_COUNT + 1), sizeof(TSStateId)),
554 };
555 }
556
state_predecessor_map_delete(StatePredecessorMap * self)557 static inline void state_predecessor_map_delete(StatePredecessorMap *self) {
558 ts_free(self->contents);
559 }
560
state_predecessor_map_add(StatePredecessorMap * self,TSStateId state,TSStateId predecessor)561 static inline void state_predecessor_map_add(
562 StatePredecessorMap *self,
563 TSStateId state,
564 TSStateId predecessor
565 ) {
566 unsigned index = state * (MAX_STATE_PREDECESSOR_COUNT + 1);
567 TSStateId *count = &self->contents[index];
568 if (*count == 0 || (*count < MAX_STATE_PREDECESSOR_COUNT && self->contents[index + *count] != predecessor)) {
569 (*count)++;
570 self->contents[index + *count] = predecessor;
571 }
572 }
573
state_predecessor_map_get(const StatePredecessorMap * self,TSStateId state,unsigned * count)574 static inline const TSStateId *state_predecessor_map_get(
575 const StatePredecessorMap *self,
576 TSStateId state,
577 unsigned *count
578 ) {
579 unsigned index = state * (MAX_STATE_PREDECESSOR_COUNT + 1);
580 *count = self->contents[index];
581 return &self->contents[index + 1];
582 }
583
584 /****************
585 * AnalysisState
586 ****************/
587
analysis_state__recursion_depth(const AnalysisState * self)588 static unsigned analysis_state__recursion_depth(const AnalysisState *self) {
589 unsigned result = 0;
590 for (unsigned i = 0; i < self->depth; i++) {
591 TSSymbol symbol = self->stack[i].parent_symbol;
592 for (unsigned j = 0; j < i; j++) {
593 if (self->stack[j].parent_symbol == symbol) {
594 result++;
595 break;
596 }
597 }
598 }
599 return result;
600 }
601
analysis_state__compare_position(const AnalysisState * self,const AnalysisState * other)602 static inline int analysis_state__compare_position(
603 const AnalysisState *self,
604 const AnalysisState *other
605 ) {
606 for (unsigned i = 0; i < self->depth; i++) {
607 if (i >= other->depth) return -1;
608 if (self->stack[i].child_index < other->stack[i].child_index) return -1;
609 if (self->stack[i].child_index > other->stack[i].child_index) return 1;
610 }
611 if (self->depth < other->depth) return 1;
612 return 0;
613 }
614
analysis_state__compare(const AnalysisState * self,const AnalysisState * other)615 static inline int analysis_state__compare(
616 const AnalysisState *self,
617 const AnalysisState *other
618 ) {
619 int result = analysis_state__compare_position(self, other);
620 if (result != 0) return result;
621 for (unsigned i = 0; i < self->depth; i++) {
622 if (self->stack[i].parent_symbol < other->stack[i].parent_symbol) return -1;
623 if (self->stack[i].parent_symbol > other->stack[i].parent_symbol) return 1;
624 if (self->stack[i].parse_state < other->stack[i].parse_state) return -1;
625 if (self->stack[i].parse_state > other->stack[i].parse_state) return 1;
626 if (self->stack[i].field_id < other->stack[i].field_id) return -1;
627 if (self->stack[i].field_id > other->stack[i].field_id) return 1;
628 }
629 if (self->step_index < other->step_index) return -1;
630 if (self->step_index > other->step_index) return 1;
631 return 0;
632 }
633
analysis_state__top(AnalysisState * self)634 static inline AnalysisStateEntry *analysis_state__top(AnalysisState *self) {
635 return &self->stack[self->depth - 1];
636 }
637
analysis_state__has_supertype(AnalysisState * self,TSSymbol symbol)638 static inline bool analysis_state__has_supertype(AnalysisState *self, TSSymbol symbol) {
639 for (unsigned i = 0; i < self->depth; i++) {
640 if (self->stack[i].parent_symbol == symbol) return true;
641 }
642 return false;
643 }
644
645 /***********************
646 * AnalysisSubgraphNode
647 ***********************/
648
analysis_subgraph_node__compare(const AnalysisSubgraphNode * self,const AnalysisSubgraphNode * other)649 static inline int analysis_subgraph_node__compare(const AnalysisSubgraphNode *self, const AnalysisSubgraphNode *other) {
650 if (self->state < other->state) return -1;
651 if (self->state > other->state) return 1;
652 if (self->child_index < other->child_index) return -1;
653 if (self->child_index > other->child_index) return 1;
654 if (self->done < other->done) return -1;
655 if (self->done > other->done) return 1;
656 if (self->production_id < other->production_id) return -1;
657 if (self->production_id > other->production_id) return 1;
658 return 0;
659 }
660
661 /*********
662 * Query
663 *********/
664
665 // The `pattern_map` contains a mapping from TSSymbol values to indices in the
666 // `steps` array. For a given syntax node, the `pattern_map` makes it possible
667 // to quickly find the starting steps of all of the patterns whose root matches
668 // that node. Each entry has two fields: a `pattern_index`, which identifies one
669 // of the patterns in the query, and a `step_index`, which indicates the start
670 // offset of that pattern's steps within the `steps` array.
671 //
672 // The entries are sorted by the patterns' root symbols, and lookups use a
673 // binary search. This ensures that the cost of this initial lookup step
674 // scales logarithmically with the number of patterns in the query.
675 //
676 // This returns `true` if the symbol is present and `false` otherwise.
677 // If the symbol is not present `*result` is set to the index where the
678 // symbol should be inserted.
ts_query__pattern_map_search(const TSQuery * self,TSSymbol needle,uint32_t * result)679 static inline bool ts_query__pattern_map_search(
680 const TSQuery *self,
681 TSSymbol needle,
682 uint32_t *result
683 ) {
684 uint32_t base_index = self->wildcard_root_pattern_count;
685 uint32_t size = self->pattern_map.size - base_index;
686 if (size == 0) {
687 *result = base_index;
688 return false;
689 }
690 while (size > 1) {
691 uint32_t half_size = size / 2;
692 uint32_t mid_index = base_index + half_size;
693 TSSymbol mid_symbol = self->steps.contents[
694 self->pattern_map.contents[mid_index].step_index
695 ].symbol;
696 if (needle > mid_symbol) base_index = mid_index;
697 size -= half_size;
698 }
699
700 TSSymbol symbol = self->steps.contents[
701 self->pattern_map.contents[base_index].step_index
702 ].symbol;
703
704 if (needle > symbol) {
705 base_index++;
706 if (base_index < self->pattern_map.size) {
707 symbol = self->steps.contents[
708 self->pattern_map.contents[base_index].step_index
709 ].symbol;
710 }
711 }
712
713 *result = base_index;
714 return needle == symbol;
715 }
716
717 // Insert a new pattern's start index into the pattern map, maintaining
718 // the pattern map's ordering invariant.
ts_query__pattern_map_insert(TSQuery * self,TSSymbol symbol,PatternEntry new_entry)719 static inline void ts_query__pattern_map_insert(
720 TSQuery *self,
721 TSSymbol symbol,
722 PatternEntry new_entry
723 ) {
724 uint32_t index;
725 ts_query__pattern_map_search(self, symbol, &index);
726
727 // Ensure that the entries are sorted not only by symbol, but also
728 // by pattern_index. This way, states for earlier patterns will be
729 // initiated first, which allows the ordering of the states array
730 // to be maintained more efficiently.
731 while (index < self->pattern_map.size) {
732 PatternEntry *entry = &self->pattern_map.contents[index];
733 if (
734 self->steps.contents[entry->step_index].symbol == symbol &&
735 entry->pattern_index < new_entry.pattern_index
736 ) {
737 index++;
738 } else {
739 break;
740 }
741 }
742
743 array_insert(&self->pattern_map, index, new_entry);
744 }
745
ts_query__analyze_patterns(TSQuery * self,unsigned * error_offset)746 static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
747 // Identify all of the patterns in the query that have child patterns, both at the
748 // top level and nested within other larger patterns. Record the step index where
749 // each pattern starts.
750 Array(uint32_t) parent_step_indices = array_new();
751 for (unsigned i = 0; i < self->steps.size; i++) {
752 QueryStep *step = &self->steps.contents[i];
753 if (i + 1 < self->steps.size) {
754 QueryStep *next_step = &self->steps.contents[i + 1];
755 if (
756 step->symbol != WILDCARD_SYMBOL &&
757 step->symbol != NAMED_WILDCARD_SYMBOL &&
758 next_step->depth > step->depth &&
759 next_step->depth != PATTERN_DONE_MARKER
760 ) {
761 array_push(&parent_step_indices, i);
762 }
763 }
764 if (step->depth > 0) {
765 step->is_definite = true;
766 }
767 }
768
769 // For every parent symbol in the query, initialize an 'analysis subgraph'.
770 // This subgraph lists all of the states in the parse table that are directly
771 // involved in building subtrees for this symbol.
772 //
773 // In addition to the parent symbols in the query, construct subgraphs for all
774 // of the hidden symbols in the grammar, because these might occur within
775 // one of the parent nodes, such that their children appear to belong to the
776 // parent.
777 Array(AnalysisSubgraph) subgraphs = array_new();
778 for (unsigned i = 0; i < parent_step_indices.size; i++) {
779 uint32_t parent_step_index = parent_step_indices.contents[i];
780 TSSymbol parent_symbol = self->steps.contents[parent_step_index].symbol;
781 AnalysisSubgraph subgraph = { .symbol = parent_symbol };
782 array_insert_sorted_by(&subgraphs, .symbol, subgraph);
783 }
784 for (TSSymbol sym = self->language->token_count; sym < self->language->symbol_count; sym++) {
785 if (!ts_language_symbol_metadata(self->language, sym).visible) {
786 AnalysisSubgraph subgraph = { .symbol = sym };
787 array_insert_sorted_by(&subgraphs, .symbol, subgraph);
788 }
789 }
790
791 // Scan the parse table to find the data needed to populate these subgraphs.
792 // Collect three things during this scan:
793 // 1) All of the parse states where one of these symbols can start.
794 // 2) All of the parse states where one of these symbols can end, along
795 // with information about the node that would be created.
796 // 3) A list of predecessor states for each state.
797 StatePredecessorMap predecessor_map = state_predecessor_map_new(self->language);
798 for (TSStateId state = 1; state < self->language->state_count; state++) {
799 unsigned subgraph_index, exists;
800 LookaheadIterator lookahead_iterator = ts_language_lookaheads(self->language, state);
801 while (ts_lookahead_iterator_next(&lookahead_iterator)) {
802 if (lookahead_iterator.action_count) {
803 for (unsigned i = 0; i < lookahead_iterator.action_count; i++) {
804 const TSParseAction *action = &lookahead_iterator.actions[i];
805 if (action->type == TSParseActionTypeReduce) {
806 const TSSymbol *aliases, *aliases_end;
807 ts_language_aliases_for_symbol(
808 self->language,
809 action->reduce.symbol,
810 &aliases,
811 &aliases_end
812 );
813 for (const TSSymbol *symbol = aliases; symbol < aliases_end; symbol++) {
814 array_search_sorted_by(
815 &subgraphs,
816 .symbol,
817 *symbol,
818 &subgraph_index,
819 &exists
820 );
821 if (exists) {
822 AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index];
823 if (subgraph->nodes.size == 0 || array_back(&subgraph->nodes)->state != state) {
824 array_push(&subgraph->nodes, ((AnalysisSubgraphNode) {
825 .state = state,
826 .production_id = action->reduce.production_id,
827 .child_index = action->reduce.child_count,
828 .done = true,
829 }));
830 }
831 }
832 }
833 } else if (action->type == TSParseActionTypeShift && !action->shift.extra) {
834 TSStateId next_state = action->shift.state;
835 state_predecessor_map_add(&predecessor_map, next_state, state);
836 }
837 }
838 } else if (lookahead_iterator.next_state != 0) {
839 if (lookahead_iterator.next_state != state) {
840 state_predecessor_map_add(&predecessor_map, lookahead_iterator.next_state, state);
841 }
842 const TSSymbol *aliases, *aliases_end;
843 ts_language_aliases_for_symbol(
844 self->language,
845 lookahead_iterator.symbol,
846 &aliases,
847 &aliases_end
848 );
849 for (const TSSymbol *symbol = aliases; symbol < aliases_end; symbol++) {
850 array_search_sorted_by(
851 &subgraphs,
852 .symbol,
853 *symbol,
854 &subgraph_index,
855 &exists
856 );
857 if (exists) {
858 AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index];
859 if (
860 subgraph->start_states.size == 0 ||
861 *array_back(&subgraph->start_states) != state
862 )
863 array_push(&subgraph->start_states, state);
864 }
865 }
866 }
867 }
868 }
869
870 // For each subgraph, compute the preceding states by walking backward
871 // from the end states using the predecessor map.
872 Array(AnalysisSubgraphNode) next_nodes = array_new();
873 for (unsigned i = 0; i < subgraphs.size; i++) {
874 AnalysisSubgraph *subgraph = &subgraphs.contents[i];
875 if (subgraph->nodes.size == 0) {
876 array_delete(&subgraph->start_states);
877 array_erase(&subgraphs, i);
878 i--;
879 continue;
880 }
881 array_assign(&next_nodes, &subgraph->nodes);
882 while (next_nodes.size > 0) {
883 AnalysisSubgraphNode node = array_pop(&next_nodes);
884 if (node.child_index > 1) {
885 unsigned predecessor_count;
886 const TSStateId *predecessors = state_predecessor_map_get(
887 &predecessor_map,
888 node.state,
889 &predecessor_count
890 );
891 for (unsigned j = 0; j < predecessor_count; j++) {
892 AnalysisSubgraphNode predecessor_node = {
893 .state = predecessors[j],
894 .child_index = node.child_index - 1,
895 .production_id = node.production_id,
896 .done = false,
897 };
898 unsigned index, exists;
899 array_search_sorted_with(
900 &subgraph->nodes, analysis_subgraph_node__compare, &predecessor_node,
901 &index, &exists
902 );
903 if (!exists) {
904 array_insert(&subgraph->nodes, index, predecessor_node);
905 array_push(&next_nodes, predecessor_node);
906 }
907 }
908 }
909 }
910 }
911
912 #ifdef DEBUG_ANALYZE_QUERY
913 printf("\nSubgraphs:\n");
914 for (unsigned i = 0; i < subgraphs.size; i++) {
915 AnalysisSubgraph *subgraph = &subgraphs.contents[i];
916 printf(" %u, %s:\n", subgraph->symbol, ts_language_symbol_name(self->language, subgraph->symbol));
917 for (unsigned j = 0; j < subgraph->start_states.size; j++) {
918 printf(
919 " {state: %u}\n",
920 subgraph->start_states.contents[j]
921 );
922 }
923 for (unsigned j = 0; j < subgraph->nodes.size; j++) {
924 AnalysisSubgraphNode *node = &subgraph->nodes.contents[j];
925 printf(
926 " {state: %u, child_index: %u, production_id: %u, done: %d}\n",
927 node->state, node->child_index, node->production_id, node->done
928 );
929 }
930 printf("\n");
931 }
932 #endif
933
934 // For each non-terminal pattern, determine if the pattern can successfully match,
935 // and identify all of the possible children within the pattern where matching could fail.
936 bool all_patterns_are_valid = true;
937 AnalysisStateSet states = array_new();
938 AnalysisStateSet next_states = array_new();
939 AnalysisStateSet deeper_states = array_new();
940 Array(uint16_t) final_step_indices = array_new();
941 for (unsigned i = 0; i < parent_step_indices.size; i++) {
942 uint16_t parent_step_index = parent_step_indices.contents[i];
943 uint16_t parent_depth = self->steps.contents[parent_step_index].depth;
944 TSSymbol parent_symbol = self->steps.contents[parent_step_index].symbol;
945 if (parent_symbol == ts_builtin_sym_error) continue;
946
947 // Find the subgraph that corresponds to this pattern's root symbol. If the pattern's
948 // root symbols is not a non-terminal, then return an error.
949 unsigned subgraph_index, exists;
950 array_search_sorted_by(&subgraphs, .symbol, parent_symbol, &subgraph_index, &exists);
951 if (!exists) {
952 unsigned first_child_step_index = parent_step_index + 1;
953 uint32_t i, exists;
954 array_search_sorted_by(&self->step_offsets, .step_index, first_child_step_index, &i, &exists);
955 assert(exists);
956 *error_offset = self->step_offsets.contents[i].byte_offset;
957 all_patterns_are_valid = false;
958 break;
959 }
960
961 // Initialize an analysis state at every parse state in the table where
962 // this parent symbol can occur.
963 AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index];
964 array_clear(&states);
965 array_clear(&deeper_states);
966 for (unsigned j = 0; j < subgraph->start_states.size; j++) {
967 TSStateId parse_state = subgraph->start_states.contents[j];
968 array_push(&states, ((AnalysisState) {
969 .step_index = parent_step_index + 1,
970 .stack = {
971 [0] = {
972 .parse_state = parse_state,
973 .parent_symbol = parent_symbol,
974 .child_index = 0,
975 .field_id = 0,
976 .done = false,
977 },
978 },
979 .depth = 1,
980 }));
981 }
982
983 // Walk the subgraph for this non-terminal, tracking all of the possible
984 // sequences of progress within the pattern.
985 bool can_finish_pattern = false;
986 bool did_exceed_max_depth = false;
987 unsigned recursion_depth_limit = 0;
988 unsigned prev_final_step_count = 0;
989 array_clear(&final_step_indices);
990 for (;;) {
991 #ifdef DEBUG_ANALYZE_QUERY
992 printf("Final step indices:");
993 for (unsigned j = 0; j < final_step_indices.size; j++) {
994 printf(" %4u", final_step_indices.contents[j]);
995 }
996 printf("\nWalk states for %u %s:\n", i, ts_language_symbol_name(self->language, parent_symbol));
997 for (unsigned j = 0; j < states.size; j++) {
998 AnalysisState *state = &states.contents[j];
999 printf(" %3u: step: %u, stack: [", j, state->step_index);
1000 for (unsigned k = 0; k < state->depth; k++) {
1001 printf(
1002 " {%s, child: %u, state: %4u",
1003 self->language->symbol_names[state->stack[k].parent_symbol],
1004 state->stack[k].child_index,
1005 state->stack[k].parse_state
1006 );
1007 if (state->stack[k].field_id) printf(", field: %s", self->language->field_names[state->stack[k].field_id]);
1008 if (state->stack[k].done) printf(", DONE");
1009 printf("}");
1010 }
1011 printf(" ]\n");
1012 }
1013 #endif
1014
1015 // If no further progress can be made within the current recursion depth limit, then
1016 // bump the depth limit by one, and continue to process the states the exceeded the
1017 // limit. But only allow this if progress has been made since the last time the depth
1018 // limit was increased.
1019 if (states.size == 0) {
1020 if (deeper_states.size > 0 && final_step_indices.size > prev_final_step_count) {
1021 #ifdef DEBUG_ANALYZE_QUERY
1022 printf("Increase recursion depth limit to %u\n", recursion_depth_limit + 1);
1023 #endif
1024
1025 prev_final_step_count = final_step_indices.size;
1026 recursion_depth_limit++;
1027 AnalysisStateSet _states = states;
1028 states = deeper_states;
1029 deeper_states = _states;
1030 continue;
1031 }
1032
1033 break;
1034 }
1035
1036 array_clear(&next_states);
1037 for (unsigned j = 0; j < states.size; j++) {
1038 AnalysisState * const state = &states.contents[j];
1039
1040 // For efficiency, it's important to avoid processing the same analysis state more
1041 // than once. To achieve this, keep the states in order of ascending position within
1042 // their hypothetical syntax trees. In each iteration of this loop, start by advancing
1043 // the states that have made the least progress. Avoid advancing states that have already
1044 // made more progress.
1045 if (next_states.size > 0) {
1046 int comparison = analysis_state__compare_position(state, array_back(&next_states));
1047 if (comparison == 0) {
1048 array_insert_sorted_with(&next_states, analysis_state__compare, *state);
1049 continue;
1050 } else if (comparison > 0) {
1051 while (j < states.size) {
1052 array_push(&next_states, states.contents[j]);
1053 j++;
1054 }
1055 break;
1056 }
1057 }
1058
1059 const TSStateId parse_state = analysis_state__top(state)->parse_state;
1060 const TSSymbol parent_symbol = analysis_state__top(state)->parent_symbol;
1061 const TSFieldId parent_field_id = analysis_state__top(state)->field_id;
1062 const unsigned child_index = analysis_state__top(state)->child_index;
1063 const QueryStep * const step = &self->steps.contents[state->step_index];
1064
1065 unsigned subgraph_index, exists;
1066 array_search_sorted_by(&subgraphs, .symbol, parent_symbol, &subgraph_index, &exists);
1067 if (!exists) continue;
1068 const AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index];
1069
1070 // Follow every possible path in the parse table, but only visit states that
1071 // are part of the subgraph for the current symbol.
1072 LookaheadIterator lookahead_iterator = ts_language_lookaheads(self->language, parse_state);
1073 while (ts_lookahead_iterator_next(&lookahead_iterator)) {
1074 TSSymbol sym = lookahead_iterator.symbol;
1075
1076 TSStateId next_parse_state;
1077 if (lookahead_iterator.action_count) {
1078 const TSParseAction *action = &lookahead_iterator.actions[lookahead_iterator.action_count - 1];
1079 if (action->type == TSParseActionTypeShift) {
1080 next_parse_state = action->shift.extra ? parse_state : action->shift.state;
1081 } else {
1082 continue;
1083 }
1084 } else if (lookahead_iterator.next_state != 0) {
1085 next_parse_state = lookahead_iterator.next_state;
1086 } else {
1087 continue;
1088 }
1089
1090 AnalysisSubgraphNode successor = {
1091 .state = next_parse_state,
1092 .child_index = child_index + 1,
1093 };
1094 unsigned node_index;
1095 array_search_sorted_with(
1096 &subgraph->nodes,
1097 analysis_subgraph_node__compare, &successor,
1098 &node_index, &exists
1099 );
1100 while (node_index < subgraph->nodes.size) {
1101 AnalysisSubgraphNode *node = &subgraph->nodes.contents[node_index++];
1102 if (node->state != successor.state || node->child_index != successor.child_index) break;
1103
1104 // Use the subgraph to determine what alias and field will eventually be applied
1105 // to this child node.
1106 TSSymbol alias = ts_language_alias_at(self->language, node->production_id, child_index);
1107 TSSymbol visible_symbol = alias
1108 ? alias
1109 : self->language->symbol_metadata[sym].visible
1110 ? self->language->public_symbol_map[sym]
1111 : 0;
1112 TSFieldId field_id = parent_field_id;
1113 if (!field_id) {
1114 const TSFieldMapEntry *field_map, *field_map_end;
1115 ts_language_field_map(self->language, node->production_id, &field_map, &field_map_end);
1116 for (; field_map != field_map_end; field_map++) {
1117 if (!field_map->inherited && field_map->child_index == child_index) {
1118 field_id = field_map->field_id;
1119 break;
1120 }
1121 }
1122 }
1123
1124 // Create a new state that has advanced past this hypothetical subtree.
1125 AnalysisState next_state = *state;
1126 analysis_state__top(&next_state)->child_index++;
1127 analysis_state__top(&next_state)->parse_state = successor.state;
1128 if (node->done) analysis_state__top(&next_state)->done = true;
1129
1130 // Determine if this hypothetical child node would match the current step
1131 // of the query pattern.
1132 bool does_match = false;
1133 if (visible_symbol) {
1134 does_match = true;
1135 if (step->symbol == NAMED_WILDCARD_SYMBOL) {
1136 if (!self->language->symbol_metadata[visible_symbol].named) does_match = false;
1137 } else if (step->symbol != WILDCARD_SYMBOL) {
1138 if (step->symbol != visible_symbol) does_match = false;
1139 }
1140 if (step->field && step->field != field_id) {
1141 does_match = false;
1142 }
1143 if (
1144 step->supertype_symbol &&
1145 !analysis_state__has_supertype(state, step->supertype_symbol)
1146 ) does_match = false;
1147 }
1148
1149 // If this is a hidden child, then push a new entry to the stack, in order to
1150 // walk through the children of this child.
1151 else if (sym >= self->language->token_count) {
1152 if (next_state.depth + 1 >= MAX_ANALYSIS_STATE_DEPTH) {
1153 did_exceed_max_depth = true;
1154 continue;
1155 }
1156
1157 next_state.depth++;
1158 analysis_state__top(&next_state)->parse_state = parse_state;
1159 analysis_state__top(&next_state)->child_index = 0;
1160 analysis_state__top(&next_state)->parent_symbol = sym;
1161 analysis_state__top(&next_state)->field_id = field_id;
1162 analysis_state__top(&next_state)->done = false;
1163
1164 if (analysis_state__recursion_depth(&next_state) > recursion_depth_limit) {
1165 array_insert_sorted_with(&deeper_states, analysis_state__compare, next_state);
1166 continue;
1167 }
1168 }
1169
1170 // Pop from the stack when this state reached the end of its current syntax node.
1171 while (next_state.depth > 0 && analysis_state__top(&next_state)->done) {
1172 next_state.depth--;
1173 }
1174
1175 // If this hypothetical child did match the current step of the query pattern,
1176 // then advance to the next step at the current depth. This involves skipping
1177 // over any descendant steps of the current child.
1178 const QueryStep *next_step = step;
1179 if (does_match) {
1180 for (;;) {
1181 next_state.step_index++;
1182 next_step = &self->steps.contents[next_state.step_index];
1183 if (
1184 next_step->depth == PATTERN_DONE_MARKER ||
1185 next_step->depth <= parent_depth + 1
1186 ) break;
1187 }
1188 } else if (next_parse_state == parse_state) {
1189 continue;
1190 }
1191
1192 for (;;) {
1193 // Skip pass-through states. Although these states have alternatives, they are only
1194 // used to implement repetitions, and query analysis does not need to process
1195 // repetitions in order to determine whether steps are possible and definite.
1196 if (next_step->is_pass_through) {
1197 next_state.step_index++;
1198 next_step++;
1199 continue;
1200 }
1201
1202 // If the pattern is finished or hypothetical parent node is complete, then
1203 // record that matching can terminate at this step of the pattern. Otherwise,
1204 // add this state to the list of states to process on the next iteration.
1205 if (!next_step->is_dead_end) {
1206 bool did_finish_pattern = self->steps.contents[next_state.step_index].depth != parent_depth + 1;
1207 if (did_finish_pattern) can_finish_pattern = true;
1208 if (did_finish_pattern || next_state.depth == 0) {
1209 array_insert_sorted_by(&final_step_indices, , next_state.step_index);
1210 } else {
1211 array_insert_sorted_with(&next_states, analysis_state__compare, next_state);
1212 }
1213 }
1214
1215 // If the state has advanced to a step with an alternative step, then add another state
1216 // at that alternative step. This process is simpler than the process of actually matching a
1217 // pattern during query exection, because for the purposes of query analysis, there is no
1218 // need to process repetitions.
1219 if (
1220 does_match &&
1221 next_step->alternative_index != NONE &&
1222 next_step->alternative_index > next_state.step_index
1223 ) {
1224 next_state.step_index = next_step->alternative_index;
1225 next_step = &self->steps.contents[next_state.step_index];
1226 } else {
1227 break;
1228 }
1229 }
1230 }
1231 }
1232 }
1233
1234 AnalysisStateSet _states = states;
1235 states = next_states;
1236 next_states = _states;
1237 }
1238
1239 // Mark as indefinite any step where a match terminated.
1240 // Later, this property will be propagated to all of the step's predecessors.
1241 for (unsigned j = 0; j < final_step_indices.size; j++) {
1242 uint32_t final_step_index = final_step_indices.contents[j];
1243 QueryStep *step = &self->steps.contents[final_step_index];
1244 if (
1245 step->depth != PATTERN_DONE_MARKER &&
1246 step->depth > parent_depth &&
1247 !step->is_dead_end
1248 ) {
1249 step->is_definite = false;
1250 }
1251 }
1252
1253 if (did_exceed_max_depth) {
1254 for (unsigned j = parent_step_index + 1; j < self->steps.size; j++) {
1255 QueryStep *step = &self->steps.contents[j];
1256 if (
1257 step->depth <= parent_depth ||
1258 step->depth == PATTERN_DONE_MARKER
1259 ) break;
1260 if (!step->is_dead_end) {
1261 step->is_definite = false;
1262 }
1263 }
1264 }
1265
1266 // If this pattern cannot match, store the pattern index so that it can be
1267 // returned to the caller.
1268 if (all_patterns_are_valid && !can_finish_pattern && !did_exceed_max_depth) {
1269 assert(final_step_indices.size > 0);
1270 uint16_t impossible_step_index = *array_back(&final_step_indices);
1271 uint32_t i, exists;
1272 array_search_sorted_by(&self->step_offsets, .step_index, impossible_step_index, &i, &exists);
1273 if (i >= self->step_offsets.size) i = self->step_offsets.size - 1;
1274 *error_offset = self->step_offsets.contents[i].byte_offset;
1275 all_patterns_are_valid = false;
1276 break;
1277 }
1278 }
1279
1280 // Mark as indefinite any step with captures that are used in predicates.
1281 Array(uint16_t) predicate_capture_ids = array_new();
1282 for (unsigned i = 0; i < self->patterns.size; i++) {
1283 QueryPattern *pattern = &self->patterns.contents[i];
1284
1285 // Gather all of the captures that are used in predicates for this pattern.
1286 array_clear(&predicate_capture_ids);
1287 for (
1288 unsigned start = pattern->predicate_steps.offset,
1289 end = start + pattern->predicate_steps.length,
1290 j = start; j < end; j++
1291 ) {
1292 TSQueryPredicateStep *step = &self->predicate_steps.contents[j];
1293 if (step->type == TSQueryPredicateStepTypeCapture) {
1294 array_insert_sorted_by(&predicate_capture_ids, , step->value_id);
1295 }
1296 }
1297
1298 // Find all of the steps that have these captures.
1299 for (
1300 unsigned start = pattern->steps.offset,
1301 end = start + pattern->steps.length,
1302 j = start; j < end; j++
1303 ) {
1304 QueryStep *step = &self->steps.contents[j];
1305 for (unsigned k = 0; k < MAX_STEP_CAPTURE_COUNT; k++) {
1306 uint16_t capture_id = step->capture_ids[k];
1307 if (capture_id == NONE) break;
1308 unsigned index, exists;
1309 array_search_sorted_by(&predicate_capture_ids, , capture_id, &index, &exists);
1310 if (exists) {
1311 step->is_definite = false;
1312 break;
1313 }
1314 }
1315 }
1316 }
1317
1318 // Propagate indefiniteness backwards.
1319 bool done = self->steps.size == 0;
1320 while (!done) {
1321 done = true;
1322 for (unsigned i = self->steps.size - 1; i > 0; i--) {
1323 QueryStep *step = &self->steps.contents[i];
1324
1325 // Determine if this step is definite or has definite alternatives.
1326 bool is_definite = false;
1327 for (;;) {
1328 if (step->is_definite) {
1329 is_definite = true;
1330 break;
1331 }
1332 if (step->alternative_index == NONE || step->alternative_index < i) {
1333 break;
1334 }
1335 step = &self->steps.contents[step->alternative_index];
1336 }
1337
1338 // If not, mark its predecessor as indefinite.
1339 if (!is_definite) {
1340 QueryStep *prev_step = &self->steps.contents[i - 1];
1341 if (
1342 !prev_step->is_dead_end &&
1343 prev_step->depth != PATTERN_DONE_MARKER &&
1344 prev_step->is_definite
1345 ) {
1346 prev_step->is_definite = false;
1347 done = false;
1348 }
1349 }
1350 }
1351 }
1352
1353 #ifdef DEBUG_ANALYZE_QUERY
1354 printf("Steps:\n");
1355 for (unsigned i = 0; i < self->steps.size; i++) {
1356 QueryStep *step = &self->steps.contents[i];
1357 if (step->depth == PATTERN_DONE_MARKER) {
1358 printf(" %u: DONE\n", i);
1359 } else {
1360 printf(
1361 " %u: {symbol: %s, field: %s, is_definite: %d}\n",
1362 i,
1363 (step->symbol == WILDCARD_SYMBOL || step->symbol == NAMED_WILDCARD_SYMBOL)
1364 ? "ANY"
1365 : ts_language_symbol_name(self->language, step->symbol),
1366 (step->field ? ts_language_field_name_for_id(self->language, step->field) : "-"),
1367 step->is_definite
1368 );
1369 }
1370 }
1371 #endif
1372
1373 // Cleanup
1374 for (unsigned i = 0; i < subgraphs.size; i++) {
1375 array_delete(&subgraphs.contents[i].start_states);
1376 array_delete(&subgraphs.contents[i].nodes);
1377 }
1378 array_delete(&subgraphs);
1379 array_delete(&next_nodes);
1380 array_delete(&states);
1381 array_delete(&next_states);
1382 array_delete(&deeper_states);
1383 array_delete(&final_step_indices);
1384 array_delete(&parent_step_indices);
1385 array_delete(&predicate_capture_ids);
1386 state_predecessor_map_delete(&predecessor_map);
1387
1388 return all_patterns_are_valid;
1389 }
1390
ts_query__finalize_steps(TSQuery * self)1391 static void ts_query__finalize_steps(TSQuery *self) {
1392 for (unsigned i = 0; i < self->steps.size; i++) {
1393 QueryStep *step = &self->steps.contents[i];
1394 uint32_t depth = step->depth;
1395 if (step->capture_ids[0] != NONE) {
1396 step->contains_captures = true;
1397 } else {
1398 step->contains_captures = false;
1399 for (unsigned j = i + 1; j < self->steps.size; j++) {
1400 QueryStep *s = &self->steps.contents[j];
1401 if (s->depth == PATTERN_DONE_MARKER || s->depth <= depth) break;
1402 if (s->capture_ids[0] != NONE) step->contains_captures = true;
1403 }
1404 }
1405 }
1406 }
1407
ts_query__add_negated_fields(TSQuery * self,uint16_t step_index,TSFieldId * field_ids,uint16_t field_count)1408 static void ts_query__add_negated_fields(
1409 TSQuery *self,
1410 uint16_t step_index,
1411 TSFieldId *field_ids,
1412 uint16_t field_count
1413 ) {
1414 QueryStep *step = &self->steps.contents[step_index];
1415
1416 // The negated field array stores a list of field lists, separated by zeros.
1417 // Try to find the start index of an existing list that matches this new list.
1418 bool failed_match = false;
1419 unsigned match_count = 0;
1420 unsigned start_i = 0;
1421 for (unsigned i = 0; i < self->negated_fields.size; i++) {
1422 TSFieldId existing_field_id = self->negated_fields.contents[i];
1423
1424 // At each zero value, terminate the match attempt. If we've exactly
1425 // matched the new field list, then reuse this index. Otherwise,
1426 // start over the matching process.
1427 if (existing_field_id == 0) {
1428 if (match_count == field_count) {
1429 step->negated_field_list_id = start_i;
1430 return;
1431 } else {
1432 start_i = i + 1;
1433 match_count = 0;
1434 failed_match = false;
1435 }
1436 }
1437
1438 // If the existing list matches our new list so far, then advance
1439 // to the next element of the new list.
1440 else if (
1441 match_count < field_count &&
1442 existing_field_id == field_ids[match_count] &&
1443 !failed_match
1444 ) {
1445 match_count++;
1446 }
1447
1448 // Otherwise, this existing list has failed to match.
1449 else {
1450 match_count = 0;
1451 failed_match = true;
1452 }
1453 }
1454
1455 step->negated_field_list_id = self->negated_fields.size;
1456 array_extend(&self->negated_fields, field_count, field_ids);
1457 array_push(&self->negated_fields, 0);
1458 }
1459
ts_query__parse_string_literal(TSQuery * self,Stream * stream)1460 static TSQueryError ts_query__parse_string_literal(
1461 TSQuery *self,
1462 Stream *stream
1463 ) {
1464 const char *string_start = stream->input;
1465 if (stream->next != '"') return TSQueryErrorSyntax;
1466 stream_advance(stream);
1467 const char *prev_position = stream->input;
1468
1469 bool is_escaped = false;
1470 array_clear(&self->string_buffer);
1471 for (;;) {
1472 if (is_escaped) {
1473 is_escaped = false;
1474 switch (stream->next) {
1475 case 'n':
1476 array_push(&self->string_buffer, '\n');
1477 break;
1478 case 'r':
1479 array_push(&self->string_buffer, '\r');
1480 break;
1481 case 't':
1482 array_push(&self->string_buffer, '\t');
1483 break;
1484 case '0':
1485 array_push(&self->string_buffer, '\0');
1486 break;
1487 default:
1488 array_extend(&self->string_buffer, stream->next_size, stream->input);
1489 break;
1490 }
1491 prev_position = stream->input + stream->next_size;
1492 } else {
1493 if (stream->next == '\\') {
1494 array_extend(&self->string_buffer, (stream->input - prev_position), prev_position);
1495 prev_position = stream->input + 1;
1496 is_escaped = true;
1497 } else if (stream->next == '"') {
1498 array_extend(&self->string_buffer, (stream->input - prev_position), prev_position);
1499 stream_advance(stream);
1500 return TSQueryErrorNone;
1501 } else if (stream->next == '\n') {
1502 stream_reset(stream, string_start);
1503 return TSQueryErrorSyntax;
1504 }
1505 }
1506 if (!stream_advance(stream)) {
1507 stream_reset(stream, string_start);
1508 return TSQueryErrorSyntax;
1509 }
1510 }
1511 }
1512
1513 // Parse a single predicate associated with a pattern, adding it to the
1514 // query's internal `predicate_steps` array. Predicates are arbitrary
1515 // S-expressions associated with a pattern which are meant to be handled at
1516 // a higher level of abstraction, such as the Rust/JavaScript bindings. They
1517 // can contain '@'-prefixed capture names, double-quoted strings, and bare
1518 // symbols, which also represent strings.
ts_query__parse_predicate(TSQuery * self,Stream * stream)1519 static TSQueryError ts_query__parse_predicate(
1520 TSQuery *self,
1521 Stream *stream
1522 ) {
1523 if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax;
1524 const char *predicate_name = stream->input;
1525 stream_scan_identifier(stream);
1526 uint32_t length = stream->input - predicate_name;
1527 uint16_t id = symbol_table_insert_name(
1528 &self->predicate_values,
1529 predicate_name,
1530 length
1531 );
1532 array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
1533 .type = TSQueryPredicateStepTypeString,
1534 .value_id = id,
1535 }));
1536 stream_skip_whitespace(stream);
1537
1538 for (;;) {
1539 if (stream->next == ')') {
1540 stream_advance(stream);
1541 stream_skip_whitespace(stream);
1542 array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
1543 .type = TSQueryPredicateStepTypeDone,
1544 .value_id = 0,
1545 }));
1546 break;
1547 }
1548
1549 // Parse an '@'-prefixed capture name
1550 else if (stream->next == '@') {
1551 stream_advance(stream);
1552
1553 // Parse the capture name
1554 if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax;
1555 const char *capture_name = stream->input;
1556 stream_scan_identifier(stream);
1557 uint32_t length = stream->input - capture_name;
1558
1559 // Add the capture id to the first step of the pattern
1560 int capture_id = symbol_table_id_for_name(
1561 &self->captures,
1562 capture_name,
1563 length
1564 );
1565 if (capture_id == -1) {
1566 stream_reset(stream, capture_name);
1567 return TSQueryErrorCapture;
1568 }
1569
1570 array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
1571 .type = TSQueryPredicateStepTypeCapture,
1572 .value_id = capture_id,
1573 }));
1574 }
1575
1576 // Parse a string literal
1577 else if (stream->next == '"') {
1578 TSQueryError e = ts_query__parse_string_literal(self, stream);
1579 if (e) return e;
1580 uint16_t id = symbol_table_insert_name(
1581 &self->predicate_values,
1582 self->string_buffer.contents,
1583 self->string_buffer.size
1584 );
1585 array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
1586 .type = TSQueryPredicateStepTypeString,
1587 .value_id = id,
1588 }));
1589 }
1590
1591 // Parse a bare symbol
1592 else if (stream_is_ident_start(stream)) {
1593 const char *symbol_start = stream->input;
1594 stream_scan_identifier(stream);
1595 uint32_t length = stream->input - symbol_start;
1596 uint16_t id = symbol_table_insert_name(
1597 &self->predicate_values,
1598 symbol_start,
1599 length
1600 );
1601 array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
1602 .type = TSQueryPredicateStepTypeString,
1603 .value_id = id,
1604 }));
1605 }
1606
1607 else {
1608 return TSQueryErrorSyntax;
1609 }
1610
1611 stream_skip_whitespace(stream);
1612 }
1613
1614 return 0;
1615 }
1616
1617 // Read one S-expression pattern from the stream, and incorporate it into
1618 // the query's internal state machine representation. For nested patterns,
1619 // this function calls itself recursively.
ts_query__parse_pattern(TSQuery * self,Stream * stream,uint32_t depth,bool is_immediate)1620 static TSQueryError ts_query__parse_pattern(
1621 TSQuery *self,
1622 Stream *stream,
1623 uint32_t depth,
1624 bool is_immediate
1625 ) {
1626 if (stream->next == 0) return TSQueryErrorSyntax;
1627 if (stream->next == ')' || stream->next == ']') return PARENT_DONE;
1628
1629 const uint32_t starting_step_index = self->steps.size;
1630
1631 // Store the byte offset of each step in the query.
1632 if (
1633 self->step_offsets.size == 0 ||
1634 array_back(&self->step_offsets)->step_index != starting_step_index
1635 ) {
1636 array_push(&self->step_offsets, ((StepOffset) {
1637 .step_index = starting_step_index,
1638 .byte_offset = stream_offset(stream),
1639 }));
1640 }
1641
1642 // An open bracket is the start of an alternation.
1643 if (stream->next == '[') {
1644 stream_advance(stream);
1645 stream_skip_whitespace(stream);
1646
1647 // Parse each branch, and add a placeholder step in between the branches.
1648 Array(uint32_t) branch_step_indices = array_new();
1649 for (;;) {
1650 uint32_t start_index = self->steps.size;
1651 TSQueryError e = ts_query__parse_pattern(
1652 self,
1653 stream,
1654 depth,
1655 is_immediate
1656 );
1657
1658 if (e == PARENT_DONE && stream->next == ']' && branch_step_indices.size > 0) {
1659 stream_advance(stream);
1660 break;
1661 } else if (e) {
1662 if (e == PARENT_DONE) e = TSQueryErrorSyntax;
1663 array_delete(&branch_step_indices);
1664 return e;
1665 }
1666
1667 array_push(&branch_step_indices, start_index);
1668 array_push(&self->steps, query_step__new(0, depth, false));
1669 }
1670 (void)array_pop(&self->steps);
1671
1672 // For all of the branches except for the last one, add the subsequent branch as an
1673 // alternative, and link the end of the branch to the current end of the steps.
1674 for (unsigned i = 0; i < branch_step_indices.size - 1; i++) {
1675 uint32_t step_index = branch_step_indices.contents[i];
1676 uint32_t next_step_index = branch_step_indices.contents[i + 1];
1677 QueryStep *start_step = &self->steps.contents[step_index];
1678 QueryStep *end_step = &self->steps.contents[next_step_index - 1];
1679 start_step->alternative_index = next_step_index;
1680 end_step->alternative_index = self->steps.size;
1681 end_step->is_dead_end = true;
1682 }
1683
1684 array_delete(&branch_step_indices);
1685 }
1686
1687 // An open parenthesis can be the start of three possible constructs:
1688 // * A grouped sequence
1689 // * A predicate
1690 // * A named node
1691 else if (stream->next == '(') {
1692 stream_advance(stream);
1693 stream_skip_whitespace(stream);
1694
1695 // If this parenthesis is followed by a node, then it represents a grouped sequence.
1696 if (stream->next == '(' || stream->next == '"' || stream->next == '[') {
1697 bool child_is_immediate = false;
1698 for (;;) {
1699 if (stream->next == '.') {
1700 child_is_immediate = true;
1701 stream_advance(stream);
1702 stream_skip_whitespace(stream);
1703 }
1704 TSQueryError e = ts_query__parse_pattern(
1705 self,
1706 stream,
1707 depth,
1708 child_is_immediate
1709 );
1710 if (e == PARENT_DONE && stream->next == ')') {
1711 stream_advance(stream);
1712 break;
1713 } else if (e) {
1714 return e;
1715 }
1716
1717 child_is_immediate = false;
1718 }
1719 }
1720
1721 // A dot/pound character indicates the start of a predicate.
1722 else if (stream->next == '.' || stream->next == '#') {
1723 stream_advance(stream);
1724 return ts_query__parse_predicate(self, stream);
1725 }
1726
1727 // Otherwise, this parenthesis is the start of a named node.
1728 else {
1729 TSSymbol symbol;
1730
1731 // TODO - remove.
1732 // For temporary backward compatibility, handle '*' as a wildcard.
1733 if (stream->next == '*') {
1734 symbol = depth > 0 ? NAMED_WILDCARD_SYMBOL : WILDCARD_SYMBOL;
1735 stream_advance(stream);
1736 }
1737
1738 // Parse a normal node name
1739 else if (stream_is_ident_start(stream)) {
1740 const char *node_name = stream->input;
1741 stream_scan_identifier(stream);
1742 uint32_t length = stream->input - node_name;
1743
1744 // TODO - remove.
1745 // For temporary backward compatibility, handle predicates without the leading '#' sign.
1746 if (length > 0 && (node_name[length - 1] == '!' || node_name[length - 1] == '?')) {
1747 stream_reset(stream, node_name);
1748 return ts_query__parse_predicate(self, stream);
1749 }
1750
1751 // Parse the wildcard symbol
1752 else if (length == 1 && node_name[0] == '_') {
1753 symbol = depth > 0 ? NAMED_WILDCARD_SYMBOL : WILDCARD_SYMBOL;
1754 }
1755
1756 else {
1757 symbol = ts_language_symbol_for_name(
1758 self->language,
1759 node_name,
1760 length,
1761 true
1762 );
1763 if (!symbol) {
1764 stream_reset(stream, node_name);
1765 return TSQueryErrorNodeType;
1766 }
1767 }
1768 } else {
1769 return TSQueryErrorSyntax;
1770 }
1771
1772 // Add a step for the node.
1773 array_push(&self->steps, query_step__new(symbol, depth, is_immediate));
1774 if (ts_language_symbol_metadata(self->language, symbol).supertype) {
1775 QueryStep *step = array_back(&self->steps);
1776 step->supertype_symbol = step->symbol;
1777 step->symbol = NAMED_WILDCARD_SYMBOL;
1778 }
1779
1780 stream_skip_whitespace(stream);
1781
1782 if (stream->next == '/') {
1783 stream_advance(stream);
1784 if (!stream_is_ident_start(stream)) {
1785 return TSQueryErrorSyntax;
1786 }
1787
1788 const char *node_name = stream->input;
1789 stream_scan_identifier(stream);
1790 uint32_t length = stream->input - node_name;
1791
1792 QueryStep *step = array_back(&self->steps);
1793 step->symbol = ts_language_symbol_for_name(
1794 self->language,
1795 node_name,
1796 length,
1797 true
1798 );
1799 if (!step->symbol) {
1800 stream_reset(stream, node_name);
1801 return TSQueryErrorNodeType;
1802 }
1803
1804 stream_skip_whitespace(stream);
1805 }
1806
1807 // Parse the child patterns
1808 bool child_is_immediate = false;
1809 uint16_t last_child_step_index = 0;
1810 uint16_t negated_field_count = 0;
1811 TSFieldId negated_field_ids[MAX_NEGATED_FIELD_COUNT];
1812 for (;;) {
1813 // Parse a negated field assertion
1814 if (stream->next == '!') {
1815 stream_advance(stream);
1816 stream_skip_whitespace(stream);
1817 if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax;
1818 const char *field_name = stream->input;
1819 stream_scan_identifier(stream);
1820 uint32_t length = stream->input - field_name;
1821 stream_skip_whitespace(stream);
1822
1823 TSFieldId field_id = ts_language_field_id_for_name(
1824 self->language,
1825 field_name,
1826 length
1827 );
1828 if (!field_id) {
1829 stream->input = field_name;
1830 return TSQueryErrorField;
1831 }
1832
1833 // Keep the field ids sorted.
1834 if (negated_field_count < MAX_NEGATED_FIELD_COUNT) {
1835 negated_field_ids[negated_field_count] = field_id;
1836 negated_field_count++;
1837 }
1838
1839 continue;
1840 }
1841
1842 // Parse a sibling anchor
1843 if (stream->next == '.') {
1844 child_is_immediate = true;
1845 stream_advance(stream);
1846 stream_skip_whitespace(stream);
1847 }
1848
1849 uint16_t step_index = self->steps.size;
1850 TSQueryError e = ts_query__parse_pattern(
1851 self,
1852 stream,
1853 depth + 1,
1854 child_is_immediate
1855 );
1856 if (e == PARENT_DONE && stream->next == ')') {
1857 if (child_is_immediate) {
1858 if (last_child_step_index == 0) {
1859 return TSQueryErrorSyntax;
1860 }
1861 self->steps.contents[last_child_step_index].is_last_child = true;
1862 }
1863
1864 if (negated_field_count) {
1865 ts_query__add_negated_fields(
1866 self,
1867 starting_step_index,
1868 negated_field_ids,
1869 negated_field_count
1870 );
1871 }
1872
1873 stream_advance(stream);
1874 break;
1875 } else if (e) {
1876 return e;
1877 }
1878
1879 last_child_step_index = step_index;
1880 child_is_immediate = false;
1881 }
1882 }
1883 }
1884
1885 // Parse a wildcard pattern
1886 else if (
1887 stream->next == '_' ||
1888
1889 // TODO remove.
1890 // For temporary backward compatibility, handle '*' as a wildcard.
1891 stream->next == '*'
1892 ) {
1893 stream_advance(stream);
1894 stream_skip_whitespace(stream);
1895
1896 // Add a step that matches any kind of node
1897 array_push(&self->steps, query_step__new(WILDCARD_SYMBOL, depth, is_immediate));
1898 }
1899
1900 // Parse a double-quoted anonymous leaf node expression
1901 else if (stream->next == '"') {
1902 const char *string_start = stream->input;
1903 TSQueryError e = ts_query__parse_string_literal(self, stream);
1904 if (e) return e;
1905
1906 // Add a step for the node
1907 TSSymbol symbol = ts_language_symbol_for_name(
1908 self->language,
1909 self->string_buffer.contents,
1910 self->string_buffer.size,
1911 false
1912 );
1913 if (!symbol) {
1914 stream_reset(stream, string_start + 1);
1915 return TSQueryErrorNodeType;
1916 }
1917 array_push(&self->steps, query_step__new(symbol, depth, is_immediate));
1918 }
1919
1920 // Parse a field-prefixed pattern
1921 else if (stream_is_ident_start(stream)) {
1922 // Parse the field name
1923 const char *field_name = stream->input;
1924 stream_scan_identifier(stream);
1925 uint32_t length = stream->input - field_name;
1926 stream_skip_whitespace(stream);
1927
1928 if (stream->next != ':') {
1929 stream_reset(stream, field_name);
1930 return TSQueryErrorSyntax;
1931 }
1932 stream_advance(stream);
1933 stream_skip_whitespace(stream);
1934
1935 // Parse the pattern
1936 TSQueryError e = ts_query__parse_pattern(
1937 self,
1938 stream,
1939 depth,
1940 is_immediate
1941 );
1942 if (e == PARENT_DONE) return TSQueryErrorSyntax;
1943 if (e) return e;
1944
1945 // Add the field name to the first step of the pattern
1946 TSFieldId field_id = ts_language_field_id_for_name(
1947 self->language,
1948 field_name,
1949 length
1950 );
1951 if (!field_id) {
1952 stream->input = field_name;
1953 return TSQueryErrorField;
1954 }
1955
1956 uint32_t step_index = starting_step_index;
1957 QueryStep *step = &self->steps.contents[step_index];
1958 for (;;) {
1959 step->field = field_id;
1960 if (
1961 step->alternative_index != NONE &&
1962 step->alternative_index > step_index &&
1963 step->alternative_index < self->steps.size
1964 ) {
1965 step_index = step->alternative_index;
1966 step = &self->steps.contents[step_index];
1967 } else {
1968 break;
1969 }
1970 }
1971 }
1972
1973 else {
1974 return TSQueryErrorSyntax;
1975 }
1976
1977 stream_skip_whitespace(stream);
1978
1979 // Parse suffixes modifiers for this pattern
1980 for (;;) {
1981 // Parse the one-or-more operator.
1982 if (stream->next == '+') {
1983 stream_advance(stream);
1984 stream_skip_whitespace(stream);
1985
1986 QueryStep repeat_step = query_step__new(WILDCARD_SYMBOL, depth, false);
1987 repeat_step.alternative_index = starting_step_index;
1988 repeat_step.is_pass_through = true;
1989 repeat_step.alternative_is_immediate = true;
1990 array_push(&self->steps, repeat_step);
1991 }
1992
1993 // Parse the zero-or-more repetition operator.
1994 else if (stream->next == '*') {
1995 stream_advance(stream);
1996 stream_skip_whitespace(stream);
1997
1998 QueryStep repeat_step = query_step__new(WILDCARD_SYMBOL, depth, false);
1999 repeat_step.alternative_index = starting_step_index;
2000 repeat_step.is_pass_through = true;
2001 repeat_step.alternative_is_immediate = true;
2002 array_push(&self->steps, repeat_step);
2003
2004 QueryStep *step = &self->steps.contents[starting_step_index];
2005 while (step->alternative_index != NONE) {
2006 step = &self->steps.contents[step->alternative_index];
2007 }
2008 step->alternative_index = self->steps.size;
2009 }
2010
2011 // Parse the optional operator.
2012 else if (stream->next == '?') {
2013 stream_advance(stream);
2014 stream_skip_whitespace(stream);
2015
2016 QueryStep *step = &self->steps.contents[starting_step_index];
2017 while (step->alternative_index != NONE) {
2018 step = &self->steps.contents[step->alternative_index];
2019 }
2020 step->alternative_index = self->steps.size;
2021 }
2022
2023 // Parse an '@'-prefixed capture pattern
2024 else if (stream->next == '@') {
2025 stream_advance(stream);
2026 if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax;
2027 const char *capture_name = stream->input;
2028 stream_scan_identifier(stream);
2029 uint32_t length = stream->input - capture_name;
2030 stream_skip_whitespace(stream);
2031
2032 // Add the capture id to the first step of the pattern
2033 uint16_t capture_id = symbol_table_insert_name(
2034 &self->captures,
2035 capture_name,
2036 length
2037 );
2038
2039 uint32_t step_index = starting_step_index;
2040 for (;;) {
2041 QueryStep *step = &self->steps.contents[step_index];
2042 query_step__add_capture(step, capture_id);
2043 if (
2044 step->alternative_index != NONE &&
2045 step->alternative_index > step_index &&
2046 step->alternative_index < self->steps.size
2047 ) {
2048 step_index = step->alternative_index;
2049 step = &self->steps.contents[step_index];
2050 } else {
2051 break;
2052 }
2053 }
2054 }
2055
2056 // No more suffix modifiers
2057 else {
2058 break;
2059 }
2060 }
2061
2062 return 0;
2063 }
2064
ts_query_new(const TSLanguage * language,const char * source,uint32_t source_len,uint32_t * error_offset,TSQueryError * error_type)2065 TSQuery *ts_query_new(
2066 const TSLanguage *language,
2067 const char *source,
2068 uint32_t source_len,
2069 uint32_t *error_offset,
2070 TSQueryError *error_type
2071 ) {
2072 if (
2073 !language ||
2074 language->version > TREE_SITTER_LANGUAGE_VERSION ||
2075 language->version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION
2076 ) {
2077 *error_type = TSQueryErrorLanguage;
2078 return NULL;
2079 }
2080
2081 TSQuery *self = ts_malloc(sizeof(TSQuery));
2082 *self = (TSQuery) {
2083 .steps = array_new(),
2084 .pattern_map = array_new(),
2085 .captures = symbol_table_new(),
2086 .predicate_values = symbol_table_new(),
2087 .predicate_steps = array_new(),
2088 .patterns = array_new(),
2089 .step_offsets = array_new(),
2090 .string_buffer = array_new(),
2091 .negated_fields = array_new(),
2092 .wildcard_root_pattern_count = 0,
2093 .language = language,
2094 };
2095
2096 array_push(&self->negated_fields, 0);
2097
2098 // Parse all of the S-expressions in the given string.
2099 Stream stream = stream_new(source, source_len);
2100 stream_skip_whitespace(&stream);
2101 while (stream.input < stream.end) {
2102 uint32_t pattern_index = self->patterns.size;
2103 uint32_t start_step_index = self->steps.size;
2104 uint32_t start_predicate_step_index = self->predicate_steps.size;
2105 array_push(&self->patterns, ((QueryPattern) {
2106 .steps = (Slice) {.offset = start_step_index},
2107 .predicate_steps = (Slice) {.offset = start_predicate_step_index},
2108 .start_byte = stream_offset(&stream),
2109 }));
2110 *error_type = ts_query__parse_pattern(self, &stream, 0, false);
2111 array_push(&self->steps, query_step__new(0, PATTERN_DONE_MARKER, false));
2112
2113 QueryPattern *pattern = array_back(&self->patterns);
2114 pattern->steps.length = self->steps.size - start_step_index;
2115 pattern->predicate_steps.length = self->predicate_steps.size - start_predicate_step_index;
2116
2117 // If any pattern could not be parsed, then report the error information
2118 // and terminate.
2119 if (*error_type) {
2120 if (*error_type == PARENT_DONE) *error_type = TSQueryErrorSyntax;
2121 *error_offset = stream_offset(&stream);
2122 ts_query_delete(self);
2123 return NULL;
2124 }
2125
2126 // Maintain a map that can look up patterns for a given root symbol.
2127 uint16_t wildcard_root_alternative_index = NONE;
2128 for (;;) {
2129 QueryStep *step = &self->steps.contents[start_step_index];
2130
2131 // If a pattern has a wildcard at its root, but it has a non-wildcard child,
2132 // then optimize the matching process by skipping matching the wildcard.
2133 // Later, during the matching process, the query cursor will check that
2134 // there is a parent node, and capture it if necessary.
2135 if (step->symbol == WILDCARD_SYMBOL && step->depth == 0) {
2136 QueryStep *second_step = &self->steps.contents[start_step_index + 1];
2137 if (second_step->symbol != WILDCARD_SYMBOL && second_step->depth == 1) {
2138 wildcard_root_alternative_index = step->alternative_index;
2139 start_step_index += 1;
2140 step = second_step;
2141 }
2142 }
2143
2144 // Determine whether the pattern has a single root node. This affects
2145 // decisions about whether or not to start matching the pattern when
2146 // a query cursor has a range restriction.
2147 bool is_rooted = true;
2148 uint32_t start_depth = step->depth;
2149 for (uint32_t step_index = start_step_index + 1; step_index < self->steps.size; step_index++) {
2150 QueryStep *step = &self->steps.contents[step_index];
2151 if (step->depth == start_depth) {
2152 is_rooted = false;
2153 break;
2154 }
2155 }
2156
2157 ts_query__pattern_map_insert(self, step->symbol, (PatternEntry) {
2158 .step_index = start_step_index,
2159 .pattern_index = pattern_index,
2160 .is_rooted = is_rooted
2161 });
2162 if (step->symbol == WILDCARD_SYMBOL) {
2163 self->wildcard_root_pattern_count++;
2164 }
2165
2166 // If there are alternatives or options at the root of the pattern,
2167 // then add multiple entries to the pattern map.
2168 if (step->alternative_index != NONE) {
2169 start_step_index = step->alternative_index;
2170 step->alternative_index = NONE;
2171 } else if (wildcard_root_alternative_index != NONE) {
2172 start_step_index = wildcard_root_alternative_index;
2173 wildcard_root_alternative_index = NONE;
2174 } else {
2175 break;
2176 }
2177 }
2178 }
2179
2180 if (!ts_query__analyze_patterns(self, error_offset)) {
2181 *error_type = TSQueryErrorStructure;
2182 ts_query_delete(self);
2183 return NULL;
2184 }
2185
2186 ts_query__finalize_steps(self);
2187 array_delete(&self->string_buffer);
2188 return self;
2189 }
2190
ts_query_delete(TSQuery * self)2191 void ts_query_delete(TSQuery *self) {
2192 if (self) {
2193 array_delete(&self->steps);
2194 array_delete(&self->pattern_map);
2195 array_delete(&self->predicate_steps);
2196 array_delete(&self->patterns);
2197 array_delete(&self->step_offsets);
2198 array_delete(&self->string_buffer);
2199 array_delete(&self->negated_fields);
2200 symbol_table_delete(&self->captures);
2201 symbol_table_delete(&self->predicate_values);
2202 ts_free(self);
2203 }
2204 }
2205
ts_query_pattern_count(const TSQuery * self)2206 uint32_t ts_query_pattern_count(const TSQuery *self) {
2207 return self->patterns.size;
2208 }
2209
ts_query_capture_count(const TSQuery * self)2210 uint32_t ts_query_capture_count(const TSQuery *self) {
2211 return self->captures.slices.size;
2212 }
2213
ts_query_string_count(const TSQuery * self)2214 uint32_t ts_query_string_count(const TSQuery *self) {
2215 return self->predicate_values.slices.size;
2216 }
2217
ts_query_capture_name_for_id(const TSQuery * self,uint32_t index,uint32_t * length)2218 const char *ts_query_capture_name_for_id(
2219 const TSQuery *self,
2220 uint32_t index,
2221 uint32_t *length
2222 ) {
2223 return symbol_table_name_for_id(&self->captures, index, length);
2224 }
2225
ts_query_string_value_for_id(const TSQuery * self,uint32_t index,uint32_t * length)2226 const char *ts_query_string_value_for_id(
2227 const TSQuery *self,
2228 uint32_t index,
2229 uint32_t *length
2230 ) {
2231 return symbol_table_name_for_id(&self->predicate_values, index, length);
2232 }
2233
ts_query_predicates_for_pattern(const TSQuery * self,uint32_t pattern_index,uint32_t * step_count)2234 const TSQueryPredicateStep *ts_query_predicates_for_pattern(
2235 const TSQuery *self,
2236 uint32_t pattern_index,
2237 uint32_t *step_count
2238 ) {
2239 Slice slice = self->patterns.contents[pattern_index].predicate_steps;
2240 *step_count = slice.length;
2241 if (self->predicate_steps.contents == NULL) {
2242 return NULL;
2243 }
2244 return &self->predicate_steps.contents[slice.offset];
2245 }
2246
ts_query_start_byte_for_pattern(const TSQuery * self,uint32_t pattern_index)2247 uint32_t ts_query_start_byte_for_pattern(
2248 const TSQuery *self,
2249 uint32_t pattern_index
2250 ) {
2251 return self->patterns.contents[pattern_index].start_byte;
2252 }
2253
ts_query_step_is_definite(const TSQuery * self,uint32_t byte_offset)2254 bool ts_query_step_is_definite(
2255 const TSQuery *self,
2256 uint32_t byte_offset
2257 ) {
2258 uint32_t step_index = UINT32_MAX;
2259 for (unsigned i = 0; i < self->step_offsets.size; i++) {
2260 StepOffset *step_offset = &self->step_offsets.contents[i];
2261 if (step_offset->byte_offset > byte_offset) break;
2262 step_index = step_offset->step_index;
2263 }
2264 if (step_index < self->steps.size) {
2265 return self->steps.contents[step_index].is_definite;
2266 } else {
2267 return false;
2268 }
2269 }
2270
ts_query_disable_capture(TSQuery * self,const char * name,uint32_t length)2271 void ts_query_disable_capture(
2272 TSQuery *self,
2273 const char *name,
2274 uint32_t length
2275 ) {
2276 // Remove capture information for any pattern step that previously
2277 // captured with the given name.
2278 int id = symbol_table_id_for_name(&self->captures, name, length);
2279 if (id != -1) {
2280 for (unsigned i = 0; i < self->steps.size; i++) {
2281 QueryStep *step = &self->steps.contents[i];
2282 query_step__remove_capture(step, id);
2283 }
2284 ts_query__finalize_steps(self);
2285 }
2286 }
2287
ts_query_disable_pattern(TSQuery * self,uint32_t pattern_index)2288 void ts_query_disable_pattern(
2289 TSQuery *self,
2290 uint32_t pattern_index
2291 ) {
2292 // Remove the given pattern from the pattern map. Its steps will still
2293 // be in the `steps` array, but they will never be read.
2294 for (unsigned i = 0; i < self->pattern_map.size; i++) {
2295 PatternEntry *pattern = &self->pattern_map.contents[i];
2296 if (pattern->pattern_index == pattern_index) {
2297 array_erase(&self->pattern_map, i);
2298 i--;
2299 }
2300 }
2301 }
2302
2303 /***************
2304 * QueryCursor
2305 ***************/
2306
ts_query_cursor_new(void)2307 TSQueryCursor *ts_query_cursor_new(void) {
2308 TSQueryCursor *self = ts_malloc(sizeof(TSQueryCursor));
2309 *self = (TSQueryCursor) {
2310 .did_exceed_match_limit = false,
2311 .ascending = false,
2312 .halted = false,
2313 .states = array_new(),
2314 .finished_states = array_new(),
2315 .capture_list_pool = capture_list_pool_new(),
2316 .start_byte = 0,
2317 .end_byte = UINT32_MAX,
2318 .start_point = {0, 0},
2319 .end_point = POINT_MAX,
2320 };
2321 array_reserve(&self->states, 8);
2322 array_reserve(&self->finished_states, 8);
2323 return self;
2324 }
2325
ts_query_cursor_delete(TSQueryCursor * self)2326 void ts_query_cursor_delete(TSQueryCursor *self) {
2327 array_delete(&self->states);
2328 array_delete(&self->finished_states);
2329 ts_tree_cursor_delete(&self->cursor);
2330 capture_list_pool_delete(&self->capture_list_pool);
2331 ts_free(self);
2332 }
2333
ts_query_cursor_did_exceed_match_limit(const TSQueryCursor * self)2334 bool ts_query_cursor_did_exceed_match_limit(const TSQueryCursor *self) {
2335 return self->did_exceed_match_limit;
2336 }
2337
ts_query_cursor_match_limit(const TSQueryCursor * self)2338 uint32_t ts_query_cursor_match_limit(const TSQueryCursor *self) {
2339 return self->capture_list_pool.max_capture_list_count;
2340 }
2341
ts_query_cursor_set_match_limit(TSQueryCursor * self,uint32_t limit)2342 void ts_query_cursor_set_match_limit(TSQueryCursor *self, uint32_t limit) {
2343 self->capture_list_pool.max_capture_list_count = limit;
2344 }
2345
ts_query_cursor_exec(TSQueryCursor * self,const TSQuery * query,TSNode node)2346 void ts_query_cursor_exec(
2347 TSQueryCursor *self,
2348 const TSQuery *query,
2349 TSNode node
2350 ) {
2351 array_clear(&self->states);
2352 array_clear(&self->finished_states);
2353 ts_tree_cursor_reset(&self->cursor, node);
2354 capture_list_pool_reset(&self->capture_list_pool);
2355 self->next_state_id = 0;
2356 self->depth = 0;
2357 self->ascending = false;
2358 self->halted = false;
2359 self->query = query;
2360 self->did_exceed_match_limit = false;
2361 }
2362
ts_query_cursor_set_byte_range(TSQueryCursor * self,uint32_t start_byte,uint32_t end_byte)2363 void ts_query_cursor_set_byte_range(
2364 TSQueryCursor *self,
2365 uint32_t start_byte,
2366 uint32_t end_byte
2367 ) {
2368 if (end_byte == 0) {
2369 end_byte = UINT32_MAX;
2370 }
2371 self->start_byte = start_byte;
2372 self->end_byte = end_byte;
2373 }
2374
ts_query_cursor_set_point_range(TSQueryCursor * self,TSPoint start_point,TSPoint end_point)2375 void ts_query_cursor_set_point_range(
2376 TSQueryCursor *self,
2377 TSPoint start_point,
2378 TSPoint end_point
2379 ) {
2380 if (end_point.row == 0 && end_point.column == 0) {
2381 end_point = POINT_MAX;
2382 }
2383 self->start_point = start_point;
2384 self->end_point = end_point;
2385 }
2386
2387 // Search through all of the in-progress states, and find the captured
2388 // node that occurs earliest in the document.
ts_query_cursor__first_in_progress_capture(TSQueryCursor * self,uint32_t * state_index,uint32_t * byte_offset,uint32_t * pattern_index,bool * is_definite)2389 static bool ts_query_cursor__first_in_progress_capture(
2390 TSQueryCursor *self,
2391 uint32_t *state_index,
2392 uint32_t *byte_offset,
2393 uint32_t *pattern_index,
2394 bool *is_definite
2395 ) {
2396 bool result = false;
2397 *state_index = UINT32_MAX;
2398 *byte_offset = UINT32_MAX;
2399 *pattern_index = UINT32_MAX;
2400 for (unsigned i = 0; i < self->states.size; i++) {
2401 QueryState *state = &self->states.contents[i];
2402 if (state->dead) continue;
2403
2404 const CaptureList *captures = capture_list_pool_get(
2405 &self->capture_list_pool,
2406 state->capture_list_id
2407 );
2408 if (state->consumed_capture_count >= captures->size) {
2409 continue;
2410 }
2411
2412 TSNode node = captures->contents[state->consumed_capture_count].node;
2413 if (
2414 ts_node_end_byte(node) <= self->start_byte ||
2415 point_lte(ts_node_end_point(node), self->start_point)
2416 ) {
2417 state->consumed_capture_count++;
2418 i--;
2419 continue;
2420 }
2421
2422 uint32_t node_start_byte = ts_node_start_byte(node);
2423 if (
2424 !result ||
2425 node_start_byte < *byte_offset ||
2426 (node_start_byte == *byte_offset && state->pattern_index < *pattern_index)
2427 ) {
2428 QueryStep *step = &self->query->steps.contents[state->step_index];
2429 if (is_definite) {
2430 *is_definite = step->is_definite;
2431 } else if (step->is_definite) {
2432 continue;
2433 }
2434
2435 result = true;
2436 *state_index = i;
2437 *byte_offset = node_start_byte;
2438 *pattern_index = state->pattern_index;
2439 }
2440 }
2441 return result;
2442 }
2443
2444 // Determine which node is first in a depth-first traversal
ts_query_cursor__compare_nodes(TSNode left,TSNode right)2445 int ts_query_cursor__compare_nodes(TSNode left, TSNode right) {
2446 if (left.id != right.id) {
2447 uint32_t left_start = ts_node_start_byte(left);
2448 uint32_t right_start = ts_node_start_byte(right);
2449 if (left_start < right_start) return -1;
2450 if (left_start > right_start) return 1;
2451 uint32_t left_node_count = ts_node_end_byte(left);
2452 uint32_t right_node_count = ts_node_end_byte(right);
2453 if (left_node_count > right_node_count) return -1;
2454 if (left_node_count < right_node_count) return 1;
2455 }
2456 return 0;
2457 }
2458
2459 // Determine if either state contains a superset of the other state's captures.
ts_query_cursor__compare_captures(TSQueryCursor * self,QueryState * left_state,QueryState * right_state,bool * left_contains_right,bool * right_contains_left)2460 void ts_query_cursor__compare_captures(
2461 TSQueryCursor *self,
2462 QueryState *left_state,
2463 QueryState *right_state,
2464 bool *left_contains_right,
2465 bool *right_contains_left
2466 ) {
2467 const CaptureList *left_captures = capture_list_pool_get(
2468 &self->capture_list_pool,
2469 left_state->capture_list_id
2470 );
2471 const CaptureList *right_captures = capture_list_pool_get(
2472 &self->capture_list_pool,
2473 right_state->capture_list_id
2474 );
2475 *left_contains_right = true;
2476 *right_contains_left = true;
2477 unsigned i = 0, j = 0;
2478 for (;;) {
2479 if (i < left_captures->size) {
2480 if (j < right_captures->size) {
2481 TSQueryCapture *left = &left_captures->contents[i];
2482 TSQueryCapture *right = &right_captures->contents[j];
2483 if (left->node.id == right->node.id && left->index == right->index) {
2484 i++;
2485 j++;
2486 } else {
2487 switch (ts_query_cursor__compare_nodes(left->node, right->node)) {
2488 case -1:
2489 *right_contains_left = false;
2490 i++;
2491 break;
2492 case 1:
2493 *left_contains_right = false;
2494 j++;
2495 break;
2496 default:
2497 *right_contains_left = false;
2498 *left_contains_right = false;
2499 i++;
2500 j++;
2501 break;
2502 }
2503 }
2504 } else {
2505 *right_contains_left = false;
2506 break;
2507 }
2508 } else {
2509 if (j < right_captures->size) {
2510 *left_contains_right = false;
2511 }
2512 break;
2513 }
2514 }
2515 }
2516
ts_query_cursor__add_state(TSQueryCursor * self,const PatternEntry * pattern)2517 static void ts_query_cursor__add_state(
2518 TSQueryCursor *self,
2519 const PatternEntry *pattern
2520 ) {
2521 QueryStep *step = &self->query->steps.contents[pattern->step_index];
2522 uint32_t start_depth = self->depth - step->depth;
2523
2524 // Keep the states array in ascending order of start_depth and pattern_index,
2525 // so that it can be processed more efficiently elsewhere. Usually, there is
2526 // no work to do here because of two facts:
2527 // * States with lower start_depth are naturally added first due to the
2528 // order in which nodes are visited.
2529 // * Earlier patterns are naturally added first because of the ordering of the
2530 // pattern_map data structure that's used to initiate matches.
2531 //
2532 // This loop is only needed in cases where two conditions hold:
2533 // * A pattern consists of more than one sibling node, so that its states
2534 // remain in progress after exiting the node that started the match.
2535 // * The first node in the pattern matches against multiple nodes at the
2536 // same depth.
2537 //
2538 // An example of this is the pattern '((comment)* (function))'. If multiple
2539 // `comment` nodes appear in a row, then we may initiate a new state for this
2540 // pattern while another state for the same pattern is already in progress.
2541 // If there are multiple patterns like this in a query, then this loop will
2542 // need to execute in order to keep the states ordered by pattern_index.
2543 uint32_t index = self->states.size;
2544 while (index > 0) {
2545 QueryState *prev_state = &self->states.contents[index - 1];
2546 if (prev_state->start_depth < start_depth) break;
2547 if (prev_state->start_depth == start_depth) {
2548 if (prev_state->pattern_index < pattern->pattern_index) break;
2549 if (prev_state->pattern_index == pattern->pattern_index) {
2550 // Avoid inserting an unnecessary duplicate state, which would be
2551 // immediately pruned by the longest-match criteria.
2552 if (prev_state->step_index == pattern->step_index) return;
2553 }
2554 }
2555 index--;
2556 }
2557
2558 LOG(
2559 " start state. pattern:%u, step:%u\n",
2560 pattern->pattern_index,
2561 pattern->step_index
2562 );
2563 array_insert(&self->states, index, ((QueryState) {
2564 .capture_list_id = NONE,
2565 .step_index = pattern->step_index,
2566 .pattern_index = pattern->pattern_index,
2567 .start_depth = start_depth,
2568 .consumed_capture_count = 0,
2569 .seeking_immediate_match = true,
2570 .has_in_progress_alternatives = false,
2571 .needs_parent = step->depth == 1,
2572 .dead = false,
2573 }));
2574 }
2575
2576 // Acquire a capture list for this state. If there are no capture lists left in the
2577 // pool, this will steal the capture list from another existing state, and mark that
2578 // other state as 'dead'.
ts_query_cursor__prepare_to_capture(TSQueryCursor * self,QueryState * state,unsigned state_index_to_preserve)2579 static CaptureList *ts_query_cursor__prepare_to_capture(
2580 TSQueryCursor *self,
2581 QueryState *state,
2582 unsigned state_index_to_preserve
2583 ) {
2584 if (state->capture_list_id == NONE) {
2585 state->capture_list_id = capture_list_pool_acquire(&self->capture_list_pool);
2586
2587 // If there are no capture lists left in the pool, then terminate whichever
2588 // state has captured the earliest node in the document, and steal its
2589 // capture list.
2590 if (state->capture_list_id == NONE) {
2591 self->did_exceed_match_limit = true;
2592 uint32_t state_index, byte_offset, pattern_index;
2593 if (
2594 ts_query_cursor__first_in_progress_capture(
2595 self,
2596 &state_index,
2597 &byte_offset,
2598 &pattern_index,
2599 NULL
2600 ) &&
2601 state_index != state_index_to_preserve
2602 ) {
2603 LOG(
2604 " abandon state. index:%u, pattern:%u, offset:%u.\n",
2605 state_index, pattern_index, byte_offset
2606 );
2607 QueryState *other_state = &self->states.contents[state_index];
2608 state->capture_list_id = other_state->capture_list_id;
2609 other_state->capture_list_id = NONE;
2610 other_state->dead = true;
2611 CaptureList *list = capture_list_pool_get_mut(
2612 &self->capture_list_pool,
2613 state->capture_list_id
2614 );
2615 array_clear(list);
2616 return list;
2617 } else {
2618 LOG(" ran out of capture lists");
2619 return NULL;
2620 }
2621 }
2622 }
2623 return capture_list_pool_get_mut(&self->capture_list_pool, state->capture_list_id);
2624 }
2625
ts_query_cursor__capture(TSQueryCursor * self,QueryState * state,QueryStep * step,TSNode node)2626 static void ts_query_cursor__capture(
2627 TSQueryCursor *self,
2628 QueryState *state,
2629 QueryStep *step,
2630 TSNode node
2631 ) {
2632 if (state->dead) return;
2633 CaptureList *capture_list = ts_query_cursor__prepare_to_capture(self, state, UINT32_MAX);
2634 if (!capture_list) {
2635 state->dead = true;
2636 return;
2637 }
2638
2639 for (unsigned j = 0; j < MAX_STEP_CAPTURE_COUNT; j++) {
2640 uint16_t capture_id = step->capture_ids[j];
2641 if (step->capture_ids[j] == NONE) break;
2642 array_push(capture_list, ((TSQueryCapture) { node, capture_id }));
2643 LOG(
2644 " capture node. type:%s, pattern:%u, capture_id:%u, capture_count:%u\n",
2645 ts_node_type(node),
2646 state->pattern_index,
2647 capture_id,
2648 capture_list->size
2649 );
2650 }
2651 }
2652
2653 // Duplicate the given state and insert the newly-created state immediately after
2654 // the given state in the `states` array. Ensures that the given state reference is
2655 // still valid, even if the states array is reallocated.
ts_query_cursor__copy_state(TSQueryCursor * self,QueryState ** state_ref)2656 static QueryState *ts_query_cursor__copy_state(
2657 TSQueryCursor *self,
2658 QueryState **state_ref
2659 ) {
2660 const QueryState *state = *state_ref;
2661 uint32_t state_index = state - self->states.contents;
2662 QueryState copy = *state;
2663 copy.capture_list_id = NONE;
2664
2665 // If the state has captures, copy its capture list.
2666 if (state->capture_list_id != NONE) {
2667 CaptureList *new_captures = ts_query_cursor__prepare_to_capture(self, ©, state_index);
2668 if (!new_captures) return NULL;
2669 const CaptureList *old_captures = capture_list_pool_get(
2670 &self->capture_list_pool,
2671 state->capture_list_id
2672 );
2673 array_push_all(new_captures, old_captures);
2674 }
2675
2676 array_insert(&self->states, state_index + 1, copy);
2677 *state_ref = &self->states.contents[state_index];
2678 return &self->states.contents[state_index + 1];
2679 }
2680
2681 // Walk the tree, processing patterns until at least one pattern finishes,
2682 // If one or more patterns finish, return `true` and store their states in the
2683 // `finished_states` array. Multiple patterns can finish on the same node. If
2684 // there are no more matches, return `false`.
ts_query_cursor__advance(TSQueryCursor * self,bool stop_on_definite_step)2685 static inline bool ts_query_cursor__advance(
2686 TSQueryCursor *self,
2687 bool stop_on_definite_step
2688 ) {
2689 bool did_match = false;
2690 for (;;) {
2691 if (self->halted) {
2692 while (self->states.size > 0) {
2693 QueryState state = array_pop(&self->states);
2694 capture_list_pool_release(
2695 &self->capture_list_pool,
2696 state.capture_list_id
2697 );
2698 }
2699 }
2700
2701 if (did_match || self->halted) return did_match;
2702
2703 // Exit the current node.
2704 if (self->ascending) {
2705 LOG("leave node. type:%s\n", ts_node_type(ts_tree_cursor_current_node(&self->cursor)));
2706
2707 // Leave this node by stepping to its next sibling or to its parent.
2708 if (ts_tree_cursor_goto_next_sibling(&self->cursor)) {
2709 self->ascending = false;
2710 } else if (ts_tree_cursor_goto_parent(&self->cursor)) {
2711 self->depth--;
2712 } else {
2713 LOG("halt at root\n");
2714 self->halted = true;
2715 }
2716
2717 // After leaving a node, remove any states that cannot make further progress.
2718 uint32_t deleted_count = 0;
2719 for (unsigned i = 0, n = self->states.size; i < n; i++) {
2720 QueryState *state = &self->states.contents[i];
2721 QueryStep *step = &self->query->steps.contents[state->step_index];
2722
2723 // If a state completed its pattern inside of this node, but was deferred from finishing
2724 // in order to search for longer matches, mark it as finished.
2725 if (step->depth == PATTERN_DONE_MARKER) {
2726 if (state->start_depth > self->depth || self->halted) {
2727 LOG(" finish pattern %u\n", state->pattern_index);
2728 state->id = self->next_state_id++;
2729 array_push(&self->finished_states, *state);
2730 did_match = true;
2731 deleted_count++;
2732 continue;
2733 }
2734 }
2735
2736 // If a state needed to match something within this node, then remove that state
2737 // as it has failed to match.
2738 else if ((uint32_t)state->start_depth + (uint32_t)step->depth > self->depth) {
2739 LOG(
2740 " failed to match. pattern:%u, step:%u\n",
2741 state->pattern_index,
2742 state->step_index
2743 );
2744 capture_list_pool_release(
2745 &self->capture_list_pool,
2746 state->capture_list_id
2747 );
2748 deleted_count++;
2749 continue;
2750 }
2751
2752 if (deleted_count > 0) {
2753 self->states.contents[i - deleted_count] = *state;
2754 }
2755 }
2756 self->states.size -= deleted_count;
2757 }
2758
2759 // Enter a new node.
2760 else {
2761 // Get the properties of the current node.
2762 TSNode node = ts_tree_cursor_current_node(&self->cursor);
2763 TSNode parent_node = ts_tree_cursor_parent_node(&self->cursor);
2764 TSSymbol symbol = ts_node_symbol(node);
2765 bool is_named = ts_node_is_named(node);
2766 bool has_later_siblings;
2767 bool has_later_named_siblings;
2768 bool can_have_later_siblings_with_this_field;
2769 TSFieldId field_id = 0;
2770 TSSymbol supertypes[8] = {0};
2771 unsigned supertype_count = 8;
2772 ts_tree_cursor_current_status(
2773 &self->cursor,
2774 &field_id,
2775 &has_later_siblings,
2776 &has_later_named_siblings,
2777 &can_have_later_siblings_with_this_field,
2778 supertypes,
2779 &supertype_count
2780 );
2781 LOG(
2782 "enter node. type:%s, field:%s, row:%u state_count:%u, finished_state_count:%u\n",
2783 ts_node_type(node),
2784 ts_language_field_name_for_id(self->query->language, field_id),
2785 ts_node_start_point(node).row,
2786 self->states.size,
2787 self->finished_states.size
2788 );
2789
2790 bool node_intersects_range = (
2791 ts_node_end_byte(node) > self->start_byte &&
2792 ts_node_start_byte(node) < self->end_byte &&
2793 point_gt(ts_node_end_point(node), self->start_point) &&
2794 point_lt(ts_node_start_point(node), self->end_point)
2795 );
2796
2797 bool parent_intersects_range = ts_node_is_null(parent_node) || (
2798 ts_node_end_byte(parent_node) > self->start_byte &&
2799 ts_node_start_byte(parent_node) < self->end_byte &&
2800 point_gt(ts_node_end_point(parent_node), self->start_point) &&
2801 point_lt(ts_node_start_point(parent_node), self->end_point)
2802 );
2803
2804 // Add new states for any patterns whose root node is a wildcard.
2805 for (unsigned i = 0; i < self->query->wildcard_root_pattern_count; i++) {
2806 PatternEntry *pattern = &self->query->pattern_map.contents[i];
2807
2808 // If this node matches the first step of the pattern, then add a new
2809 // state at the start of this pattern.
2810 QueryStep *step = &self->query->steps.contents[pattern->step_index];
2811 if (
2812 (node_intersects_range || (!pattern->is_rooted && parent_intersects_range)) &&
2813 (!step->field || field_id == step->field) &&
2814 (!step->supertype_symbol || supertype_count > 0)
2815 ) {
2816 ts_query_cursor__add_state(self, pattern);
2817 }
2818 }
2819
2820 // Add new states for any patterns whose root node matches this node.
2821 unsigned i;
2822 if (ts_query__pattern_map_search(self->query, symbol, &i)) {
2823 PatternEntry *pattern = &self->query->pattern_map.contents[i];
2824
2825 QueryStep *step = &self->query->steps.contents[pattern->step_index];
2826 do {
2827 // If this node matches the first step of the pattern, then add a new
2828 // state at the start of this pattern.
2829 if (
2830 (node_intersects_range || (!pattern->is_rooted && parent_intersects_range)) &&
2831 (!step->field || field_id == step->field)
2832 ) {
2833 ts_query_cursor__add_state(self, pattern);
2834 }
2835
2836 // Advance to the next pattern whose root node matches this node.
2837 i++;
2838 if (i == self->query->pattern_map.size) break;
2839 pattern = &self->query->pattern_map.contents[i];
2840 step = &self->query->steps.contents[pattern->step_index];
2841 } while (step->symbol == symbol);
2842 }
2843
2844 // Update all of the in-progress states with current node.
2845 for (unsigned i = 0, copy_count = 0; i < self->states.size; i += 1 + copy_count) {
2846 QueryState *state = &self->states.contents[i];
2847 QueryStep *step = &self->query->steps.contents[state->step_index];
2848 state->has_in_progress_alternatives = false;
2849 copy_count = 0;
2850
2851 // Check that the node matches all of the criteria for the next
2852 // step of the pattern.
2853 if ((uint32_t)state->start_depth + (uint32_t)step->depth != self->depth) continue;
2854
2855 // Determine if this node matches this step of the pattern, and also
2856 // if this node can have later siblings that match this step of the
2857 // pattern.
2858 bool node_does_match =
2859 step->symbol == symbol ||
2860 step->symbol == WILDCARD_SYMBOL ||
2861 (step->symbol == NAMED_WILDCARD_SYMBOL && is_named);
2862 bool later_sibling_can_match = has_later_siblings;
2863 if ((step->is_immediate && is_named) || state->seeking_immediate_match) {
2864 later_sibling_can_match = false;
2865 }
2866 if (step->is_last_child && has_later_named_siblings) {
2867 node_does_match = false;
2868 }
2869 if (step->supertype_symbol) {
2870 bool has_supertype = false;
2871 for (unsigned j = 0; j < supertype_count; j++) {
2872 if (supertypes[j] == step->supertype_symbol) {
2873 has_supertype = true;
2874 break;
2875 }
2876 }
2877 if (!has_supertype) node_does_match = false;
2878 }
2879 if (step->field) {
2880 if (step->field == field_id) {
2881 if (!can_have_later_siblings_with_this_field) {
2882 later_sibling_can_match = false;
2883 }
2884 } else {
2885 node_does_match = false;
2886 }
2887 }
2888
2889 if (step->negated_field_list_id) {
2890 TSFieldId *negated_field_ids = &self->query->negated_fields.contents[step->negated_field_list_id];
2891 for (;;) {
2892 TSFieldId negated_field_id = *negated_field_ids;
2893 if (negated_field_id) {
2894 negated_field_ids++;
2895 if (ts_node_child_by_field_id(node, negated_field_id).id) {
2896 node_does_match = false;
2897 break;
2898 }
2899 } else {
2900 break;
2901 }
2902 }
2903 }
2904
2905 // Remove states immediately if it is ever clear that they cannot match.
2906 if (!node_does_match) {
2907 if (!later_sibling_can_match) {
2908 LOG(
2909 " discard state. pattern:%u, step:%u\n",
2910 state->pattern_index,
2911 state->step_index
2912 );
2913 capture_list_pool_release(
2914 &self->capture_list_pool,
2915 state->capture_list_id
2916 );
2917 array_erase(&self->states, i);
2918 i--;
2919 }
2920 continue;
2921 }
2922
2923 // Some patterns can match their root node in multiple ways, capturing different
2924 // children. If this pattern step could match later children within the same
2925 // parent, then this query state cannot simply be updated in place. It must be
2926 // split into two states: one that matches this node, and one which skips over
2927 // this node, to preserve the possibility of matching later siblings.
2928 if (later_sibling_can_match && (step->contains_captures || !step->is_definite)) {
2929 if (ts_query_cursor__copy_state(self, &state)) {
2930 LOG(
2931 " split state for capture. pattern:%u, step:%u\n",
2932 state->pattern_index,
2933 state->step_index
2934 );
2935 copy_count++;
2936 }
2937 }
2938
2939 // If this pattern started with a wildcard, such that the pattern map
2940 // actually points to the *second* step of the pattern, then check
2941 // that the node has a parent, and capture the parent node if necessary.
2942 if (state->needs_parent) {
2943 TSNode parent = ts_tree_cursor_parent_node(&self->cursor);
2944 if (ts_node_is_null(parent)) {
2945 LOG(" missing parent node\n");
2946 state->dead = true;
2947 } else {
2948 state->needs_parent = false;
2949 QueryStep *skipped_wildcard_step = step;
2950 do {
2951 skipped_wildcard_step--;
2952 } while (
2953 skipped_wildcard_step->is_dead_end ||
2954 skipped_wildcard_step->is_pass_through ||
2955 skipped_wildcard_step->depth > 0
2956 );
2957 if (skipped_wildcard_step->capture_ids[0] != NONE) {
2958 LOG(" capture wildcard parent\n");
2959 ts_query_cursor__capture(
2960 self,
2961 state,
2962 skipped_wildcard_step,
2963 parent
2964 );
2965 }
2966 }
2967 }
2968
2969 // If the current node is captured in this pattern, add it to the capture list.
2970 if (step->capture_ids[0] != NONE) {
2971 ts_query_cursor__capture(self, state, step, node);
2972 }
2973
2974 if (state->dead) {
2975 array_erase(&self->states, i);
2976 i--;
2977 continue;
2978 }
2979
2980 // Advance this state to the next step of its pattern.
2981 state->step_index++;
2982 state->seeking_immediate_match = false;
2983 LOG(
2984 " advance state. pattern:%u, step:%u\n",
2985 state->pattern_index,
2986 state->step_index
2987 );
2988
2989 QueryStep *next_step = &self->query->steps.contents[state->step_index];
2990 if (stop_on_definite_step && next_step->is_definite) did_match = true;
2991
2992 // If this state's next step has an alternative step, then copy the state in order
2993 // to pursue both alternatives. The alternative step itself may have an alternative,
2994 // so this is an interative process.
2995 unsigned end_index = i + 1;
2996 for (unsigned j = i; j < end_index; j++) {
2997 QueryState *state = &self->states.contents[j];
2998 QueryStep *next_step = &self->query->steps.contents[state->step_index];
2999 if (next_step->alternative_index != NONE) {
3000 // A "dead-end" step exists only to add a non-sequential jump into the step sequence,
3001 // via its alternative index. When a state reaches a dead-end step, it jumps straight
3002 // to the step's alternative.
3003 if (next_step->is_dead_end) {
3004 state->step_index = next_step->alternative_index;
3005 j--;
3006 continue;
3007 }
3008
3009 // A "pass-through" step exists only to add a branch into the step sequence,
3010 // via its alternative_index. When a state reaches a pass-through step, it splits
3011 // in order to process the alternative step, and then it advances to the next step.
3012 if (next_step->is_pass_through) {
3013 state->step_index++;
3014 j--;
3015 }
3016
3017 QueryState *copy = ts_query_cursor__copy_state(self, &state);
3018 if (copy) {
3019 LOG(
3020 " split state for branch. pattern:%u, from_step:%u, to_step:%u, immediate:%d, capture_count: %u\n",
3021 copy->pattern_index,
3022 copy->step_index,
3023 next_step->alternative_index,
3024 next_step->alternative_is_immediate,
3025 capture_list_pool_get(&self->capture_list_pool, copy->capture_list_id)->size
3026 );
3027 end_index++;
3028 copy_count++;
3029 copy->step_index = next_step->alternative_index;
3030 if (next_step->alternative_is_immediate) {
3031 copy->seeking_immediate_match = true;
3032 }
3033 }
3034 }
3035 }
3036 }
3037
3038 for (unsigned i = 0; i < self->states.size; i++) {
3039 QueryState *state = &self->states.contents[i];
3040 if (state->dead) {
3041 array_erase(&self->states, i);
3042 i--;
3043 continue;
3044 }
3045
3046 // Enfore the longest-match criteria. When a query pattern contains optional or
3047 // repeated nodes, this is necessary to avoid multiple redundant states, where
3048 // one state has a strict subset of another state's captures.
3049 bool did_remove = false;
3050 for (unsigned j = i + 1; j < self->states.size; j++) {
3051 QueryState *other_state = &self->states.contents[j];
3052
3053 // Query states are kept in ascending order of start_depth and pattern_index.
3054 // Since the longest-match criteria is only used for deduping matches of the same
3055 // pattern and root node, we only need to perform pairwise comparisons within a
3056 // small slice of the states array.
3057 if (
3058 other_state->start_depth != state->start_depth ||
3059 other_state->pattern_index != state->pattern_index
3060 ) break;
3061
3062 bool left_contains_right, right_contains_left;
3063 ts_query_cursor__compare_captures(
3064 self,
3065 state,
3066 other_state,
3067 &left_contains_right,
3068 &right_contains_left
3069 );
3070 if (left_contains_right) {
3071 if (state->step_index == other_state->step_index) {
3072 LOG(
3073 " drop shorter state. pattern: %u, step_index: %u\n",
3074 state->pattern_index,
3075 state->step_index
3076 );
3077 capture_list_pool_release(&self->capture_list_pool, other_state->capture_list_id);
3078 array_erase(&self->states, j);
3079 j--;
3080 continue;
3081 }
3082 other_state->has_in_progress_alternatives = true;
3083 }
3084 if (right_contains_left) {
3085 if (state->step_index == other_state->step_index) {
3086 LOG(
3087 " drop shorter state. pattern: %u, step_index: %u\n",
3088 state->pattern_index,
3089 state->step_index
3090 );
3091 capture_list_pool_release(&self->capture_list_pool, state->capture_list_id);
3092 array_erase(&self->states, i);
3093 i--;
3094 did_remove = true;
3095 break;
3096 }
3097 state->has_in_progress_alternatives = true;
3098 }
3099 }
3100
3101 // If there the state is at the end of its pattern, remove it from the list
3102 // of in-progress states and add it to the list of finished states.
3103 if (!did_remove) {
3104 LOG(
3105 " keep state. pattern: %u, start_depth: %u, step_index: %u, capture_count: %u\n",
3106 state->pattern_index,
3107 state->start_depth,
3108 state->step_index,
3109 capture_list_pool_get(&self->capture_list_pool, state->capture_list_id)->size
3110 );
3111 QueryStep *next_step = &self->query->steps.contents[state->step_index];
3112 if (next_step->depth == PATTERN_DONE_MARKER) {
3113 if (state->has_in_progress_alternatives) {
3114 LOG(" defer finishing pattern %u\n", state->pattern_index);
3115 } else {
3116 LOG(" finish pattern %u\n", state->pattern_index);
3117 state->id = self->next_state_id++;
3118 array_push(&self->finished_states, *state);
3119 array_erase(&self->states, state - self->states.contents);
3120 did_match = true;
3121 i--;
3122 }
3123 }
3124 }
3125 }
3126
3127 // When the current node ends prior to the desired start offset,
3128 // only descend for the purpose of continuing in-progress matches.
3129 bool should_descend = node_intersects_range;
3130 if (!should_descend) {
3131 for (unsigned i = 0; i < self->states.size; i++) {
3132 QueryState *state = &self->states.contents[i];;
3133 QueryStep *next_step = &self->query->steps.contents[state->step_index];
3134 if (
3135 next_step->depth != PATTERN_DONE_MARKER &&
3136 state->start_depth + next_step->depth > self->depth
3137 ) {
3138 should_descend = true;
3139 break;
3140 }
3141 }
3142 }
3143
3144 if (!should_descend) {
3145 LOG(
3146 " not descending. node end byte: %u, start byte: %u\n",
3147 ts_node_end_byte(node),
3148 self->start_byte
3149 );
3150 }
3151
3152 if (should_descend && ts_tree_cursor_goto_first_child(&self->cursor)) {
3153 self->depth++;
3154 } else {
3155 self->ascending = true;
3156 }
3157 }
3158 }
3159 }
3160
ts_query_cursor_next_match(TSQueryCursor * self,TSQueryMatch * match)3161 bool ts_query_cursor_next_match(
3162 TSQueryCursor *self,
3163 TSQueryMatch *match
3164 ) {
3165 if (self->finished_states.size == 0) {
3166 if (!ts_query_cursor__advance(self, false)) {
3167 return false;
3168 }
3169 }
3170
3171 QueryState *state = &self->finished_states.contents[0];
3172 match->id = state->id;
3173 match->pattern_index = state->pattern_index;
3174 const CaptureList *captures = capture_list_pool_get(
3175 &self->capture_list_pool,
3176 state->capture_list_id
3177 );
3178 match->captures = captures->contents;
3179 match->capture_count = captures->size;
3180 capture_list_pool_release(&self->capture_list_pool, state->capture_list_id);
3181 array_erase(&self->finished_states, 0);
3182 return true;
3183 }
3184
ts_query_cursor_remove_match(TSQueryCursor * self,uint32_t match_id)3185 void ts_query_cursor_remove_match(
3186 TSQueryCursor *self,
3187 uint32_t match_id
3188 ) {
3189 for (unsigned i = 0; i < self->finished_states.size; i++) {
3190 const QueryState *state = &self->finished_states.contents[i];
3191 if (state->id == match_id) {
3192 capture_list_pool_release(
3193 &self->capture_list_pool,
3194 state->capture_list_id
3195 );
3196 array_erase(&self->finished_states, i);
3197 return;
3198 }
3199 }
3200 }
3201
ts_query_cursor_next_capture(TSQueryCursor * self,TSQueryMatch * match,uint32_t * capture_index)3202 bool ts_query_cursor_next_capture(
3203 TSQueryCursor *self,
3204 TSQueryMatch *match,
3205 uint32_t *capture_index
3206 ) {
3207 // The goal here is to return captures in order, even though they may not
3208 // be discovered in order, because patterns can overlap. Search for matches
3209 // until there is a finished capture that is before any unfinished capture.
3210 for (;;) {
3211 // First, find the earliest capture in an unfinished match.
3212 uint32_t first_unfinished_capture_byte;
3213 uint32_t first_unfinished_pattern_index;
3214 uint32_t first_unfinished_state_index;
3215 bool first_unfinished_state_is_definite = false;
3216 ts_query_cursor__first_in_progress_capture(
3217 self,
3218 &first_unfinished_state_index,
3219 &first_unfinished_capture_byte,
3220 &first_unfinished_pattern_index,
3221 &first_unfinished_state_is_definite
3222 );
3223
3224 // Then find the earliest capture in a finished match. It must occur
3225 // before the first capture in an *unfinished* match.
3226 QueryState *first_finished_state = NULL;
3227 uint32_t first_finished_capture_byte = first_unfinished_capture_byte;
3228 uint32_t first_finished_pattern_index = first_unfinished_pattern_index;
3229 for (unsigned i = 0; i < self->finished_states.size;) {
3230 QueryState *state = &self->finished_states.contents[i];
3231 const CaptureList *captures = capture_list_pool_get(
3232 &self->capture_list_pool,
3233 state->capture_list_id
3234 );
3235
3236 // Remove states whose captures are all consumed.
3237 if (state->consumed_capture_count >= captures->size) {
3238 capture_list_pool_release(
3239 &self->capture_list_pool,
3240 state->capture_list_id
3241 );
3242 array_erase(&self->finished_states, i);
3243 continue;
3244 }
3245
3246 // Skip captures that precede the cursor's start byte.
3247 TSNode node = captures->contents[state->consumed_capture_count].node;
3248 if (ts_node_end_byte(node) <= self->start_byte) {
3249 state->consumed_capture_count++;
3250 continue;
3251 }
3252
3253 uint32_t node_start_byte = ts_node_start_byte(node);
3254 if (
3255 node_start_byte < first_finished_capture_byte ||
3256 (
3257 node_start_byte == first_finished_capture_byte &&
3258 state->pattern_index < first_finished_pattern_index
3259 )
3260 ) {
3261 first_finished_state = state;
3262 first_finished_capture_byte = node_start_byte;
3263 first_finished_pattern_index = state->pattern_index;
3264 }
3265 i++;
3266 }
3267
3268 // If there is finished capture that is clearly before any unfinished
3269 // capture, then return its match, and its capture index. Internally
3270 // record the fact that the capture has been 'consumed'.
3271 QueryState *state;
3272 if (first_finished_state) {
3273 state = first_finished_state;
3274 } else if (first_unfinished_state_is_definite) {
3275 state = &self->states.contents[first_unfinished_state_index];
3276 } else {
3277 state = NULL;
3278 }
3279
3280 if (state) {
3281 match->id = state->id;
3282 match->pattern_index = state->pattern_index;
3283 const CaptureList *captures = capture_list_pool_get(
3284 &self->capture_list_pool,
3285 state->capture_list_id
3286 );
3287 match->captures = captures->contents;
3288 match->capture_count = captures->size;
3289 *capture_index = state->consumed_capture_count;
3290 state->consumed_capture_count++;
3291 return true;
3292 }
3293
3294 if (capture_list_pool_is_empty(&self->capture_list_pool)) {
3295 LOG(
3296 " abandon state. index:%u, pattern:%u, offset:%u.\n",
3297 first_unfinished_state_index,
3298 first_unfinished_pattern_index,
3299 first_unfinished_capture_byte
3300 );
3301 capture_list_pool_release(
3302 &self->capture_list_pool,
3303 self->states.contents[first_unfinished_state_index].capture_list_id
3304 );
3305 array_erase(&self->states, first_unfinished_state_index);
3306 }
3307
3308 // If there are no finished matches that are ready to be returned, then
3309 // continue finding more matches.
3310 if (
3311 !ts_query_cursor__advance(self, true) &&
3312 self->finished_states.size == 0
3313 ) return false;
3314 }
3315 }
3316
3317 #undef LOG
3318