1 /* DOM node selection */
2
3 #ifdef HAVE_CONFIG_H
4 #include "config.h"
5 #endif
6
7 #include "elinks.h"
8
9 #include "dom/css/scanner.h"
10 #include "dom/dom.h"
11 #include "dom/node.h"
12 #include "dom/scanner.h"
13 #include "dom/select.h"
14 #include "dom/stack.h"
15 #include "dom/string.h"
16 #include "util/memory.h"
17
18
19 /* Selector parsing: */
20
21 /* Maps the content of a scanner token to a pseudo-class or -element ID. */
22 static enum dom_select_pseudo
get_dom_select_pseudo(struct dom_scanner_token * token)23 get_dom_select_pseudo(struct dom_scanner_token *token)
24 {
25 static struct {
26 struct dom_string string;
27 enum dom_select_pseudo pseudo;
28 } pseudo_info[] = {
29
30 #define INIT_DOM_SELECT_PSEUDO_STRING(str, type) \
31 { INIT_DOM_STRING(str, -1), DOM_SELECT_PSEUDO_##type }
32
33 INIT_DOM_SELECT_PSEUDO_STRING("first-line", FIRST_LINE),
34 INIT_DOM_SELECT_PSEUDO_STRING("first-letter", FIRST_LETTER),
35 INIT_DOM_SELECT_PSEUDO_STRING("selection", SELECTION),
36 INIT_DOM_SELECT_PSEUDO_STRING("after", AFTER),
37 INIT_DOM_SELECT_PSEUDO_STRING("before", BEFORE),
38 INIT_DOM_SELECT_PSEUDO_STRING("link", LINK),
39 INIT_DOM_SELECT_PSEUDO_STRING("visited", VISITED),
40 INIT_DOM_SELECT_PSEUDO_STRING("active", ACTIVE),
41 INIT_DOM_SELECT_PSEUDO_STRING("hover", HOVER),
42 INIT_DOM_SELECT_PSEUDO_STRING("focus", FOCUS),
43 INIT_DOM_SELECT_PSEUDO_STRING("target", TARGET),
44 INIT_DOM_SELECT_PSEUDO_STRING("enabled", ENABLED),
45 INIT_DOM_SELECT_PSEUDO_STRING("disabled", DISABLED),
46 INIT_DOM_SELECT_PSEUDO_STRING("checked", CHECKED),
47 INIT_DOM_SELECT_PSEUDO_STRING("indeterminate", INDETERMINATE),
48
49 /* Content pseudo-classes: */
50
51 INIT_DOM_SELECT_PSEUDO_STRING("contains", CONTAINS),
52
53 /* Structural pseudo-classes: */
54
55 INIT_DOM_SELECT_PSEUDO_STRING("nth-child", NTH_CHILD),
56 INIT_DOM_SELECT_PSEUDO_STRING("nth-last-child", NTH_LAST_CHILD),
57 INIT_DOM_SELECT_PSEUDO_STRING("first-child", FIRST_CHILD),
58 INIT_DOM_SELECT_PSEUDO_STRING("last-child", LAST_CHILD),
59 INIT_DOM_SELECT_PSEUDO_STRING("only-child", ONLY_CHILD),
60
61 INIT_DOM_SELECT_PSEUDO_STRING("nth-of-type", NTH_TYPE),
62 INIT_DOM_SELECT_PSEUDO_STRING("nth-last-of-type",NTH_LAST_TYPE),
63 INIT_DOM_SELECT_PSEUDO_STRING("first-of-type", FIRST_TYPE),
64 INIT_DOM_SELECT_PSEUDO_STRING("last-of-type", LAST_TYPE),
65 INIT_DOM_SELECT_PSEUDO_STRING("only-of-type", ONLY_TYPE),
66
67 INIT_DOM_SELECT_PSEUDO_STRING("root", ROOT),
68 INIT_DOM_SELECT_PSEUDO_STRING("empty", EMPTY),
69
70 #undef INIT_DOM_SELECT_PSEUDO_STRING
71
72 };
73 int i;
74
75 for (i = 0; i < sizeof_array(pseudo_info); i++)
76 if (!dom_string_casecmp(&pseudo_info[i].string, &token->string))
77 return pseudo_info[i].pseudo;
78
79 return DOM_SELECT_PSEUDO_UNKNOWN;
80 }
81
82 /* Parses attribute selector. For example '[foo="bar"]' or '[foo|="boo"]'. */
83 static enum dom_exception_code
parse_dom_select_attribute(struct dom_select_node * sel,struct dom_scanner * scanner)84 parse_dom_select_attribute(struct dom_select_node *sel, struct dom_scanner *scanner)
85 {
86 struct dom_scanner_token *token = get_dom_scanner_token(scanner);
87
88 /* Get '['. */
89
90 if (token->type != '[')
91 return DOM_ERR_INVALID_STATE;
92
93 /* Get the attribute name. */
94
95 token = get_next_dom_scanner_token(scanner);
96 if (!token || token->type != CSS_TOKEN_IDENT)
97 return DOM_ERR_SYNTAX;
98
99 copy_dom_string(&sel->node.string, &token->string);
100
101 /* Get the optional '=' combo or ending ']'. */
102
103 token = get_next_dom_scanner_token(scanner);
104 if (!token) return DOM_ERR_SYNTAX;
105
106 switch (token->type) {
107 case ']':
108 sel->match.attribute |= DOM_SELECT_ATTRIBUTE_ANY;
109 return DOM_ERR_NONE;
110
111 case CSS_TOKEN_SELECT_SPACE_LIST:
112 sel->match.attribute |= DOM_SELECT_ATTRIBUTE_SPACE_LIST;
113 break;
114
115 case CSS_TOKEN_SELECT_HYPHEN_LIST:
116 sel->match.attribute |= DOM_SELECT_ATTRIBUTE_HYPHEN_LIST;
117 break;
118
119 case CSS_TOKEN_SELECT_BEGIN:
120 sel->match.attribute |= DOM_SELECT_ATTRIBUTE_BEGIN;
121 break;
122
123 case CSS_TOKEN_SELECT_END:
124 sel->match.attribute |= DOM_SELECT_ATTRIBUTE_END;
125 break;
126
127 case CSS_TOKEN_SELECT_CONTAINS:
128 sel->match.attribute |= DOM_SELECT_ATTRIBUTE_CONTAINS;
129 break;
130
131 default:
132 return DOM_ERR_SYNTAX;
133 }
134
135 /* Get the required value. */
136
137 token = get_next_dom_scanner_token(scanner);
138 if (!token) return DOM_ERR_SYNTAX;
139
140 switch (token->type) {
141 case CSS_TOKEN_IDENT:
142 case CSS_TOKEN_STRING:
143 copy_dom_string(&sel->node.data.attribute.value, &token->string);
144 break;
145
146 default:
147 return DOM_ERR_SYNTAX;
148 }
149
150 /* Get the ending ']'. */
151
152 token = get_next_dom_scanner_token(scanner);
153 if (token && token->type == ']')
154 return DOM_ERR_NONE;
155
156 return DOM_ERR_SYNTAX;
157 }
158
159 /* Parse:
160 *
161 * 0n+1 / 1
162 * 2n+0 / 2n
163 * 2n+1
164 * -0n+2
165 * -0n+1 / -1
166 * 1n+0 / n+0 / n
167 * 0n+0
168 */
169
170 /* FIXME: Move somewhere else? dom/scanner.h? */
171 static size_t
get_scanner_token_number(struct dom_scanner_token * token)172 get_scanner_token_number(struct dom_scanner_token *token)
173 {
174 size_t number = 0;
175
176 while (token->string.length > 0 && isdigit(token->string.string[0])) {
177 size_t old_number = number;
178
179 number *= 10;
180
181 /* -E2BIG */
182 if (old_number > number)
183 return -1;
184
185 number += token->string.string[0] - '0';
186 skip_dom_scanner_token_char(token);
187 }
188
189 return number;
190 }
191
192 /* Parses the '(...)' part of ':nth-of-type(...)' and ':nth-child(...)'. */
193 static enum dom_exception_code
parse_dom_select_nth_arg(struct dom_select_nth_match * nth,struct dom_scanner * scanner)194 parse_dom_select_nth_arg(struct dom_select_nth_match *nth, struct dom_scanner *scanner)
195 {
196 struct dom_scanner_token *token = get_next_dom_scanner_token(scanner);
197 int sign = 1;
198 int number = -1;
199
200 if (!token || token->type != '(')
201 return DOM_ERR_SYNTAX;
202
203 token = get_next_dom_scanner_token(scanner);
204 if (!token)
205 return DOM_ERR_SYNTAX;
206
207 switch (token->type) {
208 case CSS_TOKEN_IDENT:
209 if (dom_scanner_token_contains(token, "even")) {
210 nth->step = 2;
211 nth->index = 0;
212
213 } else if (dom_scanner_token_contains(token, "odd")) {
214 nth->step = 2;
215 nth->index = 1;
216
217 } else {
218 /* Check for 'n' ident below. */
219 break;
220 }
221
222 if (skip_css_tokens(scanner, ')'))
223 return DOM_ERR_NONE;
224
225 return DOM_ERR_SYNTAX;
226
227 case '-':
228 sign = -1;
229
230 token = get_next_dom_scanner_token(scanner);
231 if (!token) return DOM_ERR_SYNTAX;
232
233 if (token->type != CSS_TOKEN_IDENT)
234 break;
235
236 if (token->type != CSS_TOKEN_NUMBER)
237 return DOM_ERR_SYNTAX;
238 /* Fall-through */
239
240 case CSS_TOKEN_NUMBER:
241 number = get_scanner_token_number(token);
242 if (number < 0)
243 return DOM_ERR_INVALID_STATE;
244
245 token = get_next_dom_scanner_token(scanner);
246 if (!token) return DOM_ERR_SYNTAX;
247 break;
248
249 default:
250 return DOM_ERR_SYNTAX;
251 }
252
253 /* The rest can contain n+ part */
254 switch (token->type) {
255 case CSS_TOKEN_IDENT:
256 if (!dom_scanner_token_contains(token, "n"))
257 return DOM_ERR_SYNTAX;
258
259 nth->step = sign * number;
260
261 token = get_next_dom_scanner_token(scanner);
262 if (!token) return DOM_ERR_SYNTAX;
263
264 if (token->type != '+')
265 break;
266
267 token = get_next_dom_scanner_token(scanner);
268 if (!token) return DOM_ERR_SYNTAX;
269
270 if (token->type != CSS_TOKEN_NUMBER)
271 break;
272
273 number = get_scanner_token_number(token);
274 if (number < 0)
275 return DOM_ERR_INVALID_STATE;
276
277 nth->index = sign * number;
278 break;
279
280 default:
281 nth->step = 0;
282 nth->index = sign * number;
283 }
284
285 if (skip_css_tokens(scanner, ')'))
286 return DOM_ERR_NONE;
287
288 return DOM_ERR_SYNTAX;
289 }
290
291 /* Parse a pseudo-class or -element with the syntax: ':<ident>'. */
292 static enum dom_exception_code
parse_dom_select_pseudo(struct dom_select * select,struct dom_select_node * sel,struct dom_scanner * scanner)293 parse_dom_select_pseudo(struct dom_select *select, struct dom_select_node *sel,
294 struct dom_scanner *scanner)
295 {
296 struct dom_scanner_token *token = get_dom_scanner_token(scanner);
297 enum dom_select_pseudo pseudo;
298 enum dom_exception_code code;
299
300 /* Skip double :'s in front of some pseudo's (::first-line, etc.) */
301 do {
302 token = get_next_dom_scanner_token(scanner);
303 } while (token && token->type == ':');
304
305 if (!token || token->type != CSS_TOKEN_IDENT)
306 return DOM_ERR_SYNTAX;
307
308 pseudo = get_dom_select_pseudo(token);
309 switch (pseudo) {
310 case DOM_SELECT_PSEUDO_UNKNOWN:
311 return DOM_ERR_NOT_FOUND;
312
313 case DOM_SELECT_PSEUDO_CONTAINS:
314 /* FIXME: E:contains("text") */
315 break;
316
317 case DOM_SELECT_PSEUDO_NTH_CHILD:
318 case DOM_SELECT_PSEUDO_NTH_LAST_CHILD:
319 code = parse_dom_select_nth_arg(&sel->nth_child, scanner);
320 if (code != DOM_ERR_NONE)
321 return code;
322
323 sel->match.element |= DOM_SELECT_ELEMENT_NTH_CHILD;
324 break;
325
326 case DOM_SELECT_PSEUDO_FIRST_CHILD:
327 sel->match.element |= DOM_SELECT_ELEMENT_NTH_CHILD;
328 set_dom_select_nth_match(&sel->nth_child, 0, 1);
329 break;
330
331 case DOM_SELECT_PSEUDO_LAST_CHILD:
332 sel->match.element |= DOM_SELECT_ELEMENT_NTH_CHILD;
333 set_dom_select_nth_match(&sel->nth_child, 0, -1);
334 break;
335
336 case DOM_SELECT_PSEUDO_ONLY_CHILD:
337 sel->match.element |= DOM_SELECT_ELEMENT_NTH_CHILD;
338 set_dom_select_nth_match(&sel->nth_child, 0, 0);
339 break;
340
341 case DOM_SELECT_PSEUDO_NTH_TYPE:
342 case DOM_SELECT_PSEUDO_NTH_LAST_TYPE:
343 code = parse_dom_select_nth_arg(&sel->nth_type, scanner);
344 if (code != DOM_ERR_NONE)
345 return code;
346
347 sel->match.element |= DOM_SELECT_ELEMENT_NTH_TYPE;
348 break;
349
350 case DOM_SELECT_PSEUDO_FIRST_TYPE:
351 sel->match.element |= DOM_SELECT_ELEMENT_NTH_TYPE;
352 set_dom_select_nth_match(&sel->nth_type, 0, 1);
353 break;
354
355 case DOM_SELECT_PSEUDO_LAST_TYPE:
356 sel->match.element |= DOM_SELECT_ELEMENT_NTH_TYPE;
357 set_dom_select_nth_match(&sel->nth_type, 0, -1);
358 break;
359
360 case DOM_SELECT_PSEUDO_ONLY_TYPE:
361 sel->match.element |= DOM_SELECT_ELEMENT_NTH_TYPE;
362 set_dom_select_nth_match(&sel->nth_type, 0, 0);
363 break;
364
365 case DOM_SELECT_PSEUDO_ROOT:
366 sel->match.element |= DOM_SELECT_ELEMENT_ROOT;
367 break;
368
369 case DOM_SELECT_PSEUDO_EMPTY:
370 sel->match.element |= DOM_SELECT_ELEMENT_EMPTY;
371 break;
372
373 default:
374 /* It's a bitflag! */
375 select->pseudo |= pseudo;
376 }
377
378 return DOM_ERR_NONE;
379 }
380
381 /* The element relation flags are mutual exclusive. This macro can be used
382 * for checking if anyflag is set. */
383 #define get_element_relation(sel) \
384 ((sel)->match.element & DOM_SELECT_RELATION_FLAGS)
385
386 /* Parse a CSS3 selector and add selector nodes to the @select struct. */
387 static enum dom_exception_code
parse_dom_select(struct dom_select * select,struct dom_stack * stack,struct dom_string * string)388 parse_dom_select(struct dom_select *select, struct dom_stack *stack,
389 struct dom_string *string)
390 {
391 struct dom_scanner scanner;
392 struct dom_select_node sel;
393
394 init_dom_scanner(&scanner, &dom_css_scanner_info, string, 0, 0);
395
396 memset(&sel, 0, sizeof(sel));
397
398 while (dom_scanner_has_tokens(&scanner)) {
399 struct dom_scanner_token *token = get_dom_scanner_token(&scanner);
400 enum dom_exception_code code;
401 struct dom_select_node *select_node;
402
403 assert(token);
404
405 if (token->type == '{'
406 || token->type == '}'
407 || token->type == ';'
408 || token->type == ',')
409 break;
410
411 /* Examine the selector fragment */
412
413 switch (token->type) {
414 case CSS_TOKEN_IDENT:
415 sel.node.type = DOM_NODE_ELEMENT;
416 copy_dom_string(&sel.node.string, &token->string);
417 if (dom_scanner_token_contains(token, "*"))
418 sel.match.element |= DOM_SELECT_ELEMENT_UNIVERSAL;
419 break;
420
421 case CSS_TOKEN_HASH:
422 case CSS_TOKEN_HEX_COLOR:
423 /* ID fragment */
424 sel.node.type = DOM_NODE_ATTRIBUTE;
425 sel.match.attribute |= DOM_SELECT_ATTRIBUTE_ID;
426 /* Skip the leading '#'. */
427 skip_dom_scanner_token_char(token);
428 break;
429
430 case '[':
431 sel.node.type = DOM_NODE_ATTRIBUTE;
432 code = parse_dom_select_attribute(&sel, &scanner);
433 if (code != DOM_ERR_NONE)
434 return code;
435 break;
436
437 case '.':
438 token = get_next_dom_scanner_token(&scanner);
439 if (!token || token->type != CSS_TOKEN_IDENT)
440 return DOM_ERR_SYNTAX;
441
442 sel.node.type = DOM_NODE_ATTRIBUTE;
443 sel.match.attribute |= DOM_SELECT_ATTRIBUTE_SPACE_LIST;
444 set_dom_string(&sel.node.string, "class", -1);
445 copy_dom_string(&sel.node.data.attribute.value, &token->string);
446 break;
447
448 case ':':
449 code = parse_dom_select_pseudo(select, &sel, &scanner);
450 if (code != DOM_ERR_NONE)
451 return code;
452 break;
453
454 case '>':
455 if (get_element_relation(&sel) != DOM_SELECT_RELATION_DESCENDANT)
456 return DOM_ERR_SYNTAX;
457 sel.match.element |= DOM_SELECT_RELATION_DIRECT_CHILD;
458 break;
459
460 case '+':
461 if (get_element_relation(&sel) != DOM_SELECT_RELATION_DESCENDANT)
462 return DOM_ERR_SYNTAX;
463 sel.match.element |= DOM_SELECT_RELATION_DIRECT_ADJACENT;
464 break;
465
466 case '~':
467 if (get_element_relation(&sel) != DOM_SELECT_RELATION_DESCENDANT)
468 return DOM_ERR_SYNTAX;
469 sel.match.element |= DOM_SELECT_RELATION_INDIRECT_ADJACENT;
470 break;
471
472 default:
473 return DOM_ERR_SYNTAX;
474 }
475
476 skip_dom_scanner_token(&scanner);
477
478 if (sel.node.type == DOM_NODE_UNKNOWN)
479 continue;
480
481 select_node = mem_calloc(1, sizeof(*select_node));
482 copy_struct(select_node, &sel);
483
484 if (!dom_stack_is_empty(stack)) {
485 struct dom_node *node = &select_node->node;
486 struct dom_node *parent = get_dom_stack_top(stack)->node;
487 struct dom_node_list **list = get_dom_node_list(parent, node);
488 int sort = (node->type == DOM_NODE_ATTRIBUTE);
489 int index;
490
491 assertm(list, "Adding node to bad parent [%d -> %d]",
492 node->type, parent->type);
493
494 index = *list && (*list)->size > 0 && sort
495 ? get_dom_node_map_index(*list, node) : -1;
496
497 if (!add_to_dom_node_list(list, node, index)) {
498 done_dom_node(node);
499 return DOM_ERR_INVALID_STATE;
500 }
501
502 node->parent = parent;
503
504 } else {
505 assert(!select->selector);
506 select->selector = select_node;
507 }
508
509 if (!push_dom_node(stack, &select_node->node))
510 return DOM_ERR_INVALID_STATE;
511
512 if (select_node->node.type != DOM_NODE_ELEMENT)
513 pop_dom_node(stack);
514
515 memset(&sel, 0, sizeof(sel));
516 }
517
518 if (select->selector)
519 return DOM_ERR_NONE;
520
521 return DOM_ERR_INVALID_STATE;
522 }
523
524 /* Basically this is just a wrapper for parse_dom_select() to ease error
525 * handling. */
526 struct dom_select *
init_dom_select(enum dom_select_syntax syntax,struct dom_string * string)527 init_dom_select(enum dom_select_syntax syntax, struct dom_string *string)
528 {
529 struct dom_select *select = mem_calloc(1, sizeof(select));
530 struct dom_stack stack;
531 enum dom_exception_code code;
532
533 init_dom_stack(&stack, DOM_STACK_KEEP_NODES);
534 add_dom_stack_tracer(&stack, "init-select: ");
535
536 code = parse_dom_select(select, &stack, string);
537 done_dom_stack(&stack);
538
539 if (code == DOM_ERR_NONE)
540 return select;
541
542 done_dom_select(select);
543
544 return NULL;
545 }
546
547 void
done_dom_select(struct dom_select * select)548 done_dom_select(struct dom_select *select)
549 {
550 if (select->selector) {
551 struct dom_node *node = (struct dom_node *) select->selector;
552
553 /* This will recursively free all children select nodes. */
554 done_dom_node(node);
555 }
556
557 mem_free(select);
558 }
559
560
561 /* DOM node selection: */
562
563 /* This struct stores data related to the 'application' of a DOM selector
564 * on a DOM tree or stream. */
565 struct dom_select_data {
566 /* The selector matching stack. The various selector nodes are pushed
567 * onto this stack as they are matched (and later popped when they are
568 * no longer 'reachable', that is, has been popped from the DOM tree or
569 * stream. This way the selector can match each selector node multiple
570 * times and the selection is a simple matter of matching the current
571 * node against each state on this stack. */
572 struct dom_stack stack;
573
574 /* Reference to the selector. */
575 struct dom_select *select;
576
577 /* The list of nodes who have been matched / selected. */
578 struct dom_node_list *list;
579 };
580
581 /* This state struct is used for the select data stack and holds info about the
582 * node that was matched. */
583 struct dom_select_state {
584 /* The matched node. This is always an element node. */
585 struct dom_node *node;
586 };
587
588 /* Get a child node of a given type. By design, a selector node can
589 * only have one child per type of node. */
590 static struct dom_select_node *
get_child_dom_select_node(struct dom_select_node * selector,enum dom_node_type type)591 get_child_dom_select_node(struct dom_select_node *selector,
592 enum dom_node_type type)
593 {
594 struct dom_node_list *children = selector->node.data.element.children;
595 struct dom_node *node;
596 int index;
597
598 if (!children)
599 return NULL;
600
601 foreach_dom_node (children, node, index) {
602 if (node->type == type)
603 return (struct dom_select_node *) node;
604 }
605
606 return NULL;
607 }
608
609
610 #define has_attribute_match(selector, name) \
611 ((selector)->match.attribute & (name))
612
613 static int
match_attribute_value(struct dom_select_node * selector,struct dom_node * node)614 match_attribute_value(struct dom_select_node *selector, struct dom_node *node)
615 {
616 struct dom_string str;
617 struct dom_string *selvalue = &selector->node.data.attribute.value;
618 struct dom_string *value = &node->data.attribute.value;
619 unsigned char separator;
620 int do_compare;
621
622 assert(selvalue->length);
623
624 /* The attribute selector value should atleast be contained in the
625 * attribute value. */
626 if (value->length < selvalue->length)
627 return 0;
628
629 /* The following three matching methods requires the selector value to
630 * match a substring at a well-defined offset. */
631
632 if (has_attribute_match(selector, DOM_SELECT_ATTRIBUTE_EXACT)) {
633 return !dom_string_casecmp(value, selvalue);
634 }
635
636 if (has_attribute_match(selector, DOM_SELECT_ATTRIBUTE_BEGIN)) {
637 set_dom_string(&str, value->string, selvalue->length);
638
639 return !dom_string_casecmp(&str, selvalue);
640 }
641
642 if (has_attribute_match(selector, DOM_SELECT_ATTRIBUTE_END)) {
643 size_t offset = value->length - selvalue->length;
644
645 set_dom_string(&str, value->string + offset, selvalue->length);
646
647 return !dom_string_casecmp(&str, selvalue);
648 }
649
650 /* The 3 following matching methods requires the selector value to be a
651 * substring of the value enclosed in a specific separator (with the
652 * begining and ending of the attribute value both being valid
653 * separators). */
654
655 set_dom_string(&str, value->string, value->length);
656
657 if (has_attribute_match(selector, DOM_SELECT_ATTRIBUTE_HYPHEN_LIST)) {
658 separator = '-';
659
660 } else if (has_attribute_match(selector, DOM_SELECT_ATTRIBUTE_CONTAINS)) {
661 separator = '\0';
662
663 } if (has_attribute_match(selector, DOM_SELECT_ATTRIBUTE_SPACE_LIST)) {
664 separator = ' ';
665
666 } else {
667 INTERNAL("No attribute selector matching method defined");
668 return 0;
669 }
670
671 do_compare = 1;
672
673 do {
674 if (do_compare
675 && !dom_string_ncasecmp(&str, selvalue, selvalue->length)) {
676 /* "Contains" matches no matter what comes after. */
677 if (str.length == selvalue->length)
678 return 1;
679
680 switch (separator) {
681 case '\0':
682 /* "Contains" matches no matter what comes after. */
683 return 1;
684
685 case '-':
686 if (str.string[str.length] == separator)
687 return 1;
688 break;
689
690 default:
691 if (isspace(str.string[str.length]))
692 return 1;
693 }
694 }
695
696 switch (separator) {
697 case '\0':
698 do_compare = 1;
699 break;
700
701 case '-':
702 do_compare = (str.string[0] == '-');
703 break;
704
705 default:
706 do_compare = isspace(str.string[0]);
707 }
708
709 str.length--, str.string++;
710
711 } while (str.length >= selvalue->length);
712
713 return 0;
714 }
715
716 /* Match the attribute of an element @node against attribute selector nodes
717 * of a given @base. */
718 static int
match_attribute_selectors(struct dom_select_node * base,struct dom_node * node)719 match_attribute_selectors(struct dom_select_node *base, struct dom_node *node)
720 {
721 struct dom_node_list *attrs = node->data.element.map;
722 struct dom_node_list *selnodes = base->node.data.element.map;
723 struct dom_node *selnode;
724 size_t index;
725
726 assert(base->node.type == DOM_NODE_ELEMENT
727 && node->type == DOM_NODE_ELEMENT);
728
729 /* If there are no attribute selectors that is a clean match ... */
730 if (!selnodes)
731 return 1;
732
733 /* ... the opposite goes if there are no attributes to match. */
734 if (!attrs)
735 return 0;
736
737 foreach_dom_node (selnodes, selnode, index) {
738 struct dom_select_node *selector = (void *) selnode;
739 struct dom_node *attr;
740
741 if (has_attribute_match(selector, DOM_SELECT_ATTRIBUTE_ID)) {
742 size_t idindex;
743
744 attr = NULL;
745 foreach_dom_node (attrs, attr, idindex) {
746 if (attr->data.attribute.id)
747 break;
748 }
749
750 if (!is_dom_node_list_member(attrs, idindex))
751 attr = NULL;
752
753 } else {
754 attr = get_dom_node_map_entry(attrs, DOM_NODE_ATTRIBUTE,
755 selnode->data.attribute.type,
756 &selnode->string);
757 }
758
759 if (!attr)
760 return 0;
761
762 if (has_attribute_match(selector, DOM_SELECT_ATTRIBUTE_ANY))
763 continue;
764
765 if (!match_attribute_value(selector, attr))
766 return 0;
767 }
768
769 return 1;
770 }
771
772 /* XXX: Assume the first context is the one! */
773 #define get_dom_select_state(stack, state) \
774 ((struct dom_select_state *) get_dom_stack_state_data((stack)->contexts[0], state))
775
776 static int
match_element_relation(struct dom_select_node * selector,struct dom_node * node,struct dom_stack * stack)777 match_element_relation(struct dom_select_node *selector, struct dom_node *node,
778 struct dom_stack *stack)
779 {
780 struct dom_stack_state *state;
781 enum dom_select_element_match relation = get_element_relation(selector);
782 int i, index;
783
784 assert(relation);
785
786 /* When matching any relation there must be a parent, either so that
787 * the node is a descendant or it is possible to check for siblings. */
788 if (!node->parent)
789 return 0;
790
791 if (relation != DOM_SELECT_RELATION_DIRECT_CHILD) {
792 /* When looking for preceeding siblings of the current node,
793 * the current node cannot be first or not in the list (-1). */
794 index = get_dom_node_list_index(node->parent, node);
795 if (index < 1)
796 return 0;
797 } else {
798 index = -1;
799 }
800
801 /* Find states which hold the parent of the current selector
802 * and check if the parent selector's node is the parent of the
803 * current node. */
804 foreachback_dom_stack_state(stack, state, i) {
805 struct dom_node *selnode;
806
807 /* We are only interested in states which hold the parent of
808 * the current selector. */
809 if (state->node != selector->node.parent)
810 continue;
811
812 selnode = get_dom_select_state(stack, state)->node;
813
814 if (relation == DOM_SELECT_RELATION_DIRECT_CHILD) {
815 /* Check if the parent selector's node is the parent of
816 * the current node. */
817 if (selnode == node->parent)
818 return 1;
819
820 } else {
821 int sibindex;
822
823 /* Check if they are siblings. */
824 if (selnode->parent != node->parent)
825 continue;
826
827 sibindex = get_dom_node_list_index(node->parent, selnode);
828
829 if (relation == DOM_SELECT_RELATION_DIRECT_ADJACENT) {
830 /* Check if the sibling node immediately
831 * preceeds the current node. */
832 if (sibindex + 1 == index)
833 return 1;
834
835 } else { /* DOM_SELECT_RELATION_INDIRECT_ADJACENT */
836 /* Check if the sibling node preceeds the
837 * current node. */
838 if (sibindex < index)
839 return 1;
840 }
841 }
842 }
843
844 return 0;
845 }
846
847 #define has_element_match(selector, name) \
848 ((selector)->match.element & (name))
849
850 static int
match_element_selector(struct dom_select_node * selector,struct dom_node * node,struct dom_stack * stack)851 match_element_selector(struct dom_select_node *selector, struct dom_node *node,
852 struct dom_stack *stack)
853 {
854 assert(node && node->type == DOM_NODE_ELEMENT);
855
856 if (!has_element_match(selector, DOM_SELECT_ELEMENT_UNIVERSAL)
857 && dom_node_casecmp(&selector->node, node))
858 return 0;
859
860 if (get_element_relation(selector) != DOM_SELECT_RELATION_DESCENDANT
861 && !match_element_relation(selector, node, stack))
862 return 0;
863
864 /* Root nodes either have no parents or are the single child of the
865 * document node. */
866 if (has_element_match(selector, DOM_SELECT_ELEMENT_ROOT)
867 && node->parent) {
868 if (node->parent->type != DOM_NODE_DOCUMENT
869 || node->parent->data.document.children->size > 1)
870 return 0;
871 }
872
873 if (has_element_match(selector, DOM_SELECT_ELEMENT_EMPTY)
874 && node->data.element.children
875 && node->data.element.children->size > 0)
876 return 0;
877
878 if (has_element_match(selector, DOM_SELECT_ELEMENT_NTH_CHILD)) {
879 /* FIXME */
880 return 0;
881 }
882
883 if (has_element_match(selector, DOM_SELECT_ELEMENT_NTH_TYPE)) {
884 /* FIXME */
885 return 0;
886 }
887
888 /* Check attribute selectors. */
889 if (selector->node.data.element.map
890 && !match_attribute_selectors(selector, node))
891 return 0;
892
893 return 1;
894 }
895
896
897 #define get_dom_select_data(stack) ((stack)->current->data)
898
899 /* Matches an element node being visited against the current selector stack. */
900 static void
dom_select_push_element(struct dom_stack * stack,struct dom_node * node,void * data)901 dom_select_push_element(struct dom_stack *stack, struct dom_node *node, void *data)
902 {
903 struct dom_select_data *select_data = get_dom_select_data(stack);
904 struct dom_stack_state *state;
905 int pos;
906
907 foreach_dom_stack_state(&select_data->stack, state, pos) {
908 struct dom_select_node *selector = (void *) state->node;
909
910 /* FIXME: Since the same dom_select_node can be multiple times
911 * on the select_data->stack, cache what select nodes was
912 * matches so that it is only checked once. */
913
914 if (!match_element_selector(selector, node, &select_data->stack))
915 continue;
916
917 WDBG("Matched element: %.*s.", node->string.length, node->string.string);
918 /* This node is matched, so push the next selector node to
919 * match on the stack. */
920 selector = get_child_dom_select_node(selector, DOM_NODE_ELEMENT);
921 if (selector)
922 push_dom_node(&select_data->stack, &selector->node);
923 }
924 }
925
926 /* Ensures that nodes, no longer 'reachable' on the stack do not have any
927 * states associated with them on the select data stack. */
928 static void
dom_select_pop_element(struct dom_stack * stack,struct dom_node * node,void * data)929 dom_select_pop_element(struct dom_stack *stack, struct dom_node *node, void *data)
930 {
931 struct dom_select_data *select_data = get_dom_select_data(stack);
932 struct dom_stack_state *state;
933 int index;
934
935 stack = &select_data->stack;
936
937 foreachback_dom_stack_state (stack, state, index) {
938 struct dom_select_state *select_state;
939
940 select_state = get_dom_select_state(stack, state);
941 if (select_state->node == node) {
942 pop_dom_state(stack, state);
943 WDBG("Remove element.");
944 continue;
945 }
946 }
947 }
948
949 /* For now this is only for matching the ':contains(<string>)' pseudo-class.
950 * Any node which can contain text and thus characters from the given <string>
951 * are handled in this common callback. */
952 static void
dom_select_push_text(struct dom_stack * stack,struct dom_node * node,void * data)953 dom_select_push_text(struct dom_stack *stack, struct dom_node *node, void *data)
954 {
955 struct dom_select_data *select_data = get_dom_select_data(stack);
956 struct dom_stack_state *state = get_dom_stack_top(&select_data->stack);
957 struct dom_select_node *selector = (void *) state->node;
958 struct dom_select_node *text_sel = get_child_dom_select_node(selector, DOM_NODE_TEXT);
959 struct dom_string *text;
960
961 WDBG("Text node: %d chars", node->string.length);
962
963 if (!text_sel)
964 return;
965
966 text = &text_sel->node.string;
967
968 switch (node->type) {
969 case DOM_NODE_TEXT:
970 case DOM_NODE_CDATA_SECTION:
971 case DOM_NODE_ENTITY_REFERENCE:
972 break;
973 default:
974 ERROR("Unhandled type");
975 }
976 }
977
978 /* Context info for interacting with the DOM tree or stream stack. */
979 static struct dom_stack_context_info dom_select_context_info = {
980 /* Object size: */ 0,
981 /* Push: */
982 {
983 /* */ NULL,
984 /* DOM_NODE_ELEMENT */ dom_select_push_element,
985 /* DOM_NODE_ATTRIBUTE */ NULL,
986 /* DOM_NODE_TEXT */ dom_select_push_text,
987 /* DOM_NODE_CDATA_SECTION */ dom_select_push_text,
988 /* DOM_NODE_ENTITY_REFERENCE */ dom_select_push_text,
989 /* DOM_NODE_ENTITY */ NULL,
990 /* DOM_NODE_PROC_INSTRUCTION */ NULL,
991 /* DOM_NODE_COMMENT */ NULL,
992 /* DOM_NODE_DOCUMENT */ NULL,
993 /* DOM_NODE_DOCUMENT_TYPE */ NULL,
994 /* DOM_NODE_DOCUMENT_FRAGMENT */ NULL,
995 /* DOM_NODE_NOTATION */ NULL,
996 },
997 /* Pop: */
998 {
999 /* */ NULL,
1000 /* DOM_NODE_ELEMENT */ dom_select_pop_element,
1001 /* DOM_NODE_ATTRIBUTE */ NULL,
1002 /* DOM_NODE_TEXT */ NULL,
1003 /* DOM_NODE_CDATA_SECTION */ NULL,
1004 /* DOM_NODE_ENTITY_REFERENCE */ NULL,
1005 /* DOM_NODE_ENTITY */ NULL,
1006 /* DOM_NODE_PROC_INSTRUCTION */ NULL,
1007 /* DOM_NODE_COMMENT */ NULL,
1008 /* DOM_NODE_DOCUMENT */ NULL,
1009 /* DOM_NODE_DOCUMENT_TYPE */ NULL,
1010 /* DOM_NODE_DOCUMENT_FRAGMENT */ NULL,
1011 /* DOM_NODE_NOTATION */ NULL,
1012 }
1013 };
1014
1015 /* Context info related to the private select data stack of matched nodes. */
1016 static struct dom_stack_context_info dom_select_data_context_info = {
1017 /* Object size: */ sizeof(struct dom_select_state),
1018 /* Push: */
1019 {
1020 /* */ NULL,
1021 /* DOM_NODE_ELEMENT */ NULL,
1022 /* DOM_NODE_ATTRIBUTE */ NULL,
1023 /* DOM_NODE_TEXT */ NULL,
1024 /* DOM_NODE_CDATA_SECTION */ NULL,
1025 /* DOM_NODE_ENTITY_REFERENCE */ NULL,
1026 /* DOM_NODE_ENTITY */ NULL,
1027 /* DOM_NODE_PROC_INSTRUCTION */ NULL,
1028 /* DOM_NODE_COMMENT */ NULL,
1029 /* DOM_NODE_DOCUMENT */ NULL,
1030 /* DOM_NODE_DOCUMENT_TYPE */ NULL,
1031 /* DOM_NODE_DOCUMENT_FRAGMENT */ NULL,
1032 /* DOM_NODE_NOTATION */ NULL,
1033 },
1034 /* Pop: */
1035 {
1036 /* */ NULL,
1037 /* DOM_NODE_ELEMENT */ NULL,
1038 /* DOM_NODE_ATTRIBUTE */ NULL,
1039 /* DOM_NODE_TEXT */ NULL,
1040 /* DOM_NODE_CDATA_SECTION */ NULL,
1041 /* DOM_NODE_ENTITY_REFERENCE */ NULL,
1042 /* DOM_NODE_ENTITY */ NULL,
1043 /* DOM_NODE_PROC_INSTRUCTION */ NULL,
1044 /* DOM_NODE_COMMENT */ NULL,
1045 /* DOM_NODE_DOCUMENT */ NULL,
1046 /* DOM_NODE_DOCUMENT_TYPE */ NULL,
1047 /* DOM_NODE_DOCUMENT_FRAGMENT */ NULL,
1048 /* DOM_NODE_NOTATION */ NULL,
1049 }
1050 };
1051
1052
1053 struct dom_node_list *
select_dom_nodes(struct dom_select * select,struct dom_node * root)1054 select_dom_nodes(struct dom_select *select, struct dom_node *root)
1055 {
1056 struct dom_select_data select_data;
1057 struct dom_stack stack;
1058
1059 memset(&select_data, 0, sizeof(select_data));
1060
1061 select_data.select = select;;
1062
1063 init_dom_stack(&stack, DOM_STACK_KEEP_NODES);
1064 add_dom_stack_context(&stack, &select_data,
1065 &dom_select_context_info);
1066 add_dom_stack_tracer(&stack, "select-tree: ");
1067
1068 init_dom_stack(&select_data.stack, DOM_STACK_KEEP_NODES);
1069 add_dom_stack_context(&select_data.stack, &select_data,
1070 &dom_select_data_context_info);
1071 add_dom_stack_tracer(&select_data.stack, "select-match: ");
1072
1073 if (push_dom_node(&select_data.stack, &select->selector->node)) {
1074 get_dom_stack_top(&select_data.stack)->immutable = 1;
1075 walk_dom_nodes(&stack, root);
1076 }
1077
1078 done_dom_stack(&select_data.stack);
1079 done_dom_stack(&stack);
1080
1081 return select_data.list;
1082 }
1083