1 /* DOM node selection */
2 
3 #ifdef HAVE_CONFIG_H
4 #include "config.h"
5 #endif
6 
7 #include "elinks.h"
8 
9 #include "dom/css/scanner.h"
10 #include "dom/dom.h"
11 #include "dom/node.h"
12 #include "dom/scanner.h"
13 #include "dom/select.h"
14 #include "dom/stack.h"
15 #include "dom/string.h"
16 #include "util/memory.h"
17 
18 
19 /* Selector parsing: */
20 
21 /* Maps the content of a scanner token to a pseudo-class or -element ID. */
22 static enum dom_select_pseudo
get_dom_select_pseudo(struct dom_scanner_token * token)23 get_dom_select_pseudo(struct dom_scanner_token *token)
24 {
25 	static struct {
26 		struct dom_string string;
27 		enum dom_select_pseudo pseudo;
28 	} pseudo_info[] = {
29 
30 #define INIT_DOM_SELECT_PSEUDO_STRING(str, type) \
31 	{ INIT_DOM_STRING(str, -1), DOM_SELECT_PSEUDO_##type }
32 
33 	INIT_DOM_SELECT_PSEUDO_STRING("first-line",	FIRST_LINE),
34 	INIT_DOM_SELECT_PSEUDO_STRING("first-letter",	FIRST_LETTER),
35 	INIT_DOM_SELECT_PSEUDO_STRING("selection",	SELECTION),
36 	INIT_DOM_SELECT_PSEUDO_STRING("after",		AFTER),
37 	INIT_DOM_SELECT_PSEUDO_STRING("before",		BEFORE),
38 	INIT_DOM_SELECT_PSEUDO_STRING("link",		LINK),
39 	INIT_DOM_SELECT_PSEUDO_STRING("visited",	VISITED),
40 	INIT_DOM_SELECT_PSEUDO_STRING("active",		ACTIVE),
41 	INIT_DOM_SELECT_PSEUDO_STRING("hover",		HOVER),
42 	INIT_DOM_SELECT_PSEUDO_STRING("focus",		FOCUS),
43 	INIT_DOM_SELECT_PSEUDO_STRING("target",		TARGET),
44 	INIT_DOM_SELECT_PSEUDO_STRING("enabled",	ENABLED),
45 	INIT_DOM_SELECT_PSEUDO_STRING("disabled",	DISABLED),
46 	INIT_DOM_SELECT_PSEUDO_STRING("checked",	CHECKED),
47 	INIT_DOM_SELECT_PSEUDO_STRING("indeterminate",	INDETERMINATE),
48 
49 	/* Content pseudo-classes: */
50 
51 	INIT_DOM_SELECT_PSEUDO_STRING("contains",	CONTAINS),
52 
53 	/* Structural pseudo-classes: */
54 
55 	INIT_DOM_SELECT_PSEUDO_STRING("nth-child",	NTH_CHILD),
56 	INIT_DOM_SELECT_PSEUDO_STRING("nth-last-child",	NTH_LAST_CHILD),
57 	INIT_DOM_SELECT_PSEUDO_STRING("first-child",	FIRST_CHILD),
58 	INIT_DOM_SELECT_PSEUDO_STRING("last-child",	LAST_CHILD),
59 	INIT_DOM_SELECT_PSEUDO_STRING("only-child",	ONLY_CHILD),
60 
61 	INIT_DOM_SELECT_PSEUDO_STRING("nth-of-type",	NTH_TYPE),
62 	INIT_DOM_SELECT_PSEUDO_STRING("nth-last-of-type",NTH_LAST_TYPE),
63 	INIT_DOM_SELECT_PSEUDO_STRING("first-of-type",	FIRST_TYPE),
64 	INIT_DOM_SELECT_PSEUDO_STRING("last-of-type",	LAST_TYPE),
65 	INIT_DOM_SELECT_PSEUDO_STRING("only-of-type",	ONLY_TYPE),
66 
67 	INIT_DOM_SELECT_PSEUDO_STRING("root",		ROOT),
68 	INIT_DOM_SELECT_PSEUDO_STRING("empty",		EMPTY),
69 
70 #undef INIT_DOM_SELECT_PSEUDO_STRING
71 
72 	};
73 	int i;
74 
75 	for (i = 0; i < sizeof_array(pseudo_info); i++)
76 		if (!dom_string_casecmp(&pseudo_info[i].string, &token->string))
77 			return pseudo_info[i].pseudo;
78 
79 	return DOM_SELECT_PSEUDO_UNKNOWN;
80 }
81 
82 /* Parses attribute selector. For example '[foo="bar"]' or '[foo|="boo"]'. */
83 static enum dom_exception_code
parse_dom_select_attribute(struct dom_select_node * sel,struct dom_scanner * scanner)84 parse_dom_select_attribute(struct dom_select_node *sel, struct dom_scanner *scanner)
85 {
86 	struct dom_scanner_token *token = get_dom_scanner_token(scanner);
87 
88 	/* Get '['. */
89 
90 	if (token->type != '[')
91 		return DOM_ERR_INVALID_STATE;
92 
93 	/* Get the attribute name. */
94 
95 	token = get_next_dom_scanner_token(scanner);
96 	if (!token || token->type != CSS_TOKEN_IDENT)
97 		return DOM_ERR_SYNTAX;
98 
99 	copy_dom_string(&sel->node.string, &token->string);
100 
101 	/* Get the optional '=' combo or ending ']'. */
102 
103 	token = get_next_dom_scanner_token(scanner);
104 	if (!token) return DOM_ERR_SYNTAX;
105 
106 	switch (token->type) {
107 	case ']':
108 		sel->match.attribute |= DOM_SELECT_ATTRIBUTE_ANY;
109 		return DOM_ERR_NONE;
110 
111 	case CSS_TOKEN_SELECT_SPACE_LIST:
112 		sel->match.attribute |= DOM_SELECT_ATTRIBUTE_SPACE_LIST;
113 		break;
114 
115 	case CSS_TOKEN_SELECT_HYPHEN_LIST:
116 		sel->match.attribute |= DOM_SELECT_ATTRIBUTE_HYPHEN_LIST;
117 		break;
118 
119 	case CSS_TOKEN_SELECT_BEGIN:
120 		sel->match.attribute |= DOM_SELECT_ATTRIBUTE_BEGIN;
121 		break;
122 
123 	case CSS_TOKEN_SELECT_END:
124 		sel->match.attribute |= DOM_SELECT_ATTRIBUTE_END;
125 		break;
126 
127 	case CSS_TOKEN_SELECT_CONTAINS:
128 		sel->match.attribute |= DOM_SELECT_ATTRIBUTE_CONTAINS;
129 		break;
130 
131 	default:
132 		return DOM_ERR_SYNTAX;
133 	}
134 
135 	/* Get the required value. */
136 
137 	token = get_next_dom_scanner_token(scanner);
138 	if (!token) return DOM_ERR_SYNTAX;
139 
140 	switch (token->type) {
141 	case CSS_TOKEN_IDENT:
142 	case CSS_TOKEN_STRING:
143 		copy_dom_string(&sel->node.data.attribute.value, &token->string);
144 		break;
145 
146 	default:
147 		return DOM_ERR_SYNTAX;
148 	}
149 
150 	/* Get the ending ']'. */
151 
152 	token = get_next_dom_scanner_token(scanner);
153 	if (token && token->type == ']')
154 		return DOM_ERR_NONE;
155 
156 	return DOM_ERR_SYNTAX;
157 }
158 
159 /* Parse:
160  *
161  *   0n+1 / 1
162  *   2n+0 / 2n
163  *   2n+1
164  *  -0n+2
165  *  -0n+1 / -1
166  *   1n+0 / n+0 / n
167  *   0n+0
168  */
169 
170 /* FIXME: Move somewhere else? dom/scanner.h? */
171 static size_t
get_scanner_token_number(struct dom_scanner_token * token)172 get_scanner_token_number(struct dom_scanner_token *token)
173 {
174 	size_t number = 0;
175 
176 	while (token->string.length > 0 && isdigit(token->string.string[0])) {
177 		size_t old_number = number;
178 
179 		number *= 10;
180 
181 		/* -E2BIG */
182 		if (old_number > number)
183 			return -1;
184 
185 		number += token->string.string[0] - '0';
186 		skip_dom_scanner_token_char(token);
187 	}
188 
189 	return number;
190 }
191 
192 /* Parses the '(...)' part of ':nth-of-type(...)' and ':nth-child(...)'. */
193 static enum dom_exception_code
parse_dom_select_nth_arg(struct dom_select_nth_match * nth,struct dom_scanner * scanner)194 parse_dom_select_nth_arg(struct dom_select_nth_match *nth, struct dom_scanner *scanner)
195 {
196 	struct dom_scanner_token *token = get_next_dom_scanner_token(scanner);
197 	int sign = 1;
198 	int number = -1;
199 
200 	if (!token || token->type != '(')
201 		return DOM_ERR_SYNTAX;
202 
203 	token = get_next_dom_scanner_token(scanner);
204 	if (!token)
205 		return DOM_ERR_SYNTAX;
206 
207 	switch (token->type) {
208 	case CSS_TOKEN_IDENT:
209 		if (dom_scanner_token_contains(token, "even")) {
210 			nth->step = 2;
211 			nth->index = 0;
212 
213 		} else if (dom_scanner_token_contains(token, "odd")) {
214 			nth->step = 2;
215 			nth->index = 1;
216 
217 		} else {
218 			/* Check for 'n' ident below. */
219 			break;
220 		}
221 
222 		if (skip_css_tokens(scanner, ')'))
223 			return DOM_ERR_NONE;
224 
225 		return DOM_ERR_SYNTAX;
226 
227 	case '-':
228 		sign = -1;
229 
230 		token = get_next_dom_scanner_token(scanner);
231 		if (!token) return DOM_ERR_SYNTAX;
232 
233 		if (token->type != CSS_TOKEN_IDENT)
234 			break;
235 
236 		if (token->type != CSS_TOKEN_NUMBER)
237 			return DOM_ERR_SYNTAX;
238 		/* Fall-through */
239 
240 	case CSS_TOKEN_NUMBER:
241 		number = get_scanner_token_number(token);
242 		if (number < 0)
243 			return DOM_ERR_INVALID_STATE;
244 
245 		token = get_next_dom_scanner_token(scanner);
246 		if (!token) return DOM_ERR_SYNTAX;
247 		break;
248 
249 	default:
250 		return DOM_ERR_SYNTAX;
251 	}
252 
253 	/* The rest can contain n+ part */
254 	switch (token->type) {
255 	case CSS_TOKEN_IDENT:
256 		if (!dom_scanner_token_contains(token, "n"))
257 			return DOM_ERR_SYNTAX;
258 
259 		nth->step = sign * number;
260 
261 		token = get_next_dom_scanner_token(scanner);
262 		if (!token) return DOM_ERR_SYNTAX;
263 
264 		if (token->type != '+')
265 			break;
266 
267 		token = get_next_dom_scanner_token(scanner);
268 		if (!token) return DOM_ERR_SYNTAX;
269 
270 		if (token->type != CSS_TOKEN_NUMBER)
271 			break;
272 
273 		number = get_scanner_token_number(token);
274 		if (number < 0)
275 			return DOM_ERR_INVALID_STATE;
276 
277 		nth->index = sign * number;
278 		break;
279 
280 	default:
281 		nth->step  = 0;
282 		nth->index = sign * number;
283 	}
284 
285 	if (skip_css_tokens(scanner, ')'))
286 		return DOM_ERR_NONE;
287 
288 	return DOM_ERR_SYNTAX;
289 }
290 
291 /* Parse a pseudo-class or -element with the syntax: ':<ident>'. */
292 static enum dom_exception_code
parse_dom_select_pseudo(struct dom_select * select,struct dom_select_node * sel,struct dom_scanner * scanner)293 parse_dom_select_pseudo(struct dom_select *select, struct dom_select_node *sel,
294 			struct dom_scanner *scanner)
295 {
296 	struct dom_scanner_token *token = get_dom_scanner_token(scanner);
297 	enum dom_select_pseudo pseudo;
298 	enum dom_exception_code code;
299 
300 	/* Skip double :'s in front of some pseudo's (::first-line, etc.) */
301 	do {
302 		token = get_next_dom_scanner_token(scanner);
303 	} while (token && token->type == ':');
304 
305 	if (!token || token->type != CSS_TOKEN_IDENT)
306 		return DOM_ERR_SYNTAX;
307 
308 	pseudo = get_dom_select_pseudo(token);
309 	switch (pseudo) {
310 	case DOM_SELECT_PSEUDO_UNKNOWN:
311 		return DOM_ERR_NOT_FOUND;
312 
313 	case DOM_SELECT_PSEUDO_CONTAINS:
314 		/* FIXME: E:contains("text") */
315 		break;
316 
317 	case DOM_SELECT_PSEUDO_NTH_CHILD:
318 	case DOM_SELECT_PSEUDO_NTH_LAST_CHILD:
319 		code = parse_dom_select_nth_arg(&sel->nth_child, scanner);
320 		if (code != DOM_ERR_NONE)
321 			return code;
322 
323 		sel->match.element |= DOM_SELECT_ELEMENT_NTH_CHILD;
324 		break;
325 
326 	case DOM_SELECT_PSEUDO_FIRST_CHILD:
327 		sel->match.element |= DOM_SELECT_ELEMENT_NTH_CHILD;
328 		set_dom_select_nth_match(&sel->nth_child, 0, 1);
329 		break;
330 
331 	case DOM_SELECT_PSEUDO_LAST_CHILD:
332 		sel->match.element |= DOM_SELECT_ELEMENT_NTH_CHILD;
333 		set_dom_select_nth_match(&sel->nth_child, 0, -1);
334 		break;
335 
336 	case DOM_SELECT_PSEUDO_ONLY_CHILD:
337 		sel->match.element |= DOM_SELECT_ELEMENT_NTH_CHILD;
338 		set_dom_select_nth_match(&sel->nth_child, 0, 0);
339 		break;
340 
341 	case DOM_SELECT_PSEUDO_NTH_TYPE:
342 	case DOM_SELECT_PSEUDO_NTH_LAST_TYPE:
343 		code = parse_dom_select_nth_arg(&sel->nth_type, scanner);
344 		if (code != DOM_ERR_NONE)
345 			return code;
346 
347 		sel->match.element |= DOM_SELECT_ELEMENT_NTH_TYPE;
348 		break;
349 
350 	case DOM_SELECT_PSEUDO_FIRST_TYPE:
351 		sel->match.element |= DOM_SELECT_ELEMENT_NTH_TYPE;
352 		set_dom_select_nth_match(&sel->nth_type, 0, 1);
353 		break;
354 
355 	case DOM_SELECT_PSEUDO_LAST_TYPE:
356 		sel->match.element |= DOM_SELECT_ELEMENT_NTH_TYPE;
357 		set_dom_select_nth_match(&sel->nth_type, 0, -1);
358 		break;
359 
360 	case DOM_SELECT_PSEUDO_ONLY_TYPE:
361 		sel->match.element |= DOM_SELECT_ELEMENT_NTH_TYPE;
362 		set_dom_select_nth_match(&sel->nth_type, 0, 0);
363 		break;
364 
365 	case DOM_SELECT_PSEUDO_ROOT:
366 		sel->match.element |= DOM_SELECT_ELEMENT_ROOT;
367 		break;
368 
369 	case DOM_SELECT_PSEUDO_EMPTY:
370 		sel->match.element |= DOM_SELECT_ELEMENT_EMPTY;
371 		break;
372 
373 	default:
374 		/* It's a bitflag! */
375 		select->pseudo |= pseudo;
376 	}
377 
378 	return DOM_ERR_NONE;
379 }
380 
381 /* The element relation flags are mutual exclusive. This macro can be used
382  * for checking if anyflag is set. */
383 #define get_element_relation(sel) \
384 	((sel)->match.element & DOM_SELECT_RELATION_FLAGS)
385 
386 /* Parse a CSS3 selector and add selector nodes to the @select struct. */
387 static enum dom_exception_code
parse_dom_select(struct dom_select * select,struct dom_stack * stack,struct dom_string * string)388 parse_dom_select(struct dom_select *select, struct dom_stack *stack,
389 		 struct dom_string *string)
390 {
391 	struct dom_scanner scanner;
392 	struct dom_select_node sel;
393 
394 	init_dom_scanner(&scanner, &dom_css_scanner_info, string, 0, 0);
395 
396 	memset(&sel, 0, sizeof(sel));
397 
398 	while (dom_scanner_has_tokens(&scanner)) {
399 		struct dom_scanner_token *token = get_dom_scanner_token(&scanner);
400 		enum dom_exception_code code;
401 		struct dom_select_node *select_node;
402 
403 		assert(token);
404 
405 		if (token->type == '{'
406 		    || token->type == '}'
407 		    || token->type == ';'
408 		    || token->type == ',')
409 			break;
410 
411 		/* Examine the selector fragment */
412 
413 		switch (token->type) {
414 		case CSS_TOKEN_IDENT:
415 			sel.node.type = DOM_NODE_ELEMENT;
416 			copy_dom_string(&sel.node.string, &token->string);
417 			if (dom_scanner_token_contains(token, "*"))
418 				sel.match.element |= DOM_SELECT_ELEMENT_UNIVERSAL;
419 			break;
420 
421 		case CSS_TOKEN_HASH:
422 		case CSS_TOKEN_HEX_COLOR:
423 			/* ID fragment */
424 			sel.node.type = DOM_NODE_ATTRIBUTE;
425 			sel.match.attribute |= DOM_SELECT_ATTRIBUTE_ID;
426 			/* Skip the leading '#'. */
427 			skip_dom_scanner_token_char(token);
428 			break;
429 
430 		case '[':
431 			sel.node.type = DOM_NODE_ATTRIBUTE;
432 			code = parse_dom_select_attribute(&sel, &scanner);
433 			if (code != DOM_ERR_NONE)
434 				return code;
435 			break;
436 
437 		case '.':
438 			token = get_next_dom_scanner_token(&scanner);
439 			if (!token || token->type != CSS_TOKEN_IDENT)
440 				return DOM_ERR_SYNTAX;
441 
442 			sel.node.type = DOM_NODE_ATTRIBUTE;
443 			sel.match.attribute |= DOM_SELECT_ATTRIBUTE_SPACE_LIST;
444 			set_dom_string(&sel.node.string, "class", -1);
445 			copy_dom_string(&sel.node.data.attribute.value, &token->string);
446 			break;
447 
448 		case ':':
449 			code = parse_dom_select_pseudo(select, &sel, &scanner);
450 			if (code != DOM_ERR_NONE)
451 				return code;
452 			break;
453 
454 		case '>':
455 			if (get_element_relation(&sel) != DOM_SELECT_RELATION_DESCENDANT)
456 				return DOM_ERR_SYNTAX;
457 			sel.match.element |= DOM_SELECT_RELATION_DIRECT_CHILD;
458 			break;
459 
460 		case '+':
461 			if (get_element_relation(&sel) != DOM_SELECT_RELATION_DESCENDANT)
462 				return DOM_ERR_SYNTAX;
463 			sel.match.element |= DOM_SELECT_RELATION_DIRECT_ADJACENT;
464 			break;
465 
466 		case '~':
467 			if (get_element_relation(&sel) != DOM_SELECT_RELATION_DESCENDANT)
468 				return DOM_ERR_SYNTAX;
469 			sel.match.element |= DOM_SELECT_RELATION_INDIRECT_ADJACENT;
470 			break;
471 
472 		default:
473 			return DOM_ERR_SYNTAX;
474 		}
475 
476 		skip_dom_scanner_token(&scanner);
477 
478 		if (sel.node.type == DOM_NODE_UNKNOWN)
479 			continue;
480 
481 		select_node = mem_calloc(1, sizeof(*select_node));
482 		copy_struct(select_node, &sel);
483 
484 		if (!dom_stack_is_empty(stack)) {
485 			struct dom_node *node = &select_node->node;
486 			struct dom_node *parent = get_dom_stack_top(stack)->node;
487 			struct dom_node_list **list = get_dom_node_list(parent, node);
488 			int sort = (node->type == DOM_NODE_ATTRIBUTE);
489 			int index;
490 
491 			assertm(list, "Adding node to bad parent [%d -> %d]",
492 				node->type, parent->type);
493 
494 			index = *list && (*list)->size > 0 && sort
495 				? get_dom_node_map_index(*list, node) : -1;
496 
497 			if (!add_to_dom_node_list(list, node, index)) {
498 				done_dom_node(node);
499 				return DOM_ERR_INVALID_STATE;
500 			}
501 
502 			node->parent = parent;
503 
504 		} else {
505 			assert(!select->selector);
506 			select->selector = select_node;
507 		}
508 
509 		if (!push_dom_node(stack, &select_node->node))
510 			return DOM_ERR_INVALID_STATE;
511 
512 		if (select_node->node.type != DOM_NODE_ELEMENT)
513 			pop_dom_node(stack);
514 
515 		memset(&sel, 0, sizeof(sel));
516 	}
517 
518 	if (select->selector)
519 		return DOM_ERR_NONE;
520 
521 	return DOM_ERR_INVALID_STATE;
522 }
523 
524 /* Basically this is just a wrapper for parse_dom_select() to ease error
525  * handling. */
526 struct dom_select *
init_dom_select(enum dom_select_syntax syntax,struct dom_string * string)527 init_dom_select(enum dom_select_syntax syntax, struct dom_string *string)
528 {
529 	struct dom_select *select = mem_calloc(1, sizeof(select));
530 	struct dom_stack stack;
531 	enum dom_exception_code code;
532 
533 	init_dom_stack(&stack, DOM_STACK_KEEP_NODES);
534 	add_dom_stack_tracer(&stack, "init-select: ");
535 
536 	code = parse_dom_select(select, &stack, string);
537 	done_dom_stack(&stack);
538 
539 	if (code == DOM_ERR_NONE)
540 		return select;
541 
542 	done_dom_select(select);
543 
544 	return NULL;
545 }
546 
547 void
done_dom_select(struct dom_select * select)548 done_dom_select(struct dom_select *select)
549 {
550 	if (select->selector) {
551 		struct dom_node *node = (struct dom_node *) select->selector;
552 
553 		/* This will recursively free all children select nodes. */
554 		done_dom_node(node);
555 	}
556 
557 	mem_free(select);
558 }
559 
560 
561 /* DOM node selection: */
562 
563 /* This struct stores data related to the 'application' of a DOM selector
564  * on a DOM tree or stream. */
565 struct dom_select_data {
566 	/* The selector matching stack. The various selector nodes are pushed
567 	 * onto this stack as they are matched (and later popped when they are
568 	 * no longer 'reachable', that is, has been popped from the DOM tree or
569 	 * stream. This way the selector can match each selector node multiple
570 	 * times and the selection is a simple matter of matching the current
571 	 * node against each state on this stack. */
572 	struct dom_stack stack;
573 
574 	/* Reference to the selector. */
575 	struct dom_select *select;
576 
577 	/* The list of nodes who have been matched / selected. */
578 	struct dom_node_list *list;
579 };
580 
581 /* This state struct is used for the select data stack and holds info about the
582  * node that was matched. */
583 struct dom_select_state {
584 	/* The matched node. This is always an element node. */
585 	struct dom_node *node;
586 };
587 
588 /* Get a child node of a given type. By design, a selector node can
589  * only have one child per type of node. */
590 static struct dom_select_node *
get_child_dom_select_node(struct dom_select_node * selector,enum dom_node_type type)591 get_child_dom_select_node(struct dom_select_node *selector,
592 			  enum dom_node_type type)
593 {
594 	struct dom_node_list *children = selector->node.data.element.children;
595 	struct dom_node *node;
596 	int index;
597 
598 	if (!children)
599 		return NULL;
600 
601 	foreach_dom_node (children, node, index) {
602 		if (node->type == type)
603 			return (struct dom_select_node *) node;
604 	}
605 
606 	return NULL;
607 }
608 
609 
610 #define has_attribute_match(selector, name) \
611 	((selector)->match.attribute & (name))
612 
613 static int
match_attribute_value(struct dom_select_node * selector,struct dom_node * node)614 match_attribute_value(struct dom_select_node *selector, struct dom_node *node)
615 {
616 	struct dom_string str;
617 	struct dom_string *selvalue = &selector->node.data.attribute.value;
618 	struct dom_string *value = &node->data.attribute.value;
619 	unsigned char separator;
620 	int do_compare;
621 
622 	assert(selvalue->length);
623 
624 	/* The attribute selector value should atleast be contained in the
625 	 * attribute value. */
626 	if (value->length < selvalue->length)
627 		return 0;
628 
629 	/* The following three matching methods requires the selector value to
630 	 * match a substring at a well-defined offset. */
631 
632 	if (has_attribute_match(selector, DOM_SELECT_ATTRIBUTE_EXACT)) {
633 		return !dom_string_casecmp(value, selvalue);
634 	}
635 
636 	if (has_attribute_match(selector, DOM_SELECT_ATTRIBUTE_BEGIN)) {
637 		set_dom_string(&str, value->string, selvalue->length);
638 
639 		return !dom_string_casecmp(&str, selvalue);
640 	}
641 
642 	if (has_attribute_match(selector, DOM_SELECT_ATTRIBUTE_END)) {
643 		size_t offset = value->length - selvalue->length;
644 
645 		set_dom_string(&str, value->string + offset, selvalue->length);
646 
647 		return !dom_string_casecmp(&str, selvalue);
648 	}
649 
650 	/* The 3 following matching methods requires the selector value to be a
651 	 * substring of the value enclosed in a specific separator (with the
652 	 * begining and ending of the attribute value both being valid
653 	 * separators). */
654 
655 	set_dom_string(&str, value->string, value->length);
656 
657 	if (has_attribute_match(selector, DOM_SELECT_ATTRIBUTE_HYPHEN_LIST)) {
658 		separator = '-';
659 
660 	} else if (has_attribute_match(selector, DOM_SELECT_ATTRIBUTE_CONTAINS)) {
661 		separator = '\0';
662 
663 	} if (has_attribute_match(selector, DOM_SELECT_ATTRIBUTE_SPACE_LIST)) {
664 		separator = ' ';
665 
666 	} else {
667 		INTERNAL("No attribute selector matching method defined");
668 		return 0;
669 	}
670 
671 	do_compare = 1;
672 
673 	do {
674 		if (do_compare
675 		    && !dom_string_ncasecmp(&str, selvalue, selvalue->length)) {
676 			/* "Contains" matches no matter what comes after. */
677 			if (str.length == selvalue->length)
678 				return 1;
679 
680 			switch (separator) {
681 			case '\0':
682 				/* "Contains" matches no matter what comes after. */
683 				return 1;
684 
685 			case '-':
686 				if (str.string[str.length] == separator)
687 					return 1;
688 				break;
689 
690 			default:
691 				if (isspace(str.string[str.length]))
692 					return 1;
693 			}
694 		}
695 
696 		switch (separator) {
697 		case '\0':
698 			do_compare = 1;
699 			break;
700 
701 		case '-':
702 			do_compare = (str.string[0] == '-');
703 			break;
704 
705 		default:
706 			do_compare = isspace(str.string[0]);
707 		}
708 
709 		str.length--, str.string++;
710 
711 	} while (str.length >= selvalue->length);
712 
713 	return 0;
714 }
715 
716 /* Match the attribute of an element @node against attribute selector nodes
717  * of a given @base. */
718 static int
match_attribute_selectors(struct dom_select_node * base,struct dom_node * node)719 match_attribute_selectors(struct dom_select_node *base, struct dom_node *node)
720 {
721 	struct dom_node_list *attrs = node->data.element.map;
722 	struct dom_node_list *selnodes = base->node.data.element.map;
723 	struct dom_node *selnode;
724 	size_t index;
725 
726 	assert(base->node.type == DOM_NODE_ELEMENT
727 	       && node->type == DOM_NODE_ELEMENT);
728 
729 	/* If there are no attribute selectors that is a clean match ... */
730 	if (!selnodes)
731 		return 1;
732 
733 	/* ... the opposite goes if there are no attributes to match. */
734 	if (!attrs)
735 		return 0;
736 
737 	foreach_dom_node (selnodes, selnode, index) {
738 		struct dom_select_node *selector = (void *) selnode;
739 		struct dom_node *attr;
740 
741 		if (has_attribute_match(selector, DOM_SELECT_ATTRIBUTE_ID)) {
742 			size_t idindex;
743 
744 			attr = NULL;
745 			foreach_dom_node (attrs, attr, idindex) {
746 				if (attr->data.attribute.id)
747 					break;
748 			}
749 
750 			if (!is_dom_node_list_member(attrs, idindex))
751 				attr = NULL;
752 
753 		} else {
754 			attr = get_dom_node_map_entry(attrs, DOM_NODE_ATTRIBUTE,
755 						      selnode->data.attribute.type,
756 						      &selnode->string);
757 		}
758 
759 		if (!attr)
760 			return 0;
761 
762 		if (has_attribute_match(selector, DOM_SELECT_ATTRIBUTE_ANY))
763 			continue;
764 
765 		if (!match_attribute_value(selector, attr))
766 			return 0;
767 	}
768 
769 	return 1;
770 }
771 
772 /* XXX: Assume the first context is the one! */
773 #define get_dom_select_state(stack, state) \
774 	((struct dom_select_state *) get_dom_stack_state_data((stack)->contexts[0], state))
775 
776 static int
match_element_relation(struct dom_select_node * selector,struct dom_node * node,struct dom_stack * stack)777 match_element_relation(struct dom_select_node *selector, struct dom_node *node,
778 		       struct dom_stack *stack)
779 {
780 	struct dom_stack_state *state;
781 	enum dom_select_element_match relation = get_element_relation(selector);
782 	int i, index;
783 
784 	assert(relation);
785 
786 	/* When matching any relation there must be a parent, either so that
787 	 * the node is a descendant or it is possible to check for siblings. */
788 	if (!node->parent)
789 		return 0;
790 
791 	if (relation != DOM_SELECT_RELATION_DIRECT_CHILD) {
792 		/* When looking for preceeding siblings of the current node,
793 		 * the current node cannot be first or not in the list (-1). */
794 		index = get_dom_node_list_index(node->parent, node);
795 		if (index < 1)
796 			return 0;
797 	} else {
798 		index = -1;
799 	}
800 
801 	/* Find states which hold the parent of the current selector
802 	 * and check if the parent selector's node is the parent of the
803 	 * current node. */
804 	foreachback_dom_stack_state(stack, state, i) {
805 		struct dom_node *selnode;
806 
807 		/* We are only interested in states which hold the parent of
808 		 * the current selector. */
809 		if (state->node != selector->node.parent)
810 			continue;
811 
812 		selnode = get_dom_select_state(stack, state)->node;
813 
814 		if (relation == DOM_SELECT_RELATION_DIRECT_CHILD) {
815 			/* Check if the parent selector's node is the parent of
816 			 * the current node. */
817 			if (selnode == node->parent)
818 				return 1;
819 
820 		} else {
821 			int sibindex;
822 
823 			/* Check if they are siblings. */
824 			if (selnode->parent != node->parent)
825 				continue;
826 
827 			sibindex = get_dom_node_list_index(node->parent, selnode);
828 
829 			if (relation == DOM_SELECT_RELATION_DIRECT_ADJACENT) {
830 				/* Check if the sibling node immediately
831 				 * preceeds the current node. */
832 				if (sibindex + 1 == index)
833 					return 1;
834 
835 			} else { /* DOM_SELECT_RELATION_INDIRECT_ADJACENT */
836 				/* Check if the sibling node preceeds the
837 				 * current node. */
838 				if (sibindex < index)
839 					return 1;
840 			}
841 		}
842 	}
843 
844 	return 0;
845 }
846 
847 #define has_element_match(selector, name) \
848 	((selector)->match.element & (name))
849 
850 static int
match_element_selector(struct dom_select_node * selector,struct dom_node * node,struct dom_stack * stack)851 match_element_selector(struct dom_select_node *selector, struct dom_node *node,
852 		       struct dom_stack *stack)
853 {
854 	assert(node && node->type == DOM_NODE_ELEMENT);
855 
856 	if (!has_element_match(selector, DOM_SELECT_ELEMENT_UNIVERSAL)
857 	    && dom_node_casecmp(&selector->node, node))
858 		return 0;
859 
860 	if (get_element_relation(selector) != DOM_SELECT_RELATION_DESCENDANT
861 	    && !match_element_relation(selector, node, stack))
862 		return 0;
863 
864 	/* Root nodes either have no parents or are the single child of the
865 	 * document node. */
866 	if (has_element_match(selector, DOM_SELECT_ELEMENT_ROOT)
867 	    && node->parent) {
868 		if (node->parent->type != DOM_NODE_DOCUMENT
869 		    || node->parent->data.document.children->size > 1)
870 			return 0;
871 	}
872 
873 	if (has_element_match(selector, DOM_SELECT_ELEMENT_EMPTY)
874 	    && node->data.element.children
875 	    && node->data.element.children->size > 0)
876 		return 0;
877 
878 	if (has_element_match(selector, DOM_SELECT_ELEMENT_NTH_CHILD)) {
879 		/* FIXME */
880 		return 0;
881 	}
882 
883 	if (has_element_match(selector, DOM_SELECT_ELEMENT_NTH_TYPE)) {
884 		/* FIXME */
885 		return 0;
886 	}
887 
888 	/* Check attribute selectors. */
889 	if (selector->node.data.element.map
890 	    && !match_attribute_selectors(selector, node))
891 		return 0;
892 
893 	return 1;
894 }
895 
896 
897 #define get_dom_select_data(stack) ((stack)->current->data)
898 
899 /* Matches an element node being visited against the current selector stack. */
900 static void
dom_select_push_element(struct dom_stack * stack,struct dom_node * node,void * data)901 dom_select_push_element(struct dom_stack *stack, struct dom_node *node, void *data)
902 {
903 	struct dom_select_data *select_data = get_dom_select_data(stack);
904 	struct dom_stack_state *state;
905 	int pos;
906 
907 	foreach_dom_stack_state(&select_data->stack, state, pos) {
908 		struct dom_select_node *selector = (void *) state->node;
909 
910 		/* FIXME: Since the same dom_select_node can be multiple times
911 		 * on the select_data->stack, cache what select nodes was
912 		 * matches so that it is only checked once. */
913 
914 		if (!match_element_selector(selector, node, &select_data->stack))
915 			continue;
916 
917 		WDBG("Matched element: %.*s.", node->string.length, node->string.string);
918 		/* This node is matched, so push the next selector node to
919 		 * match on the stack. */
920 		selector = get_child_dom_select_node(selector, DOM_NODE_ELEMENT);
921 		if (selector)
922 			push_dom_node(&select_data->stack, &selector->node);
923 	}
924 }
925 
926 /* Ensures that nodes, no longer 'reachable' on the stack do not have any
927  * states associated with them on the select data stack. */
928 static void
dom_select_pop_element(struct dom_stack * stack,struct dom_node * node,void * data)929 dom_select_pop_element(struct dom_stack *stack, struct dom_node *node, void *data)
930 {
931 	struct dom_select_data *select_data = get_dom_select_data(stack);
932 	struct dom_stack_state *state;
933 	int index;
934 
935 	stack = &select_data->stack;
936 
937 	foreachback_dom_stack_state (stack, state, index) {
938 		struct dom_select_state *select_state;
939 
940 		select_state = get_dom_select_state(stack, state);
941 		if (select_state->node == node) {
942 			pop_dom_state(stack, state);
943 			WDBG("Remove element.");
944 			continue;
945 		}
946 	}
947 }
948 
949 /* For now this is only for matching the ':contains(<string>)' pseudo-class.
950  * Any node which can contain text and thus characters from the given <string>
951  * are handled in this common callback. */
952 static void
dom_select_push_text(struct dom_stack * stack,struct dom_node * node,void * data)953 dom_select_push_text(struct dom_stack *stack, struct dom_node *node, void *data)
954 {
955 	struct dom_select_data *select_data = get_dom_select_data(stack);
956 	struct dom_stack_state *state = get_dom_stack_top(&select_data->stack);
957 	struct dom_select_node *selector = (void *) state->node;
958 	struct dom_select_node *text_sel = get_child_dom_select_node(selector, DOM_NODE_TEXT);
959 	struct dom_string *text;
960 
961 	WDBG("Text node: %d chars", node->string.length);
962 
963 	if (!text_sel)
964 		return;
965 
966 	text = &text_sel->node.string;
967 
968 	switch (node->type) {
969 	case DOM_NODE_TEXT:
970 	case DOM_NODE_CDATA_SECTION:
971 	case DOM_NODE_ENTITY_REFERENCE:
972 		break;
973 	default:
974 		ERROR("Unhandled type");
975 	}
976 }
977 
978 /* Context info for interacting with the DOM tree or stream stack. */
979 static struct dom_stack_context_info dom_select_context_info = {
980 	/* Object size: */			0,
981 	/* Push: */
982 	{
983 		/*				*/ NULL,
984 		/* DOM_NODE_ELEMENT		*/ dom_select_push_element,
985 		/* DOM_NODE_ATTRIBUTE		*/ NULL,
986 		/* DOM_NODE_TEXT		*/ dom_select_push_text,
987 		/* DOM_NODE_CDATA_SECTION	*/ dom_select_push_text,
988 		/* DOM_NODE_ENTITY_REFERENCE	*/ dom_select_push_text,
989 		/* DOM_NODE_ENTITY		*/ NULL,
990 		/* DOM_NODE_PROC_INSTRUCTION	*/ NULL,
991 		/* DOM_NODE_COMMENT		*/ NULL,
992 		/* DOM_NODE_DOCUMENT		*/ NULL,
993 		/* DOM_NODE_DOCUMENT_TYPE	*/ NULL,
994 		/* DOM_NODE_DOCUMENT_FRAGMENT	*/ NULL,
995 		/* DOM_NODE_NOTATION		*/ NULL,
996 	},
997 	/* Pop: */
998 	{
999 		/*				*/ NULL,
1000 		/* DOM_NODE_ELEMENT		*/ dom_select_pop_element,
1001 		/* DOM_NODE_ATTRIBUTE		*/ NULL,
1002 		/* DOM_NODE_TEXT		*/ NULL,
1003 		/* DOM_NODE_CDATA_SECTION	*/ NULL,
1004 		/* DOM_NODE_ENTITY_REFERENCE	*/ NULL,
1005 		/* DOM_NODE_ENTITY		*/ NULL,
1006 		/* DOM_NODE_PROC_INSTRUCTION	*/ NULL,
1007 		/* DOM_NODE_COMMENT		*/ NULL,
1008 		/* DOM_NODE_DOCUMENT		*/ NULL,
1009 		/* DOM_NODE_DOCUMENT_TYPE	*/ NULL,
1010 		/* DOM_NODE_DOCUMENT_FRAGMENT	*/ NULL,
1011 		/* DOM_NODE_NOTATION		*/ NULL,
1012 	}
1013 };
1014 
1015 /* Context info related to the private select data stack of matched nodes. */
1016 static struct dom_stack_context_info dom_select_data_context_info = {
1017 	/* Object size: */			sizeof(struct dom_select_state),
1018 	/* Push: */
1019 	{
1020 		/*				*/ NULL,
1021 		/* DOM_NODE_ELEMENT		*/ NULL,
1022 		/* DOM_NODE_ATTRIBUTE		*/ NULL,
1023 		/* DOM_NODE_TEXT		*/ NULL,
1024 		/* DOM_NODE_CDATA_SECTION	*/ NULL,
1025 		/* DOM_NODE_ENTITY_REFERENCE	*/ NULL,
1026 		/* DOM_NODE_ENTITY		*/ NULL,
1027 		/* DOM_NODE_PROC_INSTRUCTION	*/ NULL,
1028 		/* DOM_NODE_COMMENT		*/ NULL,
1029 		/* DOM_NODE_DOCUMENT		*/ NULL,
1030 		/* DOM_NODE_DOCUMENT_TYPE	*/ NULL,
1031 		/* DOM_NODE_DOCUMENT_FRAGMENT	*/ NULL,
1032 		/* DOM_NODE_NOTATION		*/ NULL,
1033 	},
1034 	/* Pop: */
1035 	{
1036 		/*				*/ NULL,
1037 		/* DOM_NODE_ELEMENT		*/ NULL,
1038 		/* DOM_NODE_ATTRIBUTE		*/ NULL,
1039 		/* DOM_NODE_TEXT		*/ NULL,
1040 		/* DOM_NODE_CDATA_SECTION	*/ NULL,
1041 		/* DOM_NODE_ENTITY_REFERENCE	*/ NULL,
1042 		/* DOM_NODE_ENTITY		*/ NULL,
1043 		/* DOM_NODE_PROC_INSTRUCTION	*/ NULL,
1044 		/* DOM_NODE_COMMENT		*/ NULL,
1045 		/* DOM_NODE_DOCUMENT		*/ NULL,
1046 		/* DOM_NODE_DOCUMENT_TYPE	*/ NULL,
1047 		/* DOM_NODE_DOCUMENT_FRAGMENT	*/ NULL,
1048 		/* DOM_NODE_NOTATION		*/ NULL,
1049 	}
1050 };
1051 
1052 
1053 struct dom_node_list *
select_dom_nodes(struct dom_select * select,struct dom_node * root)1054 select_dom_nodes(struct dom_select *select, struct dom_node *root)
1055 {
1056 	struct dom_select_data select_data;
1057 	struct dom_stack stack;
1058 
1059 	memset(&select_data, 0, sizeof(select_data));
1060 
1061 	select_data.select = select;;
1062 
1063 	init_dom_stack(&stack, DOM_STACK_KEEP_NODES);
1064 	add_dom_stack_context(&stack, &select_data,
1065 			      &dom_select_context_info);
1066 	add_dom_stack_tracer(&stack, "select-tree: ");
1067 
1068 	init_dom_stack(&select_data.stack, DOM_STACK_KEEP_NODES);
1069 	add_dom_stack_context(&select_data.stack, &select_data,
1070 			      &dom_select_data_context_info);
1071 	add_dom_stack_tracer(&select_data.stack, "select-match: ");
1072 
1073 	if (push_dom_node(&select_data.stack, &select->selector->node)) {
1074 		get_dom_stack_top(&select_data.stack)->immutable = 1;
1075 		walk_dom_nodes(&stack, root);
1076 	}
1077 
1078 	done_dom_stack(&select_data.stack);
1079 	done_dom_stack(&stack);
1080 
1081 	return select_data.list;
1082 }
1083