1 #ifndef EL_DOM_SELECT_H
2 #define EL_DOM_SELECT_H
3 
4 #include "dom/node.h"
5 
6 
7 /* FIXME: Namespaces; *|E */
8 
9 enum dom_select_element_match {
10 	/* Gives info about the relation required between two element nodes for
11 	 * them to match. This is also referred to as combinators. */
12 	/* The following are mutually exclusive and at least one must be set.
13 	 * DOM_SELECT_RELATION_DESCENDANT is the default. */
14 
15 	/* Matches any F descendant of E:		E   F */
16 	/* Bogus flag; it is an easy way to have a default. */
17 	DOM_SELECT_RELATION_DESCENDANT = 0,
18 	/* Matches F being a direct child of E:		E > F */
19 	DOM_SELECT_RELATION_DIRECT_CHILD = 1,
20 	/* Matches F immediate preceded by E:		E + F */
21 	DOM_SELECT_RELATION_DIRECT_ADJACENT = 2,
22 	/* Matches F preceded by E:			E ~ F */
23 	DOM_SELECT_RELATION_INDIRECT_ADJACENT = 4,
24 
25 	DOM_SELECT_RELATION_FLAGS = DOM_SELECT_RELATION_DESCENDANT
26 				  | DOM_SELECT_RELATION_DIRECT_CHILD
27 				  | DOM_SELECT_RELATION_DIRECT_ADJACENT
28 				  | DOM_SELECT_RELATION_INDIRECT_ADJACENT,
29 
30 	/* None of the following are mutual exclusive. They can co-exist
31 	 * although combining them might not make a lot of sense. */
32 
33 	/* Matches any element:				* */
34 	DOM_SELECT_ELEMENT_UNIVERSAL = 8,
35 	/* Matches the root node of the document:	:root or // */
36 	DOM_SELECT_ELEMENT_ROOT = 16,
37 	/* Matches the empty element (not even text):	:empty */
38 	DOM_SELECT_ELEMENT_EMPTY = 32,
39 
40 	/* Matches the some n-th child of its parent:	:nth-child(n), etc. */
41 	DOM_SELECT_ELEMENT_NTH_CHILD = 64,
42 
43 	/* Matches the some n-th sibling of its type:	:nth-of-type(n), etc. */
44 	DOM_SELECT_ELEMENT_NTH_TYPE = 128,
45 };
46 
47 /* The special CSS .bar class attribute syntax is represented as
48  * E[class="bar"]. The ID flag will match against any attribute with it's
49  * boolean id member set. XXX: These flags are ATM mutually exclusive. */
50 enum dom_select_attribute_match {
51 	/* Matches any set value:			E[foo] */
52 	DOM_SELECT_ATTRIBUTE_ANY = 1,
53 	/* Matches exact value "bar":			E[foo="bar"] */
54 	DOM_SELECT_ATTRIBUTE_EXACT = 2,
55 	/* Matches space seprated list "z bar bee":	E[foo~="bar"] */
56 	DOM_SELECT_ATTRIBUTE_SPACE_LIST = 4,
57 	/* Matches hyphen separated list "z-bar-bee":	E[foo|="bar"] */
58 	DOM_SELECT_ATTRIBUTE_HYPHEN_LIST = 8,
59 	/* Matches value begining; "bar-z-bee":		E[foo^="bar"]*/
60 	DOM_SELECT_ATTRIBUTE_BEGIN = 16,
61 	/* Matches value ending; "z-bee-bar":		E[foo$="bar"] */
62 	DOM_SELECT_ATTRIBUTE_END = 32,
63 	/* Matches value containing; "m33p/bar\++":	E[foo*="bar"] */
64 	DOM_SELECT_ATTRIBUTE_CONTAINS = 64,
65 	/* Matches exact ID attribute value "bar":	#bar */
66 	DOM_SELECT_ATTRIBUTE_ID = 128,
67 };
68 
69 /* Info about text matching is stored in a DOM text node. */
70 enum dom_select_text_match {
71 	/* Matches E containing substring "foo":	E:contains("foo") */
72 	DOM_SELECT_TEXT_CONTAINS = 1,
73 };
74 
75 /* Info about what nth child or type to match. The basic syntax is:
76  *
77  * 	<step>n<index>
78  *
79  * with a little syntactic sugar.
80  *
81  * Examples:
82  *
83  *    0n+1 / 1		is first child (same as :first-child)
84  *    2n+0 / 2n / even	is all even children
85  *    2n+1 / odd	is all odd children
86  *   -0n+2		is the last two children
87  *   -0n+1 / -1		is last child (same as :last-child)
88  *    1n+0 / n+0 / n	is all elements of type
89  *    0n+0		is only element of type (a special internal syntax
90  *    			used when storing nth-info)
91  *
92  * That is, a zero step (0n) means exact indexing, and non-zero step
93  * means stepwise indexing.
94  */
95 struct dom_select_nth_match {
96 	size_t step;
97 	size_t index;
98 };
99 
100 #define set_dom_select_nth_match(nth, nthstep, nthindex) \
101 	do { (nth)->step = (nthstep); (nth)->index = (nthindex); } while(0)
102 
103 /* This is supposed to be a simple selector. However, this struct is also used
104  * for holding data for attribute matching and element text matching. */
105 struct dom_select_node {
106 	/* This holds the DOM node which has data about the node being matched.
107 	 * It can be either an element, attribute, or a text node. */
108 	/* XXX: Keep at the top. This is used for translating dom_node
109 	 * reference to dom_select_node. */
110 	struct dom_node node;
111 
112 	/* Only meaningful for element nodes. */
113 	/* FIXME: Don't waste memory for non-element nodes. */
114 	struct dom_select_nth_match nth_child;
115 	struct dom_select_nth_match nth_type;
116 
117 	/* Flags, specifying how the matching should be done. */
118 	union {
119 		enum dom_select_element_match element;
120 		enum dom_select_attribute_match attribute;
121 		enum dom_select_text_match text;
122 	} match;
123 };
124 
125 
126 enum dom_select_pseudo {
127 	DOM_SELECT_PSEUDO_UNKNOWN = 0,
128 
129 	/* Pseudo-elements: */
130 
131 	/* Matches first formatted line:		::first-line */
132 	DOM_SELECT_PSEUDO_FIRST_LINE = 1,
133 	/* Matches first formatted letter:		::first-letter */
134 	DOM_SELECT_PSEUDO_FIRST_LETTER = 2,
135 	/* Matches text selected by user:		::selection */
136 	DOM_SELECT_PSEUDO_SELECTION = 4,
137 	/* Matches generated context after an element:	::after */
138 	DOM_SELECT_PSEUDO_AFTER = 8,
139 	/* Matches generated content before an element:	::before */
140 	DOM_SELECT_PSEUDO_BEFORE = 16,
141 
142 	/* Pseudo-attributes: */
143 
144 	/* Link pseudo-classes: */
145 	DOM_SELECT_PSEUDO_LINK = 32,			/* :link */
146 	DOM_SELECT_PSEUDO_VISITED = 64,			/* :visited */
147 
148 	/* User action pseudo-classes: */
149 	DOM_SELECT_PSEUDO_ACTIVE = 128,			/* :active */
150 	DOM_SELECT_PSEUDO_HOVER = 256,			/* :hover */
151 	DOM_SELECT_PSEUDO_FOCUS = 512,			/* :focus */
152 
153 	/* Target pseudo-class: */
154 	DOM_SELECT_PSEUDO_TARGET = 1024,		/* :target */
155 
156 	/* UI element states pseudo-classes: */
157 	DOM_SELECT_PSEUDO_ENABLED = 2048,		/* :enabled */
158 	DOM_SELECT_PSEUDO_DISABLED = 4096,		/* :disabled */
159 	DOM_SELECT_PSEUDO_CHECKED = 8192,		/* :checked */
160 	DOM_SELECT_PSEUDO_INDETERMINATE = 16384,	/* :indeterminate */
161 
162 	/* XXX: The following pseudo-classes are not kept in the pseudo member
163 	 * of the dom_select struct so they should not be bitfields. They are
164 	 * mostly for parsing purposes. */
165 
166 	DOM_SELECT_PSEUDO_CONTAINS = 10000,
167 
168 	DOM_SELECT_PSEUDO_NTH_CHILD,
169 	DOM_SELECT_PSEUDO_NTH_LAST_CHILD,
170 	DOM_SELECT_PSEUDO_FIRST_CHILD,
171 	DOM_SELECT_PSEUDO_LAST_CHILD,
172 	DOM_SELECT_PSEUDO_ONLY_CHILD,
173 
174 	DOM_SELECT_PSEUDO_NTH_TYPE,
175 	DOM_SELECT_PSEUDO_NTH_LAST_TYPE,
176 	DOM_SELECT_PSEUDO_FIRST_TYPE,
177 	DOM_SELECT_PSEUDO_LAST_TYPE,
178 	DOM_SELECT_PSEUDO_ONLY_TYPE,
179 
180 	DOM_SELECT_PSEUDO_ROOT,
181 	DOM_SELECT_PSEUDO_EMPTY,
182 };
183 
184 struct dom_select {
185 	struct dom_select_node *selector;
186 	unsigned long specificity;
187 	enum dom_select_pseudo pseudo;
188 };
189 
190 enum dom_select_syntax {
191 	DOM_SELECT_SYNTAX_CSS,	/* Example: 'p a[id=node] a:hover */
192 	DOM_SELECT_SYNTAX_PATH,	/* Example: '//rss/channel/item' */
193 };
194 
195 struct dom_select *init_dom_select(enum dom_select_syntax syntax,
196 				   struct dom_string *string);
197 
198 void done_dom_select(struct dom_select *select);
199 
200 struct dom_node_list *
201 select_dom_nodes(struct dom_select *select, struct dom_node *root);
202 
203 /*
204  * +------------------------------------------------------------------------------------+
205  * | Pattern               | Meaning                      | Type              | Version |
206  * |-----------------------+------------------------------+-------------------+---------|
207  * | *                     | any element                  | Universal         | 2       |
208  * |                       |                              | selector          |         |
209  * |-----------------------+------------------------------+-------------------+---------|
210  * | E                     | an element of type E         | Type selector     | 1       |
211  * |-----------------------+------------------------------+-------------------+---------|
212  * | E F                   | an F element descendant of   | Descendant        | 1       |
213  * |                       | an E element                 | combinator        |         |
214  * |-----------------------+------------------------------+-------------------+---------|
215  * | E > F                 | an F element child of an E   | Child combinator  | 2       |
216  * |                       | element                      |                   |         |
217  * |-----------------------+------------------------------+-------------------+---------|
218  * | E + F                 | an F element immediately     | Direct adjacent   | 2       |
219  * |                       | preceded by an E element     | combinator        |         |
220  * |-----------------------+------------------------------+-------------------+---------|
221  * | E ~ F                 | an F element preceded by an  | Indirect adjacent | 3       |
222  * |                       | E element                    | combinator        |         |
223  * |-----------------------+------------------------------+-------------------+---------|
224  * | E:root                | an E element, root of the    | Structural        | 3       |
225  * |                       | document                     | pseudo-classes    |         |
226  * |-----------------------+------------------------------+-------------------+---------|
227  * |                       | an E element that has no     | Structural        |         |
228  * | E:empty               | children (including text     | pseudo-classes    | 3       |
229  * |                       | nodes)                       |                   |         |
230  * |-----------------------+------------------------------+-------------------+---------|
231  * | E:first-child         | an E element, first child of | Structural        | 2       |
232  * |                       | its parent                   | pseudo-classes    |         |
233  * |-----------------------+------------------------------+-------------------+---------|
234  * | E:last-child          | an E element, last child of  | Structural        | 3       |
235  * |                       | its parent                   | pseudo-classes    |         |
236  * |-----------------------+------------------------------+-------------------+---------|
237  * | E:nth-child(n)        | an E element, the n-th child | Structural        | 3       |
238  * |                       | of its parent                | pseudo-classes    |         |
239  * |-----------------------+------------------------------+-------------------+---------|
240  * |                       | an E element, the n-th child | Structural        |         |
241  * | E:nth-last-child(n)   | of its parent, counting from | pseudo-classes    | 3       |
242  * |                       | the last one                 |                   |         |
243  * |-----------------------+------------------------------+-------------------+---------|
244  * | E:first-of-type       | an E element, first sibling  | Structural        | 3       |
245  * |                       | of its type                  | pseudo-classes    |         |
246  * |-----------------------+------------------------------+-------------------+---------|
247  * | E:last-of-type        | an E element, last sibling   | Structural        | 3       |
248  * |                       | of its type                  | pseudo-classes    |         |
249  * |-----------------------+------------------------------+-------------------+---------|
250  * | E:nth-of-type(n)      | an E element, the n-th       | Structural        | 3       |
251  * |                       | sibling of its type          | pseudo-classes    |         |
252  * |-----------------------+------------------------------+-------------------+---------|
253  * |                       | an E element, the n-th       | Structural        |         |
254  * | E:nth-last-of-type(n) | sibling of its type,         | pseudo-classes    | 3       |
255  * |                       | counting from the last one   |                   |         |
256  * |-----------------------+------------------------------+-------------------+---------|
257  * | E:only-child          | an E element, only child of  | Structural        | 3       |
258  * |                       | its parent                   | pseudo-classes    |         |
259  * |-----------------------+------------------------------+-------------------+---------|
260  * | E:only-of-type        | an E element, only sibling   | Structural        | 3       |
261  * |                       | of its type                  | pseudo-classes    |         |
262  * |-----------------------+------------------------------+-------------------+---------|
263  * |                       | an E element being the       |                   |         |
264  * | E:link                | source anchor of a hyperlink | The link          |         |
265  * | E:visited             | of which the target is not   | pseudo-classes    | 1       |
266  * |                       | yet visited (:link) or       |                   |         |
267  * |                       | already visited (:visited)   |                   |         |
268  * |-----------------------+------------------------------+-------------------+---------|
269  * | E:active              | an E element during certain  | The user action   |         |
270  * | E:hover               | user actions                 | pseudo-classes    | 1 and 2 |
271  * | E:focus               |                              |                   |         |
272  * |-----------------------+------------------------------+-------------------+---------|
273  * | E:target              | an E element being the       | The target        | 3       |
274  * |                       | target of the referring URI  | pseudo-class      |         |
275  * |-----------------------+------------------------------+-------------------+---------|
276  * |                       | an element of type E in      |                   |         |
277  * | E:lang(fr)            | language "fr" (the document  | The :lang()       | 2       |
278  * | FIXME                 | language specifies how       | pseudo-class      |         |
279  * |                       | language is determined)      |                   |         |
280  * |-----------------------+------------------------------+-------------------+---------|
281  * | E:enabled             | a user interface element E   | The UI element    |         |
282  * | E:disabled            | which is enabled or disabled | states            | 3       |
283  * |                       |                              | pseudo-classes    |         |
284  * |-----------------------+------------------------------+-------------------+---------|
285  * |                       | a user interface element E   |                   |         |
286  * | E:checked             | which is checked or in an    | The UI element    |         |
287  * | E:indeterminate       | indeterminate state (for     | states            | 3       |
288  * |                       | instance a radio-button or   | pseudo-classes    |         |
289  * |                       | checkbox)                    |                   |         |
290  * |-----------------------+------------------------------+-------------------+---------|
291  * |                       | an E element containing the  | Content           |         |
292  * | E:contains("foo")     | substring "foo" in its       | pseudo-class      | 3       |
293  * |                       | textual contents             |                   |         |
294  * |-----------------------+------------------------------+-------------------+---------|
295  * | E::first-line         | the first formatted line of  | The :first-line   | 1       |
296  * |                       | an E element                 | pseudo-element    |         |
297  * |-----------------------+------------------------------+-------------------+---------|
298  * | E::first-letter       | the first formatted letter   | The :first-letter | 1       |
299  * |                       | of an E element              | pseudo-element    |         |
300  * |-----------------------+------------------------------+-------------------+---------|
301  * |                       | the portion of an E element  | The UI element    |         |
302  * | E::selection          | that is currently            | fragments         | 3       |
303  * |                       | selected/highlighted by the  | pseudo-elements   |         |
304  * |                       | user                         |                   |         |
305  * |-----------------------+------------------------------+-------------------+---------|
306  * | E::before             | generated content before an  | The :before       | 2       |
307  * |                       | E element                    | pseudo-element    |         |
308  * |-----------------------+------------------------------+-------------------+---------|
309  * | E::after              | generated content after an E | The :after        | 2       |
310  * |                       | element                      | pseudo-element    |         |
311  * |-----------------------+------------------------------+-------------------+---------|
312  * |                       | an E element whose class is  |                   |         |
313  * | E.warning             | "warning" (the document      | Class selectors   | 1       |
314  * |                       | language specifies how class |                   |         |
315  * |                       | is determined).              |                   |         |
316  * |-----------------------+------------------------------+-------------------+---------|
317  * | E#myid                | an E element with ID equal   | ID selectors      | 1       |
318  * |                       | to "myid".                   |                   |         |
319  * |-----------------------+------------------------------+-------------------+---------|
320  * | E[foo]                | an E element with a "foo"    | Attribute         | 2       |
321  * |                       | attribute                    | selectors         |         |
322  * |-----------------------+------------------------------+-------------------+---------|
323  * |                       | an E element whose "foo"     | Attribute         |         |
324  * | E[foo="bar"]          | attribute value is exactly   | selectors         | 2       |
325  * |                       | equal to "bar"               |                   |         |
326  * |-----------------------+------------------------------+-------------------+---------|
327  * |                       | an E element whose "foo"     |                   |         |
328  * |                       | attribute value is a list of | Attribute         |         |
329  * | E[foo~="bar"]         | space-separated values, one  | selectors         | 2       |
330  * |                       | of which is exactly equal to |                   |         |
331  * |                       | "bar"                        |                   |         |
332  * |-----------------------+------------------------------+-------------------+---------|
333  * |                       | an E element whose "foo"     |                   |         |
334  * | E[foo^="bar"]         | attribute value begins       | Attribute         | 3       |
335  * |                       | exactly with the string      | selectors         |         |
336  * |                       | "bar"                        |                   |         |
337  * |-----------------------+------------------------------+-------------------+---------|
338  * |                       | an E element whose "foo"     | Attribute         |         |
339  * | E[foo$="bar"]         | attribute value ends exactly | selectors         | 3       |
340  * |                       | with the string "bar"        |                   |         |
341  * |-----------------------+------------------------------+-------------------+---------|
342  * |                       | an E element whose "foo"     | Attribute         |         |
343  * | E[foo*="bar"]         | attribute value contains the | selectors         | 3       |
344  * |                       | substring "bar"              |                   |         |
345  * |-----------------------+------------------------------+-------------------+---------|
346  * |                       | an E element whose           |                   |         |
347  * |                       | "hreflang" attribute has a   | Attribute         |         |
348  * | E[hreflang|="en"]     | hyphen-separated list of     | selectors         | 2       |
349  * |                       | values beginning (from the   |                   |         |
350  * |                       | left) with "en"              |                   |         |
351  * |-----------------------+------------------------------+-------------------+---------|
352  * | E:not(s)              | an E element that does not   | Negation          | 3       |
353  * | FIXME                 | match simple selector s      | pseudo-class      |         |
354  * +------------------------------------------------------------------------------------+
355  */
356 
357 #endif
358