1 /*
2 * Type definitions and a parser for CSS selectors.
3 *
4 * The Selector type is a linked list of simple selectors, with the
5 * subject at the head, and its context linked from the "context"
6 * field. The "combinator" field is the relation between this simple
7 * selector and its context.
8 *
9 * To do: Allow multiple, comma-separated selectors
10 *
11 * Author: Bert Bos <bert@w3.org>
12 * Created: 8 July 2001
13 * Version: $Id: selector.c,v 1.17 2021/04/15 21:00:55 bbos Exp $
14 **/
15
16 #include "config.h"
17 #include <assert.h>
18 #include <stdlib.h>
19 #include <stdio.h>
20 #include <ctype.h>
21 #include <stdbool.h>
22 #ifdef HAVE_STRING_H
23 # include <string.h>
24 #elif HAVE_STRINGS_H
25 # include <strings.h>
26 #endif
27 #include "export.h"
28 #include "heap.e"
29 #include "types.e"
30 #include "errexit.e"
31
32 EXPORT typedef enum { /* Pseudo-classes & -elements */
33 AttrNode, /* ::attr() */
34 RootSel, NthChild, NthOfType, FirstChild, FirstOfType, Lang,
35 NthLastChild, NthLastOfType, LastChild, LastOfType, OnlyChild,
36 OnlyOfType, Empty,
37 Not, Is,
38 } PseudoType;
39
40 EXPORT typedef struct _PseudoCond {
41 PseudoType type;
42 int a, b; /* :nth-child(an+b) */
43 string s; /* :lang(s) */
44 struct _SimpleSelector *sel; /* :not(sel), :is(sel,...) */
45 struct _PseudoCond *next;
46 } PseudoCond;
47
48 EXPORT typedef enum { /* =, ~=, ^=, $= *= |= */
49 Exists, Equals, Includes, StartsWith, EndsWith, Contains, LangMatch,
50 HasClass, HasID /* ".foo", "#foo" */
51 } Operator;
52
53 EXPORT typedef struct _AttribCond {
54 Operator op;
55 string name; /* If not HasClass/ID */
56 string value; /* If op!=Exists */
57 struct _AttribCond *next;
58 } AttribCond;
59
60 EXPORT typedef enum {
61 Descendant, Child, Adjacent, Sibling /* <space>, >, +, ~ */
62 } Combinator;
63
64 EXPORT typedef struct _SimpleSelector {
65 string name; /* NULL is "*" */
66 AttribCond *attribs;
67 PseudoCond *pseudos;
68 PseudoCond *pseudoelts; /* E.g., ::attr(foo) */
69 Combinator combinator; /* If context not NULL */
70 struct _SimpleSelector *context;
71 struct _SimpleSelector *next; /* Comma-separated selectors */
72 } SimpleSelector, *Selector;
73
74 typedef enum {
75 INIT, START_SIMPLE, AFTER_SIMPLE, AFTER_TYPE, CLASS, ID, ATTR, PSEUDO,
76 AFTER_ATTR, EQ, START_VALUE, DSTRING, SSTRING, AFTER_VALUE,
77 PSEUDO_ELT, IN_PSEUDO, LANG, IN_PSEUDO_, PSEUDO__O, PSEUDO__E, AFTER_N,
78 AFTER_NUM, END_PSEUDO, AFTER_PLUS, AFTER_MINUS, PSEUDO__OD, PSEUDO__ODD,
79 PSEUDO__EV, PSEUDO__EVE, PSEUDO__EVEN, LANG_, PSEUDO_ELT_ATTR,
80 PSEUDO_ELT_ATTR_, IN_PSEUDO_PLUS, IN_PSEUDO_MINUS, NOT, IS
81 } State;
82
83
84 /* strappc -- append a character to a malloc'ed string */
strappc(string * s,char c)85 static void strappc(string *s, char c)
86 {
87 assert(s);
88 int len = *s ? strlen(*s) : 0;
89 renewarray(*s, len + 2);
90 (*s)[len] = c;
91 (*s)[len+1] = '\0';
92 }
93
94
95 /* push_sel -- allocate memory for a new selector; initialize */
push_sel(Selector * selector,Combinator combinator)96 static void push_sel(Selector *selector, Combinator combinator)
97 {
98 Selector h;
99
100 new(h);
101 h->name = NULL;
102 h->attribs = NULL;
103 h->pseudos = NULL;
104 h->pseudoelts = NULL;
105 h->context = *selector;
106 h->combinator = combinator;
107 h->next = NULL;
108 *selector = h;
109 }
110
111 /* isnmstart -- check if a character can start an identifier */
isnmstart(unsigned int c)112 static bool isnmstart(unsigned int c)
113 {
114 return ('a' <= c && c <= 'z')
115 || ('A' <= c && c <= 'Z')
116 || (c == '_')
117 || (c == '\\')
118 || (c >= '\200');
119 }
120
121 /* isnmchar -- check if a character can be inside an identifier */
isnmchar(unsigned int c)122 static bool isnmchar(unsigned int c)
123 {
124 return ('a' <= c && c <= 'z')
125 || ('A' <= c && c <= 'Z')
126 || ('0' <= c && c <= '9')
127 || (c == '_')
128 || (c == '-')
129 || (c == '\\')
130 || (c >= '\200');
131 }
132
133 /* parse_comment -- skip over a comment */
parse_comment(string * s)134 static void parse_comment(string *s)
135 {
136 assert(s && *s && **s == '/');
137 if (*(++*s) != '*') errexit("Unexpected \"/\"\n");
138 for ((*s)++; **s; (*s)++)
139 if (**s == '*' && *(*s+1) == '/') {(*s) += 2; return;}
140 errexit("Comment is missing the closing \"*/\"\n");
141 }
142
143 /* parse_escape -- parse a backslash-escaped character, append UTF-8 to value */
parse_escape(string * s,string * value)144 static void parse_escape(string *s, string *value)
145 {
146 int n;
147
148 assert(value && s && *s && **s == '\\');
149 (*s)++;
150 if (!isxdigit(**s)) {strappc(value, **s); (*s)++; return;}
151 n = hexval(**s); (*s)++;
152 if (isxdigit(**s)) {n = 16 * n + hexval(**s); (*s)++;}
153 if (isxdigit(**s)) {n = 16 * n + hexval(**s); (*s)++;}
154 if (isxdigit(**s)) {n = 16 * n + hexval(**s); (*s)++;}
155 if (isxdigit(**s)) {n = 16 * n + hexval(**s); (*s)++;}
156 if (isxdigit(**s)) {n = 16 * n + hexval(**s); (*s)++;}
157 if (isspace(**s)) (*s)++;
158
159 /* Convert to a UTF-8 string */
160 if (n <= 0x7F) {
161 strappc(value, n);
162 } else if (n <= 0x7FF) {
163 strappc(value, 0xC0 | (n >> 6));
164 strappc(value, 0x80 | (n & 0x3F));
165 } else if (n <= 0xFFFF) {
166 strappc(value, 0xE0 | (n >> 12));
167 strappc(value, 0x80 | ((n >> 6) & 0x3F));
168 strappc(value, 0x80 | (n & 0x3F));
169 } else if (n <= 0x1FFFFF) {
170 strappc(value, 0xF0 | (n >> 18));
171 strappc(value, 0x80 | ((n >> 12) & 0x3F));
172 strappc(value, 0x80 | ((n >> 6) & 0x3F));
173 strappc(value, 0x80 | (n & 0x3F));
174 } else if (n <= 0x3FFFFFF) {
175 strappc(value, 0xF0 | (n >> 24));
176 strappc(value, 0x80 | ((n >> 18) & 0x3F));
177 strappc(value, 0x80 | ((n >> 12) & 0x3F));
178 strappc(value, 0x80 | ((n >> 6) & 0x3F));
179 strappc(value, 0x80 | (n & 0x3F));
180 } else {
181 strappc(value, 0xF0 | (n >> 30));
182 strappc(value, 0x80 | ((n >> 24) & 0x3F));
183 strappc(value, 0x80 | ((n >> 18) & 0x3F));
184 strappc(value, 0x80 | ((n >> 12) & 0x3F));
185 strappc(value, 0x80 | ((n >> 6) & 0x3F));
186 strappc(value, 0x80 | (n & 0x3F));
187 }
188 }
189
190 /* parse_ident -- parse and return an identifier */
parse_ident(string * s)191 static string parse_ident(string *s)
192 {
193 string ident = NULL;
194
195 assert(*s && isnmchar(**s)); /* Not isnmstart(), it may be a HASH (#...) */
196 if (**s == '\\') parse_escape(s, &ident);
197 else strappc(&ident, *(*s)++);
198 while (isnmchar(**s))
199 if (**s == '\\') parse_escape(s, &ident);
200 else strappc(&ident, *(*s)++);
201 return ident;
202 }
203
204 /* parse_int -- parse and return a decimal integer */
parse_int(string * s)205 static int parse_int(string *s)
206 {
207 int n = 0;
208 bool neg = false;
209
210 assert(s && *s && (**s == '-' || **s == '+' || isdigit(**s)));
211 if (**s == '+') (*s)++; else if (**s == '-') {neg = true; (*s)++;}
212 if (!isdigit(**s))errexit("Expected a number after +/- but found \"%c\"\n",**s);
213 while (isdigit(**s)) {
214 n = 10 * n + (**s - '0');
215 if (n < 0) errexit("Cannot handle a number this big\n");
216 (*s)++;
217 }
218 return neg ? -n : n;
219 }
220
221 /* parse_selector -- parse the selector in s */
parse_selector(const string selector,string * rest)222 EXPORT Selector parse_selector(const string selector, string *rest)
223 {
224 State state = INIT;
225 string name;
226 string s = selector;
227 AttribCond *attsel;
228 PseudoCond *pseudosel;
229 Selector sel = NULL;
230 int n = 0; /* Avoid warning about uninitialized */
231
232 push_sel(&sel, Descendant);
233
234 while (*s) {
235 switch (state) {
236 case INIT: /* Expect a simple sel */
237 if (isspace(*s)) s++;
238 else if (*s == '/') parse_comment(&s);
239 else state = START_SIMPLE;
240 break;
241 case AFTER_SIMPLE: /* Expect a combinator */
242 if (isspace(*s)) s++;
243 else if (*s == '/') parse_comment(&s);
244 else if (*s == '+') {s++; push_sel(&sel, Adjacent); state = INIT;}
245 else if (*s == '>') {s++; push_sel(&sel, Child); state = INIT;}
246 else if (*s == '~') {s++; push_sel(&sel, Sibling); state = INIT;}
247 else if (*s == ',') {sel->next = parse_selector(s+1, &s);}
248 else if (*s == ')') {*rest = s; return sel;}
249 else {push_sel(&sel, Descendant); state = INIT;}
250 break;
251 case START_SIMPLE: /* Start simple sel */
252 if (*s == '*') {s++; state = AFTER_TYPE;}
253 else if (*s == '.') {s++; state = CLASS;}
254 else if (*s == '#') {s++; state = ID;}
255 else if (*s == '[') {s++; state = ATTR;}
256 else if (*s == ':') {s++; state = PSEUDO;}
257 else if (isnmstart(*s)) {sel->name = parse_ident(&s); state = AFTER_TYPE;}
258 else errexit("Unexpected \"%c\"\n", *s);
259 break;
260 case AFTER_TYPE: /* After a type sel */
261 if (*s == '/') parse_comment(&s);
262 else if (*s == '.') {s++; state = CLASS;}
263 else if (*s == '#') {s++; state = ID;}
264 else if (*s == '[') {s++; state = ATTR;}
265 else if (*s == ':') {s++; state = PSEUDO;}
266 else if (isnmstart(*s)) errexit("Unexpected \"%c\"\n", *s);
267 else state = AFTER_SIMPLE;
268 break;
269 case CLASS: /* Just seen a '.' */
270 if (*s == '/') parse_comment(&s);
271 else if (isnmstart(*s)) {new(attsel); attsel->op = HasClass;
272 attsel->value = parse_ident(&s); attsel->next = sel->attribs;
273 sel->attribs = attsel; state = AFTER_TYPE;}
274 else errexit("Unexpected \"%c\" after \".\"\n", *s);
275 break;
276 case ID: /* Just seen a '#' */
277 if (isnmchar(*s)) {new(attsel); attsel->op = HasID;
278 attsel->value = parse_ident(&s); attsel->next = sel->attribs;
279 sel->attribs = attsel; state = AFTER_TYPE;}
280 else errexit("Unexpected \"%c\" after \"#\"\n", *s);
281 break;
282 case ATTR: /* Just seen '[' */
283 if (isspace(*s)) s++;
284 else if (*s == '/') parse_comment(&s);
285 else if (isnmstart(*s)) {new(attsel); attsel->name = parse_ident(&s);
286 attsel->next = sel->attribs; sel->attribs = attsel; state = AFTER_ATTR;}
287 else errexit("Unexpected \"%c\" after \"[\"\n", *s);
288 break;
289 case AFTER_ATTR: /* Just seen a '[' + ident */
290 if (isspace(*s)) s++;
291 else if (*s == '/') parse_comment(&s);
292 else if (*s == ']') {s++; sel->attribs->op = Exists; state = AFTER_TYPE;}
293 else if (*s == '~') {s++; sel->attribs->op = Includes; state = EQ;}
294 else if (*s == '|') {s++; sel->attribs->op = LangMatch; state = EQ;}
295 else if (*s == '^') {s++; sel->attribs->op = StartsWith; state = EQ;}
296 else if (*s == '$') {s++; sel->attribs->op = EndsWith; state = EQ;}
297 else if (*s == '*') {s++; sel->attribs->op = Contains; state = EQ;}
298 else {sel->attribs->op = Equals; state = EQ;}
299 break;
300 case EQ: /* Expect '=' */
301 if (*s != '=') errexit("Expected '=' instead of \"%c\"\n", *s);
302 else {s++; sel->attribs->value = NULL; state = START_VALUE;}
303 break;
304 case START_VALUE: /* After '=' */
305 if (isspace(*s)) s++;
306 else if (*s == '/') parse_comment(&s);
307 else if (*s == '"') {s++; state = DSTRING;}
308 else if (*s == '\'') {s++; state = SSTRING;}
309 else if (isnmstart(*s)) {sel->attribs->value = parse_ident(&s);
310 state = AFTER_VALUE;}
311 else errexit("Expected string or name after \"=\" but found \"%c\"\n",*s);
312 break;
313 case DSTRING: /* Inside "..." */
314 if (*s == '"') {
315 s++;
316 if (!sel->attribs->value) sel->attribs->value = newstring("");
317 state = AFTER_VALUE;
318 }
319 else if (*s == '\\') parse_escape(&s, &sel->attribs->value);
320 else {strappc(&sel->attribs->value, *s); s++;}
321 break;
322 case SSTRING: /* Inside "..." */
323 if (*s == '\'') {
324 s++;
325 if (!sel->attribs->value) sel->attribs->value = newstring("");
326 state = AFTER_VALUE;
327 }
328 else if (*s == '\\') parse_escape(&s, &sel->attribs->value);
329 else {strappc(&sel->attribs->value, *s); s++;}
330 break;
331 case AFTER_VALUE: /* Expect ']' */
332 if (isspace(*s)) s++;
333 else if (*s == '/') parse_comment(&s);
334 else if (*s == ']') {s++; state = AFTER_TYPE;}
335 else errexit("Expected ']' instead of \"%c\"\n", *s);
336 break;
337 case PSEUDO: /* After ':' */
338 if (*s == '/') parse_comment(&s);
339 else if (*s == ':') {s++; state = PSEUDO_ELT;}
340 else if (!isnmstart(*s))
341 errexit("Expected a pseudo-class after \":\" but found \"%c\"\n", *s);
342 else {new(pseudosel); pseudosel->next = sel->pseudos;
343 sel->pseudos = pseudosel; name = parse_ident(&s);
344 if (strcasecmp(name, "root") == 0) {
345 pseudosel->type = RootSel; state = AFTER_TYPE;}
346 else if (strcasecmp(name, "nth-child") == 0) {
347 pseudosel->type = NthChild; state = IN_PSEUDO;}
348 else if (strcasecmp(name, "nth-last-child") == 0) {
349 pseudosel->type = NthLastChild; state = IN_PSEUDO;}
350 else if (strcasecmp(name, "nth-of-type") == 0) {
351 pseudosel->type = NthOfType; state = IN_PSEUDO;}
352 else if (strcasecmp(name, "nth-last-of-type") == 0) {
353 pseudosel->type = NthLastOfType; state = IN_PSEUDO;}
354 else if (strcasecmp(name, "first-child") == 0) {
355 pseudosel->type = FirstChild; state = AFTER_TYPE;}
356 else if (strcasecmp(name, "last-child") == 0) {
357 pseudosel->type = LastChild; state = AFTER_TYPE;}
358 else if (strcasecmp(name, "first-of-type") == 0) {
359 pseudosel->type = FirstOfType; state = AFTER_TYPE;}
360 else if (strcasecmp(name, "last-of-type") == 0) {
361 pseudosel->type = LastOfType; state = AFTER_TYPE;}
362 else if (strcasecmp(name, "only-child") == 0) {
363 pseudosel->type = OnlyChild; state = AFTER_TYPE;}
364 else if (strcasecmp(name, "only-of-type") == 0) {
365 pseudosel->type = OnlyOfType; state = AFTER_TYPE;}
366 else if (strcasecmp(name, "empty") == 0) {
367 pseudosel->type = Empty; state = AFTER_TYPE;}
368 else if (strcasecmp(name, "lang") == 0) {
369 pseudosel->type = Lang; state = LANG;}
370 else if (strcasecmp(name, "not") == 0) {
371 pseudosel->type = Not; state = NOT;}
372 else if (strcasecmp(name, "is") == 0
373 || strcasecmp(name, "where") == 0) {
374 pseudosel->type = Is; state = IS;}
375 else errexit("Unknown pseudo-class \"%s\"\n", name);
376 }
377 break;
378 case IN_PSEUDO: /* After ':...', expect '(' */
379 if (*s == '(') {s++; state = IN_PSEUDO_;}
380 else errexit("Expecting a \"(\" but found \"%c\"\n", *s);
381 break;
382 case IN_PSEUDO_: /* Expecting an+b */
383 if (isspace(*s)) s++;
384 else if (*s == '/') parse_comment(&s);
385 else if (*s == 'o' || *s == 'O') {s++; state = PSEUDO__O;}
386 else if (*s == 'e' || *s == 'E') {s++; state = PSEUDO__E;}
387 else if (*s == 'n' || *s == 'N') {sel->pseudos->a = 1; s++;state=AFTER_N;}
388 else if (*s == '+') {s++; state = IN_PSEUDO_PLUS;}
389 else if (*s == '-') {s++; state = IN_PSEUDO_MINUS;}
390 else if (isdigit(*s)) {n = parse_int(&s); state = AFTER_NUM;}
391 else errexit("Expected a number, but found \"%c\"\n", *s);
392 break;
393 case IN_PSEUDO_PLUS: /* After ':pseudo(+' */
394 if (*s == 'n') {sel->pseudos->a = 1; s++; state = AFTER_N;}
395 else if (isdigit(*s)) {n = parse_int(&s); state = AFTER_NUM;}
396 else errexit("Expected a number after \"+\" but found \"%c\"\n", *s);
397 break;
398 case IN_PSEUDO_MINUS: /* After ':pseudo(-' */
399 if (*s == 'n') {sel->pseudos->a = -1; s++; state = AFTER_N;}
400 else if (isdigit(*s)) {n = -parse_int(&s); state = AFTER_NUM;}
401 else errexit("Expected a number after \"-\" but found \"%c\"\n", *s);
402 break;
403 case AFTER_NUM: /* After ':pseudo(' + number */
404 if (*s == 'n' || *s == 'N') {sel->pseudos->a = n; s++; state = AFTER_N;}
405 else {sel->pseudos->a = 0; sel->pseudos->b = n; state = END_PSEUDO;}
406 break;
407 case AFTER_N: /* After ':pseudo(an' */
408 if (isspace(*s)) s++;
409 else if (*s == '+') {s++; state = AFTER_PLUS;}
410 else if (*s == '-') {s++; state = AFTER_MINUS;}
411 else {sel->pseudos->b = 0; state = END_PSEUDO;}
412 break;
413 case AFTER_PLUS: /* After an+ */
414 if (isspace(*s)) s++;
415 else if (isdigit(*s)) {sel->pseudos->b = parse_int(&s); state=END_PSEUDO;}
416 else errexit("Expected a number after the \"+\", but found \"%c\"\n", *s);
417 break;
418 case AFTER_MINUS: /* After an- */
419 if (isspace(*s)) s++;
420 else if (isdigit(*s)) {sel->pseudos->b= -parse_int(&s); state=END_PSEUDO;}
421 else errexit("Expected a number after the \"-\", but found \"%c\"\n", *s);
422 break;
423 case END_PSEUDO: /* Expecting ')' */
424 if (isspace(*s)) s++;
425 else if (*s == '/') parse_comment(&s);
426 else if (*s == ')') {s++; state = AFTER_TYPE;}
427 else errexit("Expected \")\" but found \"%c\"\n", *s);
428 break;
429 case PSEUDO__O: /* After :nth...(o */
430 if (*s == 'd' || *s == 'D') {s++; state = PSEUDO__OD;}
431 else errexit("Illegal character \"%c\" in \":nth-...(\"\n", *s);
432 break;
433 case PSEUDO__OD: /* After :nth...(od */
434 if (*s == 'd' || *s == 'D') {s++; state = PSEUDO__ODD;}
435 else errexit("Illegal character \"%c\" in \":nth-...(\"\n", *s);
436 break;
437 case PSEUDO__ODD: /* After :nth-...(odd */
438 if (!isnmchar(*s)) {state = END_PSEUDO;
439 sel->pseudos->a = 2; sel->pseudos->b = 1;}
440 else errexit("Illegal character \"%c\" in \":nth-...(\"\n", *s);
441 break;
442 case PSEUDO__E: /* After :nth-...(e */
443 if (*s == 'v' || *s == 'V') {s++; state = PSEUDO__EV;}
444 else errexit("Illegal character \"%c\" in \":nth-...(\"\n", *s);
445 break;
446 case PSEUDO__EV: /* After :nth-...(ev */
447 if (*s == 'e' || *s == 'E') {s++; state = PSEUDO__EVE;}
448 else errexit("Illegal character \"%c\" in \":nth-...(\"\n", *s);
449 break;
450 case PSEUDO__EVE: /* After :nth-...(eve */
451 if (*s == 'n' || *s == 'N') {s++; state = PSEUDO__EVEN;}
452 else errexit("Illegal character \"%c\" in \":nth-...(\"\n", *s);
453 break;
454 case PSEUDO__EVEN: /* Afte :nth-...(even */
455 if (!isnmchar(*s)) {state = END_PSEUDO;
456 sel->pseudos->a = 2; sel->pseudos->b = 0;}
457 else errexit("Illegal character \"%c\" in \":nth-...(\"\n", *s);
458 break;
459 case LANG: /* After ':lang' */
460 if (*s == '(') {s++; state = LANG_;}
461 else errexit("Expecting \"(\" after \":lang\" but found \"%c\"\n", *s);
462 break;
463 case LANG_: /* After ':lang(' */
464 if (isspace(*s)) s++;
465 else if (*s == '/') parse_comment(&s);
466 else if (isnmstart(*s)) {sel->pseudos->s = parse_ident(&s);
467 state = END_PSEUDO;}
468 else errexit("Incorrect \":lang(\" at \"%c\"\n", *s);
469 break;
470 case NOT: /* After ':not' */
471 if (*s == '(') {sel->pseudos->sel = parse_selector(s + 1, &s);
472 state = END_PSEUDO;}
473 else errexit("Expecting \"(\" after \":not\" but found \"%c\"\n", *s);
474 break;
475 case IS: /* After ':is' */
476 if (*s == '(') {sel->pseudos->sel = parse_selector(s + 1, &s);
477 state = END_PSEUDO;}
478 else errexit("Expecting \"(\" after \":is\" but found \"%c\"\n", *s);
479 break;
480 case PSEUDO_ELT: /* After '::' */
481 if (*s == '/') parse_comment(&s);
482 else if (!isnmstart(*s))
483 errexit("Expected a pseudo-element after \"::\" but found \"%c\"\n",*s);
484 else {push_sel(&sel, Child); new(pseudosel);
485 pseudosel->next = sel->pseudoelts; sel->pseudoelts = pseudosel;
486 name = parse_ident(&s);
487 if (strcasecmp(name, "attr") == 0) {
488 sel->pseudoelts->type = AttrNode; state = PSEUDO_ELT_ATTR;}
489 else errexit("Unknown pseudo-element \"%s\"\n", name);
490 }
491 break;
492 case PSEUDO_ELT_ATTR: /* After '::attr' */
493 if (*s == '(') {s++; state = PSEUDO_ELT_ATTR_;}
494 else errexit("Expected \"(\" after \"::attr\" but found \"%c\"\n", *s);
495 break;
496 case PSEUDO_ELT_ATTR_: /* After '::attr(' */
497 if (isspace(*s)) s++;
498 else if (*s == '/') parse_comment(&s);
499 else if (!isnmstart(*s)) errexit("Expected a name after \"::attr(\"\n");
500 else {sel->pseudoelts->s = parse_ident(&s); state = END_PSEUDO;}
501 break;
502 default:
503 assert(!"Cannot happen");
504 }
505 }
506 if (state != AFTER_TYPE && state != AFTER_SIMPLE)
507 errexit("Incomplete selector (state %d)\n", state);
508
509 *rest = s;
510 return sel;
511 }
512
513 void dump_selector(FILE *f, Selector s);
514
515 /* utf8toint -- convert a UTF-8 sequence to a character code point */
utf8toint(conststring s,conststring * t)516 static int utf8toint(conststring s, conststring *t)
517 {
518 int n;
519
520 assert(s);
521 assert(*s);
522 if ((*s & 0x80) == 0) {*t = s + 1; return *s;} /* 0xxxxxxx */
523 if ((*s & 0xE0) == 0xC0) n = *s & 0x1F; /* 110xxxxx */
524 else if ((*s & 0xF0) == 0xE0) n = *s & 0xF; /* 1110xxxx */
525 else if ((*s & 0xF8) == 0xF0) n = *s & 0x7; /* 11110xxx */
526 else {*t = s + 1; return *s;} /* Error! */
527 for (s++; *s && (*s & 0x80) == 0x80; s++) n = (n << 6) | (*s & 0x3F);
528 *t = s;
529 return n;
530 }
531
532 /* esc -- print a string, escaping special characters */
esc(FILE * f,conststring s)533 static void esc(FILE *f, conststring s)
534 {
535 assert(s);
536
537 if (!*s) /* Can only happen with a string, not an ident */
538 fprintf(f, "\"\"");
539 else {
540 while (*s) {
541 if (('a' <= *s && *s <= 'z') || ('A' <= *s && *s <= 'Z') ||
542 ('0' <= *s && *s <= '9') || *s == '-' || *s == '_' || *s & 0x80)
543 putc(*(s++), f);
544 else
545 fprintf(f, "\\%X ", utf8toint(s, &s));
546 }
547 }
548 }
549
550 /* dump_simple_selector -- serialize a simple selector */
dump_simple_selector(FILE * f,const SimpleSelector * s)551 EXPORT void dump_simple_selector(FILE *f, const SimpleSelector *s)
552 {
553 AttribCond *a;
554 PseudoCond *p;
555
556 if (s->name) esc(f, s->name); else putc('*', f);
557 for (a = s->attribs; a; a = a->next) {
558 if (a->op == HasClass) {putc('.', f); esc(f, a->value);}
559 else if (a->op == HasID) {putc('#', f); esc(f, a->value);}
560 else {
561 putc('[', f); esc(f, a->name);
562 switch (a->op) {
563 case Exists: break;
564 case Equals: putc('=', f); break;
565 case Includes: fprintf(f, "~="); break;
566 case StartsWith: fprintf(f, "^="); break;
567 case EndsWith: fprintf(f, "$="); break;
568 case Contains: fprintf(f, "*"); break;
569 case LangMatch: fprintf(f, "|="); break;
570 default: assert(!"Cannot happen");
571 }
572 if (a->op != Exists) esc(f, a->value);
573 putc(']', f);
574 }
575 }
576 for (p = s->pseudos; p; p = p->next) {
577 switch (p->type) {
578 case RootSel: fprintf(f, ":root"); break;
579 case NthChild: fprintf(f, ":nth-child(%dn+%d)", p->a, p->b); break;
580 case NthOfType: fprintf(f, ":nth-of-type(%dn+%d)", p->a, p->b); break;
581 case FirstChild: fprintf(f, ":first-child"); break;
582 case FirstOfType: fprintf(f, ":first-of-type"); break;
583 case Lang: fprintf(f, ":lang("); esc(f, p->s); putc(')', f); break;
584 case NthLastChild: fprintf(f, ":nth-last-child(%dn+%d)", p->a, p->b); break;
585 case NthLastOfType: fprintf(f, ":nth-last-of-type(%dn+%d)",p->a,p->b);break;
586 case LastChild: fprintf(f, ":last-child"); break;
587 case LastOfType: fprintf(f, ":last-of-type"); break;
588 case OnlyChild: fprintf(f, ":only-child"); break;
589 case OnlyOfType: fprintf(f, ":only-of-type"); break;
590 case Empty: fprintf(f, ":empty"); break;
591 case Not: fprintf(f,":not("); dump_selector(f,p->sel); fprintf(f,")");break;
592 case Is: fprintf(f,":is("); dump_selector(f,p->sel); fprintf(f,")");break;
593 default: assert(!"Cannot happen");
594 }
595 }
596 for (p = s->pseudoelts; p; p = p->next) {
597 switch (p->type) {
598 case AttrNode: fprintf(f, "::attr("); esc(f, p->s); putc(')', f); break;
599 default: assert(!"Cannot happen");
600 }
601 }
602 }
603
604 /* dump_selector -- serialize a selector */
dump_selector(FILE * f,const Selector s)605 EXPORT void dump_selector(FILE *f, const Selector s)
606 {
607 assert(s);
608
609 if (s->context) {
610 dump_selector(f, s->context);
611 switch (s->combinator) {
612 case Descendant: fprintf(f, " "); break;
613 case Child: fprintf(f, " > "); break;
614 case Adjacent: fprintf(f, " + "); break;
615 case Sibling: fprintf(f, " ~ "); break;
616 default: assert(!"Cannot happen");
617 }
618 }
619 dump_simple_selector(f, s);
620
621 if (s->next) {
622 fprintf(f, ", ");
623 dump_selector(f, s->next);
624 }
625 }
626