1 /*
2  * Type definitions and a parser for CSS selectors.
3  *
4  * The Selector type is a linked list of simple selectors, with the
5  * subject at the head, and its context linked from the "context"
6  * field. The "combinator" field is the relation between this simple
7  * selector and its context.
8  *
9  * To do: Allow multiple, comma-separated selectors
10  *
11  * Author: Bert Bos <bert@w3.org>
12  * Created: 8 July 2001
13  * Version: $Id: selector.c,v 1.17 2021/04/15 21:00:55 bbos Exp $
14  **/
15 
16 #include "config.h"
17 #include <assert.h>
18 #include <stdlib.h>
19 #include <stdio.h>
20 #include <ctype.h>
21 #include <stdbool.h>
22 #ifdef HAVE_STRING_H
23 #  include <string.h>
24 #elif HAVE_STRINGS_H
25 #  include <strings.h>
26 #endif
27 #include "export.h"
28 #include "heap.e"
29 #include "types.e"
30 #include "errexit.e"
31 
32 EXPORT typedef enum {				/* Pseudo-classes & -elements */
33   AttrNode,					/* ::attr() */
34   RootSel, NthChild, NthOfType, FirstChild, FirstOfType, Lang,
35   NthLastChild, NthLastOfType, LastChild, LastOfType, OnlyChild,
36   OnlyOfType, Empty,
37   Not, Is,
38 } PseudoType;
39 
40 EXPORT typedef struct _PseudoCond {
41   PseudoType type;
42   int a, b;					/* :nth-child(an+b) */
43   string s;					/* :lang(s) */
44   struct _SimpleSelector *sel;			/* :not(sel), :is(sel,...) */
45   struct _PseudoCond *next;
46 } PseudoCond;
47 
48 EXPORT typedef enum {				/* =, ~=, ^=, $= *= |= */
49   Exists, Equals, Includes, StartsWith, EndsWith, Contains, LangMatch,
50   HasClass, HasID				/* ".foo", "#foo" */
51 } Operator;
52 
53 EXPORT typedef struct _AttribCond {
54   Operator op;
55   string name;					/* If not HasClass/ID */
56   string value;					/* If op!=Exists */
57   struct _AttribCond *next;
58 } AttribCond;
59 
60 EXPORT typedef enum {
61   Descendant, Child, Adjacent, Sibling		/* <space>, >, +, ~ */
62 } Combinator;
63 
64 EXPORT typedef struct _SimpleSelector {
65   string name;					/* NULL is "*" */
66   AttribCond *attribs;
67   PseudoCond *pseudos;
68   PseudoCond *pseudoelts;			/* E.g., ::attr(foo) */
69   Combinator combinator;			/* If context not NULL */
70   struct _SimpleSelector *context;
71   struct _SimpleSelector *next;			/* Comma-separated selectors */
72 } SimpleSelector, *Selector;
73 
74 typedef enum {
75   INIT, START_SIMPLE, AFTER_SIMPLE, AFTER_TYPE, CLASS, ID, ATTR, PSEUDO,
76   AFTER_ATTR, EQ, START_VALUE, DSTRING, SSTRING, AFTER_VALUE,
77   PSEUDO_ELT, IN_PSEUDO, LANG, IN_PSEUDO_, PSEUDO__O, PSEUDO__E, AFTER_N,
78   AFTER_NUM, END_PSEUDO, AFTER_PLUS, AFTER_MINUS, PSEUDO__OD, PSEUDO__ODD,
79   PSEUDO__EV, PSEUDO__EVE, PSEUDO__EVEN, LANG_, PSEUDO_ELT_ATTR,
80   PSEUDO_ELT_ATTR_, IN_PSEUDO_PLUS, IN_PSEUDO_MINUS, NOT, IS
81 } State;
82 
83 
84 /* strappc -- append a character to a malloc'ed string */
strappc(string * s,char c)85 static void strappc(string *s, char c)
86 {
87   assert(s);
88   int len = *s ? strlen(*s) : 0;
89   renewarray(*s, len + 2);
90   (*s)[len] = c;
91   (*s)[len+1] = '\0';
92 }
93 
94 
95 /* push_sel -- allocate memory for a new selector; initialize */
push_sel(Selector * selector,Combinator combinator)96 static void push_sel(Selector *selector, Combinator combinator)
97 {
98   Selector h;
99 
100   new(h);
101   h->name = NULL;
102   h->attribs = NULL;
103   h->pseudos = NULL;
104   h->pseudoelts = NULL;
105   h->context = *selector;
106   h->combinator = combinator;
107   h->next = NULL;
108   *selector = h;
109 }
110 
111 /* isnmstart -- check if a character can start an identifier */
isnmstart(unsigned int c)112 static bool isnmstart(unsigned int c)
113 {
114   return ('a' <= c && c <= 'z')
115     || ('A' <= c && c <= 'Z')
116     || (c == '_')
117     || (c == '\\')
118     || (c >= '\200');
119 }
120 
121 /* isnmchar -- check if a character can be inside an identifier */
isnmchar(unsigned int c)122 static bool isnmchar(unsigned int c)
123 {
124   return ('a' <= c && c <= 'z')
125     || ('A' <= c && c <= 'Z')
126     || ('0' <= c && c <= '9')
127     || (c == '_')
128     || (c == '-')
129     || (c == '\\')
130     || (c >= '\200');
131 }
132 
133 /* parse_comment -- skip over a comment */
parse_comment(string * s)134 static void parse_comment(string *s)
135 {
136   assert(s && *s && **s == '/');
137   if (*(++*s) != '*') errexit("Unexpected \"/\"\n");
138   for ((*s)++; **s; (*s)++)
139     if (**s == '*' && *(*s+1) == '/') {(*s) += 2; return;}
140   errexit("Comment is missing the closing \"*/\"\n");
141 }
142 
143 /* parse_escape -- parse a backslash-escaped character, append UTF-8 to value */
parse_escape(string * s,string * value)144 static void parse_escape(string *s, string *value)
145 {
146   int n;
147 
148   assert(value && s && *s && **s == '\\');
149   (*s)++;
150   if (!isxdigit(**s)) {strappc(value, **s); (*s)++; return;}
151   n = hexval(**s); (*s)++;
152   if (isxdigit(**s)) {n = 16 * n + hexval(**s); (*s)++;}
153   if (isxdigit(**s)) {n = 16 * n + hexval(**s); (*s)++;}
154   if (isxdigit(**s)) {n = 16 * n + hexval(**s); (*s)++;}
155   if (isxdigit(**s)) {n = 16 * n + hexval(**s); (*s)++;}
156   if (isxdigit(**s)) {n = 16 * n + hexval(**s); (*s)++;}
157   if (isspace(**s)) (*s)++;
158 
159   /* Convert to a UTF-8 string */
160   if (n <= 0x7F) {
161     strappc(value, n);
162   } else if (n <= 0x7FF) {
163     strappc(value, 0xC0 | (n >> 6));
164     strappc(value, 0x80 | (n & 0x3F));
165   } else if (n <= 0xFFFF) {
166     strappc(value, 0xE0 | (n >> 12));
167     strappc(value, 0x80 | ((n >> 6) & 0x3F));
168     strappc(value, 0x80 | (n & 0x3F));
169   } else if (n <= 0x1FFFFF) {
170     strappc(value, 0xF0 | (n >> 18));
171     strappc(value, 0x80 | ((n >> 12) & 0x3F));
172     strappc(value, 0x80 | ((n >> 6) & 0x3F));
173     strappc(value, 0x80 | (n & 0x3F));
174   } else if (n <= 0x3FFFFFF) {
175     strappc(value, 0xF0 | (n >> 24));
176     strappc(value, 0x80 | ((n >> 18) & 0x3F));
177     strappc(value, 0x80 | ((n >> 12) & 0x3F));
178     strappc(value, 0x80 | ((n >> 6) & 0x3F));
179     strappc(value, 0x80 | (n & 0x3F));
180   } else {
181     strappc(value, 0xF0 | (n >> 30));
182     strappc(value, 0x80 | ((n >> 24) & 0x3F));
183     strappc(value, 0x80 | ((n >> 18) & 0x3F));
184     strappc(value, 0x80 | ((n >> 12) & 0x3F));
185     strappc(value, 0x80 | ((n >> 6) & 0x3F));
186     strappc(value, 0x80 | (n & 0x3F));
187   }
188 }
189 
190 /* parse_ident -- parse and return an identifier */
parse_ident(string * s)191 static string parse_ident(string *s)
192 {
193   string ident = NULL;
194 
195   assert(*s && isnmchar(**s));	/* Not isnmstart(), it may be a HASH (#...) */
196   if (**s == '\\') parse_escape(s, &ident);
197   else strappc(&ident, *(*s)++);
198   while (isnmchar(**s))
199     if (**s == '\\') parse_escape(s, &ident);
200     else strappc(&ident, *(*s)++);
201   return ident;
202 }
203 
204 /* parse_int -- parse and return a decimal integer */
parse_int(string * s)205 static int parse_int(string *s)
206 {
207   int n = 0;
208   bool neg = false;
209 
210   assert(s && *s && (**s == '-' || **s == '+' || isdigit(**s)));
211   if (**s == '+') (*s)++; else if (**s == '-') {neg = true; (*s)++;}
212   if (!isdigit(**s))errexit("Expected a number after +/- but found \"%c\"\n",**s);
213   while (isdigit(**s)) {
214     n = 10 * n + (**s - '0');
215     if (n < 0) errexit("Cannot handle a number this big\n");
216     (*s)++;
217   }
218   return neg ? -n : n;
219 }
220 
221 /* parse_selector -- parse the selector in s */
parse_selector(const string selector,string * rest)222 EXPORT Selector parse_selector(const string selector, string *rest)
223 {
224   State state = INIT;
225   string name;
226   string s = selector;
227   AttribCond *attsel;
228   PseudoCond *pseudosel;
229   Selector sel = NULL;
230   int n = 0;			/* Avoid warning about uninitialized */
231 
232   push_sel(&sel, Descendant);
233 
234   while (*s) {
235     switch (state) {
236     case INIT:					/* Expect a simple sel */
237       if (isspace(*s)) s++;
238       else if (*s == '/') parse_comment(&s);
239       else state = START_SIMPLE;
240       break;
241     case AFTER_SIMPLE:				/* Expect a combinator */
242       if (isspace(*s)) s++;
243       else if (*s == '/') parse_comment(&s);
244       else if (*s == '+') {s++; push_sel(&sel, Adjacent); state = INIT;}
245       else if (*s == '>') {s++; push_sel(&sel, Child); state = INIT;}
246       else if (*s == '~') {s++; push_sel(&sel, Sibling); state = INIT;}
247       else if (*s == ',') {sel->next = parse_selector(s+1, &s);}
248       else if (*s == ')') {*rest = s; return sel;}
249       else {push_sel(&sel, Descendant); state = INIT;}
250       break;
251     case START_SIMPLE:				/* Start simple sel */
252       if (*s == '*') {s++; state = AFTER_TYPE;}
253       else if (*s == '.') {s++; state = CLASS;}
254       else if (*s == '#') {s++; state = ID;}
255       else if (*s == '[') {s++; state = ATTR;}
256       else if (*s == ':') {s++; state = PSEUDO;}
257       else if (isnmstart(*s)) {sel->name = parse_ident(&s); state = AFTER_TYPE;}
258       else errexit("Unexpected \"%c\"\n", *s);
259       break;
260     case AFTER_TYPE:				/* After a type sel */
261       if (*s == '/') parse_comment(&s);
262       else if (*s == '.') {s++; state = CLASS;}
263       else if (*s == '#') {s++; state = ID;}
264       else if (*s == '[') {s++; state = ATTR;}
265       else if (*s == ':') {s++; state = PSEUDO;}
266       else if (isnmstart(*s)) errexit("Unexpected \"%c\"\n", *s);
267       else state = AFTER_SIMPLE;
268       break;
269     case CLASS:					/* Just seen a '.' */
270       if (*s == '/') parse_comment(&s);
271       else if (isnmstart(*s)) {new(attsel); attsel->op = HasClass;
272 	attsel->value = parse_ident(&s); attsel->next = sel->attribs;
273 	sel->attribs = attsel; state = AFTER_TYPE;}
274       else errexit("Unexpected \"%c\" after \".\"\n", *s);
275       break;
276     case ID:					/* Just seen a '#' */
277       if (isnmchar(*s)) {new(attsel); attsel->op = HasID;
278 	attsel->value = parse_ident(&s); attsel->next = sel->attribs;
279 	sel->attribs = attsel; state = AFTER_TYPE;}
280       else errexit("Unexpected \"%c\" after \"#\"\n", *s);
281       break;
282     case ATTR:					/* Just seen '[' */
283       if (isspace(*s)) s++;
284       else if (*s == '/') parse_comment(&s);
285       else if (isnmstart(*s)) {new(attsel); attsel->name = parse_ident(&s);
286 	attsel->next = sel->attribs; sel->attribs = attsel; state = AFTER_ATTR;}
287       else errexit("Unexpected \"%c\" after \"[\"\n", *s);
288       break;
289     case AFTER_ATTR:				/* Just seen a '[' + ident */
290       if (isspace(*s)) s++;
291       else if (*s == '/') parse_comment(&s);
292       else if (*s == ']') {s++; sel->attribs->op = Exists; state = AFTER_TYPE;}
293       else if (*s == '~') {s++; sel->attribs->op = Includes; state = EQ;}
294       else if (*s == '|') {s++; sel->attribs->op = LangMatch; state = EQ;}
295       else if (*s == '^') {s++; sel->attribs->op = StartsWith; state = EQ;}
296       else if (*s == '$') {s++; sel->attribs->op = EndsWith; state = EQ;}
297       else if (*s == '*') {s++; sel->attribs->op = Contains; state = EQ;}
298       else {sel->attribs->op = Equals; state = EQ;}
299       break;
300     case EQ:					/* Expect '=' */
301       if (*s != '=') errexit("Expected '=' instead of \"%c\"\n", *s);
302       else {s++; sel->attribs->value = NULL; state = START_VALUE;}
303       break;
304     case START_VALUE:				/* After '=' */
305       if (isspace(*s)) s++;
306       else if (*s == '/') parse_comment(&s);
307       else if (*s == '"') {s++; state = DSTRING;}
308       else if (*s == '\'') {s++; state = SSTRING;}
309       else if (isnmstart(*s)) {sel->attribs->value = parse_ident(&s);
310 	state = AFTER_VALUE;}
311       else errexit("Expected string or name after \"=\" but found \"%c\"\n",*s);
312       break;
313     case DSTRING:				/* Inside "..." */
314       if (*s == '"') {
315         s++;
316         if (!sel->attribs->value) sel->attribs->value = newstring("");
317         state = AFTER_VALUE;
318       }
319       else if (*s == '\\') parse_escape(&s, &sel->attribs->value);
320       else {strappc(&sel->attribs->value, *s); s++;}
321       break;
322     case SSTRING:				/* Inside "..." */
323       if (*s == '\'') {
324         s++;
325         if (!sel->attribs->value) sel->attribs->value = newstring("");
326         state = AFTER_VALUE;
327       }
328       else if (*s == '\\') parse_escape(&s, &sel->attribs->value);
329       else {strappc(&sel->attribs->value, *s); s++;}
330       break;
331     case AFTER_VALUE:				/* Expect ']' */
332       if (isspace(*s)) s++;
333       else if (*s == '/') parse_comment(&s);
334       else if (*s == ']') {s++; state = AFTER_TYPE;}
335       else errexit("Expected ']' instead of \"%c\"\n", *s);
336       break;
337     case PSEUDO:				/* After ':' */
338       if (*s == '/') parse_comment(&s);
339       else if (*s == ':') {s++; state = PSEUDO_ELT;}
340       else if (!isnmstart(*s))
341 	errexit("Expected a pseudo-class after \":\" but found \"%c\"\n", *s);
342       else {new(pseudosel); pseudosel->next = sel->pseudos;
343 	sel->pseudos = pseudosel; name = parse_ident(&s);
344 	if (strcasecmp(name, "root") == 0) {
345 	  pseudosel->type = RootSel; state = AFTER_TYPE;}
346 	else if (strcasecmp(name, "nth-child") == 0) {
347 	  pseudosel->type = NthChild; state = IN_PSEUDO;}
348 	else if (strcasecmp(name, "nth-last-child") == 0) {
349 	  pseudosel->type = NthLastChild; state = IN_PSEUDO;}
350 	else if (strcasecmp(name, "nth-of-type") == 0) {
351 	  pseudosel->type = NthOfType; state = IN_PSEUDO;}
352 	else if (strcasecmp(name, "nth-last-of-type") == 0) {
353 	  pseudosel->type = NthLastOfType; state = IN_PSEUDO;}
354 	else if (strcasecmp(name, "first-child") == 0) {
355 	  pseudosel->type = FirstChild; state = AFTER_TYPE;}
356 	else if (strcasecmp(name, "last-child") == 0) {
357 	  pseudosel->type = LastChild; state = AFTER_TYPE;}
358 	else if (strcasecmp(name, "first-of-type") == 0) {
359 	  pseudosel->type = FirstOfType; state = AFTER_TYPE;}
360 	else if (strcasecmp(name, "last-of-type") == 0) {
361 	  pseudosel->type = LastOfType; state = AFTER_TYPE;}
362 	else if (strcasecmp(name, "only-child") == 0) {
363 	  pseudosel->type = OnlyChild; state = AFTER_TYPE;}
364 	else if (strcasecmp(name, "only-of-type") == 0) {
365 	  pseudosel->type = OnlyOfType; state = AFTER_TYPE;}
366 	else if (strcasecmp(name, "empty") == 0) {
367 	  pseudosel->type = Empty; state = AFTER_TYPE;}
368 	else if (strcasecmp(name, "lang") == 0) {
369 	  pseudosel->type = Lang; state = LANG;}
370 	else if (strcasecmp(name, "not") == 0) {
371 	  pseudosel->type = Not; state = NOT;}
372 	else if (strcasecmp(name, "is") == 0
373 	  || strcasecmp(name, "where") == 0) {
374 	  pseudosel->type = Is; state = IS;}
375 	else errexit("Unknown pseudo-class \"%s\"\n", name);
376       }
377       break;
378     case IN_PSEUDO:				/* After ':...', expect '(' */
379       if (*s == '(') {s++; state = IN_PSEUDO_;}
380       else errexit("Expecting a \"(\" but found \"%c\"\n", *s);
381       break;
382     case IN_PSEUDO_:				/* Expecting an+b */
383       if (isspace(*s)) s++;
384       else if (*s == '/') parse_comment(&s);
385       else if (*s == 'o' || *s == 'O') {s++; state = PSEUDO__O;}
386       else if (*s == 'e' || *s == 'E') {s++; state = PSEUDO__E;}
387       else if (*s == 'n' || *s == 'N') {sel->pseudos->a = 1; s++;state=AFTER_N;}
388       else if (*s == '+') {s++; state = IN_PSEUDO_PLUS;}
389       else if (*s == '-') {s++; state = IN_PSEUDO_MINUS;}
390       else if (isdigit(*s)) {n = parse_int(&s); state = AFTER_NUM;}
391       else errexit("Expected a number, but found \"%c\"\n", *s);
392       break;
393     case IN_PSEUDO_PLUS:			/* After ':pseudo(+' */
394       if (*s == 'n') {sel->pseudos->a = 1; s++; state = AFTER_N;}
395       else if (isdigit(*s)) {n = parse_int(&s); state = AFTER_NUM;}
396       else errexit("Expected a number after \"+\" but found \"%c\"\n", *s);
397       break;
398     case IN_PSEUDO_MINUS:			/* After ':pseudo(-' */
399       if (*s == 'n') {sel->pseudos->a = -1; s++; state = AFTER_N;}
400       else if (isdigit(*s)) {n = -parse_int(&s); state = AFTER_NUM;}
401       else errexit("Expected a number after \"-\" but found \"%c\"\n", *s);
402       break;
403     case AFTER_NUM:				/* After ':pseudo(' + number */
404       if (*s == 'n' || *s == 'N') {sel->pseudos->a = n;	s++; state = AFTER_N;}
405       else {sel->pseudos->a = 0; sel->pseudos->b = n; state = END_PSEUDO;}
406       break;
407     case AFTER_N:				/* After ':pseudo(an' */
408       if (isspace(*s)) s++;
409       else if (*s == '+') {s++; state = AFTER_PLUS;}
410       else if (*s == '-') {s++; state = AFTER_MINUS;}
411       else {sel->pseudos->b = 0; state = END_PSEUDO;}
412       break;
413     case AFTER_PLUS:				/* After an+ */
414       if (isspace(*s)) s++;
415       else if (isdigit(*s)) {sel->pseudos->b = parse_int(&s); state=END_PSEUDO;}
416       else errexit("Expected a number after the \"+\", but found \"%c\"\n", *s);
417       break;
418     case AFTER_MINUS:				/* After an- */
419       if (isspace(*s)) s++;
420       else if (isdigit(*s)) {sel->pseudos->b= -parse_int(&s); state=END_PSEUDO;}
421       else errexit("Expected a number after the \"-\", but found \"%c\"\n", *s);
422       break;
423     case END_PSEUDO:				/* Expecting ')' */
424       if (isspace(*s)) s++;
425       else if (*s == '/') parse_comment(&s);
426       else if (*s == ')') {s++; state = AFTER_TYPE;}
427       else errexit("Expected \")\" but found \"%c\"\n", *s);
428       break;
429     case PSEUDO__O:				/* After :nth...(o */
430       if (*s == 'd' || *s == 'D') {s++; state = PSEUDO__OD;}
431       else errexit("Illegal character \"%c\" in \":nth-...(\"\n", *s);
432       break;
433     case PSEUDO__OD:				/* After :nth...(od */
434       if (*s == 'd' || *s == 'D') {s++; state = PSEUDO__ODD;}
435       else errexit("Illegal character \"%c\" in \":nth-...(\"\n", *s);
436       break;
437     case PSEUDO__ODD:				/* After :nth-...(odd */
438       if (!isnmchar(*s)) {state = END_PSEUDO;
439         sel->pseudos->a = 2; sel->pseudos->b = 1;}
440       else errexit("Illegal character \"%c\" in \":nth-...(\"\n", *s);
441       break;
442     case PSEUDO__E:				/* After :nth-...(e */
443       if (*s == 'v' || *s == 'V') {s++; state = PSEUDO__EV;}
444       else errexit("Illegal character \"%c\" in \":nth-...(\"\n", *s);
445       break;
446     case PSEUDO__EV:				/* After :nth-...(ev */
447       if (*s == 'e' || *s == 'E') {s++; state = PSEUDO__EVE;}
448       else errexit("Illegal character \"%c\" in \":nth-...(\"\n", *s);
449       break;
450     case PSEUDO__EVE:				/* After :nth-...(eve */
451       if (*s == 'n' || *s == 'N') {s++; state = PSEUDO__EVEN;}
452       else errexit("Illegal character \"%c\" in \":nth-...(\"\n", *s);
453       break;
454     case PSEUDO__EVEN:				/* Afte :nth-...(even */
455       if (!isnmchar(*s)) {state = END_PSEUDO;
456         sel->pseudos->a = 2; sel->pseudos->b = 0;}
457       else errexit("Illegal character \"%c\" in \":nth-...(\"\n", *s);
458       break;
459     case LANG:					/* After ':lang' */
460       if (*s == '(') {s++; state = LANG_;}
461       else errexit("Expecting \"(\" after \":lang\" but found \"%c\"\n", *s);
462       break;
463     case LANG_:					/* After ':lang(' */
464       if (isspace(*s)) s++;
465       else if (*s == '/') parse_comment(&s);
466       else if (isnmstart(*s)) {sel->pseudos->s = parse_ident(&s);
467 	state = END_PSEUDO;}
468       else errexit("Incorrect \":lang(\" at \"%c\"\n", *s);
469       break;
470     case NOT:					/* After ':not' */
471       if (*s == '(') {sel->pseudos->sel = parse_selector(s + 1, &s);
472 	state = END_PSEUDO;}
473       else errexit("Expecting \"(\" after \":not\" but found \"%c\"\n", *s);
474       break;
475     case IS:					/* After ':is' */
476       if (*s == '(') {sel->pseudos->sel = parse_selector(s + 1, &s);
477 	state = END_PSEUDO;}
478       else errexit("Expecting \"(\" after \":is\" but found \"%c\"\n", *s);
479       break;
480     case PSEUDO_ELT:				/* After '::' */
481       if (*s == '/') parse_comment(&s);
482       else if (!isnmstart(*s))
483 	errexit("Expected a pseudo-element after \"::\" but found \"%c\"\n",*s);
484       else {push_sel(&sel, Child); new(pseudosel);
485 	pseudosel->next = sel->pseudoelts; sel->pseudoelts = pseudosel;
486 	name = parse_ident(&s);
487 	if (strcasecmp(name, "attr") == 0) {
488 	  sel->pseudoelts->type = AttrNode; state = PSEUDO_ELT_ATTR;}
489 	else errexit("Unknown pseudo-element \"%s\"\n", name);
490       }
491       break;
492     case PSEUDO_ELT_ATTR:			/* After '::attr' */
493       if (*s == '(') {s++; state = PSEUDO_ELT_ATTR_;}
494       else errexit("Expected \"(\" after \"::attr\" but found \"%c\"\n", *s);
495       break;
496     case PSEUDO_ELT_ATTR_:			/* After '::attr(' */
497       if (isspace(*s)) s++;
498       else if (*s == '/') parse_comment(&s);
499       else if (!isnmstart(*s)) errexit("Expected a name after \"::attr(\"\n");
500       else {sel->pseudoelts->s = parse_ident(&s); state = END_PSEUDO;}
501       break;
502     default:
503       assert(!"Cannot happen");
504     }
505   }
506   if (state != AFTER_TYPE && state != AFTER_SIMPLE)
507     errexit("Incomplete selector (state %d)\n", state);
508 
509   *rest = s;
510   return sel;
511 }
512 
513 void dump_selector(FILE *f, Selector s);
514 
515 /* utf8toint -- convert a UTF-8 sequence to a character code point */
utf8toint(conststring s,conststring * t)516 static int utf8toint(conststring s, conststring *t)
517 {
518   int n;
519 
520   assert(s);
521   assert(*s);
522   if ((*s & 0x80) == 0) {*t = s + 1; return *s;} /* 0xxxxxxx */
523   if ((*s & 0xE0) == 0xC0) n = *s & 0x1F;	 /* 110xxxxx */
524   else if ((*s & 0xF0) == 0xE0) n = *s & 0xF;	 /* 1110xxxx */
525   else if ((*s & 0xF8) == 0xF0) n = *s & 0x7;	 /* 11110xxx */
526   else {*t = s + 1; return *s;}	/* Error! */
527   for (s++; *s && (*s & 0x80) == 0x80; s++) n = (n << 6) | (*s & 0x3F);
528   *t = s;
529   return n;
530 }
531 
532 /* esc -- print a string, escaping special characters */
esc(FILE * f,conststring s)533 static void esc(FILE *f, conststring s)
534 {
535   assert(s);
536 
537   if (!*s)	     /* Can only happen with a string, not an ident */
538     fprintf(f, "\"\"");
539   else {
540     while (*s) {
541       if (('a' <= *s && *s <= 'z') || ('A' <= *s && *s <= 'Z') ||
542 	  ('0' <= *s && *s <= '9') || *s == '-' || *s == '_' || *s & 0x80)
543 	putc(*(s++), f);
544       else
545 	fprintf(f, "\\%X ", utf8toint(s, &s));
546     }
547   }
548 }
549 
550 /* dump_simple_selector -- serialize a simple selector */
dump_simple_selector(FILE * f,const SimpleSelector * s)551 EXPORT void dump_simple_selector(FILE *f, const SimpleSelector *s)
552 {
553   AttribCond *a;
554   PseudoCond *p;
555 
556   if (s->name) esc(f, s->name); else putc('*', f);
557   for (a = s->attribs; a; a = a->next) {
558     if (a->op == HasClass) {putc('.', f); esc(f, a->value);}
559     else if (a->op == HasID) {putc('#', f); esc(f, a->value);}
560     else {
561       putc('[', f); esc(f, a->name);
562       switch (a->op) {
563       case Exists: break;
564       case Equals: putc('=', f); break;
565       case Includes: fprintf(f, "~="); break;
566       case StartsWith: fprintf(f, "^="); break;
567       case EndsWith: fprintf(f, "$="); break;
568       case Contains: fprintf(f, "*"); break;
569       case LangMatch: fprintf(f, "|="); break;
570       default: assert(!"Cannot happen");
571       }
572       if (a->op != Exists) esc(f, a->value);
573       putc(']', f);
574     }
575   }
576   for (p = s->pseudos; p; p = p->next) {
577     switch (p->type) {
578     case RootSel: fprintf(f, ":root"); break;
579     case NthChild: fprintf(f, ":nth-child(%dn+%d)", p->a, p->b); break;
580     case NthOfType: fprintf(f, ":nth-of-type(%dn+%d)", p->a, p->b); break;
581     case FirstChild: fprintf(f, ":first-child"); break;
582     case FirstOfType: fprintf(f, ":first-of-type"); break;
583     case Lang: fprintf(f, ":lang("); esc(f, p->s); putc(')', f); break;
584     case NthLastChild: fprintf(f, ":nth-last-child(%dn+%d)", p->a, p->b); break;
585     case NthLastOfType: fprintf(f, ":nth-last-of-type(%dn+%d)",p->a,p->b);break;
586     case LastChild: fprintf(f, ":last-child"); break;
587     case LastOfType: fprintf(f, ":last-of-type"); break;
588     case OnlyChild: fprintf(f, ":only-child"); break;
589     case OnlyOfType: fprintf(f, ":only-of-type"); break;
590     case Empty: fprintf(f, ":empty"); break;
591     case Not: fprintf(f,":not("); dump_selector(f,p->sel); fprintf(f,")");break;
592     case Is: fprintf(f,":is("); dump_selector(f,p->sel); fprintf(f,")");break;
593     default: assert(!"Cannot happen");
594     }
595   }
596   for (p = s->pseudoelts; p; p = p->next) {
597     switch (p->type) {
598     case AttrNode: fprintf(f, "::attr("); esc(f, p->s); putc(')', f); break;
599     default: assert(!"Cannot happen");
600     }
601   }
602 }
603 
604 /* dump_selector -- serialize a selector */
dump_selector(FILE * f,const Selector s)605 EXPORT void dump_selector(FILE *f, const Selector s)
606 {
607   assert(s);
608 
609   if (s->context) {
610     dump_selector(f, s->context);
611     switch (s->combinator) {
612     case Descendant: fprintf(f, " "); break;
613     case Child: fprintf(f, " > "); break;
614     case Adjacent: fprintf(f, " + "); break;
615     case Sibling: fprintf(f, " ~ "); break;
616     default: assert(!"Cannot happen");
617     }
618   }
619   dump_simple_selector(f, s);
620 
621   if (s->next) {
622     fprintf(f, ", ");
623     dump_selector(f, s->next);
624   }
625  }
626