1 #include <inttypes.h>
2 #include <stdbool.h>
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
6
7 #include <parserutils/input/inputstream.h>
8
9 #include <libcss/libcss.h>
10
11 #include "charset/detect.h"
12 #include "lex/lex.h"
13 #include "utils/utils.h"
14
15 #include "testutils.h"
16
17 typedef struct exp_entry {
18 css_token_type type;
19 #define EXP_ENTRY_TEXT_LEN (128)
20 char text[EXP_ENTRY_TEXT_LEN];
21 size_t textLen;
22 bool hasText;
23 } exp_entry;
24
25 typedef struct line_ctx {
26 size_t buflen;
27 size_t bufused;
28 uint8_t *buf;
29
30 size_t explen;
31 size_t expused;
32 exp_entry *exp;
33
34 bool indata;
35 bool inexp;
36 } line_ctx;
37
38 static bool handle_line(const char *data, size_t datalen, void *pw);
39 static void css__parse_expected(line_ctx *ctx, const char *data, size_t len);
40 static const char *string_from_type(css_token_type type);
41 static css_token_type string_to_type(const char *data, size_t len);
42 static void run_test(const uint8_t *data, size_t len,
43 exp_entry *exp, size_t explen);
44
main(int argc,char ** argv)45 int main(int argc, char **argv)
46 {
47 line_ctx ctx;
48
49 if (argc != 2) {
50 printf("Usage: %s <filename>\n", argv[0]);
51 return 1;
52 }
53
54 ctx.buflen = css__parse_filesize(argv[1]);
55 if (ctx.buflen == 0)
56 return 1;
57
58 ctx.buf = malloc(ctx.buflen);
59 if (ctx.buf == NULL) {
60 printf("Failed allocating %u bytes\n",
61 (unsigned int) ctx.buflen);
62 return 1;
63 }
64
65 ctx.buf[0] = '\0';
66 ctx.bufused = 0;
67 ctx.explen = 0;
68 ctx.expused = 0;
69 ctx.exp = NULL;
70 ctx.indata = false;
71 ctx.inexp = false;
72
73 assert(css__parse_testfile(argv[1], handle_line, &ctx) == true);
74
75 /* and run final test */
76 if (ctx.bufused > 0)
77 run_test(ctx.buf, ctx.bufused, ctx.exp, ctx.expused);
78
79 free(ctx.buf);
80 free(ctx.exp);
81
82 printf("PASS\n");
83
84 return 0;
85 }
86
handle_line(const char * data,size_t datalen,void * pw)87 bool handle_line(const char *data, size_t datalen, void *pw)
88 {
89 line_ctx *ctx = (line_ctx *) pw;
90
91 if (data[0] == '#') {
92 if (ctx->inexp) {
93 /* This marks end of testcase, so run it */
94
95 run_test(ctx->buf, ctx->bufused,
96 ctx->exp, ctx->expused);
97
98 ctx->buf[0] = '\0';
99 ctx->bufused = 0;
100
101 ctx->expused = 0;
102 }
103
104 if (ctx->indata && strncasecmp(data+1, "expected", 8) == 0) {
105 ctx->indata = false;
106 ctx->inexp = true;
107 } else if (!ctx->indata) {
108 ctx->indata = (strncasecmp(data+1, "data", 4) == 0);
109 ctx->inexp = (strncasecmp(data+1, "expected", 8) == 0);
110 } else {
111 memcpy(ctx->buf + ctx->bufused, data, datalen);
112 ctx->bufused += datalen;
113 }
114 } else {
115 if (ctx->indata) {
116 memcpy(ctx->buf + ctx->bufused, data, datalen);
117 ctx->bufused += datalen;
118 }
119 if (ctx->inexp) {
120 if (data[datalen - 1] == '\n')
121 datalen -= 1;
122
123 css__parse_expected(ctx, data, datalen);
124 }
125 }
126
127 return true;
128 }
129
css__parse_expected(line_ctx * ctx,const char * data,size_t len)130 void css__parse_expected(line_ctx *ctx, const char *data, size_t len)
131 {
132 css_token_type type;
133 const char *colon = css__parse_strnchr(data, len, ':');
134
135 if (colon == NULL)
136 colon = data + len;
137
138 type = string_to_type(data, colon - data);
139
140 /* Append to list of expected tokens */
141 if (ctx->expused == ctx->explen) {
142 size_t num = ctx->explen == 0 ? 4 : ctx->explen;
143
144 exp_entry *temp = realloc(ctx->exp,
145 num * 2 * sizeof(exp_entry));
146 if (temp == NULL) {
147 assert(0 && "No memory for expected tokens");
148 }
149
150 ctx->exp = temp;
151 ctx->explen = num * 2;
152 }
153
154 ctx->exp[ctx->expused].type = type;
155 ctx->exp[ctx->expused].textLen = 0;
156 ctx->exp[ctx->expused].hasText = (colon != data + len);
157
158 if (colon != data + len) {
159 const char *p = colon + 1;
160 bool escape = false;
161
162 for (len = len - (colon + 1 - data); len > 0; len--, p++) {
163 char c;
164
165 if (escape == false && *p == '\\') {
166 escape = true;
167 continue;
168 }
169
170 if (escape) {
171 switch (*p) {
172 case 'n':
173 c = 0xa;
174 break;
175 case 't':
176 c = 0x9;
177 break;
178 default:
179 c = *p;
180 break;
181 }
182 escape = false;
183 } else {
184 c = *p;
185 }
186
187 ctx->exp[ctx->expused].text[
188 ctx->exp[ctx->expused].textLen] = c;
189 ctx->exp[ctx->expused].textLen++;
190
191 assert(ctx->exp[ctx->expused].textLen <
192 EXP_ENTRY_TEXT_LEN);
193 }
194 }
195 ctx->expused++;
196 }
197
string_from_type(css_token_type type)198 const char *string_from_type(css_token_type type)
199 {
200 const char *names[] =
201 {
202 "IDENT", "ATKEYWORD", "HASH", "FUNCTION", "STRING", "INVALID",
203 "URI", "UNICODE-RANGE", "CHAR", "NUMBER", "PERCENTAGE",
204 "DIMENSION", "last_intern", "CDO", "CDC", "S", "COMMENT",
205 "INCLUDES", "DASHMATCH", "PREFIXMATCH", "SUFFIXMATCH",
206 "SUBSTRINGMATCH", "EOF"
207 };
208
209 return names[type];
210 }
211
string_to_type(const char * data,size_t len)212 css_token_type string_to_type(const char *data, size_t len)
213 {
214 if (len == 5 && strncasecmp(data, "IDENT", len) == 0)
215 return CSS_TOKEN_IDENT;
216 else if (len == 9 && strncasecmp(data, "ATKEYWORD", len) == 0)
217 return CSS_TOKEN_ATKEYWORD;
218 else if (len == 6 && strncasecmp(data, "STRING", len) == 0)
219 return CSS_TOKEN_STRING;
220 else if (len == 7 && strncasecmp(data, "INVALID", len) == 0)
221 return CSS_TOKEN_INVALID_STRING;
222 else if (len == 4 && strncasecmp(data, "HASH", len) == 0)
223 return CSS_TOKEN_HASH;
224 else if (len == 6 && strncasecmp(data, "NUMBER", len) == 0)
225 return CSS_TOKEN_NUMBER;
226 else if (len == 10 && strncasecmp(data, "PERCENTAGE", len) == 0)
227 return CSS_TOKEN_PERCENTAGE;
228 else if (len == 9 && strncasecmp(data, "DIMENSION", len) == 0)
229 return CSS_TOKEN_DIMENSION;
230 else if (len == 3 && strncasecmp(data, "URI", len) == 0)
231 return CSS_TOKEN_URI;
232 else if (len == 13 && strncasecmp(data, "UNICODE-RANGE", len) == 0)
233 return CSS_TOKEN_UNICODE_RANGE;
234 else if (len == 3 && strncasecmp(data, "CDO", len) == 0)
235 return CSS_TOKEN_CDO;
236 else if (len == 3 && strncasecmp(data, "CDC", len) == 0)
237 return CSS_TOKEN_CDC;
238 else if (len == 1 && strncasecmp(data, "S", len) == 0)
239 return CSS_TOKEN_S;
240 else if (len == 7 && strncasecmp(data, "COMMENT", len) == 0)
241 return CSS_TOKEN_COMMENT;
242 else if (len == 8 && strncasecmp(data, "FUNCTION", len) == 0)
243 return CSS_TOKEN_FUNCTION;
244 else if (len == 8 && strncasecmp(data, "INCLUDES", len) == 0)
245 return CSS_TOKEN_INCLUDES;
246 else if (len == 9 && strncasecmp(data, "DASHMATCH", len) == 0)
247 return CSS_TOKEN_DASHMATCH;
248 else if (len == 11 && strncasecmp(data, "PREFIXMATCH", len) == 0)
249 return CSS_TOKEN_PREFIXMATCH;
250 else if (len == 11 && strncasecmp(data, "SUFFIXMATCH", len) == 0)
251 return CSS_TOKEN_SUFFIXMATCH;
252 else if (len == 14 && strncasecmp(data, "SUBSTRINGMATCH", len) == 0)
253 return CSS_TOKEN_SUBSTRINGMATCH;
254 else if (len == 4 && strncasecmp(data, "CHAR", len) == 0)
255 return CSS_TOKEN_CHAR;
256 else
257 return CSS_TOKEN_EOF;
258 }
259
run_test(const uint8_t * data,size_t len,exp_entry * exp,size_t explen)260 void run_test(const uint8_t *data, size_t len, exp_entry *exp, size_t explen)
261 {
262 parserutils_inputstream *input;
263 css_lexer *lexer;
264 css_error error;
265 css_token *tok;
266 size_t e;
267 static int testnum;
268
269 assert(parserutils_inputstream_create("UTF-8", CSS_CHARSET_DICTATED,
270 css__charset_extract, &input) == PARSERUTILS_OK);
271
272 assert(css__lexer_create(input, &lexer) == CSS_OK);
273
274 assert(parserutils_inputstream_append(input, data, len) ==
275 PARSERUTILS_OK);
276
277 assert(parserutils_inputstream_append(input, NULL, 0) ==
278 PARSERUTILS_OK);
279
280 e = 0;
281 testnum++;
282
283 while ((error = css__lexer_get_token(lexer, &tok)) == CSS_OK) {
284 if (tok->type != exp[e].type) {
285 printf("%d: Got token %s, Expected %s [%d, %d]\n",
286 testnum, string_from_type(tok->type),
287 string_from_type(exp[e].type),
288 tok->line, tok->col);
289 assert(0 && "Types differ");
290 }
291
292 if (exp[e].hasText) {
293 if (tok->data.len != exp[e].textLen) {
294 printf("%d: Got length %d, Expected %d\n",
295 testnum, (int) tok->data.len,
296 (int) exp[e].textLen);
297 assert(0 && "Text lengths differ");
298 }
299
300 if (strncmp((char *) tok->data.data, exp[e].text,
301 tok->data.len) != 0) {
302 printf("%d: Got data '%.*s', Expected '%.*s'\n",
303 testnum,
304 (int) tok->data.len, tok->data.data,
305 (int) exp[e].textLen, exp[e].text);
306 assert(0 && "Text differs");
307 }
308 }
309
310 e++;
311
312 if (tok->type == CSS_TOKEN_EOF)
313 break;
314 }
315
316 assert(e == explen);
317
318 css__lexer_destroy(lexer);
319
320 parserutils_inputstream_destroy(input);
321
322 printf("Test %d: PASS\n", testnum);
323 }
324