1 #include <inttypes.h>
2 #include <stdbool.h>
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
6 
7 #include <parserutils/input/inputstream.h>
8 
9 #include <libcss/libcss.h>
10 
11 #include "charset/detect.h"
12 #include "lex/lex.h"
13 #include "utils/utils.h"
14 
15 #include "testutils.h"
16 
17 typedef struct exp_entry {
18 	css_token_type type;
19 #define EXP_ENTRY_TEXT_LEN (128)
20 	char text[EXP_ENTRY_TEXT_LEN];
21 	size_t textLen;
22 	bool hasText;
23 } exp_entry;
24 
25 typedef struct line_ctx {
26 	size_t buflen;
27 	size_t bufused;
28 	uint8_t *buf;
29 
30 	size_t explen;
31 	size_t expused;
32 	exp_entry *exp;
33 
34 	bool indata;
35 	bool inexp;
36 } line_ctx;
37 
38 static bool handle_line(const char *data, size_t datalen, void *pw);
39 static void css__parse_expected(line_ctx *ctx, const char *data, size_t len);
40 static const char *string_from_type(css_token_type type);
41 static css_token_type string_to_type(const char *data, size_t len);
42 static void run_test(const uint8_t *data, size_t len,
43 		exp_entry *exp, size_t explen);
44 
main(int argc,char ** argv)45 int main(int argc, char **argv)
46 {
47 	line_ctx ctx;
48 
49 	if (argc != 2) {
50 		printf("Usage: %s <filename>\n", argv[0]);
51 		return 1;
52 	}
53 
54 	ctx.buflen = css__parse_filesize(argv[1]);
55 	if (ctx.buflen == 0)
56 		return 1;
57 
58 	ctx.buf = malloc(ctx.buflen);
59 	if (ctx.buf == NULL) {
60 		printf("Failed allocating %u bytes\n",
61 				(unsigned int) ctx.buflen);
62 		return 1;
63 	}
64 
65 	ctx.buf[0] = '\0';
66 	ctx.bufused = 0;
67 	ctx.explen = 0;
68 	ctx.expused = 0;
69 	ctx.exp = NULL;
70 	ctx.indata = false;
71 	ctx.inexp = false;
72 
73 	assert(css__parse_testfile(argv[1], handle_line, &ctx) == true);
74 
75 	/* and run final test */
76 	if (ctx.bufused > 0)
77 		run_test(ctx.buf, ctx.bufused, ctx.exp, ctx.expused);
78 
79 	free(ctx.buf);
80 	free(ctx.exp);
81 
82 	printf("PASS\n");
83 
84 	return 0;
85 }
86 
handle_line(const char * data,size_t datalen,void * pw)87 bool handle_line(const char *data, size_t datalen, void *pw)
88 {
89 	line_ctx *ctx = (line_ctx *) pw;
90 
91 	if (data[0] == '#') {
92 		if (ctx->inexp) {
93 			/* This marks end of testcase, so run it */
94 
95 			run_test(ctx->buf, ctx->bufused,
96 					ctx->exp, ctx->expused);
97 
98 			ctx->buf[0] = '\0';
99 			ctx->bufused = 0;
100 
101 			ctx->expused = 0;
102 		}
103 
104 		if (ctx->indata && strncasecmp(data+1, "expected", 8) == 0) {
105 			ctx->indata = false;
106 			ctx->inexp = true;
107 		} else if (!ctx->indata) {
108 			ctx->indata = (strncasecmp(data+1, "data", 4) == 0);
109 			ctx->inexp  = (strncasecmp(data+1, "expected", 8) == 0);
110 		} else {
111 			memcpy(ctx->buf + ctx->bufused, data, datalen);
112 			ctx->bufused += datalen;
113 		}
114 	} else {
115 		if (ctx->indata) {
116 			memcpy(ctx->buf + ctx->bufused, data, datalen);
117 			ctx->bufused += datalen;
118 		}
119 		if (ctx->inexp) {
120 			if (data[datalen - 1] == '\n')
121 				datalen -= 1;
122 
123 			css__parse_expected(ctx, data, datalen);
124 		}
125 	}
126 
127 	return true;
128 }
129 
css__parse_expected(line_ctx * ctx,const char * data,size_t len)130 void css__parse_expected(line_ctx *ctx, const char *data, size_t len)
131 {
132 	css_token_type type;
133 	const char *colon = css__parse_strnchr(data, len, ':');
134 
135 	if (colon == NULL)
136 		colon = data + len;
137 
138 	type = string_to_type(data, colon - data);
139 
140 	/* Append to list of expected tokens */
141 	if (ctx->expused == ctx->explen) {
142 		size_t num = ctx->explen == 0 ? 4 : ctx->explen;
143 
144 		exp_entry *temp = realloc(ctx->exp,
145 				num * 2 * sizeof(exp_entry));
146 		if (temp == NULL) {
147 			assert(0 && "No memory for expected tokens");
148 		}
149 
150 		ctx->exp = temp;
151 		ctx->explen = num * 2;
152 	}
153 
154 	ctx->exp[ctx->expused].type = type;
155 	ctx->exp[ctx->expused].textLen = 0;
156 	ctx->exp[ctx->expused].hasText = (colon != data + len);
157 
158 	if (colon != data + len) {
159 		const char *p = colon + 1;
160 		bool escape = false;
161 
162 		for (len = len - (colon + 1 - data); len > 0; len--, p++) {
163 			char c;
164 
165 			if (escape == false && *p == '\\') {
166 				escape = true;
167 				continue;
168 			}
169 
170 			if (escape) {
171 				switch (*p) {
172 				case 'n':
173 					c = 0xa;
174 					break;
175 				case 't':
176 					c = 0x9;
177 					break;
178 				default:
179 					c = *p;
180 					break;
181 				}
182 				escape = false;
183 			} else {
184 				c = *p;
185 			}
186 
187 			ctx->exp[ctx->expused].text[
188 					ctx->exp[ctx->expused].textLen] = c;
189 			ctx->exp[ctx->expused].textLen++;
190 
191 			assert(ctx->exp[ctx->expused].textLen <
192 					EXP_ENTRY_TEXT_LEN);
193 		}
194 	}
195 	ctx->expused++;
196 }
197 
string_from_type(css_token_type type)198 const char *string_from_type(css_token_type type)
199 {
200 	const char *names[] =
201 	{
202 		"IDENT", "ATKEYWORD", "HASH", "FUNCTION", "STRING", "INVALID",
203 		"URI", "UNICODE-RANGE", "CHAR", "NUMBER", "PERCENTAGE",
204 		"DIMENSION", "last_intern", "CDO", "CDC", "S", "COMMENT",
205 		"INCLUDES", "DASHMATCH", "PREFIXMATCH", "SUFFIXMATCH",
206 		"SUBSTRINGMATCH", "EOF"
207 	};
208 
209 	return names[type];
210 }
211 
string_to_type(const char * data,size_t len)212 css_token_type string_to_type(const char *data, size_t len)
213 {
214 	if (len == 5 && strncasecmp(data, "IDENT", len) == 0)
215 		return CSS_TOKEN_IDENT;
216 	else if (len == 9 && strncasecmp(data, "ATKEYWORD", len) == 0)
217 		return CSS_TOKEN_ATKEYWORD;
218 	else if (len == 6 && strncasecmp(data, "STRING", len) == 0)
219 		return CSS_TOKEN_STRING;
220 	else if (len == 7 && strncasecmp(data, "INVALID", len) == 0)
221 		return CSS_TOKEN_INVALID_STRING;
222 	else if (len == 4 && strncasecmp(data, "HASH", len) == 0)
223 		return CSS_TOKEN_HASH;
224 	else if (len == 6 && strncasecmp(data, "NUMBER", len) == 0)
225 		return CSS_TOKEN_NUMBER;
226 	else if (len == 10 && strncasecmp(data, "PERCENTAGE", len) == 0)
227 		return CSS_TOKEN_PERCENTAGE;
228 	else if (len == 9 && strncasecmp(data, "DIMENSION", len) == 0)
229 		return CSS_TOKEN_DIMENSION;
230 	else if (len == 3 && strncasecmp(data, "URI", len) == 0)
231 		return CSS_TOKEN_URI;
232 	else if (len == 13 && strncasecmp(data, "UNICODE-RANGE", len) == 0)
233 		return CSS_TOKEN_UNICODE_RANGE;
234 	else if (len == 3 && strncasecmp(data, "CDO", len) == 0)
235 		return CSS_TOKEN_CDO;
236 	else if (len == 3 && strncasecmp(data, "CDC", len) == 0)
237 		return CSS_TOKEN_CDC;
238 	else if (len == 1 && strncasecmp(data, "S", len) == 0)
239 		return CSS_TOKEN_S;
240 	else if (len == 7 && strncasecmp(data, "COMMENT", len) == 0)
241 		return CSS_TOKEN_COMMENT;
242 	else if (len == 8 && strncasecmp(data, "FUNCTION", len) == 0)
243 		return CSS_TOKEN_FUNCTION;
244 	else if (len == 8 && strncasecmp(data, "INCLUDES", len) == 0)
245 		return CSS_TOKEN_INCLUDES;
246 	else if (len == 9 && strncasecmp(data, "DASHMATCH", len) == 0)
247 		return CSS_TOKEN_DASHMATCH;
248 	else if (len == 11 && strncasecmp(data, "PREFIXMATCH", len) == 0)
249 		return CSS_TOKEN_PREFIXMATCH;
250 	else if (len == 11 && strncasecmp(data, "SUFFIXMATCH", len) == 0)
251 		return CSS_TOKEN_SUFFIXMATCH;
252 	else if (len == 14 && strncasecmp(data, "SUBSTRINGMATCH", len) == 0)
253 		return CSS_TOKEN_SUBSTRINGMATCH;
254 	else if (len == 4 && strncasecmp(data, "CHAR", len) == 0)
255 		return CSS_TOKEN_CHAR;
256 	else
257 		return CSS_TOKEN_EOF;
258 }
259 
run_test(const uint8_t * data,size_t len,exp_entry * exp,size_t explen)260 void run_test(const uint8_t *data, size_t len, exp_entry *exp, size_t explen)
261 {
262 	parserutils_inputstream *input;
263 	css_lexer *lexer;
264 	css_error error;
265 	css_token *tok;
266 	size_t e;
267 	static int testnum;
268 
269 	assert(parserutils_inputstream_create("UTF-8", CSS_CHARSET_DICTATED,
270 			css__charset_extract, &input) == PARSERUTILS_OK);
271 
272 	assert(css__lexer_create(input, &lexer) == CSS_OK);
273 
274 	assert(parserutils_inputstream_append(input, data, len) ==
275 			PARSERUTILS_OK);
276 
277 	assert(parserutils_inputstream_append(input, NULL, 0) ==
278 			PARSERUTILS_OK);
279 
280 	e = 0;
281 	testnum++;
282 
283 	while ((error = css__lexer_get_token(lexer, &tok)) == CSS_OK) {
284 		if (tok->type != exp[e].type) {
285 			printf("%d: Got token %s, Expected %s [%d, %d]\n",
286 				testnum, string_from_type(tok->type),
287 				string_from_type(exp[e].type),
288 				tok->line, tok->col);
289 			assert(0 && "Types differ");
290 		}
291 
292 		if (exp[e].hasText) {
293 			if (tok->data.len != exp[e].textLen) {
294 				printf("%d: Got length %d, Expected %d\n",
295 					testnum, (int) tok->data.len,
296 					(int) exp[e].textLen);
297 				assert(0 && "Text lengths differ");
298 			}
299 
300 			if (strncmp((char *) tok->data.data, exp[e].text,
301 					tok->data.len) != 0) {
302 				printf("%d: Got data '%.*s', Expected '%.*s'\n",
303 					testnum,
304 					(int) tok->data.len, tok->data.data,
305 					(int) exp[e].textLen, exp[e].text);
306 				assert(0 && "Text differs");
307 			}
308 		}
309 
310 		e++;
311 
312 		if (tok->type == CSS_TOKEN_EOF)
313 			break;
314 	}
315 
316 	assert(e == explen);
317 
318 	css__lexer_destroy(lexer);
319 
320 	parserutils_inputstream_destroy(input);
321 
322 	printf("Test %d: PASS\n", testnum);
323 }
324