1 #include <inttypes.h>
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <string.h>
5 
6 #include <parserutils/parserutils.h>
7 
8 #include "utils/utils.h"
9 
10 #include "input/filter.h"
11 
12 #include "testutils.h"
13 
main(int argc,char ** argv)14 int main(int argc, char **argv)
15 {
16 	parserutils_filter_optparams params;
17 	parserutils_filter *input;
18 	uint8_t inbuf[64], outbuf[64];
19 	size_t inlen, outlen;
20 	const uint8_t *in = inbuf;
21 	uint8_t *out = outbuf;
22 
23 	UNUSED(argc);
24 	UNUSED(argv);
25 
26 	/* Create input filter */
27 	assert(parserutils__filter_create("UTF-8", &input) == PARSERUTILS_OK);
28 
29 	/* Convert filter to UTF-8 encoding */
30 	params.encoding.name = "UTF-8";
31 	assert(parserutils__filter_setopt(input, PARSERUTILS_FILTER_SET_ENCODING,
32 			(parserutils_filter_optparams *) &params) ==
33 			PARSERUTILS_OK);
34 
35 
36 	/* Simple case - valid input & output buffer large enough */
37 	in = inbuf;
38 	out = outbuf;
39 	strcpy((char *) inbuf, "hell\xc2\xa0o!");
40 	inlen = strlen((const char *) inbuf);
41 	outbuf[0] = '\0';
42 	outlen = 64;
43 
44 	assert(parserutils__filter_process_chunk(input, &in, &inlen,
45 			&out, &outlen) == PARSERUTILS_OK);
46 
47 	printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
48 			(int) (out - ((uint8_t *) outbuf)),
49 			outbuf, (int) outlen);
50 
51 	assert(parserutils__filter_reset(input) == PARSERUTILS_OK);
52 
53 	assert(memcmp(outbuf, "hell\xc2\xa0o!",
54 			SLEN("hell\xc2\xa0o!")) == 0);
55 
56 
57 	/* Too small an output buffer; no encoding edge cases */
58 	in = inbuf;
59 	out = outbuf;
60 	strcpy((char *) inbuf, "hello!");
61 	inlen = strlen((const char *) inbuf);
62 	outbuf[0] = '\0';
63 	outlen = 5;
64 
65 	assert(parserutils__filter_process_chunk(input, &in, &inlen,
66 			&out, &outlen) == PARSERUTILS_NOMEM);
67 
68 	printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
69 			(int) (out - ((uint8_t *) outbuf)),
70 			outbuf, (int) outlen);
71 
72 	outlen = 64 - 5 + outlen;
73 
74 	assert(parserutils__filter_process_chunk(input, &in, &inlen,
75 			&out, &outlen) == PARSERUTILS_OK);
76 
77 	printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
78 			(int) (out - ((uint8_t *) outbuf)),
79 			outbuf, (int) outlen);
80 
81 	assert(parserutils__filter_reset(input) == PARSERUTILS_OK);
82 
83 	assert(memcmp(outbuf, "hello!",
84 			SLEN("hello!")) == 0);
85 
86 
87 	/* Illegal input sequence; output buffer large enough */
88 	in = inbuf;
89 	out = outbuf;
90 	strcpy((char *) inbuf, "hell\x96o!");
91 	inlen = strlen((const char *) inbuf);
92 	outbuf[0] = '\0';
93 	outlen = 64;
94 
95 	/* Input does loose decoding, converting to U+FFFD if illegal
96 	 * input is encountered */
97 	assert(parserutils__filter_process_chunk(input, &in, &inlen,
98 			&out, &outlen) == PARSERUTILS_OK);
99 
100 	printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
101 			(int) (out - ((uint8_t *) outbuf)),
102 			outbuf, (int) outlen);
103 
104 	assert(parserutils__filter_reset(input) == PARSERUTILS_OK);
105 
106 	assert(memcmp(outbuf, "hell\xef\xbf\xbdo!",
107 			SLEN("hell\xef\xbf\xbdo!")) == 0);
108 
109 
110 	/* Input ends mid-sequence */
111 	in = inbuf;
112 	out = outbuf;
113 	strcpy((char *) inbuf, "hell\xc2\xa0o!");
114 	inlen = strlen((const char *) inbuf) - 3;
115 	outbuf[0] = '\0';
116 	outlen = 64;
117 
118 	assert(parserutils__filter_process_chunk(input, &in, &inlen,
119 			&out, &outlen) == PARSERUTILS_OK);
120 
121 	printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
122 			(int) (out - ((uint8_t *) outbuf)),
123 			outbuf, (int) outlen);
124 
125 	inlen += 3;
126 
127 	assert(parserutils__filter_process_chunk(input, &in, &inlen,
128 			&out, &outlen) == PARSERUTILS_OK);
129 
130 	printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
131 			(int) (out - ((uint8_t *) outbuf)),
132 			outbuf, (int) outlen);
133 
134 	assert(parserutils__filter_reset(input) == PARSERUTILS_OK);
135 
136 	assert(memcmp(outbuf, "hell\xc2\xa0o!",
137 			SLEN("hell\xc2\xa0o!")) == 0);
138 
139 
140 	/* Input ends mid-sequence, but second attempt has too small a
141 	 * buffer, but large enough to write out the incomplete character. */
142 	in = inbuf;
143 	out = outbuf;
144 	strcpy((char *) inbuf, "hell\xc2\xa0o!");
145 	inlen = strlen((const char *) inbuf) - 3;
146 	outbuf[0] = '\0';
147 	outlen = 64;
148 
149 	assert(parserutils__filter_process_chunk(input, &in, &inlen,
150 			&out, &outlen) == PARSERUTILS_OK);
151 
152 	printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
153 			(int) (out - ((uint8_t *) outbuf)),
154 			outbuf, (int) outlen);
155 
156 	inlen += 3;
157 	outlen = 3;
158 
159 	assert(parserutils__filter_process_chunk(input, &in, &inlen,
160 			&out, &outlen) == PARSERUTILS_NOMEM);
161 
162 	printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
163 			(int) (out - ((uint8_t *) outbuf)),
164 			outbuf, (int) outlen);
165 
166 	outlen = 64 - 7;
167 
168 	assert(parserutils__filter_process_chunk(input, &in, &inlen,
169 			&out, &outlen) == PARSERUTILS_OK);
170 
171 	printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
172 			(int) (out - ((uint8_t *) outbuf)),
173 			outbuf, (int) outlen);
174 
175 	assert(parserutils__filter_reset(input) == PARSERUTILS_OK);
176 
177 	assert(memcmp(outbuf, "hell\xc2\xa0o!",
178 			SLEN("hell\xc2\xa0o!")) == 0);
179 
180 
181 	/* Input ends mid-sequence, but second attempt has too small a
182 	 * buffer, not large enough to write out the incomplete character. */
183 	in = inbuf;
184 	out = outbuf;
185 	strcpy((char *) inbuf, "hell\xc2\xa0o!");
186 	inlen = strlen((const char *) inbuf) - 3;
187 	outbuf[0] = '\0';
188 	outlen = 64;
189 
190 	assert(parserutils__filter_process_chunk(input, &in, &inlen,
191 			&out, &outlen) == PARSERUTILS_OK);
192 
193 	printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
194 			(int) (out - ((uint8_t *) outbuf)),
195 			outbuf, (int) outlen);
196 
197 	inlen += 3;
198 	outlen = 1;
199 
200 	assert(parserutils__filter_process_chunk(input, &in, &inlen,
201 			&out, &outlen) == PARSERUTILS_NOMEM);
202 
203 	printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
204 			(int) (out - ((uint8_t *) outbuf)),
205 			outbuf, (int) outlen);
206 
207 	outlen = 60;
208 
209 	assert(parserutils__filter_process_chunk(input, &in, &inlen,
210 			&out, &outlen) == PARSERUTILS_OK);
211 
212 	printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
213 			(int) (out - ((uint8_t *) outbuf)),
214 			outbuf, (int) outlen);
215 
216 	assert(parserutils__filter_reset(input) == PARSERUTILS_OK);
217 
218 	assert(memcmp(outbuf, "hell\xc2\xa0o!",
219 			SLEN("hell\xc2\xa0o!")) == 0);
220 
221 
222 	/* Input ends mid-sequence, but second attempt contains
223 	 * invalid character */
224 	in = inbuf;
225 	out = outbuf;
226 	strcpy((char *) inbuf, "hell\xc2\xc2o!");
227 	inlen = strlen((const char *) inbuf) - 3;
228 	outbuf[0] = '\0';
229 	outlen = 64;
230 
231 	assert(parserutils__filter_process_chunk(input, &in, &inlen,
232 			&out, &outlen) == PARSERUTILS_OK);
233 
234 	printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
235 			(int) (out - ((uint8_t *) outbuf)),
236 			outbuf, (int) outlen);
237 
238 	inlen += 3;
239 
240 	/* Input does loose decoding, converting to U+FFFD if illegal
241 	 * input is encountered */
242 	assert(parserutils__filter_process_chunk(input, &in, &inlen,
243 			&out, &outlen) == PARSERUTILS_OK);
244 
245 	printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
246 			(int) (out - ((uint8_t *) outbuf)),
247 			outbuf, (int) outlen);
248 
249 	assert(parserutils__filter_reset(input) == PARSERUTILS_OK);
250 
251 	assert(memcmp(outbuf, "hell\xef\xbf\xbd\xef\xbf\xbdo!",
252 			SLEN("hell\xef\xbf\xbd\xef\xbf\xbdo!")) == 0);
253 
254 
255 	/* Input ends mid-sequence, but second attempt contains another
256 	 * incomplete character */
257 	in = inbuf;
258 	out = outbuf;
259 	strcpy((char *) inbuf, "hell\xc2\xa0\xc2\xa1o!");
260 	inlen = strlen((const char *) inbuf) - 5;
261 	outbuf[0] = '\0';
262 	outlen = 64;
263 
264 	assert(parserutils__filter_process_chunk(input, &in, &inlen,
265 			&out, &outlen) == PARSERUTILS_OK);
266 
267 	printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
268 			(int) (out - ((uint8_t *) outbuf)),
269 			outbuf, (int) outlen);
270 
271 	inlen += 2;
272 
273 	assert(parserutils__filter_process_chunk(input, &in, &inlen,
274 			&out, &outlen) == PARSERUTILS_OK);
275 
276 	printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
277 			(int) (out - ((uint8_t *) outbuf)),
278 			outbuf, (int) outlen);
279 
280 	inlen += 3;
281 
282 	assert(parserutils__filter_process_chunk(input, &in, &inlen,
283 			&out, &outlen) == PARSERUTILS_OK);
284 
285 	printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
286 			(int) (out - ((uint8_t *) outbuf)),
287 			outbuf, (int) outlen);
288 
289 	assert(parserutils__filter_reset(input) == PARSERUTILS_OK);
290 
291 	assert(memcmp(outbuf, "hell\xc2\xa0\xc2\xa1o!",
292 			SLEN("hell\xc2\xa0\xc2\xa1o!")) == 0);
293 
294 
295 	/* Input ends mid-sequence, but second attempt contains insufficient
296 	 * data to complete the incomplete character */
297 	in = inbuf;
298 	out = outbuf;
299 	strcpy((char *) inbuf, "hell\xe2\x80\xa2o!");
300 	inlen = strlen((const char *) inbuf) - 4;
301 	outbuf[0] = '\0';
302 	outlen = 64;
303 
304 	assert(parserutils__filter_process_chunk(input, &in, &inlen,
305 			&out, &outlen) == PARSERUTILS_OK);
306 
307 	printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
308 			(int) (out - ((uint8_t *) outbuf)),
309 			outbuf, (int) outlen);
310 
311 	inlen += 1;
312 
313 	assert(parserutils__filter_process_chunk(input, &in, &inlen,
314 			&out, &outlen) == PARSERUTILS_OK);
315 
316 	printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
317 			(int) (out - ((uint8_t *) outbuf)),
318 			outbuf, (int) outlen);
319 
320 	inlen += 3;
321 
322 	assert(parserutils__filter_process_chunk(input, &in, &inlen,
323 			&out, &outlen) == PARSERUTILS_OK);
324 
325 	printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
326 			(int) (out - ((uint8_t *) outbuf)),
327 			outbuf, (int) outlen);
328 
329 	assert(parserutils__filter_reset(input) == PARSERUTILS_OK);
330 
331 	assert(memcmp(outbuf, "hell\xe2\x80\xa2o!",
332 			SLEN("hell\xe2\x80\xa2o!")) == 0);
333 
334 
335 	/* Clean up */
336 	parserutils__filter_destroy(input);
337 
338 	printf("PASS\n");
339 
340 	return 0;
341 }
342