1 #include <inttypes.h>
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <string.h>
5
6 #include <parserutils/parserutils.h>
7
8 #include "utils/utils.h"
9
10 #include "input/filter.h"
11
12 #include "testutils.h"
13
main(int argc,char ** argv)14 int main(int argc, char **argv)
15 {
16 parserutils_filter_optparams params;
17 parserutils_filter *input;
18 uint8_t inbuf[64], outbuf[64];
19 size_t inlen, outlen;
20 const uint8_t *in = inbuf;
21 uint8_t *out = outbuf;
22
23 UNUSED(argc);
24 UNUSED(argv);
25
26 /* Create input filter */
27 assert(parserutils__filter_create("UTF-8", &input) == PARSERUTILS_OK);
28
29 /* Convert filter to UTF-8 encoding */
30 params.encoding.name = "UTF-8";
31 assert(parserutils__filter_setopt(input, PARSERUTILS_FILTER_SET_ENCODING,
32 (parserutils_filter_optparams *) ¶ms) ==
33 PARSERUTILS_OK);
34
35
36 /* Simple case - valid input & output buffer large enough */
37 in = inbuf;
38 out = outbuf;
39 strcpy((char *) inbuf, "hell\xc2\xa0o!");
40 inlen = strlen((const char *) inbuf);
41 outbuf[0] = '\0';
42 outlen = 64;
43
44 assert(parserutils__filter_process_chunk(input, &in, &inlen,
45 &out, &outlen) == PARSERUTILS_OK);
46
47 printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
48 (int) (out - ((uint8_t *) outbuf)),
49 outbuf, (int) outlen);
50
51 assert(parserutils__filter_reset(input) == PARSERUTILS_OK);
52
53 assert(memcmp(outbuf, "hell\xc2\xa0o!",
54 SLEN("hell\xc2\xa0o!")) == 0);
55
56
57 /* Too small an output buffer; no encoding edge cases */
58 in = inbuf;
59 out = outbuf;
60 strcpy((char *) inbuf, "hello!");
61 inlen = strlen((const char *) inbuf);
62 outbuf[0] = '\0';
63 outlen = 5;
64
65 assert(parserutils__filter_process_chunk(input, &in, &inlen,
66 &out, &outlen) == PARSERUTILS_NOMEM);
67
68 printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
69 (int) (out - ((uint8_t *) outbuf)),
70 outbuf, (int) outlen);
71
72 outlen = 64 - 5 + outlen;
73
74 assert(parserutils__filter_process_chunk(input, &in, &inlen,
75 &out, &outlen) == PARSERUTILS_OK);
76
77 printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
78 (int) (out - ((uint8_t *) outbuf)),
79 outbuf, (int) outlen);
80
81 assert(parserutils__filter_reset(input) == PARSERUTILS_OK);
82
83 assert(memcmp(outbuf, "hello!",
84 SLEN("hello!")) == 0);
85
86
87 /* Illegal input sequence; output buffer large enough */
88 in = inbuf;
89 out = outbuf;
90 strcpy((char *) inbuf, "hell\x96o!");
91 inlen = strlen((const char *) inbuf);
92 outbuf[0] = '\0';
93 outlen = 64;
94
95 /* Input does loose decoding, converting to U+FFFD if illegal
96 * input is encountered */
97 assert(parserutils__filter_process_chunk(input, &in, &inlen,
98 &out, &outlen) == PARSERUTILS_OK);
99
100 printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
101 (int) (out - ((uint8_t *) outbuf)),
102 outbuf, (int) outlen);
103
104 assert(parserutils__filter_reset(input) == PARSERUTILS_OK);
105
106 assert(memcmp(outbuf, "hell\xef\xbf\xbdo!",
107 SLEN("hell\xef\xbf\xbdo!")) == 0);
108
109
110 /* Input ends mid-sequence */
111 in = inbuf;
112 out = outbuf;
113 strcpy((char *) inbuf, "hell\xc2\xa0o!");
114 inlen = strlen((const char *) inbuf) - 3;
115 outbuf[0] = '\0';
116 outlen = 64;
117
118 assert(parserutils__filter_process_chunk(input, &in, &inlen,
119 &out, &outlen) == PARSERUTILS_OK);
120
121 printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
122 (int) (out - ((uint8_t *) outbuf)),
123 outbuf, (int) outlen);
124
125 inlen += 3;
126
127 assert(parserutils__filter_process_chunk(input, &in, &inlen,
128 &out, &outlen) == PARSERUTILS_OK);
129
130 printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
131 (int) (out - ((uint8_t *) outbuf)),
132 outbuf, (int) outlen);
133
134 assert(parserutils__filter_reset(input) == PARSERUTILS_OK);
135
136 assert(memcmp(outbuf, "hell\xc2\xa0o!",
137 SLEN("hell\xc2\xa0o!")) == 0);
138
139
140 /* Input ends mid-sequence, but second attempt has too small a
141 * buffer, but large enough to write out the incomplete character. */
142 in = inbuf;
143 out = outbuf;
144 strcpy((char *) inbuf, "hell\xc2\xa0o!");
145 inlen = strlen((const char *) inbuf) - 3;
146 outbuf[0] = '\0';
147 outlen = 64;
148
149 assert(parserutils__filter_process_chunk(input, &in, &inlen,
150 &out, &outlen) == PARSERUTILS_OK);
151
152 printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
153 (int) (out - ((uint8_t *) outbuf)),
154 outbuf, (int) outlen);
155
156 inlen += 3;
157 outlen = 3;
158
159 assert(parserutils__filter_process_chunk(input, &in, &inlen,
160 &out, &outlen) == PARSERUTILS_NOMEM);
161
162 printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
163 (int) (out - ((uint8_t *) outbuf)),
164 outbuf, (int) outlen);
165
166 outlen = 64 - 7;
167
168 assert(parserutils__filter_process_chunk(input, &in, &inlen,
169 &out, &outlen) == PARSERUTILS_OK);
170
171 printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
172 (int) (out - ((uint8_t *) outbuf)),
173 outbuf, (int) outlen);
174
175 assert(parserutils__filter_reset(input) == PARSERUTILS_OK);
176
177 assert(memcmp(outbuf, "hell\xc2\xa0o!",
178 SLEN("hell\xc2\xa0o!")) == 0);
179
180
181 /* Input ends mid-sequence, but second attempt has too small a
182 * buffer, not large enough to write out the incomplete character. */
183 in = inbuf;
184 out = outbuf;
185 strcpy((char *) inbuf, "hell\xc2\xa0o!");
186 inlen = strlen((const char *) inbuf) - 3;
187 outbuf[0] = '\0';
188 outlen = 64;
189
190 assert(parserutils__filter_process_chunk(input, &in, &inlen,
191 &out, &outlen) == PARSERUTILS_OK);
192
193 printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
194 (int) (out - ((uint8_t *) outbuf)),
195 outbuf, (int) outlen);
196
197 inlen += 3;
198 outlen = 1;
199
200 assert(parserutils__filter_process_chunk(input, &in, &inlen,
201 &out, &outlen) == PARSERUTILS_NOMEM);
202
203 printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
204 (int) (out - ((uint8_t *) outbuf)),
205 outbuf, (int) outlen);
206
207 outlen = 60;
208
209 assert(parserutils__filter_process_chunk(input, &in, &inlen,
210 &out, &outlen) == PARSERUTILS_OK);
211
212 printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
213 (int) (out - ((uint8_t *) outbuf)),
214 outbuf, (int) outlen);
215
216 assert(parserutils__filter_reset(input) == PARSERUTILS_OK);
217
218 assert(memcmp(outbuf, "hell\xc2\xa0o!",
219 SLEN("hell\xc2\xa0o!")) == 0);
220
221
222 /* Input ends mid-sequence, but second attempt contains
223 * invalid character */
224 in = inbuf;
225 out = outbuf;
226 strcpy((char *) inbuf, "hell\xc2\xc2o!");
227 inlen = strlen((const char *) inbuf) - 3;
228 outbuf[0] = '\0';
229 outlen = 64;
230
231 assert(parserutils__filter_process_chunk(input, &in, &inlen,
232 &out, &outlen) == PARSERUTILS_OK);
233
234 printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
235 (int) (out - ((uint8_t *) outbuf)),
236 outbuf, (int) outlen);
237
238 inlen += 3;
239
240 /* Input does loose decoding, converting to U+FFFD if illegal
241 * input is encountered */
242 assert(parserutils__filter_process_chunk(input, &in, &inlen,
243 &out, &outlen) == PARSERUTILS_OK);
244
245 printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
246 (int) (out - ((uint8_t *) outbuf)),
247 outbuf, (int) outlen);
248
249 assert(parserutils__filter_reset(input) == PARSERUTILS_OK);
250
251 assert(memcmp(outbuf, "hell\xef\xbf\xbd\xef\xbf\xbdo!",
252 SLEN("hell\xef\xbf\xbd\xef\xbf\xbdo!")) == 0);
253
254
255 /* Input ends mid-sequence, but second attempt contains another
256 * incomplete character */
257 in = inbuf;
258 out = outbuf;
259 strcpy((char *) inbuf, "hell\xc2\xa0\xc2\xa1o!");
260 inlen = strlen((const char *) inbuf) - 5;
261 outbuf[0] = '\0';
262 outlen = 64;
263
264 assert(parserutils__filter_process_chunk(input, &in, &inlen,
265 &out, &outlen) == PARSERUTILS_OK);
266
267 printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
268 (int) (out - ((uint8_t *) outbuf)),
269 outbuf, (int) outlen);
270
271 inlen += 2;
272
273 assert(parserutils__filter_process_chunk(input, &in, &inlen,
274 &out, &outlen) == PARSERUTILS_OK);
275
276 printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
277 (int) (out - ((uint8_t *) outbuf)),
278 outbuf, (int) outlen);
279
280 inlen += 3;
281
282 assert(parserutils__filter_process_chunk(input, &in, &inlen,
283 &out, &outlen) == PARSERUTILS_OK);
284
285 printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
286 (int) (out - ((uint8_t *) outbuf)),
287 outbuf, (int) outlen);
288
289 assert(parserutils__filter_reset(input) == PARSERUTILS_OK);
290
291 assert(memcmp(outbuf, "hell\xc2\xa0\xc2\xa1o!",
292 SLEN("hell\xc2\xa0\xc2\xa1o!")) == 0);
293
294
295 /* Input ends mid-sequence, but second attempt contains insufficient
296 * data to complete the incomplete character */
297 in = inbuf;
298 out = outbuf;
299 strcpy((char *) inbuf, "hell\xe2\x80\xa2o!");
300 inlen = strlen((const char *) inbuf) - 4;
301 outbuf[0] = '\0';
302 outlen = 64;
303
304 assert(parserutils__filter_process_chunk(input, &in, &inlen,
305 &out, &outlen) == PARSERUTILS_OK);
306
307 printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
308 (int) (out - ((uint8_t *) outbuf)),
309 outbuf, (int) outlen);
310
311 inlen += 1;
312
313 assert(parserutils__filter_process_chunk(input, &in, &inlen,
314 &out, &outlen) == PARSERUTILS_OK);
315
316 printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
317 (int) (out - ((uint8_t *) outbuf)),
318 outbuf, (int) outlen);
319
320 inlen += 3;
321
322 assert(parserutils__filter_process_chunk(input, &in, &inlen,
323 &out, &outlen) == PARSERUTILS_OK);
324
325 printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
326 (int) (out - ((uint8_t *) outbuf)),
327 outbuf, (int) outlen);
328
329 assert(parserutils__filter_reset(input) == PARSERUTILS_OK);
330
331 assert(memcmp(outbuf, "hell\xe2\x80\xa2o!",
332 SLEN("hell\xe2\x80\xa2o!")) == 0);
333
334
335 /* Clean up */
336 parserutils__filter_destroy(input);
337
338 printf("PASS\n");
339
340 return 0;
341 }
342