1 /* Tests in the "basic" test case for the Expat test suite
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
8
9 Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
10 Copyright (c) 2003 Greg Stein <gstein@users.sourceforge.net>
11 Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
12 Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net>
13 Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org>
14 Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
15 Copyright (c) 2017 Joe Orton <jorton@redhat.com>
16 Copyright (c) 2017 José Gutiérrez de la Concha <jose@zeroc.com>
17 Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it>
18 Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
19 Copyright (c) 2020 Tim Gates <tim.gates@iress.com>
20 Copyright (c) 2021 Donghee Na <donghee.na@python.org>
21 Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
22 Licensed under the MIT license:
23
24 Permission is hereby granted, free of charge, to any person obtaining
25 a copy of this software and associated documentation files (the
26 "Software"), to deal in the Software without restriction, including
27 without limitation the rights to use, copy, modify, merge, publish,
28 distribute, sublicense, and/or sell copies of the Software, and to permit
29 persons to whom the Software is furnished to do so, subject to the
30 following conditions:
31
32 The above copyright notice and this permission notice shall be included
33 in all copies or substantial portions of the Software.
34
35 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
36 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
37 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
38 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
39 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
40 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
41 USE OR OTHER DEALINGS IN THE SOFTWARE.
42 */
43
44 #if defined(NDEBUG)
45 # undef NDEBUG /* because test suite relies on assert(...) at the moment */
46 #endif
47
48 #include <assert.h>
49
50 #include <stdio.h>
51 #include <string.h>
52 #include <time.h>
53
54 #if ! defined(__cplusplus)
55 # include <stdbool.h>
56 #endif
57
58 #include "expat_config.h"
59
60 #include "expat.h"
61 #include "internal.h"
62 #include "minicheck.h"
63 #include "structdata.h"
64 #include "common.h"
65 #include "dummy.h"
66 #include "handlers.h"
67 #include "siphash.h"
68 #include "basic_tests.h"
69
70 static void
basic_setup(void)71 basic_setup(void) {
72 g_parser = XML_ParserCreate(NULL);
73 if (g_parser == NULL)
74 fail("Parser not created.");
75 }
76
77 /*
78 * Character & encoding tests.
79 */
80
START_TEST(test_nul_byte)81 START_TEST(test_nul_byte) {
82 char text[] = "<doc>\0</doc>";
83
84 /* test that a NUL byte (in US-ASCII data) is an error */
85 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
86 == XML_STATUS_OK)
87 fail("Parser did not report error on NUL-byte.");
88 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
89 xml_failure(g_parser);
90 }
91 END_TEST
92
START_TEST(test_u0000_char)93 START_TEST(test_u0000_char) {
94 /* test that a NUL byte (in US-ASCII data) is an error */
95 expect_failure("<doc>�</doc>", XML_ERROR_BAD_CHAR_REF,
96 "Parser did not report error on NUL-byte.");
97 }
98 END_TEST
99
START_TEST(test_siphash_self)100 START_TEST(test_siphash_self) {
101 if (! sip24_valid())
102 fail("SipHash self-test failed");
103 }
104 END_TEST
105
START_TEST(test_siphash_spec)106 START_TEST(test_siphash_spec) {
107 /* https://131002.net/siphash/siphash.pdf (page 19, "Test values") */
108 const char message[] = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
109 "\x0a\x0b\x0c\x0d\x0e";
110 const size_t len = sizeof(message) - 1;
111 const uint64_t expected = SIP_ULL(0xa129ca61U, 0x49be45e5U);
112 struct siphash state;
113 struct sipkey key;
114
115 sip_tokey(&key, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
116 "\x0a\x0b\x0c\x0d\x0e\x0f");
117 sip24_init(&state, &key);
118
119 /* Cover spread across calls */
120 sip24_update(&state, message, 4);
121 sip24_update(&state, message + 4, len - 4);
122
123 /* Cover null length */
124 sip24_update(&state, message, 0);
125
126 if (sip24_final(&state) != expected)
127 fail("sip24_final failed spec test\n");
128
129 /* Cover wrapper */
130 if (siphash24(message, len, &key) != expected)
131 fail("siphash24 failed spec test\n");
132 }
133 END_TEST
134
START_TEST(test_bom_utf8)135 START_TEST(test_bom_utf8) {
136 /* This test is really just making sure we don't core on a UTF-8 BOM. */
137 const char *text = "\357\273\277<e/>";
138
139 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
140 == XML_STATUS_ERROR)
141 xml_failure(g_parser);
142 }
143 END_TEST
144
START_TEST(test_bom_utf16_be)145 START_TEST(test_bom_utf16_be) {
146 char text[] = "\376\377\0<\0e\0/\0>";
147
148 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
149 == XML_STATUS_ERROR)
150 xml_failure(g_parser);
151 }
152 END_TEST
153
START_TEST(test_bom_utf16_le)154 START_TEST(test_bom_utf16_le) {
155 char text[] = "\377\376<\0e\0/\0>\0";
156
157 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
158 == XML_STATUS_ERROR)
159 xml_failure(g_parser);
160 }
161 END_TEST
162
START_TEST(test_nobom_utf16_le)163 START_TEST(test_nobom_utf16_le) {
164 char text[] = " \0<\0e\0/\0>\0";
165
166 if (g_chunkSize == 1) {
167 // TODO: with just the first byte, we can't tell the difference between
168 // UTF-16-LE and UTF-8. Avoid the failure for now.
169 return;
170 }
171
172 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
173 == XML_STATUS_ERROR)
174 xml_failure(g_parser);
175 }
176 END_TEST
177
START_TEST(test_hash_collision)178 START_TEST(test_hash_collision) {
179 /* For full coverage of the lookup routine, we need to ensure a
180 * hash collision even though we can only tell that we have one
181 * through breakpoint debugging or coverage statistics. The
182 * following will cause a hash collision on machines with a 64-bit
183 * long type; others will have to experiment. The full coverage
184 * tests invoked from qa.sh usually provide a hash collision, but
185 * not always. This is an attempt to provide insurance.
186 */
187 #define COLLIDING_HASH_SALT (unsigned long)SIP_ULL(0xffffffffU, 0xff99fc90U)
188 const char *text
189 = "<doc>\n"
190 "<a1/><a2/><a3/><a4/><a5/><a6/><a7/><a8/>\n"
191 "<b1></b1><b2 attr='foo'>This is a foo</b2><b3></b3><b4></b4>\n"
192 "<b5></b5><b6></b6><b7></b7><b8></b8>\n"
193 "<c1/><c2/><c3/><c4/><c5/><c6/><c7/><c8/>\n"
194 "<d1/><d2/><d3/><d4/><d5/><d6/><d7/>\n"
195 "<d8>This triggers the table growth and collides with b2</d8>\n"
196 "</doc>\n";
197
198 XML_SetHashSalt(g_parser, COLLIDING_HASH_SALT);
199 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
200 == XML_STATUS_ERROR)
201 xml_failure(g_parser);
202 }
203 END_TEST
204 #undef COLLIDING_HASH_SALT
205
206 /* Regression test for SF bug #491986. */
START_TEST(test_danish_latin1)207 START_TEST(test_danish_latin1) {
208 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
209 "<e>J\xF8rgen \xE6\xF8\xE5\xC6\xD8\xC5</e>";
210 #ifdef XML_UNICODE
211 const XML_Char *expected
212 = XCS("J\x00f8rgen \x00e6\x00f8\x00e5\x00c6\x00d8\x00c5");
213 #else
214 const XML_Char *expected
215 = XCS("J\xC3\xB8rgen \xC3\xA6\xC3\xB8\xC3\xA5\xC3\x86\xC3\x98\xC3\x85");
216 #endif
217 run_character_check(text, expected);
218 }
219 END_TEST
220
221 /* Regression test for SF bug #514281. */
START_TEST(test_french_charref_hexidecimal)222 START_TEST(test_french_charref_hexidecimal) {
223 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
224 "<doc>éèàçêÈ</doc>";
225 #ifdef XML_UNICODE
226 const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
227 #else
228 const XML_Char *expected
229 = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
230 #endif
231 run_character_check(text, expected);
232 }
233 END_TEST
234
START_TEST(test_french_charref_decimal)235 START_TEST(test_french_charref_decimal) {
236 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
237 "<doc>éèàçêÈ</doc>";
238 #ifdef XML_UNICODE
239 const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
240 #else
241 const XML_Char *expected
242 = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
243 #endif
244 run_character_check(text, expected);
245 }
246 END_TEST
247
START_TEST(test_french_latin1)248 START_TEST(test_french_latin1) {
249 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
250 "<doc>\xE9\xE8\xE0\xE7\xEa\xC8</doc>";
251 #ifdef XML_UNICODE
252 const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
253 #else
254 const XML_Char *expected
255 = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
256 #endif
257 run_character_check(text, expected);
258 }
259 END_TEST
260
START_TEST(test_french_utf8)261 START_TEST(test_french_utf8) {
262 const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
263 "<doc>\xC3\xA9</doc>";
264 #ifdef XML_UNICODE
265 const XML_Char *expected = XCS("\x00e9");
266 #else
267 const XML_Char *expected = XCS("\xC3\xA9");
268 #endif
269 run_character_check(text, expected);
270 }
271 END_TEST
272
273 /* Regression test for SF bug #600479.
274 XXX There should be a test that exercises all legal XML Unicode
275 characters as PCDATA and attribute value content, and XML Name
276 characters as part of element and attribute names.
277 */
START_TEST(test_utf8_false_rejection)278 START_TEST(test_utf8_false_rejection) {
279 const char *text = "<doc>\xEF\xBA\xBF</doc>";
280 #ifdef XML_UNICODE
281 const XML_Char *expected = XCS("\xfebf");
282 #else
283 const XML_Char *expected = XCS("\xEF\xBA\xBF");
284 #endif
285 run_character_check(text, expected);
286 }
287 END_TEST
288
289 /* Regression test for SF bug #477667.
290 This test assures that any 8-bit character followed by a 7-bit
291 character will not be mistakenly interpreted as a valid UTF-8
292 sequence.
293 */
START_TEST(test_illegal_utf8)294 START_TEST(test_illegal_utf8) {
295 char text[100];
296 int i;
297
298 for (i = 128; i <= 255; ++i) {
299 snprintf(text, sizeof(text), "<e>%ccd</e>", i);
300 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
301 == XML_STATUS_OK) {
302 snprintf(text, sizeof(text),
303 "expected token error for '%c' (ordinal %d) in UTF-8 text", i,
304 i);
305 fail(text);
306 } else if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
307 xml_failure(g_parser);
308 /* Reset the parser since we use the same parser repeatedly. */
309 XML_ParserReset(g_parser, NULL);
310 }
311 }
312 END_TEST
313
314 /* Examples, not masks: */
315 #define UTF8_LEAD_1 "\x7f" /* 0b01111111 */
316 #define UTF8_LEAD_2 "\xdf" /* 0b11011111 */
317 #define UTF8_LEAD_3 "\xef" /* 0b11101111 */
318 #define UTF8_LEAD_4 "\xf7" /* 0b11110111 */
319 #define UTF8_FOLLOW "\xbf" /* 0b10111111 */
320
START_TEST(test_utf8_auto_align)321 START_TEST(test_utf8_auto_align) {
322 struct TestCase {
323 ptrdiff_t expectedMovementInChars;
324 const char *input;
325 };
326
327 struct TestCase cases[] = {
328 {00, ""},
329
330 {00, UTF8_LEAD_1},
331
332 {-1, UTF8_LEAD_2},
333 {00, UTF8_LEAD_2 UTF8_FOLLOW},
334
335 {-1, UTF8_LEAD_3},
336 {-2, UTF8_LEAD_3 UTF8_FOLLOW},
337 {00, UTF8_LEAD_3 UTF8_FOLLOW UTF8_FOLLOW},
338
339 {-1, UTF8_LEAD_4},
340 {-2, UTF8_LEAD_4 UTF8_FOLLOW},
341 {-3, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW},
342 {00, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW UTF8_FOLLOW},
343 };
344
345 size_t i = 0;
346 bool success = true;
347 for (; i < sizeof(cases) / sizeof(*cases); i++) {
348 const char *fromLim = cases[i].input + strlen(cases[i].input);
349 const char *const fromLimInitially = fromLim;
350 ptrdiff_t actualMovementInChars;
351
352 _INTERNAL_trim_to_complete_utf8_characters(cases[i].input, &fromLim);
353
354 actualMovementInChars = (fromLim - fromLimInitially);
355 if (actualMovementInChars != cases[i].expectedMovementInChars) {
356 size_t j = 0;
357 success = false;
358 printf("[-] UTF-8 case %2u: Expected movement by %2d chars"
359 ", actually moved by %2d chars: \"",
360 (unsigned)(i + 1), (int)cases[i].expectedMovementInChars,
361 (int)actualMovementInChars);
362 for (; j < strlen(cases[i].input); j++) {
363 printf("\\x%02x", (unsigned char)cases[i].input[j]);
364 }
365 printf("\"\n");
366 }
367 }
368
369 if (! success) {
370 fail("UTF-8 auto-alignment is not bullet-proof\n");
371 }
372 }
373 END_TEST
374
START_TEST(test_utf16)375 START_TEST(test_utf16) {
376 /* <?xml version="1.0" encoding="UTF-16"?>
377 * <doc a='123'>some {A} text</doc>
378 *
379 * where {A} is U+FF21, FULLWIDTH LATIN CAPITAL LETTER A
380 */
381 char text[]
382 = "\000<\000?\000x\000m\000\154\000 \000v\000e\000r\000s\000i\000o"
383 "\000n\000=\000'\0001\000.\000\060\000'\000 \000e\000n\000c\000o"
384 "\000d\000i\000n\000g\000=\000'\000U\000T\000F\000-\0001\000\066"
385 "\000'\000?\000>\000\n"
386 "\000<\000d\000o\000c\000 \000a\000=\000'\0001\0002\0003\000'\000>"
387 "\000s\000o\000m\000e\000 \xff\x21\000 \000t\000e\000x\000t\000"
388 "<\000/\000d\000o\000c\000>";
389 #ifdef XML_UNICODE
390 const XML_Char *expected = XCS("some \xff21 text");
391 #else
392 const XML_Char *expected = XCS("some \357\274\241 text");
393 #endif
394 CharData storage;
395
396 CharData_Init(&storage);
397 XML_SetUserData(g_parser, &storage);
398 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
399 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
400 == XML_STATUS_ERROR)
401 xml_failure(g_parser);
402 CharData_CheckXMLChars(&storage, expected);
403 }
404 END_TEST
405
START_TEST(test_utf16_le_epilog_newline)406 START_TEST(test_utf16_le_epilog_newline) {
407 unsigned int first_chunk_bytes = 17;
408 char text[] = "\xFF\xFE" /* BOM */
409 "<\000e\000/\000>\000" /* document element */
410 "\r\000\n\000\r\000\n\000"; /* epilog */
411
412 if (first_chunk_bytes >= sizeof(text) - 1)
413 fail("bad value of first_chunk_bytes");
414 if (_XML_Parse_SINGLE_BYTES(g_parser, text, first_chunk_bytes, XML_FALSE)
415 == XML_STATUS_ERROR)
416 xml_failure(g_parser);
417 else {
418 enum XML_Status rc;
419 rc = _XML_Parse_SINGLE_BYTES(g_parser, text + first_chunk_bytes,
420 sizeof(text) - first_chunk_bytes - 1,
421 XML_TRUE);
422 if (rc == XML_STATUS_ERROR)
423 xml_failure(g_parser);
424 }
425 }
426 END_TEST
427
428 /* Test that an outright lie in the encoding is faulted */
START_TEST(test_not_utf16)429 START_TEST(test_not_utf16) {
430 const char *text = "<?xml version='1.0' encoding='utf-16'?>"
431 "<doc>Hi</doc>";
432
433 /* Use a handler to provoke the appropriate code paths */
434 XML_SetXmlDeclHandler(g_parser, dummy_xdecl_handler);
435 expect_failure(text, XML_ERROR_INCORRECT_ENCODING,
436 "UTF-16 declared in UTF-8 not faulted");
437 }
438 END_TEST
439
440 /* Test that an unknown encoding is rejected */
START_TEST(test_bad_encoding)441 START_TEST(test_bad_encoding) {
442 const char *text = "<doc>Hi</doc>";
443
444 if (! XML_SetEncoding(g_parser, XCS("unknown-encoding")))
445 fail("XML_SetEncoding failed");
446 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
447 "Unknown encoding not faulted");
448 }
449 END_TEST
450
451 /* Regression test for SF bug #481609, #774028. */
START_TEST(test_latin1_umlauts)452 START_TEST(test_latin1_umlauts) {
453 const char *text
454 = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
455 "<e a='\xE4 \xF6 \xFC ä ö ü ä ö ü >'\n"
456 " >\xE4 \xF6 \xFC ä ö ü ä ö ü ></e>";
457 #ifdef XML_UNICODE
458 /* Expected results in UTF-16 */
459 const XML_Char *expected = XCS("\x00e4 \x00f6 \x00fc ")
460 XCS("\x00e4 \x00f6 \x00fc ") XCS("\x00e4 \x00f6 \x00fc >");
461 #else
462 /* Expected results in UTF-8 */
463 const XML_Char *expected = XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ")
464 XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ") XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC >");
465 #endif
466
467 run_character_check(text, expected);
468 XML_ParserReset(g_parser, NULL);
469 run_attribute_check(text, expected);
470 /* Repeat with a default handler */
471 XML_ParserReset(g_parser, NULL);
472 XML_SetDefaultHandler(g_parser, dummy_default_handler);
473 run_character_check(text, expected);
474 XML_ParserReset(g_parser, NULL);
475 XML_SetDefaultHandler(g_parser, dummy_default_handler);
476 run_attribute_check(text, expected);
477 }
478 END_TEST
479
480 /* Test that an element name with a 4-byte UTF-8 character is rejected */
START_TEST(test_long_utf8_character)481 START_TEST(test_long_utf8_character) {
482 const char *text
483 = "<?xml version='1.0' encoding='utf-8'?>\n"
484 /* 0xf0 0x90 0x80 0x80 = U+10000, the first Linear B character */
485 "<do\xf0\x90\x80\x80/>";
486 expect_failure(text, XML_ERROR_INVALID_TOKEN,
487 "4-byte UTF-8 character in element name not faulted");
488 }
489 END_TEST
490
491 /* Test that a long latin-1 attribute (too long to convert in one go)
492 * is correctly converted
493 */
START_TEST(test_long_latin1_attribute)494 START_TEST(test_long_latin1_attribute) {
495 const char *text
496 = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
497 "<doc att='"
498 /* 64 characters per line */
499 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
500 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
501 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
502 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
503 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
504 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
505 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
506 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
507 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
508 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
509 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
510 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
511 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
512 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
513 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
514 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO"
515 /* Last character splits across a buffer boundary */
516 "\xe4'>\n</doc>";
517
518 const XML_Char *expected =
519 /* 64 characters per line */
520 /* clang-format off */
521 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
522 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
523 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
524 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
525 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
526 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
527 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
528 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
529 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
530 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
531 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
532 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
533 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
534 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
535 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
536 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO")
537 /* clang-format on */
538 #ifdef XML_UNICODE
539 XCS("\x00e4");
540 #else
541 XCS("\xc3\xa4");
542 #endif
543
544 run_attribute_check(text, expected);
545 }
546 END_TEST
547
548 /* Test that a long ASCII attribute (too long to convert in one go)
549 * is correctly converted
550 */
START_TEST(test_long_ascii_attribute)551 START_TEST(test_long_ascii_attribute) {
552 const char *text
553 = "<?xml version='1.0' encoding='us-ascii'?>\n"
554 "<doc att='"
555 /* 64 characters per line */
556 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
557 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
558 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
559 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
560 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
561 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
562 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
563 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
564 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
565 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
566 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
567 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
568 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
569 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
570 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
571 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
572 "01234'>\n</doc>";
573 const XML_Char *expected =
574 /* 64 characters per line */
575 /* clang-format off */
576 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
577 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
578 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
579 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
580 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
581 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
582 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
583 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
584 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
585 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
586 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
587 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
588 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
589 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
590 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
591 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
592 XCS("01234");
593 /* clang-format on */
594
595 run_attribute_check(text, expected);
596 }
597 END_TEST
598
599 /* Regression test #1 for SF bug #653180. */
START_TEST(test_line_number_after_parse)600 START_TEST(test_line_number_after_parse) {
601 const char *text = "<tag>\n"
602 "\n"
603 "\n</tag>";
604 XML_Size lineno;
605
606 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
607 == XML_STATUS_ERROR)
608 xml_failure(g_parser);
609 lineno = XML_GetCurrentLineNumber(g_parser);
610 if (lineno != 4) {
611 char buffer[100];
612 snprintf(buffer, sizeof(buffer),
613 "expected 4 lines, saw %" XML_FMT_INT_MOD "u", lineno);
614 fail(buffer);
615 }
616 }
617 END_TEST
618
619 /* Regression test #2 for SF bug #653180. */
START_TEST(test_column_number_after_parse)620 START_TEST(test_column_number_after_parse) {
621 const char *text = "<tag></tag>";
622 XML_Size colno;
623
624 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
625 == XML_STATUS_ERROR)
626 xml_failure(g_parser);
627 colno = XML_GetCurrentColumnNumber(g_parser);
628 if (colno != 11) {
629 char buffer[100];
630 snprintf(buffer, sizeof(buffer),
631 "expected 11 columns, saw %" XML_FMT_INT_MOD "u", colno);
632 fail(buffer);
633 }
634 }
635 END_TEST
636
637 /* Regression test #3 for SF bug #653180. */
START_TEST(test_line_and_column_numbers_inside_handlers)638 START_TEST(test_line_and_column_numbers_inside_handlers) {
639 const char *text = "<a>\n" /* Unix end-of-line */
640 " <b>\r\n" /* Windows end-of-line */
641 " <c/>\r" /* Mac OS end-of-line */
642 " </b>\n"
643 " <d>\n"
644 " <f/>\n"
645 " </d>\n"
646 "</a>";
647 const StructDataEntry expected[]
648 = {{XCS("a"), 0, 1, STRUCT_START_TAG}, {XCS("b"), 2, 2, STRUCT_START_TAG},
649 {XCS("c"), 4, 3, STRUCT_START_TAG}, {XCS("c"), 8, 3, STRUCT_END_TAG},
650 {XCS("b"), 2, 4, STRUCT_END_TAG}, {XCS("d"), 2, 5, STRUCT_START_TAG},
651 {XCS("f"), 4, 6, STRUCT_START_TAG}, {XCS("f"), 8, 6, STRUCT_END_TAG},
652 {XCS("d"), 2, 7, STRUCT_END_TAG}, {XCS("a"), 0, 8, STRUCT_END_TAG}};
653 const int expected_count = sizeof(expected) / sizeof(StructDataEntry);
654 StructData storage;
655
656 StructData_Init(&storage);
657 XML_SetUserData(g_parser, &storage);
658 XML_SetStartElementHandler(g_parser, start_element_event_handler2);
659 XML_SetEndElementHandler(g_parser, end_element_event_handler2);
660 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
661 == XML_STATUS_ERROR)
662 xml_failure(g_parser);
663
664 StructData_CheckItems(&storage, expected, expected_count);
665 StructData_Dispose(&storage);
666 }
667 END_TEST
668
669 /* Regression test #4 for SF bug #653180. */
START_TEST(test_line_number_after_error)670 START_TEST(test_line_number_after_error) {
671 const char *text = "<a>\n"
672 " <b>\n"
673 " </a>"; /* missing </b> */
674 XML_Size lineno;
675 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
676 != XML_STATUS_ERROR)
677 fail("Expected a parse error");
678
679 lineno = XML_GetCurrentLineNumber(g_parser);
680 if (lineno != 3) {
681 char buffer[100];
682 snprintf(buffer, sizeof(buffer),
683 "expected 3 lines, saw %" XML_FMT_INT_MOD "u", lineno);
684 fail(buffer);
685 }
686 }
687 END_TEST
688
689 /* Regression test #5 for SF bug #653180. */
START_TEST(test_column_number_after_error)690 START_TEST(test_column_number_after_error) {
691 const char *text = "<a>\n"
692 " <b>\n"
693 " </a>"; /* missing </b> */
694 XML_Size colno;
695 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
696 != XML_STATUS_ERROR)
697 fail("Expected a parse error");
698
699 colno = XML_GetCurrentColumnNumber(g_parser);
700 if (colno != 4) {
701 char buffer[100];
702 snprintf(buffer, sizeof(buffer),
703 "expected 4 columns, saw %" XML_FMT_INT_MOD "u", colno);
704 fail(buffer);
705 }
706 }
707 END_TEST
708
709 /* Regression test for SF bug #478332. */
START_TEST(test_really_long_lines)710 START_TEST(test_really_long_lines) {
711 /* This parses an input line longer than INIT_DATA_BUF_SIZE
712 characters long (defined to be 1024 in xmlparse.c). We take a
713 really cheesy approach to building the input buffer, because
714 this avoids writing bugs in buffer-filling code.
715 */
716 const char *text
717 = "<e>"
718 /* 64 chars */
719 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
720 /* until we have at least 1024 characters on the line: */
721 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
722 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
723 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
724 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
725 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
726 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
727 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
728 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
729 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
730 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
731 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
732 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
733 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
734 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
735 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
736 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
737 "</e>";
738 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
739 == XML_STATUS_ERROR)
740 xml_failure(g_parser);
741 }
742 END_TEST
743
744 /* Test cdata processing across a buffer boundary */
START_TEST(test_really_long_encoded_lines)745 START_TEST(test_really_long_encoded_lines) {
746 /* As above, except that we want to provoke an output buffer
747 * overflow with a non-trivial encoding. For this we need to pass
748 * the whole cdata in one go, not byte-by-byte.
749 */
750 void *buffer;
751 const char *text
752 = "<?xml version='1.0' encoding='iso-8859-1'?>"
753 "<e>"
754 /* 64 chars */
755 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
756 /* until we have at least 1024 characters on the line: */
757 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
758 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
759 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
760 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
761 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
762 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
763 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
764 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
765 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
766 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
767 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
768 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
769 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
770 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
771 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
772 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
773 "</e>";
774 int parse_len = (int)strlen(text);
775
776 /* Need a cdata handler to provoke the code path we want to test */
777 XML_SetCharacterDataHandler(g_parser, dummy_cdata_handler);
778 buffer = XML_GetBuffer(g_parser, parse_len);
779 if (buffer == NULL)
780 fail("Could not allocate parse buffer");
781 assert(buffer != NULL);
782 memcpy(buffer, text, parse_len);
783 if (XML_ParseBuffer(g_parser, parse_len, XML_TRUE) == XML_STATUS_ERROR)
784 xml_failure(g_parser);
785 }
786 END_TEST
787
788 /*
789 * Element event tests.
790 */
791
START_TEST(test_end_element_events)792 START_TEST(test_end_element_events) {
793 const char *text = "<a><b><c/></b><d><f/></d></a>";
794 const XML_Char *expected = XCS("/c/b/f/d/a");
795 CharData storage;
796
797 CharData_Init(&storage);
798 XML_SetUserData(g_parser, &storage);
799 XML_SetEndElementHandler(g_parser, end_element_event_handler);
800 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
801 == XML_STATUS_ERROR)
802 xml_failure(g_parser);
803 CharData_CheckXMLChars(&storage, expected);
804 }
805 END_TEST
806
807 /*
808 * Attribute tests.
809 */
810
811 /* Helper used by the following tests; this checks any "attr" and "refs"
812 attributes to make sure whitespace has been normalized.
813
814 Return true if whitespace has been normalized in a string, using
815 the rules for attribute value normalization. The 'is_cdata' flag
816 is needed since CDATA attributes don't need to have multiple
817 whitespace characters collapsed to a single space, while other
818 attribute data types do. (Section 3.3.3 of the recommendation.)
819 */
820 static int
is_whitespace_normalized(const XML_Char * s,int is_cdata)821 is_whitespace_normalized(const XML_Char *s, int is_cdata) {
822 int blanks = 0;
823 int at_start = 1;
824 while (*s) {
825 if (*s == XCS(' '))
826 ++blanks;
827 else if (*s == XCS('\t') || *s == XCS('\n') || *s == XCS('\r'))
828 return 0;
829 else {
830 if (at_start) {
831 at_start = 0;
832 if (blanks && ! is_cdata)
833 /* illegal leading blanks */
834 return 0;
835 } else if (blanks > 1 && ! is_cdata)
836 return 0;
837 blanks = 0;
838 }
839 ++s;
840 }
841 if (blanks && ! is_cdata)
842 return 0;
843 return 1;
844 }
845
846 /* Check the attribute whitespace checker: */
START_TEST(test_helper_is_whitespace_normalized)847 START_TEST(test_helper_is_whitespace_normalized) {
848 assert(is_whitespace_normalized(XCS("abc"), 0));
849 assert(is_whitespace_normalized(XCS("abc"), 1));
850 assert(is_whitespace_normalized(XCS("abc def ghi"), 0));
851 assert(is_whitespace_normalized(XCS("abc def ghi"), 1));
852 assert(! is_whitespace_normalized(XCS(" abc def ghi"), 0));
853 assert(is_whitespace_normalized(XCS(" abc def ghi"), 1));
854 assert(! is_whitespace_normalized(XCS("abc def ghi"), 0));
855 assert(is_whitespace_normalized(XCS("abc def ghi"), 1));
856 assert(! is_whitespace_normalized(XCS("abc def ghi "), 0));
857 assert(is_whitespace_normalized(XCS("abc def ghi "), 1));
858 assert(! is_whitespace_normalized(XCS(" "), 0));
859 assert(is_whitespace_normalized(XCS(" "), 1));
860 assert(! is_whitespace_normalized(XCS("\t"), 0));
861 assert(! is_whitespace_normalized(XCS("\t"), 1));
862 assert(! is_whitespace_normalized(XCS("\n"), 0));
863 assert(! is_whitespace_normalized(XCS("\n"), 1));
864 assert(! is_whitespace_normalized(XCS("\r"), 0));
865 assert(! is_whitespace_normalized(XCS("\r"), 1));
866 assert(! is_whitespace_normalized(XCS("abc\t def"), 1));
867 }
868 END_TEST
869
870 static void XMLCALL
check_attr_contains_normalized_whitespace(void * userData,const XML_Char * name,const XML_Char ** atts)871 check_attr_contains_normalized_whitespace(void *userData, const XML_Char *name,
872 const XML_Char **atts) {
873 int i;
874 UNUSED_P(userData);
875 UNUSED_P(name);
876 for (i = 0; atts[i] != NULL; i += 2) {
877 const XML_Char *attrname = atts[i];
878 const XML_Char *value = atts[i + 1];
879 if (xcstrcmp(XCS("attr"), attrname) == 0
880 || xcstrcmp(XCS("ents"), attrname) == 0
881 || xcstrcmp(XCS("refs"), attrname) == 0) {
882 if (! is_whitespace_normalized(value, 0)) {
883 char buffer[256];
884 snprintf(buffer, sizeof(buffer),
885 "attribute value not normalized: %" XML_FMT_STR
886 "='%" XML_FMT_STR "'",
887 attrname, value);
888 fail(buffer);
889 }
890 }
891 }
892 }
893
START_TEST(test_attr_whitespace_normalization)894 START_TEST(test_attr_whitespace_normalization) {
895 const char *text
896 = "<!DOCTYPE doc [\n"
897 " <!ATTLIST doc\n"
898 " attr NMTOKENS #REQUIRED\n"
899 " ents ENTITIES #REQUIRED\n"
900 " refs IDREFS #REQUIRED>\n"
901 "]>\n"
902 "<doc attr=' a b c\t\td\te\t' refs=' id-1 \t id-2\t\t' \n"
903 " ents=' ent-1 \t\r\n"
904 " ent-2 ' >\n"
905 " <e id='id-1'/>\n"
906 " <e id='id-2'/>\n"
907 "</doc>";
908
909 XML_SetStartElementHandler(g_parser,
910 check_attr_contains_normalized_whitespace);
911 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
912 == XML_STATUS_ERROR)
913 xml_failure(g_parser);
914 }
915 END_TEST
916
917 /*
918 * XML declaration tests.
919 */
920
START_TEST(test_xmldecl_misplaced)921 START_TEST(test_xmldecl_misplaced) {
922 expect_failure("\n"
923 "<?xml version='1.0'?>\n"
924 "<a/>",
925 XML_ERROR_MISPLACED_XML_PI,
926 "failed to report misplaced XML declaration");
927 }
928 END_TEST
929
START_TEST(test_xmldecl_invalid)930 START_TEST(test_xmldecl_invalid) {
931 expect_failure("<?xml version='1.0' \xc3\xa7?>\n<doc/>", XML_ERROR_XML_DECL,
932 "Failed to report invalid XML declaration");
933 }
934 END_TEST
935
START_TEST(test_xmldecl_missing_attr)936 START_TEST(test_xmldecl_missing_attr) {
937 expect_failure("<?xml ='1.0'?>\n<doc/>\n", XML_ERROR_XML_DECL,
938 "Failed to report missing XML declaration attribute");
939 }
940 END_TEST
941
START_TEST(test_xmldecl_missing_value)942 START_TEST(test_xmldecl_missing_value) {
943 expect_failure("<?xml version='1.0' encoding='us-ascii' standalone?>\n"
944 "<doc/>",
945 XML_ERROR_XML_DECL,
946 "Failed to report missing attribute value");
947 }
948 END_TEST
949
950 /* Regression test for SF bug #584832. */
START_TEST(test_unknown_encoding_internal_entity)951 START_TEST(test_unknown_encoding_internal_entity) {
952 const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
953 "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
954 "<test a='&foo;'/>";
955
956 XML_SetUnknownEncodingHandler(g_parser, UnknownEncodingHandler, NULL);
957 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
958 == XML_STATUS_ERROR)
959 xml_failure(g_parser);
960 }
961 END_TEST
962
963 /* Test unrecognised encoding handler */
START_TEST(test_unrecognised_encoding_internal_entity)964 START_TEST(test_unrecognised_encoding_internal_entity) {
965 const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
966 "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
967 "<test a='&foo;'/>";
968
969 XML_SetUnknownEncodingHandler(g_parser, UnrecognisedEncodingHandler, NULL);
970 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
971 != XML_STATUS_ERROR)
972 fail("Unrecognised encoding not rejected");
973 }
974 END_TEST
975
976 /* Regression test for SF bug #620106. */
START_TEST(test_ext_entity_set_encoding)977 START_TEST(test_ext_entity_set_encoding) {
978 const char *text = "<!DOCTYPE doc [\n"
979 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
980 "]>\n"
981 "<doc>&en;</doc>";
982 ExtTest test_data
983 = {/* This text says it's an unsupported encoding, but it's really
984 UTF-8, which we tell Expat using XML_SetEncoding().
985 */
986 "<?xml encoding='iso-8859-3'?>\xC3\xA9", XCS("utf-8"), NULL};
987 #ifdef XML_UNICODE
988 const XML_Char *expected = XCS("\x00e9");
989 #else
990 const XML_Char *expected = XCS("\xc3\xa9");
991 #endif
992
993 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
994 run_ext_character_check(text, &test_data, expected);
995 }
996 END_TEST
997
998 /* Test external entities with no handler */
START_TEST(test_ext_entity_no_handler)999 START_TEST(test_ext_entity_no_handler) {
1000 const char *text = "<!DOCTYPE doc [\n"
1001 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1002 "]>\n"
1003 "<doc>&en;</doc>";
1004
1005 XML_SetDefaultHandler(g_parser, dummy_default_handler);
1006 run_character_check(text, XCS(""));
1007 }
1008 END_TEST
1009
1010 /* Test UTF-8 BOM is accepted */
START_TEST(test_ext_entity_set_bom)1011 START_TEST(test_ext_entity_set_bom) {
1012 const char *text = "<!DOCTYPE doc [\n"
1013 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1014 "]>\n"
1015 "<doc>&en;</doc>";
1016 ExtTest test_data = {"\xEF\xBB\xBF" /* BOM */
1017 "<?xml encoding='iso-8859-3'?>"
1018 "\xC3\xA9",
1019 XCS("utf-8"), NULL};
1020 #ifdef XML_UNICODE
1021 const XML_Char *expected = XCS("\x00e9");
1022 #else
1023 const XML_Char *expected = XCS("\xc3\xa9");
1024 #endif
1025
1026 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1027 run_ext_character_check(text, &test_data, expected);
1028 }
1029 END_TEST
1030
1031 /* Test that bad encodings are faulted */
START_TEST(test_ext_entity_bad_encoding)1032 START_TEST(test_ext_entity_bad_encoding) {
1033 const char *text = "<!DOCTYPE doc [\n"
1034 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1035 "]>\n"
1036 "<doc>&en;</doc>";
1037 ExtFaults fault
1038 = {"<?xml encoding='iso-8859-3'?>u", "Unsupported encoding not faulted",
1039 XCS("unknown"), XML_ERROR_UNKNOWN_ENCODING};
1040
1041 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1042 XML_SetUserData(g_parser, &fault);
1043 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1044 "Bad encoding should not have been accepted");
1045 }
1046 END_TEST
1047
1048 /* Try handing an invalid encoding to an external entity parser */
START_TEST(test_ext_entity_bad_encoding_2)1049 START_TEST(test_ext_entity_bad_encoding_2) {
1050 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1051 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1052 "<doc>&entity;</doc>";
1053 ExtFaults fault
1054 = {"<!ELEMENT doc (#PCDATA)*>", "Unknown encoding not faulted",
1055 XCS("unknown-encoding"), XML_ERROR_UNKNOWN_ENCODING};
1056
1057 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1058 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1059 XML_SetUserData(g_parser, &fault);
1060 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1061 "Bad encoding not faulted in external entity handler");
1062 }
1063 END_TEST
1064
1065 /* Test that no error is reported for unknown entities if we don't
1066 read an external subset. This was fixed in Expat 1.95.5.
1067 */
START_TEST(test_wfc_undeclared_entity_unread_external_subset)1068 START_TEST(test_wfc_undeclared_entity_unread_external_subset) {
1069 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
1070 "<doc>&entity;</doc>";
1071
1072 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1073 == XML_STATUS_ERROR)
1074 xml_failure(g_parser);
1075 }
1076 END_TEST
1077
1078 /* Test that an error is reported for unknown entities if we don't
1079 have an external subset.
1080 */
START_TEST(test_wfc_undeclared_entity_no_external_subset)1081 START_TEST(test_wfc_undeclared_entity_no_external_subset) {
1082 expect_failure("<doc>&entity;</doc>", XML_ERROR_UNDEFINED_ENTITY,
1083 "Parser did not report undefined entity w/out a DTD.");
1084 }
1085 END_TEST
1086
1087 /* Test that an error is reported for unknown entities if we don't
1088 read an external subset, but have been declared standalone.
1089 */
START_TEST(test_wfc_undeclared_entity_standalone)1090 START_TEST(test_wfc_undeclared_entity_standalone) {
1091 const char *text
1092 = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1093 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1094 "<doc>&entity;</doc>";
1095
1096 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1097 "Parser did not report undefined entity (standalone).");
1098 }
1099 END_TEST
1100
1101 /* Test that an error is reported for unknown entities if we have read
1102 an external subset, and standalone is true.
1103 */
START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone)1104 START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone) {
1105 const char *text
1106 = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1107 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1108 "<doc>&entity;</doc>";
1109 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1110
1111 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1112 XML_SetUserData(g_parser, &test_data);
1113 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1114 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1115 "Parser did not report undefined entity (external DTD).");
1116 }
1117 END_TEST
1118
1119 /* Test that external entity handling is not done if the parsing flag
1120 * is set to UNLESS_STANDALONE
1121 */
START_TEST(test_entity_with_external_subset_unless_standalone)1122 START_TEST(test_entity_with_external_subset_unless_standalone) {
1123 const char *text
1124 = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1125 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1126 "<doc>&entity;</doc>";
1127 ExtTest test_data = {"<!ENTITY entity 'bar'>", NULL, NULL};
1128
1129 XML_SetParamEntityParsing(g_parser,
1130 XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1131 XML_SetUserData(g_parser, &test_data);
1132 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1133 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1134 "Parser did not report undefined entity");
1135 }
1136 END_TEST
1137
1138 /* Test that no error is reported for unknown entities if we have read
1139 an external subset, and standalone is false.
1140 */
START_TEST(test_wfc_undeclared_entity_with_external_subset)1141 START_TEST(test_wfc_undeclared_entity_with_external_subset) {
1142 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1143 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1144 "<doc>&entity;</doc>";
1145 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1146
1147 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1148 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1149 run_ext_character_check(text, &test_data, XCS(""));
1150 }
1151 END_TEST
1152
1153 /* Test that an error is reported if our NotStandalone handler fails */
START_TEST(test_not_standalone_handler_reject)1154 START_TEST(test_not_standalone_handler_reject) {
1155 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1156 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1157 "<doc>&entity;</doc>";
1158 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1159
1160 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1161 XML_SetUserData(g_parser, &test_data);
1162 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1163 XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
1164 expect_failure(text, XML_ERROR_NOT_STANDALONE,
1165 "NotStandalone handler failed to reject");
1166
1167 /* Try again but without external entity handling */
1168 XML_ParserReset(g_parser, NULL);
1169 XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
1170 expect_failure(text, XML_ERROR_NOT_STANDALONE,
1171 "NotStandalone handler failed to reject");
1172 }
1173 END_TEST
1174
1175 /* Test that no error is reported if our NotStandalone handler succeeds */
START_TEST(test_not_standalone_handler_accept)1176 START_TEST(test_not_standalone_handler_accept) {
1177 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1178 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1179 "<doc>&entity;</doc>";
1180 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1181
1182 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1183 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1184 XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
1185 run_ext_character_check(text, &test_data, XCS(""));
1186
1187 /* Repeat without the external entity handler */
1188 XML_ParserReset(g_parser, NULL);
1189 XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
1190 run_character_check(text, XCS(""));
1191 }
1192 END_TEST
1193
START_TEST(test_wfc_no_recursive_entity_refs)1194 START_TEST(test_wfc_no_recursive_entity_refs) {
1195 const char *text = "<!DOCTYPE doc [\n"
1196 " <!ENTITY entity '&entity;'>\n"
1197 "]>\n"
1198 "<doc>&entity;</doc>";
1199
1200 expect_failure(text, XML_ERROR_RECURSIVE_ENTITY_REF,
1201 "Parser did not report recursive entity reference.");
1202 }
1203 END_TEST
1204
1205 /* Test incomplete external entities are faulted */
START_TEST(test_ext_entity_invalid_parse)1206 START_TEST(test_ext_entity_invalid_parse) {
1207 const char *text = "<!DOCTYPE doc [\n"
1208 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1209 "]>\n"
1210 "<doc>&en;</doc>";
1211 const ExtFaults faults[]
1212 = {{"<", "Incomplete element declaration not faulted", NULL,
1213 XML_ERROR_UNCLOSED_TOKEN},
1214 {"<\xe2\x82", /* First two bytes of a three-byte char */
1215 "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
1216 {"<tag>\xe2\x82", "Incomplete character in CDATA not faulted", NULL,
1217 XML_ERROR_PARTIAL_CHAR},
1218 {NULL, NULL, NULL, XML_ERROR_NONE}};
1219 const ExtFaults *fault = faults;
1220
1221 for (; fault->parse_text != NULL; fault++) {
1222 set_subtest("\"%s\"", fault->parse_text);
1223 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1224 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1225 XML_SetUserData(g_parser, (void *)fault);
1226 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1227 "Parser did not report external entity error");
1228 XML_ParserReset(g_parser, NULL);
1229 }
1230 }
1231 END_TEST
1232
1233 /* Regression test for SF bug #483514. */
START_TEST(test_dtd_default_handling)1234 START_TEST(test_dtd_default_handling) {
1235 const char *text = "<!DOCTYPE doc [\n"
1236 "<!ENTITY e SYSTEM 'http://example.org/e'>\n"
1237 "<!NOTATION n SYSTEM 'http://example.org/n'>\n"
1238 "<!ELEMENT doc EMPTY>\n"
1239 "<!ATTLIST doc a CDATA #IMPLIED>\n"
1240 "<?pi in dtd?>\n"
1241 "<!--comment in dtd-->\n"
1242 "]><doc/>";
1243
1244 XML_SetDefaultHandler(g_parser, accumulate_characters);
1245 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
1246 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
1247 XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
1248 XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
1249 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
1250 XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
1251 XML_SetProcessingInstructionHandler(g_parser, dummy_pi_handler);
1252 XML_SetCommentHandler(g_parser, dummy_comment_handler);
1253 XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
1254 XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
1255 run_character_check(text, XCS("\n\n\n\n\n\n\n<doc/>"));
1256 }
1257 END_TEST
1258
1259 /* Test handling of attribute declarations */
START_TEST(test_dtd_attr_handling)1260 START_TEST(test_dtd_attr_handling) {
1261 const char *prolog = "<!DOCTYPE doc [\n"
1262 "<!ELEMENT doc EMPTY>\n";
1263 AttTest attr_data[]
1264 = {{"<!ATTLIST doc a ( one | two | three ) #REQUIRED>\n"
1265 "]>"
1266 "<doc a='two'/>",
1267 XCS("doc"), XCS("a"),
1268 XCS("(one|two|three)"), /* Extraneous spaces will be removed */
1269 NULL, XML_TRUE},
1270 {"<!NOTATION foo SYSTEM 'http://example.org/foo'>\n"
1271 "<!ATTLIST doc a NOTATION (foo) #IMPLIED>\n"
1272 "]>"
1273 "<doc/>",
1274 XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), NULL, XML_FALSE},
1275 {"<!ATTLIST doc a NOTATION (foo) 'bar'>\n"
1276 "]>"
1277 "<doc/>",
1278 XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), XCS("bar"), XML_FALSE},
1279 {"<!ATTLIST doc a CDATA '\xdb\xb2'>\n"
1280 "]>"
1281 "<doc/>",
1282 XCS("doc"), XCS("a"), XCS("CDATA"),
1283 #ifdef XML_UNICODE
1284 XCS("\x06f2"),
1285 #else
1286 XCS("\xdb\xb2"),
1287 #endif
1288 XML_FALSE},
1289 {NULL, NULL, NULL, NULL, NULL, XML_FALSE}};
1290 AttTest *test;
1291
1292 for (test = attr_data; test->definition != NULL; test++) {
1293 set_subtest("%s", test->definition);
1294 XML_SetAttlistDeclHandler(g_parser, verify_attlist_decl_handler);
1295 XML_SetUserData(g_parser, test);
1296 if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)strlen(prolog),
1297 XML_FALSE)
1298 == XML_STATUS_ERROR)
1299 xml_failure(g_parser);
1300 if (_XML_Parse_SINGLE_BYTES(g_parser, test->definition,
1301 (int)strlen(test->definition), XML_TRUE)
1302 == XML_STATUS_ERROR)
1303 xml_failure(g_parser);
1304 XML_ParserReset(g_parser, NULL);
1305 }
1306 }
1307 END_TEST
1308
1309 /* See related SF bug #673791.
1310 When namespace processing is enabled, setting the namespace URI for
1311 a prefix is not allowed; this test ensures that it *is* allowed
1312 when namespace processing is not enabled.
1313 (See Namespaces in XML, section 2.)
1314 */
START_TEST(test_empty_ns_without_namespaces)1315 START_TEST(test_empty_ns_without_namespaces) {
1316 const char *text = "<doc xmlns:prefix='http://example.org/'>\n"
1317 " <e xmlns:prefix=''/>\n"
1318 "</doc>";
1319
1320 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1321 == XML_STATUS_ERROR)
1322 xml_failure(g_parser);
1323 }
1324 END_TEST
1325
1326 /* Regression test for SF bug #824420.
1327 Checks that an xmlns:prefix attribute set in an attribute's default
1328 value isn't misinterpreted.
1329 */
START_TEST(test_ns_in_attribute_default_without_namespaces)1330 START_TEST(test_ns_in_attribute_default_without_namespaces) {
1331 const char *text = "<!DOCTYPE e:element [\n"
1332 " <!ATTLIST e:element\n"
1333 " xmlns:e CDATA 'http://example.org/'>\n"
1334 " ]>\n"
1335 "<e:element/>";
1336
1337 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1338 == XML_STATUS_ERROR)
1339 xml_failure(g_parser);
1340 }
1341 END_TEST
1342
1343 /* Regression test for SF bug #1515266: missing check of stopped
1344 parser in doContext() 'for' loop. */
START_TEST(test_stop_parser_between_char_data_calls)1345 START_TEST(test_stop_parser_between_char_data_calls) {
1346 /* The sample data must be big enough that there are two calls to
1347 the character data handler from within the inner "for" loop of
1348 the XML_TOK_DATA_CHARS case in doContent(), and the character
1349 handler must stop the parser and clear the character data
1350 handler.
1351 */
1352 const char *text = long_character_data_text;
1353
1354 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1355 g_resumable = XML_FALSE;
1356 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1357 != XML_STATUS_ERROR)
1358 xml_failure(g_parser);
1359 if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
1360 xml_failure(g_parser);
1361 }
1362 END_TEST
1363
1364 /* Regression test for SF bug #1515266: missing check of stopped
1365 parser in doContext() 'for' loop. */
START_TEST(test_suspend_parser_between_char_data_calls)1366 START_TEST(test_suspend_parser_between_char_data_calls) {
1367 /* The sample data must be big enough that there are two calls to
1368 the character data handler from within the inner "for" loop of
1369 the XML_TOK_DATA_CHARS case in doContent(), and the character
1370 handler must stop the parser and clear the character data
1371 handler.
1372 */
1373 const char *text = long_character_data_text;
1374
1375 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1376 g_resumable = XML_TRUE;
1377 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1378 != XML_STATUS_SUSPENDED)
1379 xml_failure(g_parser);
1380 if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
1381 xml_failure(g_parser);
1382 /* Try parsing directly */
1383 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1384 != XML_STATUS_ERROR)
1385 fail("Attempt to continue parse while suspended not faulted");
1386 if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
1387 fail("Suspended parse not faulted with correct error");
1388 }
1389 END_TEST
1390
1391 /* Test repeated calls to XML_StopParser are handled correctly */
START_TEST(test_repeated_stop_parser_between_char_data_calls)1392 START_TEST(test_repeated_stop_parser_between_char_data_calls) {
1393 const char *text = long_character_data_text;
1394
1395 XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1396 g_resumable = XML_FALSE;
1397 g_abortable = XML_FALSE;
1398 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1399 != XML_STATUS_ERROR)
1400 fail("Failed to double-stop parser");
1401
1402 XML_ParserReset(g_parser, NULL);
1403 XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1404 g_resumable = XML_TRUE;
1405 g_abortable = XML_FALSE;
1406 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1407 != XML_STATUS_SUSPENDED)
1408 fail("Failed to double-suspend parser");
1409
1410 XML_ParserReset(g_parser, NULL);
1411 XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1412 g_resumable = XML_TRUE;
1413 g_abortable = XML_TRUE;
1414 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1415 != XML_STATUS_ERROR)
1416 fail("Failed to suspend-abort parser");
1417 }
1418 END_TEST
1419
START_TEST(test_good_cdata_ascii)1420 START_TEST(test_good_cdata_ascii) {
1421 const char *text = "<a><![CDATA[<greeting>Hello, world!</greeting>]]></a>";
1422 const XML_Char *expected = XCS("<greeting>Hello, world!</greeting>");
1423
1424 CharData storage;
1425 CharData_Init(&storage);
1426 XML_SetUserData(g_parser, &storage);
1427 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1428 /* Add start and end handlers for coverage */
1429 XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
1430 XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
1431
1432 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1433 == XML_STATUS_ERROR)
1434 xml_failure(g_parser);
1435 CharData_CheckXMLChars(&storage, expected);
1436
1437 /* Try again, this time with a default handler */
1438 XML_ParserReset(g_parser, NULL);
1439 CharData_Init(&storage);
1440 XML_SetUserData(g_parser, &storage);
1441 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1442 XML_SetDefaultHandler(g_parser, dummy_default_handler);
1443
1444 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1445 == XML_STATUS_ERROR)
1446 xml_failure(g_parser);
1447 CharData_CheckXMLChars(&storage, expected);
1448 }
1449 END_TEST
1450
START_TEST(test_good_cdata_utf16)1451 START_TEST(test_good_cdata_utf16) {
1452 /* Test data is:
1453 * <?xml version='1.0' encoding='utf-16'?>
1454 * <a><![CDATA[hello]]></a>
1455 */
1456 const char text[]
1457 = "\0<\0?\0x\0m\0l\0"
1458 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1459 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1460 "1\0"
1461 "6\0'"
1462 "\0?\0>\0\n"
1463 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>";
1464 const XML_Char *expected = XCS("hello");
1465
1466 CharData storage;
1467 CharData_Init(&storage);
1468 XML_SetUserData(g_parser, &storage);
1469 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1470
1471 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1472 == XML_STATUS_ERROR)
1473 xml_failure(g_parser);
1474 CharData_CheckXMLChars(&storage, expected);
1475 }
1476 END_TEST
1477
START_TEST(test_good_cdata_utf16_le)1478 START_TEST(test_good_cdata_utf16_le) {
1479 /* Test data is:
1480 * <?xml version='1.0' encoding='utf-16'?>
1481 * <a><![CDATA[hello]]></a>
1482 */
1483 const char text[]
1484 = "<\0?\0x\0m\0l\0"
1485 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1486 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1487 "1\0"
1488 "6\0'"
1489 "\0?\0>\0\n"
1490 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>\0";
1491 const XML_Char *expected = XCS("hello");
1492
1493 CharData storage;
1494 CharData_Init(&storage);
1495 XML_SetUserData(g_parser, &storage);
1496 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1497
1498 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1499 == XML_STATUS_ERROR)
1500 xml_failure(g_parser);
1501 CharData_CheckXMLChars(&storage, expected);
1502 }
1503 END_TEST
1504
1505 /* Test UTF16 conversion of a long cdata string */
1506
1507 /* 16 characters: handy macro to reduce visual clutter */
1508 #define A_TO_P_IN_UTF16 "\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P"
1509
START_TEST(test_long_cdata_utf16)1510 START_TEST(test_long_cdata_utf16) {
1511 /* Test data is:
1512 * <?xlm version='1.0' encoding='utf-16'?>
1513 * <a><![CDATA[
1514 * ABCDEFGHIJKLMNOP
1515 * ]]></a>
1516 */
1517 const char text[]
1518 = "\0<\0?\0x\0m\0l\0 "
1519 "\0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0 "
1520 "\0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0\x31\0\x36\0'\0?\0>"
1521 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1522 /* 64 characters per line */
1523 /* clang-format off */
1524 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1525 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1526 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1527 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1528 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1529 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1530 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1531 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1532 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1533 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1534 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1535 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1536 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1537 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1538 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1539 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1540 A_TO_P_IN_UTF16
1541 /* clang-format on */
1542 "\0]\0]\0>\0<\0/\0a\0>";
1543 const XML_Char *expected =
1544 /* clang-format off */
1545 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1546 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1547 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1548 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1549 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1550 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1551 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1552 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1553 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1554 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1555 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1556 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1557 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1558 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1559 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1560 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1561 XCS("ABCDEFGHIJKLMNOP");
1562 /* clang-format on */
1563 CharData storage;
1564 void *buffer;
1565
1566 CharData_Init(&storage);
1567 XML_SetUserData(g_parser, &storage);
1568 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1569 buffer = XML_GetBuffer(g_parser, sizeof(text) - 1);
1570 if (buffer == NULL)
1571 fail("Could not allocate parse buffer");
1572 assert(buffer != NULL);
1573 memcpy(buffer, text, sizeof(text) - 1);
1574 if (XML_ParseBuffer(g_parser, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR)
1575 xml_failure(g_parser);
1576 CharData_CheckXMLChars(&storage, expected);
1577 }
1578 END_TEST
1579
1580 /* Test handling of multiple unit UTF-16 characters */
START_TEST(test_multichar_cdata_utf16)1581 START_TEST(test_multichar_cdata_utf16) {
1582 /* Test data is:
1583 * <?xml version='1.0' encoding='utf-16'?>
1584 * <a><![CDATA[{MINIM}{CROTCHET}]]></a>
1585 *
1586 * where {MINIM} is U+1d15e (a minim or half-note)
1587 * UTF-16: 0xd834 0xdd5e
1588 * UTF-8: 0xf0 0x9d 0x85 0x9e
1589 * and {CROTCHET} is U+1d15f (a crotchet or quarter-note)
1590 * UTF-16: 0xd834 0xdd5f
1591 * UTF-8: 0xf0 0x9d 0x85 0x9f
1592 */
1593 const char text[] = "\0<\0?\0x\0m\0l\0"
1594 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1595 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1596 "1\0"
1597 "6\0'"
1598 "\0?\0>\0\n"
1599 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1600 "\xd8\x34\xdd\x5e\xd8\x34\xdd\x5f"
1601 "\0]\0]\0>\0<\0/\0a\0>";
1602 #ifdef XML_UNICODE
1603 const XML_Char *expected = XCS("\xd834\xdd5e\xd834\xdd5f");
1604 #else
1605 const XML_Char *expected = XCS("\xf0\x9d\x85\x9e\xf0\x9d\x85\x9f");
1606 #endif
1607 CharData storage;
1608
1609 CharData_Init(&storage);
1610 XML_SetUserData(g_parser, &storage);
1611 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1612
1613 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1614 == XML_STATUS_ERROR)
1615 xml_failure(g_parser);
1616 CharData_CheckXMLChars(&storage, expected);
1617 }
1618 END_TEST
1619
1620 /* Test that an element name with a UTF-16 surrogate pair is rejected */
START_TEST(test_utf16_bad_surrogate_pair)1621 START_TEST(test_utf16_bad_surrogate_pair) {
1622 /* Test data is:
1623 * <?xml version='1.0' encoding='utf-16'?>
1624 * <a><![CDATA[{BADLINB}]]></a>
1625 *
1626 * where {BADLINB} is U+10000 (the first Linear B character)
1627 * with the UTF-16 surrogate pair in the wrong order, i.e.
1628 * 0xdc00 0xd800
1629 */
1630 const char text[] = "\0<\0?\0x\0m\0l\0"
1631 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1632 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1633 "1\0"
1634 "6\0'"
1635 "\0?\0>\0\n"
1636 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1637 "\xdc\x00\xd8\x00"
1638 "\0]\0]\0>\0<\0/\0a\0>";
1639
1640 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1641 != XML_STATUS_ERROR)
1642 fail("Reversed UTF-16 surrogate pair not faulted");
1643 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
1644 xml_failure(g_parser);
1645 }
1646 END_TEST
1647
START_TEST(test_bad_cdata)1648 START_TEST(test_bad_cdata) {
1649 struct CaseData {
1650 const char *text;
1651 enum XML_Error expectedError;
1652 };
1653
1654 struct CaseData cases[]
1655 = {{"<a><", XML_ERROR_UNCLOSED_TOKEN},
1656 {"<a><!", XML_ERROR_UNCLOSED_TOKEN},
1657 {"<a><![", XML_ERROR_UNCLOSED_TOKEN},
1658 {"<a><![C", XML_ERROR_UNCLOSED_TOKEN},
1659 {"<a><![CD", XML_ERROR_UNCLOSED_TOKEN},
1660 {"<a><![CDA", XML_ERROR_UNCLOSED_TOKEN},
1661 {"<a><![CDAT", XML_ERROR_UNCLOSED_TOKEN},
1662 {"<a><![CDATA", XML_ERROR_UNCLOSED_TOKEN},
1663
1664 {"<a><![CDATA[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1665 {"<a><![CDATA[]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1666 {"<a><![CDATA[]]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1667
1668 {"<a><!<a/>", XML_ERROR_INVALID_TOKEN},
1669 {"<a><![<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
1670 {"<a><![C<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
1671 {"<a><![CD<a/>", XML_ERROR_INVALID_TOKEN},
1672 {"<a><![CDA<a/>", XML_ERROR_INVALID_TOKEN},
1673 {"<a><![CDAT<a/>", XML_ERROR_INVALID_TOKEN},
1674 {"<a><![CDATA<a/>", XML_ERROR_INVALID_TOKEN},
1675
1676 {"<a><![CDATA[<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1677 {"<a><![CDATA[]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1678 {"<a><![CDATA[]]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}};
1679
1680 size_t i = 0;
1681 for (; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1682 set_subtest("%s", cases[i].text);
1683 const enum XML_Status actualStatus = _XML_Parse_SINGLE_BYTES(
1684 g_parser, cases[i].text, (int)strlen(cases[i].text), XML_TRUE);
1685 const enum XML_Error actualError = XML_GetErrorCode(g_parser);
1686
1687 assert(actualStatus == XML_STATUS_ERROR);
1688
1689 if (actualError != cases[i].expectedError) {
1690 char message[100];
1691 snprintf(message, sizeof(message),
1692 "Expected error %d but got error %d for case %u: \"%s\"\n",
1693 cases[i].expectedError, actualError, (unsigned int)i + 1,
1694 cases[i].text);
1695 fail(message);
1696 }
1697
1698 XML_ParserReset(g_parser, NULL);
1699 }
1700 }
1701 END_TEST
1702
1703 /* Test failures in UTF-16 CDATA */
START_TEST(test_bad_cdata_utf16)1704 START_TEST(test_bad_cdata_utf16) {
1705 struct CaseData {
1706 size_t text_bytes;
1707 const char *text;
1708 enum XML_Error expected_error;
1709 };
1710
1711 const char prolog[] = "\0<\0?\0x\0m\0l\0"
1712 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1713 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1714 "1\0"
1715 "6\0'"
1716 "\0?\0>\0\n"
1717 "\0<\0a\0>";
1718 struct CaseData cases[] = {
1719 {1, "\0", XML_ERROR_UNCLOSED_TOKEN},
1720 {2, "\0<", XML_ERROR_UNCLOSED_TOKEN},
1721 {3, "\0<\0", XML_ERROR_UNCLOSED_TOKEN},
1722 {4, "\0<\0!", XML_ERROR_UNCLOSED_TOKEN},
1723 {5, "\0<\0!\0", XML_ERROR_UNCLOSED_TOKEN},
1724 {6, "\0<\0!\0[", XML_ERROR_UNCLOSED_TOKEN},
1725 {7, "\0<\0!\0[\0", XML_ERROR_UNCLOSED_TOKEN},
1726 {8, "\0<\0!\0[\0C", XML_ERROR_UNCLOSED_TOKEN},
1727 {9, "\0<\0!\0[\0C\0", XML_ERROR_UNCLOSED_TOKEN},
1728 {10, "\0<\0!\0[\0C\0D", XML_ERROR_UNCLOSED_TOKEN},
1729 {11, "\0<\0!\0[\0C\0D\0", XML_ERROR_UNCLOSED_TOKEN},
1730 {12, "\0<\0!\0[\0C\0D\0A", XML_ERROR_UNCLOSED_TOKEN},
1731 {13, "\0<\0!\0[\0C\0D\0A\0", XML_ERROR_UNCLOSED_TOKEN},
1732 {14, "\0<\0!\0[\0C\0D\0A\0T", XML_ERROR_UNCLOSED_TOKEN},
1733 {15, "\0<\0!\0[\0C\0D\0A\0T\0", XML_ERROR_UNCLOSED_TOKEN},
1734 {16, "\0<\0!\0[\0C\0D\0A\0T\0A", XML_ERROR_UNCLOSED_TOKEN},
1735 {17, "\0<\0!\0[\0C\0D\0A\0T\0A\0", XML_ERROR_UNCLOSED_TOKEN},
1736 {18, "\0<\0!\0[\0C\0D\0A\0T\0A\0[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1737 {19, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0", XML_ERROR_UNCLOSED_CDATA_SECTION},
1738 {20, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z", XML_ERROR_UNCLOSED_CDATA_SECTION},
1739 /* Now add a four-byte UTF-16 character */
1740 {21, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8",
1741 XML_ERROR_UNCLOSED_CDATA_SECTION},
1742 {22, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34", XML_ERROR_PARTIAL_CHAR},
1743 {23, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd",
1744 XML_ERROR_PARTIAL_CHAR},
1745 {24, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd\x5e",
1746 XML_ERROR_UNCLOSED_CDATA_SECTION}};
1747 size_t i;
1748
1749 for (i = 0; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1750 set_subtest("case %lu", (long unsigned)(i + 1));
1751 enum XML_Status actual_status;
1752 enum XML_Error actual_error;
1753
1754 if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)sizeof(prolog) - 1,
1755 XML_FALSE)
1756 == XML_STATUS_ERROR)
1757 xml_failure(g_parser);
1758 actual_status = _XML_Parse_SINGLE_BYTES(g_parser, cases[i].text,
1759 (int)cases[i].text_bytes, XML_TRUE);
1760 assert(actual_status == XML_STATUS_ERROR);
1761 actual_error = XML_GetErrorCode(g_parser);
1762 if (actual_error != cases[i].expected_error) {
1763 char message[1024];
1764
1765 snprintf(message, sizeof(message),
1766 "Expected error %d (%" XML_FMT_STR "), got %d (%" XML_FMT_STR
1767 ") for case %lu\n",
1768 cases[i].expected_error,
1769 XML_ErrorString(cases[i].expected_error), actual_error,
1770 XML_ErrorString(actual_error), (long unsigned)(i + 1));
1771 fail(message);
1772 }
1773 XML_ParserReset(g_parser, NULL);
1774 }
1775 }
1776 END_TEST
1777
1778 /* Test stopping the parser in cdata handler */
START_TEST(test_stop_parser_between_cdata_calls)1779 START_TEST(test_stop_parser_between_cdata_calls) {
1780 const char *text = long_cdata_text;
1781
1782 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1783 g_resumable = XML_FALSE;
1784 expect_failure(text, XML_ERROR_ABORTED, "Parse not aborted in CDATA handler");
1785 }
1786 END_TEST
1787
1788 /* Test suspending the parser in cdata handler */
START_TEST(test_suspend_parser_between_cdata_calls)1789 START_TEST(test_suspend_parser_between_cdata_calls) {
1790 const char *text = long_cdata_text;
1791 enum XML_Status result;
1792
1793 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1794 g_resumable = XML_TRUE;
1795 result = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE);
1796 if (result != XML_STATUS_SUSPENDED) {
1797 if (result == XML_STATUS_ERROR)
1798 xml_failure(g_parser);
1799 fail("Parse not suspended in CDATA handler");
1800 }
1801 if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
1802 xml_failure(g_parser);
1803 }
1804 END_TEST
1805
1806 /* Test memory allocation functions */
START_TEST(test_memory_allocation)1807 START_TEST(test_memory_allocation) {
1808 char *buffer = (char *)XML_MemMalloc(g_parser, 256);
1809 char *p;
1810
1811 if (buffer == NULL) {
1812 fail("Allocation failed");
1813 } else {
1814 /* Try writing to memory; some OSes try to cheat! */
1815 buffer[0] = 'T';
1816 buffer[1] = 'E';
1817 buffer[2] = 'S';
1818 buffer[3] = 'T';
1819 buffer[4] = '\0';
1820 if (strcmp(buffer, "TEST") != 0) {
1821 fail("Memory not writable");
1822 } else {
1823 p = (char *)XML_MemRealloc(g_parser, buffer, 512);
1824 if (p == NULL) {
1825 fail("Reallocation failed");
1826 } else {
1827 /* Write again, just to be sure */
1828 buffer = p;
1829 buffer[0] = 'V';
1830 if (strcmp(buffer, "VEST") != 0) {
1831 fail("Reallocated memory not writable");
1832 }
1833 }
1834 }
1835 XML_MemFree(g_parser, buffer);
1836 }
1837 }
1838 END_TEST
1839
1840 /* Test XML_DefaultCurrent() passes handling on correctly */
START_TEST(test_default_current)1841 START_TEST(test_default_current) {
1842 const char *text = "<doc>hell]</doc>";
1843 const char *entity_text = "<!DOCTYPE doc [\n"
1844 "<!ENTITY entity '%'>\n"
1845 "]>\n"
1846 "<doc>&entity;</doc>";
1847
1848 set_subtest("with defaulting");
1849 {
1850 struct handler_record_list storage;
1851 storage.count = 0;
1852 XML_SetDefaultHandler(g_parser, record_default_handler);
1853 XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
1854 XML_SetUserData(g_parser, &storage);
1855 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1856 == XML_STATUS_ERROR)
1857 xml_failure(g_parser);
1858 int i = 0;
1859 assert_record_handler_called(&storage, i++, "record_default_handler", 5);
1860 // we should have gotten one or more cdata callbacks, totaling 5 chars
1861 int cdata_len_remaining = 5;
1862 while (cdata_len_remaining > 0) {
1863 const struct handler_record_entry *c_entry
1864 = handler_record_get(&storage, i++);
1865 assert_true(strcmp(c_entry->name, "record_cdata_handler") == 0);
1866 assert_true(c_entry->arg > 0);
1867 assert_true(c_entry->arg <= cdata_len_remaining);
1868 cdata_len_remaining -= c_entry->arg;
1869 // default handler must follow, with the exact same len argument.
1870 assert_record_handler_called(&storage, i++, "record_default_handler",
1871 c_entry->arg);
1872 }
1873 assert_record_handler_called(&storage, i++, "record_default_handler", 6);
1874 assert_true(storage.count == i);
1875 }
1876
1877 /* Again, without the defaulting */
1878 set_subtest("no defaulting");
1879 {
1880 struct handler_record_list storage;
1881 storage.count = 0;
1882 XML_ParserReset(g_parser, NULL);
1883 XML_SetDefaultHandler(g_parser, record_default_handler);
1884 XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
1885 XML_SetUserData(g_parser, &storage);
1886 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1887 == XML_STATUS_ERROR)
1888 xml_failure(g_parser);
1889 int i = 0;
1890 assert_record_handler_called(&storage, i++, "record_default_handler", 5);
1891 // we should have gotten one or more cdata callbacks, totaling 5 chars
1892 int cdata_len_remaining = 5;
1893 while (cdata_len_remaining > 0) {
1894 const struct handler_record_entry *c_entry
1895 = handler_record_get(&storage, i++);
1896 assert_true(strcmp(c_entry->name, "record_cdata_nodefault_handler") == 0);
1897 assert_true(c_entry->arg > 0);
1898 assert_true(c_entry->arg <= cdata_len_remaining);
1899 cdata_len_remaining -= c_entry->arg;
1900 }
1901 assert_record_handler_called(&storage, i++, "record_default_handler", 6);
1902 assert_true(storage.count == i);
1903 }
1904
1905 /* Now with an internal entity to complicate matters */
1906 set_subtest("with internal entity");
1907 {
1908 struct handler_record_list storage;
1909 storage.count = 0;
1910 XML_ParserReset(g_parser, NULL);
1911 XML_SetDefaultHandler(g_parser, record_default_handler);
1912 XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
1913 XML_SetUserData(g_parser, &storage);
1914 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
1915 XML_TRUE)
1916 == XML_STATUS_ERROR)
1917 xml_failure(g_parser);
1918 /* The default handler suppresses the entity */
1919 assert_record_handler_called(&storage, 0, "record_default_handler", 9);
1920 assert_record_handler_called(&storage, 1, "record_default_handler", 1);
1921 assert_record_handler_called(&storage, 2, "record_default_handler", 3);
1922 assert_record_handler_called(&storage, 3, "record_default_handler", 1);
1923 assert_record_handler_called(&storage, 4, "record_default_handler", 1);
1924 assert_record_handler_called(&storage, 5, "record_default_handler", 1);
1925 assert_record_handler_called(&storage, 6, "record_default_handler", 8);
1926 assert_record_handler_called(&storage, 7, "record_default_handler", 1);
1927 assert_record_handler_called(&storage, 8, "record_default_handler", 6);
1928 assert_record_handler_called(&storage, 9, "record_default_handler", 1);
1929 assert_record_handler_called(&storage, 10, "record_default_handler", 7);
1930 assert_record_handler_called(&storage, 11, "record_default_handler", 1);
1931 assert_record_handler_called(&storage, 12, "record_default_handler", 1);
1932 assert_record_handler_called(&storage, 13, "record_default_handler", 1);
1933 assert_record_handler_called(&storage, 14, "record_default_handler", 1);
1934 assert_record_handler_called(&storage, 15, "record_default_handler", 1);
1935 assert_record_handler_called(&storage, 16, "record_default_handler", 5);
1936 assert_record_handler_called(&storage, 17, "record_default_handler", 8);
1937 assert_record_handler_called(&storage, 18, "record_default_handler", 6);
1938 assert_true(storage.count == 19);
1939 }
1940
1941 /* Again, with a skip handler */
1942 set_subtest("with skip handler");
1943 {
1944 struct handler_record_list storage;
1945 storage.count = 0;
1946 XML_ParserReset(g_parser, NULL);
1947 XML_SetDefaultHandler(g_parser, record_default_handler);
1948 XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
1949 XML_SetSkippedEntityHandler(g_parser, record_skip_handler);
1950 XML_SetUserData(g_parser, &storage);
1951 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
1952 XML_TRUE)
1953 == XML_STATUS_ERROR)
1954 xml_failure(g_parser);
1955 /* The default handler suppresses the entity */
1956 assert_record_handler_called(&storage, 0, "record_default_handler", 9);
1957 assert_record_handler_called(&storage, 1, "record_default_handler", 1);
1958 assert_record_handler_called(&storage, 2, "record_default_handler", 3);
1959 assert_record_handler_called(&storage, 3, "record_default_handler", 1);
1960 assert_record_handler_called(&storage, 4, "record_default_handler", 1);
1961 assert_record_handler_called(&storage, 5, "record_default_handler", 1);
1962 assert_record_handler_called(&storage, 6, "record_default_handler", 8);
1963 assert_record_handler_called(&storage, 7, "record_default_handler", 1);
1964 assert_record_handler_called(&storage, 8, "record_default_handler", 6);
1965 assert_record_handler_called(&storage, 9, "record_default_handler", 1);
1966 assert_record_handler_called(&storage, 10, "record_default_handler", 7);
1967 assert_record_handler_called(&storage, 11, "record_default_handler", 1);
1968 assert_record_handler_called(&storage, 12, "record_default_handler", 1);
1969 assert_record_handler_called(&storage, 13, "record_default_handler", 1);
1970 assert_record_handler_called(&storage, 14, "record_default_handler", 1);
1971 assert_record_handler_called(&storage, 15, "record_default_handler", 1);
1972 assert_record_handler_called(&storage, 16, "record_default_handler", 5);
1973 assert_record_handler_called(&storage, 17, "record_skip_handler", 0);
1974 assert_record_handler_called(&storage, 18, "record_default_handler", 6);
1975 assert_true(storage.count == 19);
1976 }
1977
1978 /* This time, allow the entity through */
1979 set_subtest("allow entity");
1980 {
1981 struct handler_record_list storage;
1982 storage.count = 0;
1983 XML_ParserReset(g_parser, NULL);
1984 XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
1985 XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
1986 XML_SetUserData(g_parser, &storage);
1987 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
1988 XML_TRUE)
1989 == XML_STATUS_ERROR)
1990 xml_failure(g_parser);
1991 assert_record_handler_called(&storage, 0, "record_default_handler", 9);
1992 assert_record_handler_called(&storage, 1, "record_default_handler", 1);
1993 assert_record_handler_called(&storage, 2, "record_default_handler", 3);
1994 assert_record_handler_called(&storage, 3, "record_default_handler", 1);
1995 assert_record_handler_called(&storage, 4, "record_default_handler", 1);
1996 assert_record_handler_called(&storage, 5, "record_default_handler", 1);
1997 assert_record_handler_called(&storage, 6, "record_default_handler", 8);
1998 assert_record_handler_called(&storage, 7, "record_default_handler", 1);
1999 assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2000 assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2001 assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2002 assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2003 assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2004 assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2005 assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2006 assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2007 assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2008 assert_record_handler_called(&storage, 17, "record_cdata_handler", 1);
2009 assert_record_handler_called(&storage, 18, "record_default_handler", 1);
2010 assert_record_handler_called(&storage, 19, "record_default_handler", 6);
2011 assert_true(storage.count == 20);
2012 }
2013
2014 /* Finally, without passing the cdata to the default handler */
2015 set_subtest("not passing cdata");
2016 {
2017 struct handler_record_list storage;
2018 storage.count = 0;
2019 XML_ParserReset(g_parser, NULL);
2020 XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
2021 XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
2022 XML_SetUserData(g_parser, &storage);
2023 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2024 XML_TRUE)
2025 == XML_STATUS_ERROR)
2026 xml_failure(g_parser);
2027 assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2028 assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2029 assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2030 assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2031 assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2032 assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2033 assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2034 assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2035 assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2036 assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2037 assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2038 assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2039 assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2040 assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2041 assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2042 assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2043 assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2044 assert_record_handler_called(&storage, 17, "record_cdata_nodefault_handler",
2045 1);
2046 assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2047 assert_true(storage.count == 19);
2048 }
2049 }
2050 END_TEST
2051
2052 /* Test DTD element parsing code paths */
START_TEST(test_dtd_elements)2053 START_TEST(test_dtd_elements) {
2054 const char *text = "<!DOCTYPE doc [\n"
2055 "<!ELEMENT doc (chapter)>\n"
2056 "<!ELEMENT chapter (#PCDATA)>\n"
2057 "]>\n"
2058 "<doc><chapter>Wombats are go</chapter></doc>";
2059
2060 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
2061 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2062 == XML_STATUS_ERROR)
2063 xml_failure(g_parser);
2064 }
2065 END_TEST
2066
2067 static void XMLCALL
element_decl_check_model(void * userData,const XML_Char * name,XML_Content * model)2068 element_decl_check_model(void *userData, const XML_Char *name,
2069 XML_Content *model) {
2070 UNUSED_P(userData);
2071 uint32_t errorFlags = 0;
2072
2073 /* Expected model array structure is this:
2074 * [0] (type 6, quant 0)
2075 * [1] (type 5, quant 0)
2076 * [3] (type 4, quant 0, name "bar")
2077 * [4] (type 4, quant 0, name "foo")
2078 * [5] (type 4, quant 3, name "xyz")
2079 * [2] (type 4, quant 2, name "zebra")
2080 */
2081 errorFlags |= ((xcstrcmp(name, XCS("junk")) == 0) ? 0 : (1u << 0));
2082 errorFlags |= ((model != NULL) ? 0 : (1u << 1));
2083
2084 if (model != NULL) {
2085 errorFlags |= ((model[0].type == XML_CTYPE_SEQ) ? 0 : (1u << 2));
2086 errorFlags |= ((model[0].quant == XML_CQUANT_NONE) ? 0 : (1u << 3));
2087 errorFlags |= ((model[0].numchildren == 2) ? 0 : (1u << 4));
2088 errorFlags |= ((model[0].children == &model[1]) ? 0 : (1u << 5));
2089 errorFlags |= ((model[0].name == NULL) ? 0 : (1u << 6));
2090
2091 errorFlags |= ((model[1].type == XML_CTYPE_CHOICE) ? 0 : (1u << 7));
2092 errorFlags |= ((model[1].quant == XML_CQUANT_NONE) ? 0 : (1u << 8));
2093 errorFlags |= ((model[1].numchildren == 3) ? 0 : (1u << 9));
2094 errorFlags |= ((model[1].children == &model[3]) ? 0 : (1u << 10));
2095 errorFlags |= ((model[1].name == NULL) ? 0 : (1u << 11));
2096
2097 errorFlags |= ((model[2].type == XML_CTYPE_NAME) ? 0 : (1u << 12));
2098 errorFlags |= ((model[2].quant == XML_CQUANT_REP) ? 0 : (1u << 13));
2099 errorFlags |= ((model[2].numchildren == 0) ? 0 : (1u << 14));
2100 errorFlags |= ((model[2].children == NULL) ? 0 : (1u << 15));
2101 errorFlags
2102 |= ((xcstrcmp(model[2].name, XCS("zebra")) == 0) ? 0 : (1u << 16));
2103
2104 errorFlags |= ((model[3].type == XML_CTYPE_NAME) ? 0 : (1u << 17));
2105 errorFlags |= ((model[3].quant == XML_CQUANT_NONE) ? 0 : (1u << 18));
2106 errorFlags |= ((model[3].numchildren == 0) ? 0 : (1u << 19));
2107 errorFlags |= ((model[3].children == NULL) ? 0 : (1u << 20));
2108 errorFlags |= ((xcstrcmp(model[3].name, XCS("bar")) == 0) ? 0 : (1u << 21));
2109
2110 errorFlags |= ((model[4].type == XML_CTYPE_NAME) ? 0 : (1u << 22));
2111 errorFlags |= ((model[4].quant == XML_CQUANT_NONE) ? 0 : (1u << 23));
2112 errorFlags |= ((model[4].numchildren == 0) ? 0 : (1u << 24));
2113 errorFlags |= ((model[4].children == NULL) ? 0 : (1u << 25));
2114 errorFlags |= ((xcstrcmp(model[4].name, XCS("foo")) == 0) ? 0 : (1u << 26));
2115
2116 errorFlags |= ((model[5].type == XML_CTYPE_NAME) ? 0 : (1u << 27));
2117 errorFlags |= ((model[5].quant == XML_CQUANT_PLUS) ? 0 : (1u << 28));
2118 errorFlags |= ((model[5].numchildren == 0) ? 0 : (1u << 29));
2119 errorFlags |= ((model[5].children == NULL) ? 0 : (1u << 30));
2120 errorFlags |= ((xcstrcmp(model[5].name, XCS("xyz")) == 0) ? 0 : (1u << 31));
2121 }
2122
2123 XML_SetUserData(g_parser, (void *)(uintptr_t)errorFlags);
2124 XML_FreeContentModel(g_parser, model);
2125 }
2126
START_TEST(test_dtd_elements_nesting)2127 START_TEST(test_dtd_elements_nesting) {
2128 // Payload inspired by a test in Perl's XML::Parser
2129 const char *text = "<!DOCTYPE foo [\n"
2130 "<!ELEMENT junk ((bar|foo|xyz+), zebra*)>\n"
2131 "]>\n"
2132 "<foo/>";
2133
2134 XML_SetUserData(g_parser, (void *)(uintptr_t)-1);
2135
2136 XML_SetElementDeclHandler(g_parser, element_decl_check_model);
2137 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2138 == XML_STATUS_ERROR)
2139 xml_failure(g_parser);
2140
2141 if ((uint32_t)(uintptr_t)XML_GetUserData(g_parser) != 0)
2142 fail("Element declaration model regression detected");
2143 }
2144 END_TEST
2145
2146 /* Test foreign DTD handling */
START_TEST(test_set_foreign_dtd)2147 START_TEST(test_set_foreign_dtd) {
2148 const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n";
2149 const char *text2 = "<doc>&entity;</doc>";
2150 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2151
2152 /* Check hash salt is passed through too */
2153 XML_SetHashSalt(g_parser, 0x12345678);
2154 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2155 XML_SetUserData(g_parser, &test_data);
2156 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2157 /* Add a default handler to exercise more code paths */
2158 XML_SetDefaultHandler(g_parser, dummy_default_handler);
2159 if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2160 fail("Could not set foreign DTD");
2161 if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2162 == XML_STATUS_ERROR)
2163 xml_failure(g_parser);
2164
2165 /* Ensure that trying to set the DTD after parsing has started
2166 * is faulted, even if it's the same setting.
2167 */
2168 if (XML_UseForeignDTD(g_parser, XML_TRUE)
2169 != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
2170 fail("Failed to reject late foreign DTD setting");
2171 /* Ditto for the hash salt */
2172 if (XML_SetHashSalt(g_parser, 0x23456789))
2173 fail("Failed to reject late hash salt change");
2174
2175 /* Now finish the parse */
2176 if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2177 == XML_STATUS_ERROR)
2178 xml_failure(g_parser);
2179 }
2180 END_TEST
2181
2182 /* Test foreign DTD handling with a failing NotStandalone handler */
START_TEST(test_foreign_dtd_not_standalone)2183 START_TEST(test_foreign_dtd_not_standalone) {
2184 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2185 "<doc>&entity;</doc>";
2186 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2187
2188 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2189 XML_SetUserData(g_parser, &test_data);
2190 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2191 XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
2192 if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2193 fail("Could not set foreign DTD");
2194 expect_failure(text, XML_ERROR_NOT_STANDALONE,
2195 "NotStandalonehandler failed to reject");
2196 }
2197 END_TEST
2198
2199 /* Test invalid character in a foreign DTD is faulted */
START_TEST(test_invalid_foreign_dtd)2200 START_TEST(test_invalid_foreign_dtd) {
2201 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2202 "<doc>&entity;</doc>";
2203 ExtFaults test_data
2204 = {"$", "Dollar not faulted", NULL, XML_ERROR_INVALID_TOKEN};
2205
2206 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2207 XML_SetUserData(g_parser, &test_data);
2208 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
2209 XML_UseForeignDTD(g_parser, XML_TRUE);
2210 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
2211 "Bad DTD should not have been accepted");
2212 }
2213 END_TEST
2214
2215 /* Test foreign DTD use with a doctype */
START_TEST(test_foreign_dtd_with_doctype)2216 START_TEST(test_foreign_dtd_with_doctype) {
2217 const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n"
2218 "<!DOCTYPE doc [<!ENTITY entity 'hello world'>]>\n";
2219 const char *text2 = "<doc>&entity;</doc>";
2220 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2221
2222 /* Check hash salt is passed through too */
2223 XML_SetHashSalt(g_parser, 0x12345678);
2224 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2225 XML_SetUserData(g_parser, &test_data);
2226 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2227 /* Add a default handler to exercise more code paths */
2228 XML_SetDefaultHandler(g_parser, dummy_default_handler);
2229 if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2230 fail("Could not set foreign DTD");
2231 if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2232 == XML_STATUS_ERROR)
2233 xml_failure(g_parser);
2234
2235 /* Ensure that trying to set the DTD after parsing has started
2236 * is faulted, even if it's the same setting.
2237 */
2238 if (XML_UseForeignDTD(g_parser, XML_TRUE)
2239 != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
2240 fail("Failed to reject late foreign DTD setting");
2241 /* Ditto for the hash salt */
2242 if (XML_SetHashSalt(g_parser, 0x23456789))
2243 fail("Failed to reject late hash salt change");
2244
2245 /* Now finish the parse */
2246 if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2247 == XML_STATUS_ERROR)
2248 xml_failure(g_parser);
2249 }
2250 END_TEST
2251
2252 /* Test XML_UseForeignDTD with no external subset present */
START_TEST(test_foreign_dtd_without_external_subset)2253 START_TEST(test_foreign_dtd_without_external_subset) {
2254 const char *text = "<!DOCTYPE doc [<!ENTITY foo 'bar'>]>\n"
2255 "<doc>&foo;</doc>";
2256
2257 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2258 XML_SetUserData(g_parser, NULL);
2259 XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
2260 XML_UseForeignDTD(g_parser, XML_TRUE);
2261 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2262 == XML_STATUS_ERROR)
2263 xml_failure(g_parser);
2264 }
2265 END_TEST
2266
START_TEST(test_empty_foreign_dtd)2267 START_TEST(test_empty_foreign_dtd) {
2268 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2269 "<doc>&entity;</doc>";
2270
2271 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2272 XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
2273 XML_UseForeignDTD(g_parser, XML_TRUE);
2274 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
2275 "Undefined entity not faulted");
2276 }
2277 END_TEST
2278
2279 /* Test XML Base is set and unset appropriately */
START_TEST(test_set_base)2280 START_TEST(test_set_base) {
2281 const XML_Char *old_base;
2282 const XML_Char *new_base = XCS("/local/file/name.xml");
2283
2284 old_base = XML_GetBase(g_parser);
2285 if (XML_SetBase(g_parser, new_base) != XML_STATUS_OK)
2286 fail("Unable to set base");
2287 if (xcstrcmp(XML_GetBase(g_parser), new_base) != 0)
2288 fail("Base setting not correct");
2289 if (XML_SetBase(g_parser, NULL) != XML_STATUS_OK)
2290 fail("Unable to NULL base");
2291 if (XML_GetBase(g_parser) != NULL)
2292 fail("Base setting not nulled");
2293 XML_SetBase(g_parser, old_base);
2294 }
2295 END_TEST
2296
2297 /* Test attribute counts, indexing, etc */
START_TEST(test_attributes)2298 START_TEST(test_attributes) {
2299 const char *text = "<!DOCTYPE doc [\n"
2300 "<!ELEMENT doc (tag)>\n"
2301 "<!ATTLIST doc id ID #REQUIRED>\n"
2302 "]>"
2303 "<doc a='1' id='one' b='2'>"
2304 "<tag c='3'/>"
2305 "</doc>";
2306 AttrInfo doc_info[] = {{XCS("a"), XCS("1")},
2307 {XCS("b"), XCS("2")},
2308 {XCS("id"), XCS("one")},
2309 {NULL, NULL}};
2310 AttrInfo tag_info[] = {{XCS("c"), XCS("3")}, {NULL, NULL}};
2311 ElementInfo info[] = {{XCS("doc"), 3, XCS("id"), NULL},
2312 {XCS("tag"), 1, NULL, NULL},
2313 {NULL, 0, NULL, NULL}};
2314 info[0].attributes = doc_info;
2315 info[1].attributes = tag_info;
2316
2317 XML_SetStartElementHandler(g_parser, counting_start_element_handler);
2318 XML_SetUserData(g_parser, info);
2319 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2320 == XML_STATUS_ERROR)
2321 xml_failure(g_parser);
2322 }
2323 END_TEST
2324
2325 /* Test reset works correctly in the middle of processing an internal
2326 * entity. Exercises some obscure code in XML_ParserReset().
2327 */
START_TEST(test_reset_in_entity)2328 START_TEST(test_reset_in_entity) {
2329 const char *text = "<!DOCTYPE doc [\n"
2330 "<!ENTITY wombat 'wom'>\n"
2331 "<!ENTITY entity 'hi &wom; there'>\n"
2332 "]>\n"
2333 "<doc>&entity;</doc>";
2334 XML_ParsingStatus status;
2335
2336 g_resumable = XML_TRUE;
2337 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2338 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2339 == XML_STATUS_ERROR)
2340 xml_failure(g_parser);
2341 XML_GetParsingStatus(g_parser, &status);
2342 if (status.parsing != XML_SUSPENDED)
2343 fail("Parsing status not SUSPENDED");
2344 XML_ParserReset(g_parser, NULL);
2345 XML_GetParsingStatus(g_parser, &status);
2346 if (status.parsing != XML_INITIALIZED)
2347 fail("Parsing status doesn't reset to INITIALIZED");
2348 }
2349 END_TEST
2350
2351 /* Test that resume correctly passes through parse errors */
START_TEST(test_resume_invalid_parse)2352 START_TEST(test_resume_invalid_parse) {
2353 const char *text = "<doc>Hello</doc"; /* Missing closing wedge */
2354
2355 g_resumable = XML_TRUE;
2356 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2357 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2358 == XML_STATUS_ERROR)
2359 xml_failure(g_parser);
2360 if (XML_ResumeParser(g_parser) == XML_STATUS_OK)
2361 fail("Resumed invalid parse not faulted");
2362 if (XML_GetErrorCode(g_parser) != XML_ERROR_UNCLOSED_TOKEN)
2363 fail("Invalid parse not correctly faulted");
2364 }
2365 END_TEST
2366
2367 /* Test that re-suspended parses are correctly passed through */
START_TEST(test_resume_resuspended)2368 START_TEST(test_resume_resuspended) {
2369 const char *text = "<doc>Hello<meep/>world</doc>";
2370
2371 g_resumable = XML_TRUE;
2372 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2373 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2374 == XML_STATUS_ERROR)
2375 xml_failure(g_parser);
2376 g_resumable = XML_TRUE;
2377 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2378 if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
2379 fail("Resumption not suspended");
2380 /* This one should succeed and finish up */
2381 if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
2382 xml_failure(g_parser);
2383 }
2384 END_TEST
2385
2386 /* Test that CDATA shows up correctly through a default handler */
START_TEST(test_cdata_default)2387 START_TEST(test_cdata_default) {
2388 const char *text = "<doc><![CDATA[Hello\nworld]]></doc>";
2389 const XML_Char *expected = XCS("<doc><![CDATA[Hello\nworld]]></doc>");
2390 CharData storage;
2391
2392 CharData_Init(&storage);
2393 XML_SetUserData(g_parser, &storage);
2394 XML_SetDefaultHandler(g_parser, accumulate_characters);
2395
2396 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2397 == XML_STATUS_ERROR)
2398 xml_failure(g_parser);
2399 CharData_CheckXMLChars(&storage, expected);
2400 }
2401 END_TEST
2402
2403 /* Test resetting a subordinate parser does exactly nothing */
START_TEST(test_subordinate_reset)2404 START_TEST(test_subordinate_reset) {
2405 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2406 "<!DOCTYPE doc SYSTEM 'foo'>\n"
2407 "<doc>&entity;</doc>";
2408
2409 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2410 XML_SetExternalEntityRefHandler(g_parser, external_entity_resetter);
2411 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2412 == XML_STATUS_ERROR)
2413 xml_failure(g_parser);
2414 }
2415 END_TEST
2416
2417 /* Test suspending a subordinate parser */
START_TEST(test_subordinate_suspend)2418 START_TEST(test_subordinate_suspend) {
2419 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2420 "<!DOCTYPE doc SYSTEM 'foo'>\n"
2421 "<doc>&entity;</doc>";
2422
2423 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2424 XML_SetExternalEntityRefHandler(g_parser, external_entity_suspender);
2425 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2426 == XML_STATUS_ERROR)
2427 xml_failure(g_parser);
2428 }
2429 END_TEST
2430
2431 /* Test suspending a subordinate parser from an XML declaration */
2432 /* Increases code coverage of the tests */
2433
START_TEST(test_subordinate_xdecl_suspend)2434 START_TEST(test_subordinate_xdecl_suspend) {
2435 const char *text
2436 = "<!DOCTYPE doc [\n"
2437 " <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
2438 "]>\n"
2439 "<doc>&entity;</doc>";
2440
2441 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2442 XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
2443 g_resumable = XML_TRUE;
2444 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2445 == XML_STATUS_ERROR)
2446 xml_failure(g_parser);
2447 }
2448 END_TEST
2449
START_TEST(test_subordinate_xdecl_abort)2450 START_TEST(test_subordinate_xdecl_abort) {
2451 const char *text
2452 = "<!DOCTYPE doc [\n"
2453 " <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
2454 "]>\n"
2455 "<doc>&entity;</doc>";
2456
2457 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2458 XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
2459 g_resumable = XML_FALSE;
2460 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2461 == XML_STATUS_ERROR)
2462 xml_failure(g_parser);
2463 }
2464 END_TEST
2465
2466 /* Test external entity fault handling with suspension */
START_TEST(test_ext_entity_invalid_suspended_parse)2467 START_TEST(test_ext_entity_invalid_suspended_parse) {
2468 const char *text = "<!DOCTYPE doc [\n"
2469 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2470 "]>\n"
2471 "<doc>&en;</doc>";
2472 ExtFaults faults[]
2473 = {{"<?xml version='1.0' encoding='us-ascii'?><",
2474 "Incomplete element declaration not faulted", NULL,
2475 XML_ERROR_UNCLOSED_TOKEN},
2476 {/* First two bytes of a three-byte char */
2477 "<?xml version='1.0' encoding='utf-8'?>\xe2\x82",
2478 "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
2479 {NULL, NULL, NULL, XML_ERROR_NONE}};
2480 ExtFaults *fault;
2481
2482 for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
2483 set_subtest("%s", fault->parse_text);
2484 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2485 XML_SetExternalEntityRefHandler(g_parser,
2486 external_entity_suspending_faulter);
2487 XML_SetUserData(g_parser, fault);
2488 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
2489 "Parser did not report external entity error");
2490 XML_ParserReset(g_parser, NULL);
2491 }
2492 }
2493 END_TEST
2494
2495 /* Test setting an explicit encoding */
START_TEST(test_explicit_encoding)2496 START_TEST(test_explicit_encoding) {
2497 const char *text1 = "<doc>Hello ";
2498 const char *text2 = " World</doc>";
2499
2500 /* Just check that we can set the encoding to NULL before starting */
2501 if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
2502 fail("Failed to initialise encoding to NULL");
2503 /* Say we are UTF-8 */
2504 if (XML_SetEncoding(g_parser, XCS("utf-8")) != XML_STATUS_OK)
2505 fail("Failed to set explicit encoding");
2506 if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2507 == XML_STATUS_ERROR)
2508 xml_failure(g_parser);
2509 /* Try to switch encodings mid-parse */
2510 if (XML_SetEncoding(g_parser, XCS("us-ascii")) != XML_STATUS_ERROR)
2511 fail("Allowed encoding change");
2512 if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2513 == XML_STATUS_ERROR)
2514 xml_failure(g_parser);
2515 /* Try now the parse is over */
2516 if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
2517 fail("Failed to unset encoding");
2518 }
2519 END_TEST
2520
2521 /* Test handling of trailing CR (rather than newline) */
START_TEST(test_trailing_cr)2522 START_TEST(test_trailing_cr) {
2523 const char *text = "<doc>\r";
2524 int found_cr;
2525
2526 /* Try with a character handler, for code coverage */
2527 XML_SetCharacterDataHandler(g_parser, cr_cdata_handler);
2528 XML_SetUserData(g_parser, &found_cr);
2529 found_cr = 0;
2530 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2531 == XML_STATUS_OK)
2532 fail("Failed to fault unclosed doc");
2533 if (found_cr == 0)
2534 fail("Did not catch the carriage return");
2535 XML_ParserReset(g_parser, NULL);
2536
2537 /* Now with a default handler instead */
2538 XML_SetDefaultHandler(g_parser, cr_cdata_handler);
2539 XML_SetUserData(g_parser, &found_cr);
2540 found_cr = 0;
2541 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2542 == XML_STATUS_OK)
2543 fail("Failed to fault unclosed doc");
2544 if (found_cr == 0)
2545 fail("Did not catch default carriage return");
2546 }
2547 END_TEST
2548
2549 /* Test trailing CR in an external entity parse */
START_TEST(test_ext_entity_trailing_cr)2550 START_TEST(test_ext_entity_trailing_cr) {
2551 const char *text = "<!DOCTYPE doc [\n"
2552 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2553 "]>\n"
2554 "<doc>&en;</doc>";
2555 int found_cr;
2556
2557 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2558 XML_SetExternalEntityRefHandler(g_parser, external_entity_cr_catcher);
2559 XML_SetUserData(g_parser, &found_cr);
2560 found_cr = 0;
2561 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2562 != XML_STATUS_OK)
2563 xml_failure(g_parser);
2564 if (found_cr == 0)
2565 fail("No carriage return found");
2566 XML_ParserReset(g_parser, NULL);
2567
2568 /* Try again with a different trailing CR */
2569 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2570 XML_SetExternalEntityRefHandler(g_parser, external_entity_bad_cr_catcher);
2571 XML_SetUserData(g_parser, &found_cr);
2572 found_cr = 0;
2573 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2574 != XML_STATUS_OK)
2575 xml_failure(g_parser);
2576 if (found_cr == 0)
2577 fail("No carriage return found");
2578 }
2579 END_TEST
2580
2581 /* Test handling of trailing square bracket */
START_TEST(test_trailing_rsqb)2582 START_TEST(test_trailing_rsqb) {
2583 const char *text8 = "<doc>]";
2584 const char text16[] = "\xFF\xFE<\000d\000o\000c\000>\000]\000";
2585 int found_rsqb;
2586 int text8_len = (int)strlen(text8);
2587
2588 XML_SetCharacterDataHandler(g_parser, rsqb_handler);
2589 XML_SetUserData(g_parser, &found_rsqb);
2590 found_rsqb = 0;
2591 if (_XML_Parse_SINGLE_BYTES(g_parser, text8, text8_len, XML_TRUE)
2592 == XML_STATUS_OK)
2593 fail("Failed to fault unclosed doc");
2594 if (found_rsqb == 0)
2595 fail("Did not catch the right square bracket");
2596
2597 /* Try again with a different encoding */
2598 XML_ParserReset(g_parser, NULL);
2599 XML_SetCharacterDataHandler(g_parser, rsqb_handler);
2600 XML_SetUserData(g_parser, &found_rsqb);
2601 found_rsqb = 0;
2602 if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
2603 XML_TRUE)
2604 == XML_STATUS_OK)
2605 fail("Failed to fault unclosed doc");
2606 if (found_rsqb == 0)
2607 fail("Did not catch the right square bracket");
2608
2609 /* And finally with a default handler */
2610 XML_ParserReset(g_parser, NULL);
2611 XML_SetDefaultHandler(g_parser, rsqb_handler);
2612 XML_SetUserData(g_parser, &found_rsqb);
2613 found_rsqb = 0;
2614 if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
2615 XML_TRUE)
2616 == XML_STATUS_OK)
2617 fail("Failed to fault unclosed doc");
2618 if (found_rsqb == 0)
2619 fail("Did not catch the right square bracket");
2620 }
2621 END_TEST
2622
2623 /* Test trailing right square bracket in an external entity parse */
START_TEST(test_ext_entity_trailing_rsqb)2624 START_TEST(test_ext_entity_trailing_rsqb) {
2625 const char *text = "<!DOCTYPE doc [\n"
2626 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2627 "]>\n"
2628 "<doc>&en;</doc>";
2629 int found_rsqb;
2630
2631 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2632 XML_SetExternalEntityRefHandler(g_parser, external_entity_rsqb_catcher);
2633 XML_SetUserData(g_parser, &found_rsqb);
2634 found_rsqb = 0;
2635 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2636 != XML_STATUS_OK)
2637 xml_failure(g_parser);
2638 if (found_rsqb == 0)
2639 fail("No right square bracket found");
2640 }
2641 END_TEST
2642
2643 /* Test CDATA handling in an external entity */
START_TEST(test_ext_entity_good_cdata)2644 START_TEST(test_ext_entity_good_cdata) {
2645 const char *text = "<!DOCTYPE doc [\n"
2646 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2647 "]>\n"
2648 "<doc>&en;</doc>";
2649
2650 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2651 XML_SetExternalEntityRefHandler(g_parser, external_entity_good_cdata_ascii);
2652 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2653 != XML_STATUS_OK)
2654 xml_failure(g_parser);
2655 }
2656 END_TEST
2657
2658 /* Test user parameter settings */
START_TEST(test_user_parameters)2659 START_TEST(test_user_parameters) {
2660 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2661 "<!-- Primary parse -->\n"
2662 "<!DOCTYPE doc SYSTEM 'foo'>\n"
2663 "<doc>&entity;";
2664 const char *epilog = "<!-- Back to primary parser -->\n"
2665 "</doc>";
2666
2667 g_comment_count = 0;
2668 g_skip_count = 0;
2669 g_xdecl_count = 0;
2670 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2671 XML_SetXmlDeclHandler(g_parser, xml_decl_handler);
2672 XML_SetExternalEntityRefHandler(g_parser, external_entity_param_checker);
2673 XML_SetCommentHandler(g_parser, data_check_comment_handler);
2674 XML_SetSkippedEntityHandler(g_parser, param_check_skip_handler);
2675 XML_UseParserAsHandlerArg(g_parser);
2676 XML_SetUserData(g_parser, (void *)1);
2677 g_handler_data = g_parser;
2678 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
2679 == XML_STATUS_ERROR)
2680 xml_failure(g_parser);
2681 /* Ensure we can't change policy mid-parse */
2682 if (XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_NEVER))
2683 fail("Changed param entity parsing policy while parsing");
2684 if (_XML_Parse_SINGLE_BYTES(g_parser, epilog, (int)strlen(epilog), XML_TRUE)
2685 == XML_STATUS_ERROR)
2686 xml_failure(g_parser);
2687 if (g_comment_count != 3)
2688 fail("Comment handler not invoked enough times");
2689 if (g_skip_count != 1)
2690 fail("Skip handler not invoked enough times");
2691 if (g_xdecl_count != 1)
2692 fail("XML declaration handler not invoked");
2693 }
2694 END_TEST
2695
2696 /* Test that an explicit external entity handler argument replaces
2697 * the parser as the first argument.
2698 *
2699 * We do not call the first parameter to the external entity handler
2700 * 'parser' for once, since the first time the handler is called it
2701 * will actually be a text string. We need to be able to access the
2702 * global 'parser' variable to create our external entity parser from,
2703 * since there are code paths we need to ensure get executed.
2704 */
START_TEST(test_ext_entity_ref_parameter)2705 START_TEST(test_ext_entity_ref_parameter) {
2706 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2707 "<!DOCTYPE doc SYSTEM 'foo'>\n"
2708 "<doc>&entity;</doc>";
2709
2710 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2711 XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
2712 /* Set a handler arg that is not NULL and not parser (which is
2713 * what NULL would cause to be passed.
2714 */
2715 XML_SetExternalEntityRefHandlerArg(g_parser, (void *)text);
2716 g_handler_data = text;
2717 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2718 == XML_STATUS_ERROR)
2719 xml_failure(g_parser);
2720
2721 /* Now try again with unset args */
2722 XML_ParserReset(g_parser, NULL);
2723 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2724 XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
2725 XML_SetExternalEntityRefHandlerArg(g_parser, NULL);
2726 g_handler_data = g_parser;
2727 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2728 == XML_STATUS_ERROR)
2729 xml_failure(g_parser);
2730 }
2731 END_TEST
2732
2733 /* Test the parsing of an empty string */
START_TEST(test_empty_parse)2734 START_TEST(test_empty_parse) {
2735 const char *text = "<doc></doc>";
2736 const char *partial = "<doc>";
2737
2738 if (XML_Parse(g_parser, NULL, 0, XML_FALSE) == XML_STATUS_ERROR)
2739 fail("Parsing empty string faulted");
2740 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
2741 fail("Parsing final empty string not faulted");
2742 if (XML_GetErrorCode(g_parser) != XML_ERROR_NO_ELEMENTS)
2743 fail("Parsing final empty string faulted for wrong reason");
2744
2745 /* Now try with valid text before the empty end */
2746 XML_ParserReset(g_parser, NULL);
2747 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
2748 == XML_STATUS_ERROR)
2749 xml_failure(g_parser);
2750 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) == XML_STATUS_ERROR)
2751 fail("Parsing final empty string faulted");
2752
2753 /* Now try with invalid text before the empty end */
2754 XML_ParserReset(g_parser, NULL);
2755 if (_XML_Parse_SINGLE_BYTES(g_parser, partial, (int)strlen(partial),
2756 XML_FALSE)
2757 == XML_STATUS_ERROR)
2758 xml_failure(g_parser);
2759 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
2760 fail("Parsing final incomplete empty string not faulted");
2761 }
2762 END_TEST
2763
2764 /* Test odd corners of the XML_GetBuffer interface */
2765 static enum XML_Status
get_feature(enum XML_FeatureEnum feature_id,long * presult)2766 get_feature(enum XML_FeatureEnum feature_id, long *presult) {
2767 const XML_Feature *feature = XML_GetFeatureList();
2768
2769 if (feature == NULL)
2770 return XML_STATUS_ERROR;
2771 for (; feature->feature != XML_FEATURE_END; feature++) {
2772 if (feature->feature == feature_id) {
2773 *presult = feature->value;
2774 return XML_STATUS_OK;
2775 }
2776 }
2777 return XML_STATUS_ERROR;
2778 }
2779
2780 /* Test odd corners of the XML_GetBuffer interface */
START_TEST(test_get_buffer_1)2781 START_TEST(test_get_buffer_1) {
2782 const char *text = get_buffer_test_text;
2783 void *buffer;
2784 long context_bytes;
2785
2786 /* Attempt to allocate a negative length buffer */
2787 if (XML_GetBuffer(g_parser, -12) != NULL)
2788 fail("Negative length buffer not failed");
2789
2790 /* Now get a small buffer and extend it past valid length */
2791 buffer = XML_GetBuffer(g_parser, 1536);
2792 if (buffer == NULL)
2793 fail("1.5K buffer failed");
2794 assert(buffer != NULL);
2795 memcpy(buffer, text, strlen(text));
2796 if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
2797 == XML_STATUS_ERROR)
2798 xml_failure(g_parser);
2799 if (XML_GetBuffer(g_parser, INT_MAX) != NULL)
2800 fail("INT_MAX buffer not failed");
2801
2802 /* Now try extending it a more reasonable but still too large
2803 * amount. The allocator in XML_GetBuffer() doubles the buffer
2804 * size until it exceeds the requested amount or INT_MAX. If it
2805 * exceeds INT_MAX, it rejects the request, so we want a request
2806 * between INT_MAX and INT_MAX/2. A gap of 1K seems comfortable,
2807 * with an extra byte just to ensure that the request is off any
2808 * boundary. The request will be inflated internally by
2809 * XML_CONTEXT_BYTES (if >=1), so we subtract that from our
2810 * request.
2811 */
2812 if (get_feature(XML_FEATURE_CONTEXT_BYTES, &context_bytes) != XML_STATUS_OK)
2813 context_bytes = 0;
2814 if (XML_GetBuffer(g_parser, INT_MAX - (context_bytes + 1025)) != NULL)
2815 fail("INT_MAX- buffer not failed");
2816
2817 /* Now try extending it a carefully crafted amount */
2818 if (XML_GetBuffer(g_parser, 1000) == NULL)
2819 fail("1000 buffer failed");
2820 }
2821 END_TEST
2822
2823 /* Test more corners of the XML_GetBuffer interface */
START_TEST(test_get_buffer_2)2824 START_TEST(test_get_buffer_2) {
2825 const char *text = get_buffer_test_text;
2826 void *buffer;
2827
2828 /* Now get a decent buffer */
2829 buffer = XML_GetBuffer(g_parser, 1536);
2830 if (buffer == NULL)
2831 fail("1.5K buffer failed");
2832 assert(buffer != NULL);
2833 memcpy(buffer, text, strlen(text));
2834 if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
2835 == XML_STATUS_ERROR)
2836 xml_failure(g_parser);
2837
2838 /* Extend it, to catch a different code path */
2839 if (XML_GetBuffer(g_parser, 1024) == NULL)
2840 fail("1024 buffer failed");
2841 }
2842 END_TEST
2843
2844 /* Test for signed integer overflow CVE-2022-23852 */
2845 #if XML_CONTEXT_BYTES > 0
START_TEST(test_get_buffer_3_overflow)2846 START_TEST(test_get_buffer_3_overflow) {
2847 XML_Parser parser = XML_ParserCreate(NULL);
2848 assert(parser != NULL);
2849
2850 const char *const text = "\n";
2851 const int expectedKeepValue = (int)strlen(text);
2852
2853 // After this call, variable "keep" in XML_GetBuffer will
2854 // have value expectedKeepValue
2855 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text),
2856 XML_FALSE /* isFinal */)
2857 == XML_STATUS_ERROR)
2858 xml_failure(parser);
2859
2860 assert(expectedKeepValue > 0);
2861 if (XML_GetBuffer(parser, INT_MAX - expectedKeepValue + 1) != NULL)
2862 fail("enlarging buffer not failed");
2863
2864 XML_ParserFree(parser);
2865 }
2866 END_TEST
2867 #endif // XML_CONTEXT_BYTES > 0
2868
START_TEST(test_buffer_can_grow_to_max)2869 START_TEST(test_buffer_can_grow_to_max) {
2870 const char *const prefixes[] = {
2871 "",
2872 "<",
2873 "<x a='",
2874 "<doc><x a='",
2875 "<document><x a='",
2876 "<averylongelementnamesuchthatitwillhopefullystretchacrossmultiplelinesand"
2877 "lookprettyridiculousitsalsoveryhardtoreadandifyouredoingitihavetowonderif"
2878 "youreallydonthaveanythingbettertodoofcourseiguessicouldveputsomethingbadin"
2879 "herebutipromisethatididntheybtwhowgreatarespacesandpunctuationforhelping"
2880 "withreadabilityprettygreatithinkanywaysthisisprobablylongenoughbye><x a='"};
2881 const int num_prefixes = sizeof(prefixes) / sizeof(prefixes[0]);
2882 int maxbuf = INT_MAX / 2 + (INT_MAX & 1); // round up without overflow
2883 #if defined(__MINGW32__) && ! defined(__MINGW64__)
2884 // workaround for mingw/wine32 on GitHub CI not being able to reach 1GiB
2885 // Can we make a big allocation?
2886 void *big = malloc(maxbuf);
2887 if (! big) {
2888 // The big allocation failed. Let's be a little lenient.
2889 maxbuf = maxbuf / 2;
2890 }
2891 free(big);
2892 #endif
2893
2894 for (int i = 0; i < num_prefixes; ++i) {
2895 set_subtest("\"%s\"", prefixes[i]);
2896 XML_Parser parser = XML_ParserCreate(NULL);
2897 const int prefix_len = (int)strlen(prefixes[i]);
2898 const enum XML_Status s
2899 = _XML_Parse_SINGLE_BYTES(parser, prefixes[i], prefix_len, XML_FALSE);
2900 if (s != XML_STATUS_OK)
2901 xml_failure(parser);
2902
2903 // XML_CONTEXT_BYTES of the prefix may remain in the buffer;
2904 // subtracting the whole prefix is easiest, and close enough.
2905 assert_true(XML_GetBuffer(parser, maxbuf - prefix_len) != NULL);
2906 // The limit should be consistent; no prefix should allow us to
2907 // reach above the max buffer size.
2908 assert_true(XML_GetBuffer(parser, maxbuf + 1) == NULL);
2909 XML_ParserFree(parser);
2910 }
2911 }
2912 END_TEST
2913
START_TEST(test_getbuffer_allocates_on_zero_len)2914 START_TEST(test_getbuffer_allocates_on_zero_len) {
2915 for (int first_len = 1; first_len >= 0; first_len--) {
2916 set_subtest("with len=%d first", first_len);
2917 XML_Parser parser = XML_ParserCreate(NULL);
2918 assert_true(parser != NULL);
2919 assert_true(XML_GetBuffer(parser, first_len) != NULL);
2920 assert_true(XML_GetBuffer(parser, 0) != NULL);
2921 if (XML_ParseBuffer(parser, 0, XML_FALSE) != XML_STATUS_OK)
2922 xml_failure(parser);
2923 XML_ParserFree(parser);
2924 }
2925 }
2926 END_TEST
2927
2928 /* Test position information macros */
START_TEST(test_byte_info_at_end)2929 START_TEST(test_byte_info_at_end) {
2930 const char *text = "<doc></doc>";
2931
2932 if (XML_GetCurrentByteIndex(g_parser) != -1
2933 || XML_GetCurrentByteCount(g_parser) != 0)
2934 fail("Byte index/count incorrect at start of parse");
2935 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2936 == XML_STATUS_ERROR)
2937 xml_failure(g_parser);
2938 /* At end, the count will be zero and the index the end of string */
2939 if (XML_GetCurrentByteCount(g_parser) != 0)
2940 fail("Terminal byte count incorrect");
2941 if (XML_GetCurrentByteIndex(g_parser) != (XML_Index)strlen(text))
2942 fail("Terminal byte index incorrect");
2943 }
2944 END_TEST
2945
2946 /* Test position information from errors */
2947 #define PRE_ERROR_STR "<doc></"
2948 #define POST_ERROR_STR "wombat></doc>"
START_TEST(test_byte_info_at_error)2949 START_TEST(test_byte_info_at_error) {
2950 const char *text = PRE_ERROR_STR POST_ERROR_STR;
2951
2952 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2953 == XML_STATUS_OK)
2954 fail("Syntax error not faulted");
2955 if (XML_GetCurrentByteCount(g_parser) != 0)
2956 fail("Error byte count incorrect");
2957 if (XML_GetCurrentByteIndex(g_parser) != strlen(PRE_ERROR_STR))
2958 fail("Error byte index incorrect");
2959 }
2960 END_TEST
2961 #undef PRE_ERROR_STR
2962 #undef POST_ERROR_STR
2963
2964 /* Test position information in handler */
2965 #define START_ELEMENT "<e>"
2966 #define CDATA_TEXT "Hello"
2967 #define END_ELEMENT "</e>"
START_TEST(test_byte_info_at_cdata)2968 START_TEST(test_byte_info_at_cdata) {
2969 const char *text = START_ELEMENT CDATA_TEXT END_ELEMENT;
2970 int offset, size;
2971 ByteTestData data;
2972
2973 /* Check initial context is empty */
2974 if (XML_GetInputContext(g_parser, &offset, &size) != NULL)
2975 fail("Unexpected context at start of parse");
2976
2977 data.start_element_len = (int)strlen(START_ELEMENT);
2978 data.cdata_len = (int)strlen(CDATA_TEXT);
2979 data.total_string_len = (int)strlen(text);
2980 XML_SetCharacterDataHandler(g_parser, byte_character_handler);
2981 XML_SetUserData(g_parser, &data);
2982 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
2983 xml_failure(g_parser);
2984 }
2985 END_TEST
2986 #undef START_ELEMENT
2987 #undef CDATA_TEXT
2988 #undef END_ELEMENT
2989
2990 /* Test predefined entities are correctly recognised */
START_TEST(test_predefined_entities)2991 START_TEST(test_predefined_entities) {
2992 const char *text = "<doc><>&"'</doc>";
2993 const XML_Char *expected = XCS("<doc><>&"'</doc>");
2994 const XML_Char *result = XCS("<>&\"'");
2995 CharData storage;
2996
2997 XML_SetDefaultHandler(g_parser, accumulate_characters);
2998 /* run_character_check uses XML_SetCharacterDataHandler(), which
2999 * unfortunately heads off a code path that we need to exercise.
3000 */
3001 CharData_Init(&storage);
3002 XML_SetUserData(g_parser, &storage);
3003 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3004 == XML_STATUS_ERROR)
3005 xml_failure(g_parser);
3006 /* The default handler doesn't translate the entities */
3007 CharData_CheckXMLChars(&storage, expected);
3008
3009 /* Now try again and check the translation */
3010 XML_ParserReset(g_parser, NULL);
3011 run_character_check(text, result);
3012 }
3013 END_TEST
3014
3015 /* Regression test that an invalid tag in an external parameter
3016 * reference in an external DTD is correctly faulted.
3017 *
3018 * Only a few specific tags are legal in DTDs ignoring comments and
3019 * processing instructions, all of which begin with an exclamation
3020 * mark. "<el/>" is not one of them, so the parser should raise an
3021 * error on encountering it.
3022 */
START_TEST(test_invalid_tag_in_dtd)3023 START_TEST(test_invalid_tag_in_dtd) {
3024 const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3025 "<doc></doc>\n";
3026
3027 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3028 XML_SetExternalEntityRefHandler(g_parser, external_entity_param);
3029 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3030 "Invalid tag IN DTD external param not rejected");
3031 }
3032 END_TEST
3033
3034 /* Test entities not quite the predefined ones are not mis-recognised */
START_TEST(test_not_predefined_entities)3035 START_TEST(test_not_predefined_entities) {
3036 const char *text[] = {"<doc>&pt;</doc>", "<doc>&amo;</doc>",
3037 "<doc>&quid;</doc>", "<doc>&apod;</doc>", NULL};
3038 int i = 0;
3039
3040 while (text[i] != NULL) {
3041 expect_failure(text[i], XML_ERROR_UNDEFINED_ENTITY,
3042 "Undefined entity not rejected");
3043 XML_ParserReset(g_parser, NULL);
3044 i++;
3045 }
3046 }
3047 END_TEST
3048
3049 /* Test conditional inclusion (IGNORE) */
START_TEST(test_ignore_section)3050 START_TEST(test_ignore_section) {
3051 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3052 "<doc><e>&entity;</e></doc>";
3053 const XML_Char *expected
3054 = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&entity;");
3055 CharData storage;
3056
3057 CharData_Init(&storage);
3058 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3059 XML_SetUserData(g_parser, &storage);
3060 XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore);
3061 XML_SetDefaultHandler(g_parser, accumulate_characters);
3062 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3063 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3064 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3065 XML_SetStartElementHandler(g_parser, dummy_start_element);
3066 XML_SetEndElementHandler(g_parser, dummy_end_element);
3067 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3068 == XML_STATUS_ERROR)
3069 xml_failure(g_parser);
3070 CharData_CheckXMLChars(&storage, expected);
3071 }
3072 END_TEST
3073
START_TEST(test_ignore_section_utf16)3074 START_TEST(test_ignore_section_utf16) {
3075 const char text[] =
3076 /* <!DOCTYPE d SYSTEM 's'> */
3077 "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
3078 "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n\0"
3079 /* <d><e>&en;</e></d> */
3080 "<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>\0";
3081 const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
3082 CharData storage;
3083
3084 CharData_Init(&storage);
3085 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3086 XML_SetUserData(g_parser, &storage);
3087 XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore_utf16);
3088 XML_SetDefaultHandler(g_parser, accumulate_characters);
3089 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3090 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3091 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3092 XML_SetStartElementHandler(g_parser, dummy_start_element);
3093 XML_SetEndElementHandler(g_parser, dummy_end_element);
3094 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3095 == XML_STATUS_ERROR)
3096 xml_failure(g_parser);
3097 CharData_CheckXMLChars(&storage, expected);
3098 }
3099 END_TEST
3100
START_TEST(test_ignore_section_utf16_be)3101 START_TEST(test_ignore_section_utf16_be) {
3102 const char text[] =
3103 /* <!DOCTYPE d SYSTEM 's'> */
3104 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
3105 "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n"
3106 /* <d><e>&en;</e></d> */
3107 "\0<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>";
3108 const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
3109 CharData storage;
3110
3111 CharData_Init(&storage);
3112 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3113 XML_SetUserData(g_parser, &storage);
3114 XML_SetExternalEntityRefHandler(g_parser,
3115 external_entity_load_ignore_utf16_be);
3116 XML_SetDefaultHandler(g_parser, accumulate_characters);
3117 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3118 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3119 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3120 XML_SetStartElementHandler(g_parser, dummy_start_element);
3121 XML_SetEndElementHandler(g_parser, dummy_end_element);
3122 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3123 == XML_STATUS_ERROR)
3124 xml_failure(g_parser);
3125 CharData_CheckXMLChars(&storage, expected);
3126 }
3127 END_TEST
3128
3129 /* Test mis-formatted conditional exclusion */
START_TEST(test_bad_ignore_section)3130 START_TEST(test_bad_ignore_section) {
3131 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3132 "<doc><e>&entity;</e></doc>";
3133 ExtFaults faults[]
3134 = {{"<![IGNORE[<!ELEM", "Broken-off declaration not faulted", NULL,
3135 XML_ERROR_SYNTAX},
3136 {"<![IGNORE[\x01]]>", "Invalid XML character not faulted", NULL,
3137 XML_ERROR_INVALID_TOKEN},
3138 {/* FIrst two bytes of a three-byte char */
3139 "<![IGNORE[\xe2\x82", "Partial XML character not faulted", NULL,
3140 XML_ERROR_PARTIAL_CHAR},
3141 {NULL, NULL, NULL, XML_ERROR_NONE}};
3142 ExtFaults *fault;
3143
3144 for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
3145 set_subtest("%s", fault->parse_text);
3146 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3147 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
3148 XML_SetUserData(g_parser, fault);
3149 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3150 "Incomplete IGNORE section not failed");
3151 XML_ParserReset(g_parser, NULL);
3152 }
3153 }
3154 END_TEST
3155
3156 struct bom_testdata {
3157 const char *external;
3158 int split;
3159 XML_Bool nested_callback_happened;
3160 };
3161
3162 static int XMLCALL
external_bom_checker(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)3163 external_bom_checker(XML_Parser parser, const XML_Char *context,
3164 const XML_Char *base, const XML_Char *systemId,
3165 const XML_Char *publicId) {
3166 const char *text;
3167 UNUSED_P(base);
3168 UNUSED_P(systemId);
3169 UNUSED_P(publicId);
3170
3171 XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL);
3172 if (ext_parser == NULL)
3173 fail("Could not create external entity parser");
3174
3175 if (! xcstrcmp(systemId, XCS("004-2.ent"))) {
3176 struct bom_testdata *const testdata
3177 = (struct bom_testdata *)XML_GetUserData(parser);
3178 const char *const external = testdata->external;
3179 const int split = testdata->split;
3180 testdata->nested_callback_happened = XML_TRUE;
3181
3182 if (_XML_Parse_SINGLE_BYTES(ext_parser, external, split, XML_FALSE)
3183 != XML_STATUS_OK) {
3184 xml_failure(ext_parser);
3185 }
3186 text = external + split; // the parse below will continue where we left off.
3187 } else if (! xcstrcmp(systemId, XCS("004-1.ent"))) {
3188 text = "<!ELEMENT doc EMPTY>\n"
3189 "<!ENTITY % e1 SYSTEM '004-2.ent'>\n"
3190 "<!ENTITY % e2 '%e1;'>\n";
3191 } else {
3192 fail("unknown systemId");
3193 }
3194
3195 if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE)
3196 != XML_STATUS_OK)
3197 xml_failure(ext_parser);
3198
3199 XML_ParserFree(ext_parser);
3200 return XML_STATUS_OK;
3201 }
3202
3203 /* regression test: BOM should be consumed when followed by a partial token. */
START_TEST(test_external_bom_consumed)3204 START_TEST(test_external_bom_consumed) {
3205 const char *const text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3206 "<doc></doc>\n";
3207 const char *const external = "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>";
3208 const int len = (int)strlen(external);
3209 for (int split = 0; split <= len; ++split) {
3210 set_subtest("split at byte %d", split);
3211
3212 struct bom_testdata testdata;
3213 testdata.external = external;
3214 testdata.split = split;
3215 testdata.nested_callback_happened = XML_FALSE;
3216
3217 XML_Parser parser = XML_ParserCreate(NULL);
3218 if (parser == NULL) {
3219 fail("Couldn't create parser");
3220 }
3221 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3222 XML_SetExternalEntityRefHandler(parser, external_bom_checker);
3223 XML_SetUserData(parser, &testdata);
3224 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
3225 == XML_STATUS_ERROR)
3226 xml_failure(parser);
3227 if (! testdata.nested_callback_happened) {
3228 fail("ref handler not called");
3229 }
3230 XML_ParserFree(parser);
3231 }
3232 }
3233 END_TEST
3234
3235 /* Test recursive parsing */
START_TEST(test_external_entity_values)3236 START_TEST(test_external_entity_values) {
3237 const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3238 "<doc></doc>\n";
3239 ExtFaults data_004_2[] = {
3240 {"<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL, XML_ERROR_NONE},
3241 {"<!ATTLIST $doc a1 CDATA 'value'>", "Invalid token not faulted", NULL,
3242 XML_ERROR_INVALID_TOKEN},
3243 {"'wombat", "Unterminated string not faulted", NULL,
3244 XML_ERROR_UNCLOSED_TOKEN},
3245 {"\xe2\x82", "Partial UTF-8 character not faulted", NULL,
3246 XML_ERROR_PARTIAL_CHAR},
3247 {"<?xml version='1.0' encoding='utf-8'?>\n", NULL, NULL, XML_ERROR_NONE},
3248 {"<?xml?>", "Malformed XML declaration not faulted", NULL,
3249 XML_ERROR_XML_DECL},
3250 {/* UTF-8 BOM */
3251 "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL,
3252 XML_ERROR_NONE},
3253 {"<?xml version='1.0' encoding='utf-8'?>\n$",
3254 "Invalid token after text declaration not faulted", NULL,
3255 XML_ERROR_INVALID_TOKEN},
3256 {"<?xml version='1.0' encoding='utf-8'?>\n'wombat",
3257 "Unterminated string after text decl not faulted", NULL,
3258 XML_ERROR_UNCLOSED_TOKEN},
3259 {"<?xml version='1.0' encoding='utf-8'?>\n\xe2\x82",
3260 "Partial UTF-8 character after text decl not faulted", NULL,
3261 XML_ERROR_PARTIAL_CHAR},
3262 {"%e1;", "Recursive parameter entity not faulted", NULL,
3263 XML_ERROR_RECURSIVE_ENTITY_REF},
3264 {NULL, NULL, NULL, XML_ERROR_NONE}};
3265 int i;
3266
3267 for (i = 0; data_004_2[i].parse_text != NULL; i++) {
3268 set_subtest("%s", data_004_2[i].parse_text);
3269 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3270 XML_SetExternalEntityRefHandler(g_parser, external_entity_valuer);
3271 XML_SetUserData(g_parser, &data_004_2[i]);
3272 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3273 == XML_STATUS_ERROR)
3274 xml_failure(g_parser);
3275 XML_ParserReset(g_parser, NULL);
3276 }
3277 }
3278 END_TEST
3279
3280 /* Test the recursive parse interacts with a not standalone handler */
START_TEST(test_ext_entity_not_standalone)3281 START_TEST(test_ext_entity_not_standalone) {
3282 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3283 "<doc></doc>";
3284
3285 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3286 XML_SetExternalEntityRefHandler(g_parser, external_entity_not_standalone);
3287 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3288 "Standalone rejection not caught");
3289 }
3290 END_TEST
3291
START_TEST(test_ext_entity_value_abort)3292 START_TEST(test_ext_entity_value_abort) {
3293 const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3294 "<doc></doc>\n";
3295
3296 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3297 XML_SetExternalEntityRefHandler(g_parser, external_entity_value_aborter);
3298 g_resumable = XML_FALSE;
3299 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3300 == XML_STATUS_ERROR)
3301 xml_failure(g_parser);
3302 }
3303 END_TEST
3304
START_TEST(test_bad_public_doctype)3305 START_TEST(test_bad_public_doctype) {
3306 const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
3307 "<!DOCTYPE doc PUBLIC '{BadName}' 'test'>\n"
3308 "<doc></doc>";
3309
3310 /* Setting a handler provokes a particular code path */
3311 XML_SetDoctypeDeclHandler(g_parser, dummy_start_doctype_handler,
3312 dummy_end_doctype_handler);
3313 expect_failure(text, XML_ERROR_PUBLICID, "Bad Public ID not failed");
3314 }
3315 END_TEST
3316
3317 /* Test based on ibm/valid/P32/ibm32v04.xml */
START_TEST(test_attribute_enum_value)3318 START_TEST(test_attribute_enum_value) {
3319 const char *text = "<?xml version='1.0' standalone='no'?>\n"
3320 "<!DOCTYPE animal SYSTEM 'test.dtd'>\n"
3321 "<animal>This is a \n <a/> \n\nyellow tiger</animal>";
3322 ExtTest dtd_data
3323 = {"<!ELEMENT animal (#PCDATA|a)*>\n"
3324 "<!ELEMENT a EMPTY>\n"
3325 "<!ATTLIST animal xml:space (default|preserve) 'preserve'>",
3326 NULL, NULL};
3327 const XML_Char *expected = XCS("This is a \n \n\nyellow tiger");
3328
3329 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3330 XML_SetUserData(g_parser, &dtd_data);
3331 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3332 /* An attribute list handler provokes a different code path */
3333 XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
3334 run_ext_character_check(text, &dtd_data, expected);
3335 }
3336 END_TEST
3337
3338 /* Slightly bizarrely, the library seems to silently ignore entity
3339 * definitions for predefined entities, even when they are wrong. The
3340 * language of the XML 1.0 spec is somewhat unhelpful as to what ought
3341 * to happen, so this is currently treated as acceptable.
3342 */
START_TEST(test_predefined_entity_redefinition)3343 START_TEST(test_predefined_entity_redefinition) {
3344 const char *text = "<!DOCTYPE doc [\n"
3345 "<!ENTITY apos 'foo'>\n"
3346 "]>\n"
3347 "<doc>'</doc>";
3348 run_character_check(text, XCS("'"));
3349 }
3350 END_TEST
3351
3352 /* Test that the parser stops processing the DTD after an unresolved
3353 * parameter entity is encountered.
3354 */
START_TEST(test_dtd_stop_processing)3355 START_TEST(test_dtd_stop_processing) {
3356 const char *text = "<!DOCTYPE doc [\n"
3357 "%foo;\n"
3358 "<!ENTITY bar 'bas'>\n"
3359 "]><doc/>";
3360
3361 XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
3362 init_dummy_handlers();
3363 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3364 == XML_STATUS_ERROR)
3365 xml_failure(g_parser);
3366 if (get_dummy_handler_flags() != 0)
3367 fail("DTD processing still going after undefined PE");
3368 }
3369 END_TEST
3370
3371 /* Test public notations with no system ID */
START_TEST(test_public_notation_no_sysid)3372 START_TEST(test_public_notation_no_sysid) {
3373 const char *text = "<!DOCTYPE doc [\n"
3374 "<!NOTATION note PUBLIC 'foo'>\n"
3375 "<!ELEMENT doc EMPTY>\n"
3376 "]>\n<doc/>";
3377
3378 init_dummy_handlers();
3379 XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
3380 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3381 == XML_STATUS_ERROR)
3382 xml_failure(g_parser);
3383 if (get_dummy_handler_flags() != DUMMY_NOTATION_DECL_HANDLER_FLAG)
3384 fail("Notation declaration handler not called");
3385 }
3386 END_TEST
3387
START_TEST(test_nested_groups)3388 START_TEST(test_nested_groups) {
3389 const char *text
3390 = "<!DOCTYPE doc [\n"
3391 "<!ELEMENT doc "
3392 /* Sixteen elements per line */
3393 "(e,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,"
3394 "(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?"
3395 "))))))))))))))))))))))))))))))))>\n"
3396 "<!ELEMENT e EMPTY>"
3397 "]>\n"
3398 "<doc><e/></doc>";
3399 CharData storage;
3400
3401 CharData_Init(&storage);
3402 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3403 XML_SetStartElementHandler(g_parser, record_element_start_handler);
3404 XML_SetUserData(g_parser, &storage);
3405 init_dummy_handlers();
3406 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3407 == XML_STATUS_ERROR)
3408 xml_failure(g_parser);
3409 CharData_CheckXMLChars(&storage, XCS("doce"));
3410 if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
3411 fail("Element handler not fired");
3412 }
3413 END_TEST
3414
START_TEST(test_group_choice)3415 START_TEST(test_group_choice) {
3416 const char *text = "<!DOCTYPE doc [\n"
3417 "<!ELEMENT doc (a|b|c)+>\n"
3418 "<!ELEMENT a EMPTY>\n"
3419 "<!ELEMENT b (#PCDATA)>\n"
3420 "<!ELEMENT c ANY>\n"
3421 "]>\n"
3422 "<doc>\n"
3423 "<a/>\n"
3424 "<b attr='foo'>This is a foo</b>\n"
3425 "<c></c>\n"
3426 "</doc>\n";
3427
3428 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3429 init_dummy_handlers();
3430 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3431 == XML_STATUS_ERROR)
3432 xml_failure(g_parser);
3433 if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
3434 fail("Element handler flag not raised");
3435 }
3436 END_TEST
3437
START_TEST(test_standalone_parameter_entity)3438 START_TEST(test_standalone_parameter_entity) {
3439 const char *text = "<?xml version='1.0' standalone='yes'?>\n"
3440 "<!DOCTYPE doc SYSTEM 'http://example.org/' [\n"
3441 "<!ENTITY % entity '<!ELEMENT doc (#PCDATA)>'>\n"
3442 "%entity;\n"
3443 "]>\n"
3444 "<doc></doc>";
3445 char dtd_data[] = "<!ENTITY % e1 'foo'>\n";
3446
3447 XML_SetUserData(g_parser, dtd_data);
3448 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3449 XML_SetExternalEntityRefHandler(g_parser, external_entity_public);
3450 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3451 == XML_STATUS_ERROR)
3452 xml_failure(g_parser);
3453 }
3454 END_TEST
3455
3456 /* Test skipping of parameter entity in an external DTD */
3457 /* Derived from ibm/invalid/P69/ibm69i01.xml */
START_TEST(test_skipped_parameter_entity)3458 START_TEST(test_skipped_parameter_entity) {
3459 const char *text = "<?xml version='1.0'?>\n"
3460 "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
3461 "<!ELEMENT root (#PCDATA|a)* >\n"
3462 "]>\n"
3463 "<root></root>";
3464 ExtTest dtd_data = {"%pe2;", NULL, NULL};
3465
3466 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3467 XML_SetUserData(g_parser, &dtd_data);
3468 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3469 XML_SetSkippedEntityHandler(g_parser, dummy_skip_handler);
3470 init_dummy_handlers();
3471 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3472 == XML_STATUS_ERROR)
3473 xml_failure(g_parser);
3474 if (get_dummy_handler_flags() != DUMMY_SKIP_HANDLER_FLAG)
3475 fail("Skip handler not executed");
3476 }
3477 END_TEST
3478
3479 /* Test recursive parameter entity definition rejected in external DTD */
START_TEST(test_recursive_external_parameter_entity)3480 START_TEST(test_recursive_external_parameter_entity) {
3481 const char *text = "<?xml version='1.0'?>\n"
3482 "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
3483 "<!ELEMENT root (#PCDATA|a)* >\n"
3484 "]>\n"
3485 "<root></root>";
3486 ExtFaults dtd_data = {"<!ENTITY % pe2 '%pe2;'>\n%pe2;",
3487 "Recursive external parameter entity not faulted", NULL,
3488 XML_ERROR_RECURSIVE_ENTITY_REF};
3489
3490 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
3491 XML_SetUserData(g_parser, &dtd_data);
3492 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3493 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3494 "Recursive external parameter not spotted");
3495 }
3496 END_TEST
3497
3498 /* Test undefined parameter entity in external entity handler */
START_TEST(test_undefined_ext_entity_in_external_dtd)3499 START_TEST(test_undefined_ext_entity_in_external_dtd) {
3500 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3501 "<doc></doc>\n";
3502
3503 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3504 XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
3505 XML_SetUserData(g_parser, NULL);
3506 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3507 == XML_STATUS_ERROR)
3508 xml_failure(g_parser);
3509
3510 /* Now repeat without the external entity ref handler invoking
3511 * another copy of itself.
3512 */
3513 XML_ParserReset(g_parser, NULL);
3514 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3515 XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
3516 XML_SetUserData(g_parser, g_parser); /* Any non-NULL value will do */
3517 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3518 == XML_STATUS_ERROR)
3519 xml_failure(g_parser);
3520 }
3521 END_TEST
3522
3523 /* Test suspending the parse on receiving an XML declaration works */
START_TEST(test_suspend_xdecl)3524 START_TEST(test_suspend_xdecl) {
3525 const char *text = long_character_data_text;
3526
3527 XML_SetXmlDeclHandler(g_parser, entity_suspending_xdecl_handler);
3528 XML_SetUserData(g_parser, g_parser);
3529 g_resumable = XML_TRUE;
3530 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3531 != XML_STATUS_SUSPENDED)
3532 xml_failure(g_parser);
3533 if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
3534 xml_failure(g_parser);
3535 /* Attempt to start a new parse while suspended */
3536 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3537 != XML_STATUS_ERROR)
3538 fail("Attempt to parse while suspended not faulted");
3539 if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
3540 fail("Suspended parse not faulted with correct error");
3541 }
3542 END_TEST
3543
3544 /* Test aborting the parse in an epilog works */
START_TEST(test_abort_epilog)3545 START_TEST(test_abort_epilog) {
3546 const char *text = "<doc></doc>\n\r\n";
3547 XML_Char trigger_char = XCS('\r');
3548
3549 XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3550 XML_SetUserData(g_parser, &trigger_char);
3551 g_resumable = XML_FALSE;
3552 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3553 != XML_STATUS_ERROR)
3554 fail("Abort not triggered");
3555 if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
3556 xml_failure(g_parser);
3557 }
3558 END_TEST
3559
3560 /* Test a different code path for abort in the epilog */
START_TEST(test_abort_epilog_2)3561 START_TEST(test_abort_epilog_2) {
3562 const char *text = "<doc></doc>\n";
3563 XML_Char trigger_char = XCS('\n');
3564
3565 XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3566 XML_SetUserData(g_parser, &trigger_char);
3567 g_resumable = XML_FALSE;
3568 expect_failure(text, XML_ERROR_ABORTED, "Abort not triggered");
3569 }
3570 END_TEST
3571
3572 /* Test suspension from the epilog */
START_TEST(test_suspend_epilog)3573 START_TEST(test_suspend_epilog) {
3574 const char *text = "<doc></doc>\n";
3575 XML_Char trigger_char = XCS('\n');
3576
3577 XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3578 XML_SetUserData(g_parser, &trigger_char);
3579 g_resumable = XML_TRUE;
3580 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3581 != XML_STATUS_SUSPENDED)
3582 xml_failure(g_parser);
3583 }
3584 END_TEST
3585
START_TEST(test_suspend_in_sole_empty_tag)3586 START_TEST(test_suspend_in_sole_empty_tag) {
3587 const char *text = "<doc/>";
3588 enum XML_Status rc;
3589
3590 XML_SetEndElementHandler(g_parser, suspending_end_handler);
3591 XML_SetUserData(g_parser, g_parser);
3592 rc = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE);
3593 if (rc == XML_STATUS_ERROR)
3594 xml_failure(g_parser);
3595 else if (rc != XML_STATUS_SUSPENDED)
3596 fail("Suspend not triggered");
3597 rc = XML_ResumeParser(g_parser);
3598 if (rc == XML_STATUS_ERROR)
3599 xml_failure(g_parser);
3600 else if (rc != XML_STATUS_OK)
3601 fail("Resume failed");
3602 }
3603 END_TEST
3604
START_TEST(test_unfinished_epilog)3605 START_TEST(test_unfinished_epilog) {
3606 const char *text = "<doc></doc><";
3607
3608 expect_failure(text, XML_ERROR_UNCLOSED_TOKEN,
3609 "Incomplete epilog entry not faulted");
3610 }
3611 END_TEST
3612
START_TEST(test_partial_char_in_epilog)3613 START_TEST(test_partial_char_in_epilog) {
3614 const char *text = "<doc></doc>\xe2\x82";
3615
3616 /* First check that no fault is raised if the parse is not finished */
3617 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
3618 == XML_STATUS_ERROR)
3619 xml_failure(g_parser);
3620 /* Now check that it is faulted once we finish */
3621 if (XML_ParseBuffer(g_parser, 0, XML_TRUE) != XML_STATUS_ERROR)
3622 fail("Partial character in epilog not faulted");
3623 if (XML_GetErrorCode(g_parser) != XML_ERROR_PARTIAL_CHAR)
3624 xml_failure(g_parser);
3625 }
3626 END_TEST
3627
3628 /* Test resuming a parse suspended in entity substitution */
START_TEST(test_suspend_resume_internal_entity)3629 START_TEST(test_suspend_resume_internal_entity) {
3630 const char *text
3631 = "<!DOCTYPE doc [\n"
3632 "<!ENTITY foo '<suspend>Hi<suspend>Ho</suspend></suspend>'>\n"
3633 "]>\n"
3634 "<doc>&foo;</doc>\n";
3635 const XML_Char *expected1 = XCS("Hi");
3636 const XML_Char *expected2 = XCS("HiHo");
3637 CharData storage;
3638
3639 CharData_Init(&storage);
3640 XML_SetStartElementHandler(g_parser, start_element_suspender);
3641 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
3642 XML_SetUserData(g_parser, &storage);
3643 // can't use SINGLE_BYTES here, because it'll return early on suspension, and
3644 // we won't know exactly how much input we actually managed to give Expat.
3645 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
3646 != XML_STATUS_SUSPENDED)
3647 xml_failure(g_parser);
3648 CharData_CheckXMLChars(&storage, XCS(""));
3649 if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
3650 xml_failure(g_parser);
3651 CharData_CheckXMLChars(&storage, expected1);
3652 if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
3653 xml_failure(g_parser);
3654 CharData_CheckXMLChars(&storage, expected2);
3655 }
3656 END_TEST
3657
START_TEST(test_suspend_resume_internal_entity_issue_629)3658 START_TEST(test_suspend_resume_internal_entity_issue_629) {
3659 const char *const text
3660 = "<!DOCTYPE a [<!ENTITY e '<!--COMMENT-->a'>]><a>&e;<b>\n"
3661 "<"
3662 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3663 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3664 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3665 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3666 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3667 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3668 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3669 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3670 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3671 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3672 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3673 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3674 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3675 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3676 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3677 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3678 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3679 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3680 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3681 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3682 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3683 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3684 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3685 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3686 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3687 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3688 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3689 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3690 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3691 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3692 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3693 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3694 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3695 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3696 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3697 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3698 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3699 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3700 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3701 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3702 "/>"
3703 "</b></a>";
3704 const size_t firstChunkSizeBytes = 54;
3705
3706 XML_Parser parser = XML_ParserCreate(NULL);
3707 XML_SetUserData(parser, parser);
3708 XML_SetCommentHandler(parser, suspending_comment_handler);
3709
3710 if (XML_Parse(parser, text, (int)firstChunkSizeBytes, XML_FALSE)
3711 != XML_STATUS_SUSPENDED)
3712 xml_failure(parser);
3713 if (XML_ResumeParser(parser) != XML_STATUS_OK)
3714 xml_failure(parser);
3715 if (_XML_Parse_SINGLE_BYTES(parser, text + firstChunkSizeBytes,
3716 (int)(strlen(text) - firstChunkSizeBytes),
3717 XML_TRUE)
3718 != XML_STATUS_OK)
3719 xml_failure(parser);
3720 XML_ParserFree(parser);
3721 }
3722 END_TEST
3723
3724 /* Test syntax error is caught at parse resumption */
START_TEST(test_resume_entity_with_syntax_error)3725 START_TEST(test_resume_entity_with_syntax_error) {
3726 const char *text = "<!DOCTYPE doc [\n"
3727 "<!ENTITY foo '<suspend>Hi</wombat>'>\n"
3728 "]>\n"
3729 "<doc>&foo;</doc>\n";
3730
3731 XML_SetStartElementHandler(g_parser, start_element_suspender);
3732 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3733 != XML_STATUS_SUSPENDED)
3734 xml_failure(g_parser);
3735 if (XML_ResumeParser(g_parser) != XML_STATUS_ERROR)
3736 fail("Syntax error in entity not faulted");
3737 if (XML_GetErrorCode(g_parser) != XML_ERROR_TAG_MISMATCH)
3738 xml_failure(g_parser);
3739 }
3740 END_TEST
3741
3742 /* Test suspending and resuming in a parameter entity substitution */
START_TEST(test_suspend_resume_parameter_entity)3743 START_TEST(test_suspend_resume_parameter_entity) {
3744 const char *text = "<!DOCTYPE doc [\n"
3745 "<!ENTITY % foo '<!ELEMENT doc (#PCDATA)*>'>\n"
3746 "%foo;\n"
3747 "]>\n"
3748 "<doc>Hello, world</doc>";
3749 const XML_Char *expected = XCS("Hello, world");
3750 CharData storage;
3751
3752 CharData_Init(&storage);
3753 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3754 XML_SetElementDeclHandler(g_parser, element_decl_suspender);
3755 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
3756 XML_SetUserData(g_parser, &storage);
3757 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
3758 != XML_STATUS_SUSPENDED)
3759 xml_failure(g_parser);
3760 CharData_CheckXMLChars(&storage, XCS(""));
3761 if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
3762 xml_failure(g_parser);
3763 CharData_CheckXMLChars(&storage, expected);
3764 }
3765 END_TEST
3766
3767 /* Test attempting to use parser after an error is faulted */
START_TEST(test_restart_on_error)3768 START_TEST(test_restart_on_error) {
3769 const char *text = "<$doc><doc></doc>";
3770
3771 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3772 != XML_STATUS_ERROR)
3773 fail("Invalid tag name not faulted");
3774 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
3775 xml_failure(g_parser);
3776 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
3777 fail("Restarting invalid parse not faulted");
3778 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
3779 xml_failure(g_parser);
3780 }
3781 END_TEST
3782
3783 /* Test that angle brackets in an attribute default value are faulted */
START_TEST(test_reject_lt_in_attribute_value)3784 START_TEST(test_reject_lt_in_attribute_value) {
3785 const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '<bar>'>]>\n"
3786 "<doc></doc>";
3787
3788 expect_failure(text, XML_ERROR_INVALID_TOKEN,
3789 "Bad attribute default not faulted");
3790 }
3791 END_TEST
3792
START_TEST(test_reject_unfinished_param_in_att_value)3793 START_TEST(test_reject_unfinished_param_in_att_value) {
3794 const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '&foo'>]>\n"
3795 "<doc></doc>";
3796
3797 expect_failure(text, XML_ERROR_INVALID_TOKEN,
3798 "Bad attribute default not faulted");
3799 }
3800 END_TEST
3801
START_TEST(test_trailing_cr_in_att_value)3802 START_TEST(test_trailing_cr_in_att_value) {
3803 const char *text = "<doc a='value\r'/>";
3804
3805 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3806 == XML_STATUS_ERROR)
3807 xml_failure(g_parser);
3808 }
3809 END_TEST
3810
3811 /* Try parsing a general entity within a parameter entity in a
3812 * standalone internal DTD. Covers a corner case in the parser.
3813 */
START_TEST(test_standalone_internal_entity)3814 START_TEST(test_standalone_internal_entity) {
3815 const char *text = "<?xml version='1.0' standalone='yes' ?>\n"
3816 "<!DOCTYPE doc [\n"
3817 " <!ELEMENT doc (#PCDATA)>\n"
3818 " <!ENTITY % pe '<!ATTLIST doc att2 CDATA \"≥\">'>\n"
3819 " <!ENTITY ge 'AttDefaultValue'>\n"
3820 " %pe;\n"
3821 "]>\n"
3822 "<doc att2='any'/>";
3823
3824 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3825 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3826 == XML_STATUS_ERROR)
3827 xml_failure(g_parser);
3828 }
3829 END_TEST
3830
3831 /* Test that a reference to an unknown external entity is skipped */
START_TEST(test_skipped_external_entity)3832 START_TEST(test_skipped_external_entity) {
3833 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
3834 "<doc></doc>\n";
3835 ExtTest test_data = {"<!ELEMENT doc EMPTY>\n"
3836 "<!ENTITY % e2 '%e1;'>\n",
3837 NULL, NULL};
3838
3839 XML_SetUserData(g_parser, &test_data);
3840 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3841 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3842 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3843 == XML_STATUS_ERROR)
3844 xml_failure(g_parser);
3845 }
3846 END_TEST
3847
3848 /* Test a different form of unknown external entity */
START_TEST(test_skipped_null_loaded_ext_entity)3849 START_TEST(test_skipped_null_loaded_ext_entity) {
3850 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
3851 "<doc />";
3852 ExtHdlrData test_data
3853 = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
3854 "<!ENTITY % pe2 '%pe1;'>\n"
3855 "%pe2;\n",
3856 external_entity_null_loader};
3857
3858 XML_SetUserData(g_parser, &test_data);
3859 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3860 XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
3861 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3862 == XML_STATUS_ERROR)
3863 xml_failure(g_parser);
3864 }
3865 END_TEST
3866
START_TEST(test_skipped_unloaded_ext_entity)3867 START_TEST(test_skipped_unloaded_ext_entity) {
3868 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
3869 "<doc />";
3870 ExtHdlrData test_data
3871 = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
3872 "<!ENTITY % pe2 '%pe1;'>\n"
3873 "%pe2;\n",
3874 NULL};
3875
3876 XML_SetUserData(g_parser, &test_data);
3877 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3878 XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
3879 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3880 == XML_STATUS_ERROR)
3881 xml_failure(g_parser);
3882 }
3883 END_TEST
3884
3885 /* Test that a parameter entity value ending with a carriage return
3886 * has it translated internally into a newline.
3887 */
START_TEST(test_param_entity_with_trailing_cr)3888 START_TEST(test_param_entity_with_trailing_cr) {
3889 #define PARAM_ENTITY_NAME "pe"
3890 #define PARAM_ENTITY_CORE_VALUE "<!ATTLIST doc att CDATA \"default\">"
3891 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
3892 "<doc/>";
3893 ExtTest test_data
3894 = {"<!ENTITY % " PARAM_ENTITY_NAME " '" PARAM_ENTITY_CORE_VALUE "\r'>\n"
3895 "%" PARAM_ENTITY_NAME ";\n",
3896 NULL, NULL};
3897
3898 XML_SetUserData(g_parser, &test_data);
3899 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3900 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3901 XML_SetEntityDeclHandler(g_parser, param_entity_match_handler);
3902 param_entity_match_init(XCS(PARAM_ENTITY_NAME),
3903 XCS(PARAM_ENTITY_CORE_VALUE) XCS("\n"));
3904 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3905 == XML_STATUS_ERROR)
3906 xml_failure(g_parser);
3907 int entity_match_flag = get_param_entity_match_flag();
3908 if (entity_match_flag == ENTITY_MATCH_FAIL)
3909 fail("Parameter entity CR->NEWLINE conversion failed");
3910 else if (entity_match_flag == ENTITY_MATCH_NOT_FOUND)
3911 fail("Parameter entity not parsed");
3912 }
3913 #undef PARAM_ENTITY_NAME
3914 #undef PARAM_ENTITY_CORE_VALUE
3915 END_TEST
3916
START_TEST(test_invalid_character_entity)3917 START_TEST(test_invalid_character_entity) {
3918 const char *text = "<!DOCTYPE doc [\n"
3919 " <!ENTITY entity '�'>\n"
3920 "]>\n"
3921 "<doc>&entity;</doc>";
3922
3923 expect_failure(text, XML_ERROR_BAD_CHAR_REF,
3924 "Out of range character reference not faulted");
3925 }
3926 END_TEST
3927
START_TEST(test_invalid_character_entity_2)3928 START_TEST(test_invalid_character_entity_2) {
3929 const char *text = "<!DOCTYPE doc [\n"
3930 " <!ENTITY entity '&#xg0;'>\n"
3931 "]>\n"
3932 "<doc>&entity;</doc>";
3933
3934 expect_failure(text, XML_ERROR_INVALID_TOKEN,
3935 "Out of range character reference not faulted");
3936 }
3937 END_TEST
3938
START_TEST(test_invalid_character_entity_3)3939 START_TEST(test_invalid_character_entity_3) {
3940 const char text[] =
3941 /* <!DOCTYPE doc [\n */
3942 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
3943 /* U+0E04 = KHO KHWAI
3944 * U+0E08 = CHO CHAN */
3945 /* <!ENTITY entity '&\u0e04\u0e08;'>\n */
3946 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0e\0n\0t\0i\0t\0y\0 "
3947 "\0'\0&\x0e\x04\x0e\x08\0;\0'\0>\0\n"
3948 /* ]>\n */
3949 "\0]\0>\0\n"
3950 /* <doc>&entity;</doc> */
3951 "\0<\0d\0o\0c\0>\0&\0e\0n\0t\0i\0t\0y\0;\0<\0/\0d\0o\0c\0>";
3952
3953 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3954 != XML_STATUS_ERROR)
3955 fail("Invalid start of entity name not faulted");
3956 if (XML_GetErrorCode(g_parser) != XML_ERROR_UNDEFINED_ENTITY)
3957 xml_failure(g_parser);
3958 }
3959 END_TEST
3960
START_TEST(test_invalid_character_entity_4)3961 START_TEST(test_invalid_character_entity_4) {
3962 const char *text = "<!DOCTYPE doc [\n"
3963 " <!ENTITY entity '�'>\n" /* = � */
3964 "]>\n"
3965 "<doc>&entity;</doc>";
3966
3967 expect_failure(text, XML_ERROR_BAD_CHAR_REF,
3968 "Out of range character reference not faulted");
3969 }
3970 END_TEST
3971
3972 /* Test that processing instructions are picked up by a default handler */
START_TEST(test_pi_handled_in_default)3973 START_TEST(test_pi_handled_in_default) {
3974 const char *text = "<?test processing instruction?>\n<doc/>";
3975 const XML_Char *expected = XCS("<?test processing instruction?>\n<doc/>");
3976 CharData storage;
3977
3978 CharData_Init(&storage);
3979 XML_SetDefaultHandler(g_parser, accumulate_characters);
3980 XML_SetUserData(g_parser, &storage);
3981 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3982 == XML_STATUS_ERROR)
3983 xml_failure(g_parser);
3984 CharData_CheckXMLChars(&storage, expected);
3985 }
3986 END_TEST
3987
3988 /* Test that comments are picked up by a default handler */
START_TEST(test_comment_handled_in_default)3989 START_TEST(test_comment_handled_in_default) {
3990 const char *text = "<!-- This is a comment -->\n<doc/>";
3991 const XML_Char *expected = XCS("<!-- This is a comment -->\n<doc/>");
3992 CharData storage;
3993
3994 CharData_Init(&storage);
3995 XML_SetDefaultHandler(g_parser, accumulate_characters);
3996 XML_SetUserData(g_parser, &storage);
3997 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3998 == XML_STATUS_ERROR)
3999 xml_failure(g_parser);
4000 CharData_CheckXMLChars(&storage, expected);
4001 }
4002 END_TEST
4003
4004 /* Test PIs that look almost but not quite like XML declarations */
START_TEST(test_pi_yml)4005 START_TEST(test_pi_yml) {
4006 const char *text = "<?yml something like data?><doc/>";
4007 const XML_Char *expected = XCS("yml: something like data\n");
4008 CharData storage;
4009
4010 CharData_Init(&storage);
4011 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4012 XML_SetUserData(g_parser, &storage);
4013 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4014 == XML_STATUS_ERROR)
4015 xml_failure(g_parser);
4016 CharData_CheckXMLChars(&storage, expected);
4017 }
4018 END_TEST
4019
START_TEST(test_pi_xnl)4020 START_TEST(test_pi_xnl) {
4021 const char *text = "<?xnl nothing like data?><doc/>";
4022 const XML_Char *expected = XCS("xnl: nothing like data\n");
4023 CharData storage;
4024
4025 CharData_Init(&storage);
4026 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4027 XML_SetUserData(g_parser, &storage);
4028 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4029 == XML_STATUS_ERROR)
4030 xml_failure(g_parser);
4031 CharData_CheckXMLChars(&storage, expected);
4032 }
4033 END_TEST
4034
START_TEST(test_pi_xmm)4035 START_TEST(test_pi_xmm) {
4036 const char *text = "<?xmm everything like data?><doc/>";
4037 const XML_Char *expected = XCS("xmm: everything like data\n");
4038 CharData storage;
4039
4040 CharData_Init(&storage);
4041 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4042 XML_SetUserData(g_parser, &storage);
4043 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4044 == XML_STATUS_ERROR)
4045 xml_failure(g_parser);
4046 CharData_CheckXMLChars(&storage, expected);
4047 }
4048 END_TEST
4049
START_TEST(test_utf16_pi)4050 START_TEST(test_utf16_pi) {
4051 const char text[] =
4052 /* <?{KHO KHWAI}{CHO CHAN}?>
4053 * where {KHO KHWAI} = U+0E04
4054 * and {CHO CHAN} = U+0E08
4055 */
4056 "<\0?\0\x04\x0e\x08\x0e?\0>\0"
4057 /* <q/> */
4058 "<\0q\0/\0>\0";
4059 #ifdef XML_UNICODE
4060 const XML_Char *expected = XCS("\x0e04\x0e08: \n");
4061 #else
4062 const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
4063 #endif
4064 CharData storage;
4065
4066 CharData_Init(&storage);
4067 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4068 XML_SetUserData(g_parser, &storage);
4069 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4070 == XML_STATUS_ERROR)
4071 xml_failure(g_parser);
4072 CharData_CheckXMLChars(&storage, expected);
4073 }
4074 END_TEST
4075
START_TEST(test_utf16_be_pi)4076 START_TEST(test_utf16_be_pi) {
4077 const char text[] =
4078 /* <?{KHO KHWAI}{CHO CHAN}?>
4079 * where {KHO KHWAI} = U+0E04
4080 * and {CHO CHAN} = U+0E08
4081 */
4082 "\0<\0?\x0e\x04\x0e\x08\0?\0>"
4083 /* <q/> */
4084 "\0<\0q\0/\0>";
4085 #ifdef XML_UNICODE
4086 const XML_Char *expected = XCS("\x0e04\x0e08: \n");
4087 #else
4088 const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
4089 #endif
4090 CharData storage;
4091
4092 CharData_Init(&storage);
4093 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4094 XML_SetUserData(g_parser, &storage);
4095 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4096 == XML_STATUS_ERROR)
4097 xml_failure(g_parser);
4098 CharData_CheckXMLChars(&storage, expected);
4099 }
4100 END_TEST
4101
4102 /* Test that comments can be picked up and translated */
START_TEST(test_utf16_be_comment)4103 START_TEST(test_utf16_be_comment) {
4104 const char text[] =
4105 /* <!-- Comment A --> */
4106 "\0<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0A\0 \0-\0-\0>\0\n"
4107 /* <doc/> */
4108 "\0<\0d\0o\0c\0/\0>";
4109 const XML_Char *expected = XCS(" Comment A ");
4110 CharData storage;
4111
4112 CharData_Init(&storage);
4113 XML_SetCommentHandler(g_parser, accumulate_comment);
4114 XML_SetUserData(g_parser, &storage);
4115 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4116 == XML_STATUS_ERROR)
4117 xml_failure(g_parser);
4118 CharData_CheckXMLChars(&storage, expected);
4119 }
4120 END_TEST
4121
START_TEST(test_utf16_le_comment)4122 START_TEST(test_utf16_le_comment) {
4123 const char text[] =
4124 /* <!-- Comment B --> */
4125 "<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0B\0 \0-\0-\0>\0\n\0"
4126 /* <doc/> */
4127 "<\0d\0o\0c\0/\0>\0";
4128 const XML_Char *expected = XCS(" Comment B ");
4129 CharData storage;
4130
4131 CharData_Init(&storage);
4132 XML_SetCommentHandler(g_parser, accumulate_comment);
4133 XML_SetUserData(g_parser, &storage);
4134 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4135 == XML_STATUS_ERROR)
4136 xml_failure(g_parser);
4137 CharData_CheckXMLChars(&storage, expected);
4138 }
4139 END_TEST
4140
4141 /* Test that the unknown encoding handler with map entries that expect
4142 * conversion but no conversion function is faulted
4143 */
START_TEST(test_missing_encoding_conversion_fn)4144 START_TEST(test_missing_encoding_conversion_fn) {
4145 const char *text = "<?xml version='1.0' encoding='no-conv'?>\n"
4146 "<doc>\x81</doc>";
4147
4148 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4149 /* MiscEncodingHandler sets up an encoding with every top-bit-set
4150 * character introducing a two-byte sequence. For this, it
4151 * requires a convert function. The above function call doesn't
4152 * pass one through, so when BadEncodingHandler actually gets
4153 * called it should supply an invalid encoding.
4154 */
4155 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4156 "Encoding with missing convert() not faulted");
4157 }
4158 END_TEST
4159
START_TEST(test_failing_encoding_conversion_fn)4160 START_TEST(test_failing_encoding_conversion_fn) {
4161 const char *text = "<?xml version='1.0' encoding='failing-conv'?>\n"
4162 "<doc>\x81</doc>";
4163
4164 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4165 /* BadEncodingHandler sets up an encoding with every top-bit-set
4166 * character introducing a two-byte sequence. For this, it
4167 * requires a convert function. The above function call passes
4168 * one that insists all possible sequences are invalid anyway.
4169 */
4170 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4171 "Encoding with failing convert() not faulted");
4172 }
4173 END_TEST
4174
4175 /* Test unknown encoding conversions */
START_TEST(test_unknown_encoding_success)4176 START_TEST(test_unknown_encoding_success) {
4177 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4178 /* Equivalent to <eoc>Hello, world</eoc> */
4179 "<\x81\x64\x80oc>Hello, world</\x81\x64\x80oc>";
4180
4181 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4182 run_character_check(text, XCS("Hello, world"));
4183 }
4184 END_TEST
4185
4186 /* Test bad name character in unknown encoding */
START_TEST(test_unknown_encoding_bad_name)4187 START_TEST(test_unknown_encoding_bad_name) {
4188 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4189 "<\xff\x64oc>Hello, world</\xff\x64oc>";
4190
4191 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4192 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4193 "Bad name start in unknown encoding not faulted");
4194 }
4195 END_TEST
4196
4197 /* Test bad mid-name character in unknown encoding */
START_TEST(test_unknown_encoding_bad_name_2)4198 START_TEST(test_unknown_encoding_bad_name_2) {
4199 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4200 "<d\xffoc>Hello, world</d\xffoc>";
4201
4202 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4203 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4204 "Bad name in unknown encoding not faulted");
4205 }
4206 END_TEST
4207
4208 /* Test element name that is long enough to fill the conversion buffer
4209 * in an unknown encoding, finishing with an encoded character.
4210 */
START_TEST(test_unknown_encoding_long_name_1)4211 START_TEST(test_unknown_encoding_long_name_1) {
4212 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4213 "<abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>"
4214 "Hi"
4215 "</abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>";
4216 const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
4217 CharData storage;
4218
4219 CharData_Init(&storage);
4220 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4221 XML_SetStartElementHandler(g_parser, record_element_start_handler);
4222 XML_SetUserData(g_parser, &storage);
4223 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4224 == XML_STATUS_ERROR)
4225 xml_failure(g_parser);
4226 CharData_CheckXMLChars(&storage, expected);
4227 }
4228 END_TEST
4229
4230 /* Test element name that is long enough to fill the conversion buffer
4231 * in an unknown encoding, finishing with an simple character.
4232 */
START_TEST(test_unknown_encoding_long_name_2)4233 START_TEST(test_unknown_encoding_long_name_2) {
4234 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4235 "<abcdefghabcdefghabcdefghijklmnop>"
4236 "Hi"
4237 "</abcdefghabcdefghabcdefghijklmnop>";
4238 const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
4239 CharData storage;
4240
4241 CharData_Init(&storage);
4242 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4243 XML_SetStartElementHandler(g_parser, record_element_start_handler);
4244 XML_SetUserData(g_parser, &storage);
4245 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4246 == XML_STATUS_ERROR)
4247 xml_failure(g_parser);
4248 CharData_CheckXMLChars(&storage, expected);
4249 }
4250 END_TEST
4251
START_TEST(test_invalid_unknown_encoding)4252 START_TEST(test_invalid_unknown_encoding) {
4253 const char *text = "<?xml version='1.0' encoding='invalid-9'?>\n"
4254 "<doc>Hello world</doc>";
4255
4256 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4257 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4258 "Invalid unknown encoding not faulted");
4259 }
4260 END_TEST
4261
START_TEST(test_unknown_ascii_encoding_ok)4262 START_TEST(test_unknown_ascii_encoding_ok) {
4263 const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
4264 "<doc>Hello, world</doc>";
4265
4266 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4267 run_character_check(text, XCS("Hello, world"));
4268 }
4269 END_TEST
4270
START_TEST(test_unknown_ascii_encoding_fail)4271 START_TEST(test_unknown_ascii_encoding_fail) {
4272 const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
4273 "<doc>Hello, \x80 world</doc>";
4274
4275 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4276 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4277 "Invalid character not faulted");
4278 }
4279 END_TEST
4280
START_TEST(test_unknown_encoding_invalid_length)4281 START_TEST(test_unknown_encoding_invalid_length) {
4282 const char *text = "<?xml version='1.0' encoding='invalid-len'?>\n"
4283 "<doc>Hello, world</doc>";
4284
4285 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4286 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4287 "Invalid unknown encoding not faulted");
4288 }
4289 END_TEST
4290
START_TEST(test_unknown_encoding_invalid_topbit)4291 START_TEST(test_unknown_encoding_invalid_topbit) {
4292 const char *text = "<?xml version='1.0' encoding='invalid-a'?>\n"
4293 "<doc>Hello, world</doc>";
4294
4295 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4296 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4297 "Invalid unknown encoding not faulted");
4298 }
4299 END_TEST
4300
START_TEST(test_unknown_encoding_invalid_surrogate)4301 START_TEST(test_unknown_encoding_invalid_surrogate) {
4302 const char *text = "<?xml version='1.0' encoding='invalid-surrogate'?>\n"
4303 "<doc>Hello, \x82 world</doc>";
4304
4305 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4306 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4307 "Invalid unknown encoding not faulted");
4308 }
4309 END_TEST
4310
START_TEST(test_unknown_encoding_invalid_high)4311 START_TEST(test_unknown_encoding_invalid_high) {
4312 const char *text = "<?xml version='1.0' encoding='invalid-high'?>\n"
4313 "<doc>Hello, world</doc>";
4314
4315 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4316 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4317 "Invalid unknown encoding not faulted");
4318 }
4319 END_TEST
4320
START_TEST(test_unknown_encoding_invalid_attr_value)4321 START_TEST(test_unknown_encoding_invalid_attr_value) {
4322 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4323 "<doc attr='\xff\x30'/>";
4324
4325 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4326 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4327 "Invalid attribute valid not faulted");
4328 }
4329 END_TEST
4330
4331 /* Test an external entity parser set to use latin-1 detects UTF-16
4332 * BOMs correctly.
4333 */
4334 /* Test that UTF-16 BOM does not select UTF-16 given explicit encoding */
START_TEST(test_ext_entity_latin1_utf16le_bom)4335 START_TEST(test_ext_entity_latin1_utf16le_bom) {
4336 const char *text = "<!DOCTYPE doc [\n"
4337 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4338 "]>\n"
4339 "<doc>&en;</doc>";
4340 ExtTest2 test_data
4341 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4342 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4343 * 0x4c = L and 0x20 is a space
4344 */
4345 "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
4346 #ifdef XML_UNICODE
4347 const XML_Char *expected = XCS("\x00ff\x00feL ");
4348 #else
4349 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4350 const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
4351 #endif
4352 CharData storage;
4353
4354 CharData_Init(&storage);
4355 test_data.storage = &storage;
4356 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4357 XML_SetUserData(g_parser, &test_data);
4358 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4359 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4360 == XML_STATUS_ERROR)
4361 xml_failure(g_parser);
4362 CharData_CheckXMLChars(&storage, expected);
4363 }
4364 END_TEST
4365
START_TEST(test_ext_entity_latin1_utf16be_bom)4366 START_TEST(test_ext_entity_latin1_utf16be_bom) {
4367 const char *text = "<!DOCTYPE doc [\n"
4368 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4369 "]>\n"
4370 "<doc>&en;</doc>";
4371 ExtTest2 test_data
4372 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4373 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4374 * 0x4c = L and 0x20 is a space
4375 */
4376 "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
4377 #ifdef XML_UNICODE
4378 const XML_Char *expected = XCS("\x00fe\x00ff L");
4379 #else
4380 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4381 const XML_Char *expected = XCS("\xc3\xbe\xc3\xbf L");
4382 #endif
4383 CharData storage;
4384
4385 CharData_Init(&storage);
4386 test_data.storage = &storage;
4387 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4388 XML_SetUserData(g_parser, &test_data);
4389 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4390 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4391 == XML_STATUS_ERROR)
4392 xml_failure(g_parser);
4393 CharData_CheckXMLChars(&storage, expected);
4394 }
4395 END_TEST
4396
4397 /* Parsing the full buffer rather than a byte at a time makes a
4398 * difference to the encoding scanning code, so repeat the above tests
4399 * without breaking them down by byte.
4400 */
START_TEST(test_ext_entity_latin1_utf16le_bom2)4401 START_TEST(test_ext_entity_latin1_utf16le_bom2) {
4402 const char *text = "<!DOCTYPE doc [\n"
4403 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4404 "]>\n"
4405 "<doc>&en;</doc>";
4406 ExtTest2 test_data
4407 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4408 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4409 * 0x4c = L and 0x20 is a space
4410 */
4411 "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
4412 #ifdef XML_UNICODE
4413 const XML_Char *expected = XCS("\x00ff\x00feL ");
4414 #else
4415 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4416 const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
4417 #endif
4418 CharData storage;
4419
4420 CharData_Init(&storage);
4421 test_data.storage = &storage;
4422 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4423 XML_SetUserData(g_parser, &test_data);
4424 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4425 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4426 == XML_STATUS_ERROR)
4427 xml_failure(g_parser);
4428 CharData_CheckXMLChars(&storage, expected);
4429 }
4430 END_TEST
4431
START_TEST(test_ext_entity_latin1_utf16be_bom2)4432 START_TEST(test_ext_entity_latin1_utf16be_bom2) {
4433 const char *text = "<!DOCTYPE doc [\n"
4434 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4435 "]>\n"
4436 "<doc>&en;</doc>";
4437 ExtTest2 test_data
4438 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4439 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4440 * 0x4c = L and 0x20 is a space
4441 */
4442 "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
4443 #ifdef XML_UNICODE
4444 const XML_Char *expected = XCS("\x00fe\x00ff L");
4445 #else
4446 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4447 const XML_Char *expected = "\xc3\xbe\xc3\xbf L";
4448 #endif
4449 CharData storage;
4450
4451 CharData_Init(&storage);
4452 test_data.storage = &storage;
4453 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4454 XML_SetUserData(g_parser, &test_data);
4455 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4456 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4457 == XML_STATUS_ERROR)
4458 xml_failure(g_parser);
4459 CharData_CheckXMLChars(&storage, expected);
4460 }
4461 END_TEST
4462
4463 /* Test little-endian UTF-16 given an explicit big-endian encoding */
START_TEST(test_ext_entity_utf16_be)4464 START_TEST(test_ext_entity_utf16_be) {
4465 const char *text = "<!DOCTYPE doc [\n"
4466 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4467 "]>\n"
4468 "<doc>&en;</doc>";
4469 ExtTest2 test_data = {"<\0e\0/\0>\0", 8, XCS("utf-16be"), NULL};
4470 #ifdef XML_UNICODE
4471 const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
4472 #else
4473 const XML_Char *expected = XCS("\xe3\xb0\x80" /* U+3C00 */
4474 "\xe6\x94\x80" /* U+6500 */
4475 "\xe2\xbc\x80" /* U+2F00 */
4476 "\xe3\xb8\x80"); /* U+3E00 */
4477 #endif
4478 CharData storage;
4479
4480 CharData_Init(&storage);
4481 test_data.storage = &storage;
4482 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4483 XML_SetUserData(g_parser, &test_data);
4484 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4485 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4486 == XML_STATUS_ERROR)
4487 xml_failure(g_parser);
4488 CharData_CheckXMLChars(&storage, expected);
4489 }
4490 END_TEST
4491
4492 /* Test big-endian UTF-16 given an explicit little-endian encoding */
START_TEST(test_ext_entity_utf16_le)4493 START_TEST(test_ext_entity_utf16_le) {
4494 const char *text = "<!DOCTYPE doc [\n"
4495 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4496 "]>\n"
4497 "<doc>&en;</doc>";
4498 ExtTest2 test_data = {"\0<\0e\0/\0>", 8, XCS("utf-16le"), NULL};
4499 #ifdef XML_UNICODE
4500 const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
4501 #else
4502 const XML_Char *expected = XCS("\xe3\xb0\x80" /* U+3C00 */
4503 "\xe6\x94\x80" /* U+6500 */
4504 "\xe2\xbc\x80" /* U+2F00 */
4505 "\xe3\xb8\x80"); /* U+3E00 */
4506 #endif
4507 CharData storage;
4508
4509 CharData_Init(&storage);
4510 test_data.storage = &storage;
4511 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4512 XML_SetUserData(g_parser, &test_data);
4513 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4514 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4515 == XML_STATUS_ERROR)
4516 xml_failure(g_parser);
4517 CharData_CheckXMLChars(&storage, expected);
4518 }
4519 END_TEST
4520
4521 /* Test little-endian UTF-16 given no explicit encoding.
4522 * The existing default encoding (UTF-8) is assumed to hold without a
4523 * BOM to contradict it, so the entity value will in fact provoke an
4524 * error because 0x00 is not a valid XML character. We parse the
4525 * whole buffer in one go rather than feeding it in byte by byte to
4526 * exercise different code paths in the initial scanning routines.
4527 */
START_TEST(test_ext_entity_utf16_unknown)4528 START_TEST(test_ext_entity_utf16_unknown) {
4529 const char *text = "<!DOCTYPE doc [\n"
4530 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4531 "]>\n"
4532 "<doc>&en;</doc>";
4533 ExtFaults2 test_data
4534 = {"a\0b\0c\0", 6, "Invalid character in entity not faulted", NULL,
4535 XML_ERROR_INVALID_TOKEN};
4536
4537 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter2);
4538 XML_SetUserData(g_parser, &test_data);
4539 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
4540 "Invalid character should not have been accepted");
4541 }
4542 END_TEST
4543
4544 /* Test not-quite-UTF-8 BOM (0xEF 0xBB 0xBF) */
START_TEST(test_ext_entity_utf8_non_bom)4545 START_TEST(test_ext_entity_utf8_non_bom) {
4546 const char *text = "<!DOCTYPE doc [\n"
4547 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4548 "]>\n"
4549 "<doc>&en;</doc>";
4550 ExtTest2 test_data
4551 = {"\xef\xbb\x80", /* Arabic letter DAD medial form, U+FEC0 */
4552 3, NULL, NULL};
4553 #ifdef XML_UNICODE
4554 const XML_Char *expected = XCS("\xfec0");
4555 #else
4556 const XML_Char *expected = XCS("\xef\xbb\x80");
4557 #endif
4558 CharData storage;
4559
4560 CharData_Init(&storage);
4561 test_data.storage = &storage;
4562 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4563 XML_SetUserData(g_parser, &test_data);
4564 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4565 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4566 == XML_STATUS_ERROR)
4567 xml_failure(g_parser);
4568 CharData_CheckXMLChars(&storage, expected);
4569 }
4570 END_TEST
4571
4572 /* Test that UTF-8 in a CDATA section is correctly passed through */
START_TEST(test_utf8_in_cdata_section)4573 START_TEST(test_utf8_in_cdata_section) {
4574 const char *text = "<doc><![CDATA[one \xc3\xa9 two]]></doc>";
4575 #ifdef XML_UNICODE
4576 const XML_Char *expected = XCS("one \x00e9 two");
4577 #else
4578 const XML_Char *expected = XCS("one \xc3\xa9 two");
4579 #endif
4580
4581 run_character_check(text, expected);
4582 }
4583 END_TEST
4584
4585 /* Test that little-endian UTF-16 in a CDATA section is handled */
START_TEST(test_utf8_in_cdata_section_2)4586 START_TEST(test_utf8_in_cdata_section_2) {
4587 const char *text = "<doc><![CDATA[\xc3\xa9]\xc3\xa9two]]></doc>";
4588 #ifdef XML_UNICODE
4589 const XML_Char *expected = XCS("\x00e9]\x00e9two");
4590 #else
4591 const XML_Char *expected = XCS("\xc3\xa9]\xc3\xa9two");
4592 #endif
4593
4594 run_character_check(text, expected);
4595 }
4596 END_TEST
4597
START_TEST(test_utf8_in_start_tags)4598 START_TEST(test_utf8_in_start_tags) {
4599 struct test_case {
4600 bool goodName;
4601 bool goodNameStart;
4602 const char *tagName;
4603 };
4604
4605 // The idea with the tests below is this:
4606 // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences
4607 // go to isNever and are hence not a concern.
4608 //
4609 // We start with a character that is a valid name character
4610 // (or even name-start character, see XML 1.0r4 spec) and then we flip
4611 // single bits at places where (1) the result leaves the UTF-8 encoding space
4612 // and (2) we stay in the same n-byte sequence family.
4613 //
4614 // The flipped bits are highlighted in angle brackets in comments,
4615 // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped
4616 // the most significant bit to 1 to leave UTF-8 encoding space.
4617 struct test_case cases[] = {
4618 // 1-byte UTF-8: [0xxx xxxx]
4619 {true, true, "\x3A"}, // [0011 1010] = ASCII colon ':'
4620 {false, false, "\xBA"}, // [<1>011 1010]
4621 {true, false, "\x39"}, // [0011 1001] = ASCII nine '9'
4622 {false, false, "\xB9"}, // [<1>011 1001]
4623
4624 // 2-byte UTF-8: [110x xxxx] [10xx xxxx]
4625 {true, true, "\xDB\xA5"}, // [1101 1011] [1010 0101] =
4626 // Arabic small waw U+06E5
4627 {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101]
4628 {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101]
4629 {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101]
4630 {true, false, "\xCC\x81"}, // [1100 1100] [1000 0001] =
4631 // combining char U+0301
4632 {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001]
4633 {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001]
4634 {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001]
4635
4636 // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx]
4637 {true, true, "\xE0\xA4\x85"}, // [1110 0000] [1010 0100] [1000 0101] =
4638 // Devanagari Letter A U+0905
4639 {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101]
4640 {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101]
4641 {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101]
4642 {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101]
4643 {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101]
4644 {true, false, "\xE0\xA4\x81"}, // [1110 0000] [1010 0100] [1000 0001] =
4645 // combining char U+0901
4646 {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001]
4647 {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001]
4648 {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001]
4649 {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001]
4650 {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001]
4651 };
4652 const bool atNameStart[] = {true, false};
4653
4654 size_t i = 0;
4655 char doc[1024];
4656 size_t failCount = 0;
4657
4658 // we need all the bytes to be parsed, but we don't want the errors that can
4659 // trigger on isFinal=XML_TRUE, so we skip the test if the heuristic is on.
4660 if (g_reparseDeferralEnabledDefault) {
4661 return;
4662 }
4663
4664 for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
4665 size_t j = 0;
4666 for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) {
4667 const bool expectedSuccess
4668 = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName;
4669 snprintf(doc, sizeof(doc), "<%s%s><!--", atNameStart[j] ? "" : "a",
4670 cases[i].tagName);
4671 XML_Parser parser = XML_ParserCreate(NULL);
4672
4673 const enum XML_Status status = _XML_Parse_SINGLE_BYTES(
4674 parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE);
4675
4676 bool success = true;
4677 if ((status == XML_STATUS_OK) != expectedSuccess) {
4678 success = false;
4679 }
4680 if ((status == XML_STATUS_ERROR)
4681 && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) {
4682 success = false;
4683 }
4684
4685 if (! success) {
4686 fprintf(
4687 stderr,
4688 "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n",
4689 (unsigned)i + 1u, atNameStart[j] ? " " : "not ",
4690 (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser));
4691 failCount++;
4692 }
4693
4694 XML_ParserFree(parser);
4695 }
4696 }
4697
4698 if (failCount > 0) {
4699 fail("UTF-8 regression detected");
4700 }
4701 }
4702 END_TEST
4703
4704 /* Test trailing spaces in elements are accepted */
START_TEST(test_trailing_spaces_in_elements)4705 START_TEST(test_trailing_spaces_in_elements) {
4706 const char *text = "<doc >Hi</doc >";
4707 const XML_Char *expected = XCS("doc/doc");
4708 CharData storage;
4709
4710 CharData_Init(&storage);
4711 XML_SetElementHandler(g_parser, record_element_start_handler,
4712 record_element_end_handler);
4713 XML_SetUserData(g_parser, &storage);
4714 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4715 == XML_STATUS_ERROR)
4716 xml_failure(g_parser);
4717 CharData_CheckXMLChars(&storage, expected);
4718 }
4719 END_TEST
4720
START_TEST(test_utf16_attribute)4721 START_TEST(test_utf16_attribute) {
4722 const char text[] =
4723 /* <d {KHO KHWAI}{CHO CHAN}='a'/>
4724 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4725 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4726 */
4727 "<\0d\0 \0\x04\x0e\x08\x0e=\0'\0a\0'\0/\0>\0";
4728 const XML_Char *expected = XCS("a");
4729 CharData storage;
4730
4731 CharData_Init(&storage);
4732 XML_SetStartElementHandler(g_parser, accumulate_attribute);
4733 XML_SetUserData(g_parser, &storage);
4734 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4735 == XML_STATUS_ERROR)
4736 xml_failure(g_parser);
4737 CharData_CheckXMLChars(&storage, expected);
4738 }
4739 END_TEST
4740
START_TEST(test_utf16_second_attr)4741 START_TEST(test_utf16_second_attr) {
4742 /* <d a='1' {KHO KHWAI}{CHO CHAN}='2'/>
4743 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4744 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4745 */
4746 const char text[] = "<\0d\0 \0a\0=\0'\0\x31\0'\0 \0"
4747 "\x04\x0e\x08\x0e=\0'\0\x32\0'\0/\0>\0";
4748 const XML_Char *expected = XCS("1");
4749 CharData storage;
4750
4751 CharData_Init(&storage);
4752 XML_SetStartElementHandler(g_parser, accumulate_attribute);
4753 XML_SetUserData(g_parser, &storage);
4754 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4755 == XML_STATUS_ERROR)
4756 xml_failure(g_parser);
4757 CharData_CheckXMLChars(&storage, expected);
4758 }
4759 END_TEST
4760
START_TEST(test_attr_after_solidus)4761 START_TEST(test_attr_after_solidus) {
4762 const char *text = "<doc attr1='a' / attr2='b'>";
4763
4764 expect_failure(text, XML_ERROR_INVALID_TOKEN, "Misplaced / not faulted");
4765 }
4766 END_TEST
4767
START_TEST(test_utf16_pe)4768 START_TEST(test_utf16_pe) {
4769 /* <!DOCTYPE doc [
4770 * <!ENTITY % {KHO KHWAI}{CHO CHAN} '<!ELEMENT doc (#PCDATA)>'>
4771 * %{KHO KHWAI}{CHO CHAN};
4772 * ]>
4773 * <doc></doc>
4774 *
4775 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4776 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4777 */
4778 const char text[] = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
4779 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \x0e\x04\x0e\x08\0 "
4780 "\0'\0<\0!\0E\0L\0E\0M\0E\0N\0T\0 "
4781 "\0d\0o\0c\0 \0(\0#\0P\0C\0D\0A\0T\0A\0)\0>\0'\0>\0\n"
4782 "\0%\x0e\x04\x0e\x08\0;\0\n"
4783 "\0]\0>\0\n"
4784 "\0<\0d\0o\0c\0>\0<\0/\0d\0o\0c\0>";
4785 #ifdef XML_UNICODE
4786 const XML_Char *expected = XCS("\x0e04\x0e08=<!ELEMENT doc (#PCDATA)>\n");
4787 #else
4788 const XML_Char *expected
4789 = XCS("\xe0\xb8\x84\xe0\xb8\x88=<!ELEMENT doc (#PCDATA)>\n");
4790 #endif
4791 CharData storage;
4792
4793 CharData_Init(&storage);
4794 XML_SetUserData(g_parser, &storage);
4795 XML_SetEntityDeclHandler(g_parser, accumulate_entity_decl);
4796 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4797 == XML_STATUS_ERROR)
4798 xml_failure(g_parser);
4799 CharData_CheckXMLChars(&storage, expected);
4800 }
4801 END_TEST
4802
4803 /* Test that duff attribute description keywords are rejected */
START_TEST(test_bad_attr_desc_keyword)4804 START_TEST(test_bad_attr_desc_keyword) {
4805 const char *text = "<!DOCTYPE doc [\n"
4806 " <!ATTLIST doc attr CDATA #!IMPLIED>\n"
4807 "]>\n"
4808 "<doc />";
4809
4810 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4811 "Bad keyword !IMPLIED not faulted");
4812 }
4813 END_TEST
4814
4815 /* Test that an invalid attribute description keyword consisting of
4816 * UTF-16 characters with their top bytes non-zero are correctly
4817 * faulted
4818 */
START_TEST(test_bad_attr_desc_keyword_utf16)4819 START_TEST(test_bad_attr_desc_keyword_utf16) {
4820 /* <!DOCTYPE d [
4821 * <!ATTLIST d a CDATA #{KHO KHWAI}{CHO CHAN}>
4822 * ]><d/>
4823 *
4824 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4825 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4826 */
4827 const char text[]
4828 = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
4829 "\0<\0!\0A\0T\0T\0L\0I\0S\0T\0 \0d\0 \0a\0 \0C\0D\0A\0T\0A\0 "
4830 "\0#\x0e\x04\x0e\x08\0>\0\n"
4831 "\0]\0>\0<\0d\0/\0>";
4832
4833 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4834 != XML_STATUS_ERROR)
4835 fail("Invalid UTF16 attribute keyword not faulted");
4836 if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
4837 xml_failure(g_parser);
4838 }
4839 END_TEST
4840
4841 /* Test that invalid syntax in a <!DOCTYPE> is rejected. Do this
4842 * using prefix-encoding (see above) to trigger specific code paths
4843 */
START_TEST(test_bad_doctype)4844 START_TEST(test_bad_doctype) {
4845 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4846 "<!DOCTYPE doc [ \x80\x44 ]><doc/>";
4847
4848 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4849 expect_failure(text, XML_ERROR_SYNTAX,
4850 "Invalid bytes in DOCTYPE not faulted");
4851 }
4852 END_TEST
4853
START_TEST(test_bad_doctype_utf8)4854 START_TEST(test_bad_doctype_utf8) {
4855 const char *text = "<!DOCTYPE \xDB\x25"
4856 "doc><doc/>"; // [1101 1011] [<0>010 0101]
4857 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4858 "Invalid UTF-8 in DOCTYPE not faulted");
4859 }
4860 END_TEST
4861
START_TEST(test_bad_doctype_utf16)4862 START_TEST(test_bad_doctype_utf16) {
4863 const char text[] =
4864 /* <!DOCTYPE doc [ \x06f2 ]><doc/>
4865 *
4866 * U+06F2 = EXTENDED ARABIC-INDIC DIGIT TWO, a valid number
4867 * (name character) but not a valid letter (name start character)
4868 */
4869 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0 "
4870 "\x06\xf2"
4871 "\0 \0]\0>\0<\0d\0o\0c\0/\0>";
4872
4873 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4874 != XML_STATUS_ERROR)
4875 fail("Invalid bytes in DOCTYPE not faulted");
4876 if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
4877 xml_failure(g_parser);
4878 }
4879 END_TEST
4880
START_TEST(test_bad_doctype_plus)4881 START_TEST(test_bad_doctype_plus) {
4882 const char *text = "<!DOCTYPE 1+ [ <!ENTITY foo 'bar'> ]>\n"
4883 "<1+>&foo;</1+>";
4884
4885 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4886 "'+' in document name not faulted");
4887 }
4888 END_TEST
4889
START_TEST(test_bad_doctype_star)4890 START_TEST(test_bad_doctype_star) {
4891 const char *text = "<!DOCTYPE 1* [ <!ENTITY foo 'bar'> ]>\n"
4892 "<1*>&foo;</1*>";
4893
4894 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4895 "'*' in document name not faulted");
4896 }
4897 END_TEST
4898
START_TEST(test_bad_doctype_query)4899 START_TEST(test_bad_doctype_query) {
4900 const char *text = "<!DOCTYPE 1? [ <!ENTITY foo 'bar'> ]>\n"
4901 "<1?>&foo;</1?>";
4902
4903 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4904 "'?' in document name not faulted");
4905 }
4906 END_TEST
4907
START_TEST(test_unknown_encoding_bad_ignore)4908 START_TEST(test_unknown_encoding_bad_ignore) {
4909 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>"
4910 "<!DOCTYPE doc SYSTEM 'foo'>"
4911 "<doc><e>&entity;</e></doc>";
4912 ExtFaults fault = {"<![IGNORE[<!ELEMENT \xffG (#PCDATA)*>]]>",
4913 "Invalid character not faulted", XCS("prefix-conv"),
4914 XML_ERROR_INVALID_TOKEN};
4915
4916 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4917 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4918 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
4919 XML_SetUserData(g_parser, &fault);
4920 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
4921 "Bad IGNORE section with unknown encoding not failed");
4922 }
4923 END_TEST
4924
START_TEST(test_entity_in_utf16_be_attr)4925 START_TEST(test_entity_in_utf16_be_attr) {
4926 const char text[] =
4927 /* <e a='ä ä'></e> */
4928 "\0<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 "
4929 "\0&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>";
4930 #ifdef XML_UNICODE
4931 const XML_Char *expected = XCS("\x00e4 \x00e4");
4932 #else
4933 const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
4934 #endif
4935 CharData storage;
4936
4937 CharData_Init(&storage);
4938 XML_SetUserData(g_parser, &storage);
4939 XML_SetStartElementHandler(g_parser, accumulate_attribute);
4940 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4941 == XML_STATUS_ERROR)
4942 xml_failure(g_parser);
4943 CharData_CheckXMLChars(&storage, expected);
4944 }
4945 END_TEST
4946
START_TEST(test_entity_in_utf16_le_attr)4947 START_TEST(test_entity_in_utf16_le_attr) {
4948 const char text[] =
4949 /* <e a='ä ä'></e> */
4950 "<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 \0"
4951 "&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>\0";
4952 #ifdef XML_UNICODE
4953 const XML_Char *expected = XCS("\x00e4 \x00e4");
4954 #else
4955 const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
4956 #endif
4957 CharData storage;
4958
4959 CharData_Init(&storage);
4960 XML_SetUserData(g_parser, &storage);
4961 XML_SetStartElementHandler(g_parser, accumulate_attribute);
4962 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4963 == XML_STATUS_ERROR)
4964 xml_failure(g_parser);
4965 CharData_CheckXMLChars(&storage, expected);
4966 }
4967 END_TEST
4968
START_TEST(test_entity_public_utf16_be)4969 START_TEST(test_entity_public_utf16_be) {
4970 const char text[] =
4971 /* <!DOCTYPE d [ */
4972 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
4973 /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
4974 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 "
4975 "\0'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n"
4976 /* %e; */
4977 "\0%\0e\0;\0\n"
4978 /* ]> */
4979 "\0]\0>\0\n"
4980 /* <d>&j;</d> */
4981 "\0<\0d\0>\0&\0j\0;\0<\0/\0d\0>";
4982 ExtTest2 test_data
4983 = {/* <!ENTITY j 'baz'> */
4984 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>", 34, NULL, NULL};
4985 const XML_Char *expected = XCS("baz");
4986 CharData storage;
4987
4988 CharData_Init(&storage);
4989 test_data.storage = &storage;
4990 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4991 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4992 XML_SetUserData(g_parser, &test_data);
4993 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4994 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4995 == XML_STATUS_ERROR)
4996 xml_failure(g_parser);
4997 CharData_CheckXMLChars(&storage, expected);
4998 }
4999 END_TEST
5000
START_TEST(test_entity_public_utf16_le)5001 START_TEST(test_entity_public_utf16_le) {
5002 const char text[] =
5003 /* <!DOCTYPE d [ */
5004 "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n\0"
5005 /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
5006 "<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 \0"
5007 "'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n\0"
5008 /* %e; */
5009 "%\0e\0;\0\n\0"
5010 /* ]> */
5011 "]\0>\0\n\0"
5012 /* <d>&j;</d> */
5013 "<\0d\0>\0&\0j\0;\0<\0/\0d\0>\0";
5014 ExtTest2 test_data
5015 = {/* <!ENTITY j 'baz'> */
5016 "<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>\0", 34, NULL, NULL};
5017 const XML_Char *expected = XCS("baz");
5018 CharData storage;
5019
5020 CharData_Init(&storage);
5021 test_data.storage = &storage;
5022 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5023 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5024 XML_SetUserData(g_parser, &test_data);
5025 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5026 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5027 == XML_STATUS_ERROR)
5028 xml_failure(g_parser);
5029 CharData_CheckXMLChars(&storage, expected);
5030 }
5031 END_TEST
5032
5033 /* Test that a doctype with neither an internal nor external subset is
5034 * faulted
5035 */
START_TEST(test_short_doctype)5036 START_TEST(test_short_doctype) {
5037 const char *text = "<!DOCTYPE doc></doc>";
5038 expect_failure(text, XML_ERROR_INVALID_TOKEN,
5039 "DOCTYPE without subset not rejected");
5040 }
5041 END_TEST
5042
START_TEST(test_short_doctype_2)5043 START_TEST(test_short_doctype_2) {
5044 const char *text = "<!DOCTYPE doc PUBLIC></doc>";
5045 expect_failure(text, XML_ERROR_SYNTAX,
5046 "DOCTYPE without Public ID not rejected");
5047 }
5048 END_TEST
5049
START_TEST(test_short_doctype_3)5050 START_TEST(test_short_doctype_3) {
5051 const char *text = "<!DOCTYPE doc SYSTEM></doc>";
5052 expect_failure(text, XML_ERROR_SYNTAX,
5053 "DOCTYPE without System ID not rejected");
5054 }
5055 END_TEST
5056
START_TEST(test_long_doctype)5057 START_TEST(test_long_doctype) {
5058 const char *text = "<!DOCTYPE doc PUBLIC 'foo' 'bar' 'baz'></doc>";
5059 expect_failure(text, XML_ERROR_SYNTAX, "DOCTYPE with extra ID not rejected");
5060 }
5061 END_TEST
5062
START_TEST(test_bad_entity)5063 START_TEST(test_bad_entity) {
5064 const char *text = "<!DOCTYPE doc [\n"
5065 " <!ENTITY foo PUBLIC>\n"
5066 "]>\n"
5067 "<doc/>";
5068 expect_failure(text, XML_ERROR_SYNTAX,
5069 "ENTITY without Public ID is not rejected");
5070 }
5071 END_TEST
5072
5073 /* Test unquoted value is faulted */
START_TEST(test_bad_entity_2)5074 START_TEST(test_bad_entity_2) {
5075 const char *text = "<!DOCTYPE doc [\n"
5076 " <!ENTITY % foo bar>\n"
5077 "]>\n"
5078 "<doc/>";
5079 expect_failure(text, XML_ERROR_SYNTAX,
5080 "ENTITY without Public ID is not rejected");
5081 }
5082 END_TEST
5083
START_TEST(test_bad_entity_3)5084 START_TEST(test_bad_entity_3) {
5085 const char *text = "<!DOCTYPE doc [\n"
5086 " <!ENTITY % foo PUBLIC>\n"
5087 "]>\n"
5088 "<doc/>";
5089 expect_failure(text, XML_ERROR_SYNTAX,
5090 "Parameter ENTITY without Public ID is not rejected");
5091 }
5092 END_TEST
5093
START_TEST(test_bad_entity_4)5094 START_TEST(test_bad_entity_4) {
5095 const char *text = "<!DOCTYPE doc [\n"
5096 " <!ENTITY % foo SYSTEM>\n"
5097 "]>\n"
5098 "<doc/>";
5099 expect_failure(text, XML_ERROR_SYNTAX,
5100 "Parameter ENTITY without Public ID is not rejected");
5101 }
5102 END_TEST
5103
START_TEST(test_bad_notation)5104 START_TEST(test_bad_notation) {
5105 const char *text = "<!DOCTYPE doc [\n"
5106 " <!NOTATION n SYSTEM>\n"
5107 "]>\n"
5108 "<doc/>";
5109 expect_failure(text, XML_ERROR_SYNTAX,
5110 "Notation without System ID is not rejected");
5111 }
5112 END_TEST
5113
5114 /* Test for issue #11, wrongly suppressed default handler */
START_TEST(test_default_doctype_handler)5115 START_TEST(test_default_doctype_handler) {
5116 const char *text = "<!DOCTYPE doc PUBLIC 'pubname' 'test.dtd' [\n"
5117 " <!ENTITY foo 'bar'>\n"
5118 "]>\n"
5119 "<doc>&foo;</doc>";
5120 DefaultCheck test_data[] = {{XCS("'pubname'"), 9, XML_FALSE},
5121 {XCS("'test.dtd'"), 10, XML_FALSE},
5122 {NULL, 0, XML_FALSE}};
5123 int i;
5124
5125 XML_SetUserData(g_parser, &test_data);
5126 XML_SetDefaultHandler(g_parser, checking_default_handler);
5127 XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
5128 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5129 == XML_STATUS_ERROR)
5130 xml_failure(g_parser);
5131 for (i = 0; test_data[i].expected != NULL; i++)
5132 if (! test_data[i].seen)
5133 fail("Default handler not run for public !DOCTYPE");
5134 }
5135 END_TEST
5136
START_TEST(test_empty_element_abort)5137 START_TEST(test_empty_element_abort) {
5138 const char *text = "<abort/>";
5139
5140 XML_SetStartElementHandler(g_parser, start_element_suspender);
5141 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5142 != XML_STATUS_ERROR)
5143 fail("Expected to error on abort");
5144 }
5145 END_TEST
5146
5147 /* Regression test for GH issue #612: unfinished m_declAttributeType
5148 * allocation in ->m_tempPool can corrupt following allocation.
5149 */
START_TEST(test_pool_integrity_with_unfinished_attr)5150 START_TEST(test_pool_integrity_with_unfinished_attr) {
5151 const char *text = "<?xml version='1.0' encoding='UTF-8'?>\n"
5152 "<!DOCTYPE foo [\n"
5153 "<!ELEMENT foo ANY>\n"
5154 "<!ENTITY % entp SYSTEM \"external.dtd\">\n"
5155 "%entp;\n"
5156 "]>\n"
5157 "<a></a>\n";
5158 const XML_Char *expected = XCS("COMMENT");
5159 CharData storage;
5160
5161 CharData_Init(&storage);
5162 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5163 XML_SetExternalEntityRefHandler(g_parser, external_entity_unfinished_attlist);
5164 XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
5165 XML_SetCommentHandler(g_parser, accumulate_comment);
5166 XML_SetUserData(g_parser, &storage);
5167 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5168 == XML_STATUS_ERROR)
5169 xml_failure(g_parser);
5170 CharData_CheckXMLChars(&storage, expected);
5171 }
5172 END_TEST
5173
START_TEST(test_nested_entity_suspend)5174 START_TEST(test_nested_entity_suspend) {
5175 const char *const text = "<!DOCTYPE a [\n"
5176 " <!ENTITY e1 '<!--e1-->'>\n"
5177 " <!ENTITY e2 '<!--e2 head-->&e1;<!--e2 tail-->'>\n"
5178 " <!ENTITY e3 '<!--e3 head-->&e2;<!--e3 tail-->'>\n"
5179 "]>\n"
5180 "<a><!--start-->&e3;<!--end--></a>";
5181 const XML_Char *const expected = XCS("start") XCS("e3 head") XCS("e2 head")
5182 XCS("e1") XCS("e2 tail") XCS("e3 tail") XCS("end");
5183 CharData storage;
5184 CharData_Init(&storage);
5185 XML_Parser parser = XML_ParserCreate(NULL);
5186 ParserPlusStorage parserPlusStorage = {parser, &storage};
5187
5188 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5189 XML_SetCommentHandler(parser, accumulate_and_suspend_comment_handler);
5190 XML_SetUserData(parser, &parserPlusStorage);
5191
5192 enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE);
5193 while (status == XML_STATUS_SUSPENDED) {
5194 status = XML_ResumeParser(parser);
5195 }
5196 if (status != XML_STATUS_OK)
5197 xml_failure(parser);
5198
5199 CharData_CheckXMLChars(&storage, expected);
5200 XML_ParserFree(parser);
5201 }
5202 END_TEST
5203
5204 /* Regression test for quadratic parsing on large tokens */
START_TEST(test_big_tokens_take_linear_time)5205 START_TEST(test_big_tokens_take_linear_time) {
5206 const char *const too_slow_failure_message
5207 = "Compared to the baseline runtime of the first test, this test has a "
5208 "slowdown of more than <max_slowdown>. "
5209 "Please keep increasing the value by 1 until it reliably passes the "
5210 "test on your hardware and open a bug sharing that number with us. "
5211 "Thanks in advance!";
5212 const struct {
5213 const char *pre;
5214 const char *post;
5215 } text[] = {
5216 {"<a>", "</a>"}, // assumed good, used as baseline
5217 {"<b><![CDATA[ value: ", " ]]></b>"}, // CDATA, performed OK before patch
5218 {"<c attr='", "'></c>"}, // big attribute, used to be O(N²)
5219 {"<d><!-- ", " --></d>"}, // long comment, used to be O(N²)
5220 {"<e><", "/></e>"}, // big elem name, used to be O(N²)
5221 };
5222 const int num_cases = sizeof(text) / sizeof(text[0]);
5223 // For the test we need a <max_slowdown> value that is:
5224 // (1) big enough that the test passes reliably (avoiding flaky tests), and
5225 // (2) small enough that the test actually catches regressions.
5226 const int max_slowdown = 15;
5227 char aaaaaa[4096];
5228 const int fillsize = (int)sizeof(aaaaaa);
5229 const int fillcount = 100;
5230
5231 memset(aaaaaa, 'a', fillsize);
5232
5233 if (! g_reparseDeferralEnabledDefault) {
5234 return; // heuristic is disabled; we would get O(n^2) and fail.
5235 }
5236 #if ! defined(__linux__)
5237 if (CLOCKS_PER_SEC < 100000) {
5238 // Skip this test if clock() doesn't have reasonably good resolution.
5239 // This workaround is primarily targeting Windows and FreeBSD, since
5240 // XSI requires the value to be 1.000.000 (10x the condition here), and
5241 // we want to be very sure that at least one platform in CI can catch
5242 // regressions (through a failing test).
5243 return;
5244 }
5245 #endif
5246
5247 clock_t baseline = 0;
5248 for (int i = 0; i < num_cases; ++i) {
5249 XML_Parser parser = XML_ParserCreate(NULL);
5250 assert_true(parser != NULL);
5251 enum XML_Status status;
5252 set_subtest("max_slowdown=%d text=\"%saaaaaa%s\"", max_slowdown,
5253 text[i].pre, text[i].post);
5254 const clock_t start = clock();
5255
5256 // parse the start text
5257 status = _XML_Parse_SINGLE_BYTES(parser, text[i].pre,
5258 (int)strlen(text[i].pre), XML_FALSE);
5259 if (status != XML_STATUS_OK) {
5260 xml_failure(parser);
5261 }
5262 // parse lots of 'a', failing the test early if it takes too long
5263 for (int f = 0; f < fillcount; ++f) {
5264 status = _XML_Parse_SINGLE_BYTES(parser, aaaaaa, fillsize, XML_FALSE);
5265 if (status != XML_STATUS_OK) {
5266 xml_failure(parser);
5267 }
5268 // i == 0 means we're still calculating the baseline value
5269 if (i > 0) {
5270 const clock_t now = clock();
5271 const clock_t clocks_so_far = now - start;
5272 const int slowdown = clocks_so_far / baseline;
5273 if (slowdown >= max_slowdown) {
5274 fprintf(
5275 stderr,
5276 "fill#%d: clocks_so_far=%d baseline=%d slowdown=%d max_slowdown=%d\n",
5277 f, (int)clocks_so_far, (int)baseline, slowdown, max_slowdown);
5278 fail(too_slow_failure_message);
5279 }
5280 }
5281 }
5282 // parse the end text
5283 status = _XML_Parse_SINGLE_BYTES(parser, text[i].post,
5284 (int)strlen(text[i].post), XML_TRUE);
5285 if (status != XML_STATUS_OK) {
5286 xml_failure(parser);
5287 }
5288
5289 // how long did it take in total?
5290 const clock_t end = clock();
5291 const clock_t taken = end - start;
5292 if (i == 0) {
5293 assert_true(taken > 0); // just to make sure we don't div-by-0 later
5294 baseline = taken;
5295 }
5296 const int slowdown = taken / baseline;
5297 if (slowdown >= max_slowdown) {
5298 fprintf(stderr, "taken=%d baseline=%d slowdown=%d max_slowdown=%d\n",
5299 (int)taken, (int)baseline, slowdown, max_slowdown);
5300 fail(too_slow_failure_message);
5301 }
5302
5303 XML_ParserFree(parser);
5304 }
5305 }
5306 END_TEST
5307
START_TEST(test_set_reparse_deferral)5308 START_TEST(test_set_reparse_deferral) {
5309 const char *const pre = "<d>";
5310 const char *const start = "<x attr='";
5311 const char *const end = "'></x>";
5312 char eeeeee[100];
5313 const int fillsize = (int)sizeof(eeeeee);
5314 memset(eeeeee, 'e', fillsize);
5315
5316 for (int enabled = 0; enabled <= 1; enabled += 1) {
5317 set_subtest("deferral=%d", enabled);
5318
5319 XML_Parser parser = XML_ParserCreate(NULL);
5320 assert_true(parser != NULL);
5321 assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
5322 // pre-grow the buffer to avoid reparsing due to almost-fullness
5323 assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
5324
5325 CharData storage;
5326 CharData_Init(&storage);
5327 XML_SetUserData(parser, &storage);
5328 XML_SetStartElementHandler(parser, start_element_event_handler);
5329
5330 enum XML_Status status;
5331 // parse the start text
5332 status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5333 if (status != XML_STATUS_OK) {
5334 xml_failure(parser);
5335 }
5336 CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
5337
5338 // ..and the start of the token
5339 status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
5340 if (status != XML_STATUS_OK) {
5341 xml_failure(parser);
5342 }
5343 CharData_CheckXMLChars(&storage, XCS("d")); // still just the first one
5344
5345 // try to parse lots of 'e', but the token isn't finished
5346 for (int c = 0; c < 100; ++c) {
5347 status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5348 if (status != XML_STATUS_OK) {
5349 xml_failure(parser);
5350 }
5351 }
5352 CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
5353
5354 // end the <x> token.
5355 status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5356 if (status != XML_STATUS_OK) {
5357 xml_failure(parser);
5358 }
5359
5360 if (enabled) {
5361 // In general, we may need to push more data to trigger a reparse attempt,
5362 // but in this test, the data is constructed to always require it.
5363 CharData_CheckXMLChars(&storage, XCS("d")); // or the test is incorrect
5364 // 2x the token length should suffice; the +1 covers the start and end.
5365 for (int c = 0; c < 101; ++c) {
5366 status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5367 if (status != XML_STATUS_OK) {
5368 xml_failure(parser);
5369 }
5370 }
5371 }
5372 CharData_CheckXMLChars(&storage, XCS("dx")); // the <x> should be done
5373
5374 XML_ParserFree(parser);
5375 }
5376 }
5377 END_TEST
5378
5379 struct element_decl_data {
5380 XML_Parser parser;
5381 int count;
5382 };
5383
5384 static void
element_decl_counter(void * userData,const XML_Char * name,XML_Content * model)5385 element_decl_counter(void *userData, const XML_Char *name, XML_Content *model) {
5386 UNUSED_P(name);
5387 struct element_decl_data *testdata = (struct element_decl_data *)userData;
5388 testdata->count += 1;
5389 XML_FreeContentModel(testdata->parser, model);
5390 }
5391
5392 static int
external_inherited_parser(XML_Parser p,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)5393 external_inherited_parser(XML_Parser p, const XML_Char *context,
5394 const XML_Char *base, const XML_Char *systemId,
5395 const XML_Char *publicId) {
5396 UNUSED_P(base);
5397 UNUSED_P(systemId);
5398 UNUSED_P(publicId);
5399 const char *const pre = "<!ELEMENT document ANY>\n";
5400 const char *const start = "<!ELEMENT ";
5401 const char *const end = " ANY>\n";
5402 const char *const post = "<!ELEMENT xyz ANY>\n";
5403 const int enabled = *(int *)XML_GetUserData(p);
5404 char eeeeee[100];
5405 char spaces[100];
5406 const int fillsize = (int)sizeof(eeeeee);
5407 assert_true(fillsize == (int)sizeof(spaces));
5408 memset(eeeeee, 'e', fillsize);
5409 memset(spaces, ' ', fillsize);
5410
5411 XML_Parser parser = XML_ExternalEntityParserCreate(p, context, NULL);
5412 assert_true(parser != NULL);
5413 // pre-grow the buffer to avoid reparsing due to almost-fullness
5414 assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
5415
5416 struct element_decl_data testdata;
5417 testdata.parser = parser;
5418 testdata.count = 0;
5419 XML_SetUserData(parser, &testdata);
5420 XML_SetElementDeclHandler(parser, element_decl_counter);
5421
5422 enum XML_Status status;
5423 // parse the initial text
5424 status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5425 if (status != XML_STATUS_OK) {
5426 xml_failure(parser);
5427 }
5428 assert_true(testdata.count == 1); // first element should be done
5429
5430 // ..and the start of the big token
5431 status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
5432 if (status != XML_STATUS_OK) {
5433 xml_failure(parser);
5434 }
5435 assert_true(testdata.count == 1); // still just the first one
5436
5437 // try to parse lots of 'e', but the token isn't finished
5438 for (int c = 0; c < 100; ++c) {
5439 status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5440 if (status != XML_STATUS_OK) {
5441 xml_failure(parser);
5442 }
5443 }
5444 assert_true(testdata.count == 1); // *still* just the first one
5445
5446 // end the big token.
5447 status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5448 if (status != XML_STATUS_OK) {
5449 xml_failure(parser);
5450 }
5451
5452 if (enabled) {
5453 // In general, we may need to push more data to trigger a reparse attempt,
5454 // but in this test, the data is constructed to always require it.
5455 assert_true(testdata.count == 1); // or the test is incorrect
5456 // 2x the token length should suffice; the +1 covers the start and end.
5457 for (int c = 0; c < 101; ++c) {
5458 status = XML_Parse(parser, spaces, fillsize, XML_FALSE);
5459 if (status != XML_STATUS_OK) {
5460 xml_failure(parser);
5461 }
5462 }
5463 }
5464 assert_true(testdata.count == 2); // the big token should be done
5465
5466 // parse the final text
5467 status = XML_Parse(parser, post, (int)strlen(post), XML_TRUE);
5468 if (status != XML_STATUS_OK) {
5469 xml_failure(parser);
5470 }
5471 assert_true(testdata.count == 3); // after isFinal=XML_TRUE, all must be done
5472
5473 XML_ParserFree(parser);
5474 return XML_STATUS_OK;
5475 }
5476
START_TEST(test_reparse_deferral_is_inherited)5477 START_TEST(test_reparse_deferral_is_inherited) {
5478 const char *const text
5479 = "<!DOCTYPE document SYSTEM 'something.ext'><document/>";
5480 for (int enabled = 0; enabled <= 1; ++enabled) {
5481 set_subtest("deferral=%d", enabled);
5482
5483 XML_Parser parser = XML_ParserCreate(NULL);
5484 assert_true(parser != NULL);
5485 XML_SetUserData(parser, (void *)&enabled);
5486 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5487 // this handler creates a sub-parser and checks that its deferral behavior
5488 // is what we expected, based on the value of `enabled` (in userdata).
5489 XML_SetExternalEntityRefHandler(parser, external_inherited_parser);
5490 assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
5491 if (XML_Parse(parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
5492 xml_failure(parser);
5493
5494 XML_ParserFree(parser);
5495 }
5496 }
5497 END_TEST
5498
START_TEST(test_set_reparse_deferral_on_null_parser)5499 START_TEST(test_set_reparse_deferral_on_null_parser) {
5500 assert_true(XML_SetReparseDeferralEnabled(NULL, 0) == XML_FALSE);
5501 assert_true(XML_SetReparseDeferralEnabled(NULL, 1) == XML_FALSE);
5502 assert_true(XML_SetReparseDeferralEnabled(NULL, 10) == XML_FALSE);
5503 assert_true(XML_SetReparseDeferralEnabled(NULL, 100) == XML_FALSE);
5504 assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MIN)
5505 == XML_FALSE);
5506 assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MAX)
5507 == XML_FALSE);
5508 }
5509 END_TEST
5510
START_TEST(test_set_reparse_deferral_on_the_fly)5511 START_TEST(test_set_reparse_deferral_on_the_fly) {
5512 const char *const pre = "<d><x attr='";
5513 const char *const end = "'></x>";
5514 char iiiiii[100];
5515 const int fillsize = (int)sizeof(iiiiii);
5516 memset(iiiiii, 'i', fillsize);
5517
5518 XML_Parser parser = XML_ParserCreate(NULL);
5519 assert_true(parser != NULL);
5520 assert_true(XML_SetReparseDeferralEnabled(parser, XML_TRUE));
5521
5522 CharData storage;
5523 CharData_Init(&storage);
5524 XML_SetUserData(parser, &storage);
5525 XML_SetStartElementHandler(parser, start_element_event_handler);
5526
5527 enum XML_Status status;
5528 // parse the start text
5529 status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5530 if (status != XML_STATUS_OK) {
5531 xml_failure(parser);
5532 }
5533 CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
5534
5535 // try to parse some 'i', but the token isn't finished
5536 status = XML_Parse(parser, iiiiii, fillsize, XML_FALSE);
5537 if (status != XML_STATUS_OK) {
5538 xml_failure(parser);
5539 }
5540 CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
5541
5542 // end the <x> token.
5543 status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5544 if (status != XML_STATUS_OK) {
5545 xml_failure(parser);
5546 }
5547 CharData_CheckXMLChars(&storage, XCS("d")); // not yet.
5548
5549 // now change the heuristic setting and add *no* data
5550 assert_true(XML_SetReparseDeferralEnabled(parser, XML_FALSE));
5551 // we avoid isFinal=XML_TRUE, because that would force-bypass the heuristic.
5552 status = XML_Parse(parser, "", 0, XML_FALSE);
5553 if (status != XML_STATUS_OK) {
5554 xml_failure(parser);
5555 }
5556 CharData_CheckXMLChars(&storage, XCS("dx"));
5557
5558 XML_ParserFree(parser);
5559 }
5560 END_TEST
5561
START_TEST(test_set_bad_reparse_option)5562 START_TEST(test_set_bad_reparse_option) {
5563 XML_Parser parser = XML_ParserCreate(NULL);
5564 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 2));
5565 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 3));
5566 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 99));
5567 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 127));
5568 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 128));
5569 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 129));
5570 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 255));
5571 assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 0));
5572 assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 1));
5573 XML_ParserFree(parser);
5574 }
5575 END_TEST
5576
5577 static size_t g_totalAlloc = 0;
5578 static size_t g_biggestAlloc = 0;
5579
5580 static void *
counting_realloc(void * ptr,size_t size)5581 counting_realloc(void *ptr, size_t size) {
5582 g_totalAlloc += size;
5583 if (size > g_biggestAlloc) {
5584 g_biggestAlloc = size;
5585 }
5586 return realloc(ptr, size);
5587 }
5588
5589 static void *
counting_malloc(size_t size)5590 counting_malloc(size_t size) {
5591 return counting_realloc(NULL, size);
5592 }
5593
START_TEST(test_bypass_heuristic_when_close_to_bufsize)5594 START_TEST(test_bypass_heuristic_when_close_to_bufsize) {
5595 if (g_chunkSize != 0) {
5596 // this test does not use SINGLE_BYTES, because it depends on very precise
5597 // buffer fills.
5598 return;
5599 }
5600 if (! g_reparseDeferralEnabledDefault) {
5601 return; // this test is irrelevant when the deferral heuristic is disabled.
5602 }
5603
5604 const int document_length = 65536;
5605 char *const document = (char *)malloc(document_length);
5606
5607 const XML_Memory_Handling_Suite memfuncs = {
5608 counting_malloc,
5609 counting_realloc,
5610 free,
5611 };
5612
5613 const int leading_list[] = {0, 3, 61, 96, 400, 401, 4000, 4010, 4099, -1};
5614 const int bigtoken_list[] = {3000, 4000, 4001, 4096, 4099, 5000, 20000, -1};
5615 const int fillsize_list[] = {131, 256, 399, 400, 401, 1025, 4099, 4321, -1};
5616
5617 for (const int *leading = leading_list; *leading >= 0; leading++) {
5618 for (const int *bigtoken = bigtoken_list; *bigtoken >= 0; bigtoken++) {
5619 for (const int *fillsize = fillsize_list; *fillsize >= 0; fillsize++) {
5620 set_subtest("leading=%d bigtoken=%d fillsize=%d", *leading, *bigtoken,
5621 *fillsize);
5622 // start by checking that the test looks reasonably valid
5623 assert_true(*leading + *bigtoken <= document_length);
5624
5625 // put 'x' everywhere; some will be overwritten by elements.
5626 memset(document, 'x', document_length);
5627 // maybe add an initial tag
5628 if (*leading) {
5629 assert_true(*leading >= 3); // or the test case is invalid
5630 memcpy(document, "<a>", 3);
5631 }
5632 // add the large token
5633 document[*leading + 0] = '<';
5634 document[*leading + 1] = 'b';
5635 memset(&document[*leading + 2], ' ', *bigtoken - 2); // a spacy token
5636 document[*leading + *bigtoken - 1] = '>';
5637
5638 // 1 for 'b', plus 1 or 0 depending on the presence of 'a'
5639 const int expected_elem_total = 1 + (*leading ? 1 : 0);
5640
5641 XML_Parser parser = XML_ParserCreate_MM(NULL, &memfuncs, NULL);
5642 assert_true(parser != NULL);
5643
5644 CharData storage;
5645 CharData_Init(&storage);
5646 XML_SetUserData(parser, &storage);
5647 XML_SetStartElementHandler(parser, start_element_event_handler);
5648
5649 g_biggestAlloc = 0;
5650 g_totalAlloc = 0;
5651 int offset = 0;
5652 // fill data until the big token is covered (but not necessarily parsed)
5653 while (offset < *leading + *bigtoken) {
5654 assert_true(offset + *fillsize <= document_length);
5655 const enum XML_Status status
5656 = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
5657 if (status != XML_STATUS_OK) {
5658 xml_failure(parser);
5659 }
5660 offset += *fillsize;
5661 }
5662 // Now, check that we've had a buffer allocation that could fit the
5663 // context bytes and our big token. In order to detect a special case,
5664 // we need to know how many bytes of our big token were included in the
5665 // first push that contained _any_ bytes of the big token:
5666 const int bigtok_first_chunk_bytes = *fillsize - (*leading % *fillsize);
5667 if (bigtok_first_chunk_bytes >= *bigtoken && XML_CONTEXT_BYTES == 0) {
5668 // Special case: we aren't saving any context, and the whole big token
5669 // was covered by a single fill, so Expat may have parsed directly
5670 // from our input pointer, without allocating an internal buffer.
5671 } else if (*leading < XML_CONTEXT_BYTES) {
5672 assert_true(g_biggestAlloc >= *leading + (size_t)*bigtoken);
5673 } else {
5674 assert_true(g_biggestAlloc >= XML_CONTEXT_BYTES + (size_t)*bigtoken);
5675 }
5676 // fill data until the big token is actually parsed
5677 while (storage.count < expected_elem_total) {
5678 const size_t alloc_before = g_totalAlloc;
5679 assert_true(offset + *fillsize <= document_length);
5680 const enum XML_Status status
5681 = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
5682 if (status != XML_STATUS_OK) {
5683 xml_failure(parser);
5684 }
5685 offset += *fillsize;
5686 // since all the bytes of the big token are already in the buffer,
5687 // the bufsize ceiling should make us finish its parsing without any
5688 // further buffer allocations. We assume that there will be no other
5689 // large allocations in this test.
5690 assert_true(g_totalAlloc - alloc_before < 4096);
5691 }
5692 // test-the-test: was our alloc even called?
5693 assert_true(g_totalAlloc > 0);
5694 // test-the-test: there shouldn't be any extra start elements
5695 assert_true(storage.count == expected_elem_total);
5696
5697 XML_ParserFree(parser);
5698 }
5699 }
5700 }
5701 free(document);
5702 }
5703 END_TEST
5704
START_TEST(test_varying_buffer_fills)5705 START_TEST(test_varying_buffer_fills) {
5706 const int KiB = 1024;
5707 const int MiB = 1024 * KiB;
5708 const int document_length = 16 * MiB;
5709 const int big = 7654321; // arbitrarily chosen between 4 and 8 MiB
5710
5711 if (g_chunkSize != 0) {
5712 return; // this test is slow, and doesn't use _XML_Parse_SINGLE_BYTES().
5713 }
5714
5715 char *const document = (char *)malloc(document_length);
5716 assert_true(document != NULL);
5717 memset(document, 'x', document_length);
5718 document[0] = '<';
5719 document[1] = 't';
5720 memset(&document[2], ' ', big - 2); // a very spacy token
5721 document[big - 1] = '>';
5722
5723 // Each testcase is a list of buffer fill sizes, terminated by a value < 0.
5724 // When reparse deferral is enabled, the final (negated) value is the expected
5725 // maximum number of bytes scanned in parse attempts.
5726 const int testcases[][30] = {
5727 {8 * MiB, -8 * MiB},
5728 {4 * MiB, 4 * MiB, -12 * MiB}, // try at 4MB, then 8MB = 12 MB total
5729 // zero-size fills shouldn't trigger the bypass
5730 {4 * MiB, 0, 4 * MiB, -12 * MiB},
5731 {4 * MiB, 0, 0, 4 * MiB, -12 * MiB},
5732 {4 * MiB, 0, 1 * MiB, 0, 3 * MiB, -12 * MiB},
5733 // try to hit the buffer ceiling only once (at the end)
5734 {4 * MiB, 2 * MiB, 1 * MiB, 512 * KiB, 256 * KiB, 256 * KiB, -12 * MiB},
5735 // try to hit the same buffer ceiling multiple times
5736 {4 * MiB + 1, 2 * MiB, 1 * MiB, 512 * KiB, -25 * MiB},
5737
5738 // try to hit every ceiling, by always landing 1K shy of the buffer size
5739 {1 * KiB, 2 * KiB, 4 * KiB, 8 * KiB, 16 * KiB, 32 * KiB, 64 * KiB,
5740 128 * KiB, 256 * KiB, 512 * KiB, 1 * MiB, 2 * MiB, 4 * MiB, -16 * MiB},
5741
5742 // try to avoid every ceiling, by always landing 1B past the buffer size
5743 // the normal 2x heuristic threshold still forces parse attempts.
5744 {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1
5745 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2
5746 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3
5747 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4
5748 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
5749 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6
5750 2 * MiB, 4 * MiB, // will attempt 8MiB + 1 ==> total 10M + 682K + 7
5751 -(10 * MiB + 682 * KiB + 7)},
5752 // try to avoid every ceiling again, except on our last fill.
5753 {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1
5754 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2
5755 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3
5756 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4
5757 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
5758 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6
5759 2 * MiB, 4 * MiB - 1, // will attempt 8MiB ==> total 10M + 682K + 6
5760 -(10 * MiB + 682 * KiB + 6)},
5761
5762 // try to hit ceilings on the way multiple times
5763 {512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 1 MiB buffer
5764 512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 2 MiB buffer
5765 1 * MiB + 1, 512 * KiB, 256 * KiB, 256 * KiB - 1, // 4 MiB buffer
5766 2 * MiB + 1, 1 * MiB, 512 * KiB, // 8 MiB buffer
5767 // we'll make a parse attempt at every parse call
5768 -(45 * MiB + 12)},
5769 };
5770 const int testcount = sizeof(testcases) / sizeof(testcases[0]);
5771 for (int test_i = 0; test_i < testcount; test_i++) {
5772 const int *fillsize = testcases[test_i];
5773 set_subtest("#%d {%d %d %d %d ...}", test_i, fillsize[0], fillsize[1],
5774 fillsize[2], fillsize[3]);
5775 XML_Parser parser = XML_ParserCreate(NULL);
5776 assert_true(parser != NULL);
5777 g_parseAttempts = 0;
5778
5779 CharData storage;
5780 CharData_Init(&storage);
5781 XML_SetUserData(parser, &storage);
5782 XML_SetStartElementHandler(parser, start_element_event_handler);
5783
5784 int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call)
5785 int scanned_bytes = 0; // sum of (buffered bytes at each actual parse)
5786 int offset = 0;
5787 while (*fillsize >= 0) {
5788 assert_true(offset + *fillsize <= document_length); // or test is invalid
5789 const unsigned attempts_before = g_parseAttempts;
5790 const enum XML_Status status
5791 = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
5792 if (status != XML_STATUS_OK) {
5793 xml_failure(parser);
5794 }
5795 offset += *fillsize;
5796 fillsize++;
5797 assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow
5798 worstcase_bytes += offset; // we might've tried to parse all pending bytes
5799 if (g_parseAttempts != attempts_before) {
5800 assert_true(g_parseAttempts == attempts_before + 1); // max 1/XML_Parse
5801 assert_true(offset <= INT_MAX - scanned_bytes); // avoid overflow
5802 scanned_bytes += offset; // we *did* try to parse all pending bytes
5803 }
5804 }
5805 assert_true(storage.count == 1); // the big token should've been parsed
5806 assert_true(scanned_bytes > 0); // test-the-test: does our counter work?
5807 if (g_reparseDeferralEnabledDefault) {
5808 // heuristic is enabled; some XML_Parse calls may have deferred reparsing
5809 const int max_bytes_scanned = -*fillsize;
5810 if (scanned_bytes > max_bytes_scanned) {
5811 fprintf(stderr,
5812 "bytes scanned in parse attempts: actual=%d limit=%d \n",
5813 scanned_bytes, max_bytes_scanned);
5814 fail("too many bytes scanned in parse attempts");
5815 }
5816 assert_true(scanned_bytes <= worstcase_bytes);
5817 } else {
5818 // heuristic is disabled; every XML_Parse() will have reparsed
5819 assert_true(scanned_bytes == worstcase_bytes);
5820 }
5821
5822 XML_ParserFree(parser);
5823 }
5824 free(document);
5825 }
5826 END_TEST
5827
5828 void
make_basic_test_case(Suite * s)5829 make_basic_test_case(Suite *s) {
5830 TCase *tc_basic = tcase_create("basic tests");
5831
5832 suite_add_tcase(s, tc_basic);
5833 tcase_add_checked_fixture(tc_basic, basic_setup, basic_teardown);
5834
5835 tcase_add_test(tc_basic, test_nul_byte);
5836 tcase_add_test(tc_basic, test_u0000_char);
5837 tcase_add_test(tc_basic, test_siphash_self);
5838 tcase_add_test(tc_basic, test_siphash_spec);
5839 tcase_add_test(tc_basic, test_bom_utf8);
5840 tcase_add_test(tc_basic, test_bom_utf16_be);
5841 tcase_add_test(tc_basic, test_bom_utf16_le);
5842 tcase_add_test(tc_basic, test_nobom_utf16_le);
5843 tcase_add_test(tc_basic, test_hash_collision);
5844 tcase_add_test(tc_basic, test_illegal_utf8);
5845 tcase_add_test(tc_basic, test_utf8_auto_align);
5846 tcase_add_test(tc_basic, test_utf16);
5847 tcase_add_test(tc_basic, test_utf16_le_epilog_newline);
5848 tcase_add_test(tc_basic, test_not_utf16);
5849 tcase_add_test(tc_basic, test_bad_encoding);
5850 tcase_add_test(tc_basic, test_latin1_umlauts);
5851 tcase_add_test(tc_basic, test_long_utf8_character);
5852 tcase_add_test(tc_basic, test_long_latin1_attribute);
5853 tcase_add_test(tc_basic, test_long_ascii_attribute);
5854 /* Regression test for SF bug #491986. */
5855 tcase_add_test(tc_basic, test_danish_latin1);
5856 /* Regression test for SF bug #514281. */
5857 tcase_add_test(tc_basic, test_french_charref_hexidecimal);
5858 tcase_add_test(tc_basic, test_french_charref_decimal);
5859 tcase_add_test(tc_basic, test_french_latin1);
5860 tcase_add_test(tc_basic, test_french_utf8);
5861 tcase_add_test(tc_basic, test_utf8_false_rejection);
5862 tcase_add_test(tc_basic, test_line_number_after_parse);
5863 tcase_add_test(tc_basic, test_column_number_after_parse);
5864 tcase_add_test(tc_basic, test_line_and_column_numbers_inside_handlers);
5865 tcase_add_test(tc_basic, test_line_number_after_error);
5866 tcase_add_test(tc_basic, test_column_number_after_error);
5867 tcase_add_test(tc_basic, test_really_long_lines);
5868 tcase_add_test(tc_basic, test_really_long_encoded_lines);
5869 tcase_add_test(tc_basic, test_end_element_events);
5870 tcase_add_test(tc_basic, test_helper_is_whitespace_normalized);
5871 tcase_add_test(tc_basic, test_attr_whitespace_normalization);
5872 tcase_add_test(tc_basic, test_xmldecl_misplaced);
5873 tcase_add_test(tc_basic, test_xmldecl_invalid);
5874 tcase_add_test(tc_basic, test_xmldecl_missing_attr);
5875 tcase_add_test(tc_basic, test_xmldecl_missing_value);
5876 tcase_add_test__if_xml_ge(tc_basic, test_unknown_encoding_internal_entity);
5877 tcase_add_test(tc_basic, test_unrecognised_encoding_internal_entity);
5878 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_encoding);
5879 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_no_handler);
5880 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_bom);
5881 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding);
5882 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding_2);
5883 tcase_add_test(tc_basic, test_wfc_undeclared_entity_unread_external_subset);
5884 tcase_add_test(tc_basic, test_wfc_undeclared_entity_no_external_subset);
5885 tcase_add_test(tc_basic, test_wfc_undeclared_entity_standalone);
5886 tcase_add_test(tc_basic,
5887 test_wfc_undeclared_entity_with_external_subset_standalone);
5888 tcase_add_test(tc_basic, test_entity_with_external_subset_unless_standalone);
5889 tcase_add_test(tc_basic, test_wfc_undeclared_entity_with_external_subset);
5890 tcase_add_test(tc_basic, test_not_standalone_handler_reject);
5891 tcase_add_test(tc_basic, test_not_standalone_handler_accept);
5892 tcase_add_test__if_xml_ge(tc_basic, test_wfc_no_recursive_entity_refs);
5893 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_invalid_parse);
5894 tcase_add_test__if_xml_ge(tc_basic, test_dtd_default_handling);
5895 tcase_add_test(tc_basic, test_dtd_attr_handling);
5896 tcase_add_test(tc_basic, test_empty_ns_without_namespaces);
5897 tcase_add_test(tc_basic, test_ns_in_attribute_default_without_namespaces);
5898 tcase_add_test(tc_basic, test_stop_parser_between_char_data_calls);
5899 tcase_add_test(tc_basic, test_suspend_parser_between_char_data_calls);
5900 tcase_add_test(tc_basic, test_repeated_stop_parser_between_char_data_calls);
5901 tcase_add_test(tc_basic, test_good_cdata_ascii);
5902 tcase_add_test(tc_basic, test_good_cdata_utf16);
5903 tcase_add_test(tc_basic, test_good_cdata_utf16_le);
5904 tcase_add_test(tc_basic, test_long_cdata_utf16);
5905 tcase_add_test(tc_basic, test_multichar_cdata_utf16);
5906 tcase_add_test(tc_basic, test_utf16_bad_surrogate_pair);
5907 tcase_add_test(tc_basic, test_bad_cdata);
5908 tcase_add_test(tc_basic, test_bad_cdata_utf16);
5909 tcase_add_test(tc_basic, test_stop_parser_between_cdata_calls);
5910 tcase_add_test(tc_basic, test_suspend_parser_between_cdata_calls);
5911 tcase_add_test(tc_basic, test_memory_allocation);
5912 tcase_add_test__if_xml_ge(tc_basic, test_default_current);
5913 tcase_add_test(tc_basic, test_dtd_elements);
5914 tcase_add_test(tc_basic, test_dtd_elements_nesting);
5915 tcase_add_test__ifdef_xml_dtd(tc_basic, test_set_foreign_dtd);
5916 tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_not_standalone);
5917 tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_foreign_dtd);
5918 tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_with_doctype);
5919 tcase_add_test__ifdef_xml_dtd(tc_basic,
5920 test_foreign_dtd_without_external_subset);
5921 tcase_add_test__ifdef_xml_dtd(tc_basic, test_empty_foreign_dtd);
5922 tcase_add_test(tc_basic, test_set_base);
5923 tcase_add_test(tc_basic, test_attributes);
5924 tcase_add_test__if_xml_ge(tc_basic, test_reset_in_entity);
5925 tcase_add_test(tc_basic, test_resume_invalid_parse);
5926 tcase_add_test(tc_basic, test_resume_resuspended);
5927 tcase_add_test(tc_basic, test_cdata_default);
5928 tcase_add_test(tc_basic, test_subordinate_reset);
5929 tcase_add_test(tc_basic, test_subordinate_suspend);
5930 tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_suspend);
5931 tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_abort);
5932 tcase_add_test__ifdef_xml_dtd(tc_basic,
5933 test_ext_entity_invalid_suspended_parse);
5934 tcase_add_test(tc_basic, test_explicit_encoding);
5935 tcase_add_test(tc_basic, test_trailing_cr);
5936 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_cr);
5937 tcase_add_test(tc_basic, test_trailing_rsqb);
5938 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_rsqb);
5939 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_good_cdata);
5940 tcase_add_test__ifdef_xml_dtd(tc_basic, test_user_parameters);
5941 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_ref_parameter);
5942 tcase_add_test(tc_basic, test_empty_parse);
5943 tcase_add_test(tc_basic, test_get_buffer_1);
5944 tcase_add_test(tc_basic, test_get_buffer_2);
5945 #if XML_CONTEXT_BYTES > 0
5946 tcase_add_test(tc_basic, test_get_buffer_3_overflow);
5947 #endif
5948 tcase_add_test(tc_basic, test_buffer_can_grow_to_max);
5949 tcase_add_test(tc_basic, test_getbuffer_allocates_on_zero_len);
5950 tcase_add_test(tc_basic, test_byte_info_at_end);
5951 tcase_add_test(tc_basic, test_byte_info_at_error);
5952 tcase_add_test(tc_basic, test_byte_info_at_cdata);
5953 tcase_add_test(tc_basic, test_predefined_entities);
5954 tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_tag_in_dtd);
5955 tcase_add_test(tc_basic, test_not_predefined_entities);
5956 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section);
5957 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16);
5958 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16_be);
5959 tcase_add_test__ifdef_xml_dtd(tc_basic, test_bad_ignore_section);
5960 tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_bom_consumed);
5961 tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_entity_values);
5962 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_not_standalone);
5963 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_value_abort);
5964 tcase_add_test(tc_basic, test_bad_public_doctype);
5965 tcase_add_test(tc_basic, test_attribute_enum_value);
5966 tcase_add_test(tc_basic, test_predefined_entity_redefinition);
5967 tcase_add_test__ifdef_xml_dtd(tc_basic, test_dtd_stop_processing);
5968 tcase_add_test(tc_basic, test_public_notation_no_sysid);
5969 tcase_add_test(tc_basic, test_nested_groups);
5970 tcase_add_test(tc_basic, test_group_choice);
5971 tcase_add_test(tc_basic, test_standalone_parameter_entity);
5972 tcase_add_test__ifdef_xml_dtd(tc_basic, test_skipped_parameter_entity);
5973 tcase_add_test__ifdef_xml_dtd(tc_basic,
5974 test_recursive_external_parameter_entity);
5975 tcase_add_test(tc_basic, test_undefined_ext_entity_in_external_dtd);
5976 tcase_add_test(tc_basic, test_suspend_xdecl);
5977 tcase_add_test(tc_basic, test_abort_epilog);
5978 tcase_add_test(tc_basic, test_abort_epilog_2);
5979 tcase_add_test(tc_basic, test_suspend_epilog);
5980 tcase_add_test(tc_basic, test_suspend_in_sole_empty_tag);
5981 tcase_add_test(tc_basic, test_unfinished_epilog);
5982 tcase_add_test(tc_basic, test_partial_char_in_epilog);
5983 tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_internal_entity);
5984 tcase_add_test__ifdef_xml_dtd(tc_basic,
5985 test_suspend_resume_internal_entity_issue_629);
5986 tcase_add_test__ifdef_xml_dtd(tc_basic, test_resume_entity_with_syntax_error);
5987 tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_parameter_entity);
5988 tcase_add_test(tc_basic, test_restart_on_error);
5989 tcase_add_test(tc_basic, test_reject_lt_in_attribute_value);
5990 tcase_add_test(tc_basic, test_reject_unfinished_param_in_att_value);
5991 tcase_add_test(tc_basic, test_trailing_cr_in_att_value);
5992 tcase_add_test(tc_basic, test_standalone_internal_entity);
5993 tcase_add_test(tc_basic, test_skipped_external_entity);
5994 tcase_add_test(tc_basic, test_skipped_null_loaded_ext_entity);
5995 tcase_add_test(tc_basic, test_skipped_unloaded_ext_entity);
5996 tcase_add_test__ifdef_xml_dtd(tc_basic, test_param_entity_with_trailing_cr);
5997 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity);
5998 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_2);
5999 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_3);
6000 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_4);
6001 tcase_add_test(tc_basic, test_pi_handled_in_default);
6002 tcase_add_test(tc_basic, test_comment_handled_in_default);
6003 tcase_add_test(tc_basic, test_pi_yml);
6004 tcase_add_test(tc_basic, test_pi_xnl);
6005 tcase_add_test(tc_basic, test_pi_xmm);
6006 tcase_add_test(tc_basic, test_utf16_pi);
6007 tcase_add_test(tc_basic, test_utf16_be_pi);
6008 tcase_add_test(tc_basic, test_utf16_be_comment);
6009 tcase_add_test(tc_basic, test_utf16_le_comment);
6010 tcase_add_test(tc_basic, test_missing_encoding_conversion_fn);
6011 tcase_add_test(tc_basic, test_failing_encoding_conversion_fn);
6012 tcase_add_test(tc_basic, test_unknown_encoding_success);
6013 tcase_add_test(tc_basic, test_unknown_encoding_bad_name);
6014 tcase_add_test(tc_basic, test_unknown_encoding_bad_name_2);
6015 tcase_add_test(tc_basic, test_unknown_encoding_long_name_1);
6016 tcase_add_test(tc_basic, test_unknown_encoding_long_name_2);
6017 tcase_add_test(tc_basic, test_invalid_unknown_encoding);
6018 tcase_add_test(tc_basic, test_unknown_ascii_encoding_ok);
6019 tcase_add_test(tc_basic, test_unknown_ascii_encoding_fail);
6020 tcase_add_test(tc_basic, test_unknown_encoding_invalid_length);
6021 tcase_add_test(tc_basic, test_unknown_encoding_invalid_topbit);
6022 tcase_add_test(tc_basic, test_unknown_encoding_invalid_surrogate);
6023 tcase_add_test(tc_basic, test_unknown_encoding_invalid_high);
6024 tcase_add_test(tc_basic, test_unknown_encoding_invalid_attr_value);
6025 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom);
6026 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom);
6027 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom2);
6028 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom2);
6029 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_be);
6030 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_le);
6031 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_unknown);
6032 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf8_non_bom);
6033 tcase_add_test(tc_basic, test_utf8_in_cdata_section);
6034 tcase_add_test(tc_basic, test_utf8_in_cdata_section_2);
6035 tcase_add_test(tc_basic, test_utf8_in_start_tags);
6036 tcase_add_test(tc_basic, test_trailing_spaces_in_elements);
6037 tcase_add_test(tc_basic, test_utf16_attribute);
6038 tcase_add_test(tc_basic, test_utf16_second_attr);
6039 tcase_add_test(tc_basic, test_attr_after_solidus);
6040 tcase_add_test__ifdef_xml_dtd(tc_basic, test_utf16_pe);
6041 tcase_add_test(tc_basic, test_bad_attr_desc_keyword);
6042 tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16);
6043 tcase_add_test(tc_basic, test_bad_doctype);
6044 tcase_add_test(tc_basic, test_bad_doctype_utf8);
6045 tcase_add_test(tc_basic, test_bad_doctype_utf16);
6046 tcase_add_test(tc_basic, test_bad_doctype_plus);
6047 tcase_add_test(tc_basic, test_bad_doctype_star);
6048 tcase_add_test(tc_basic, test_bad_doctype_query);
6049 tcase_add_test__ifdef_xml_dtd(tc_basic, test_unknown_encoding_bad_ignore);
6050 tcase_add_test(tc_basic, test_entity_in_utf16_be_attr);
6051 tcase_add_test(tc_basic, test_entity_in_utf16_le_attr);
6052 tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_be);
6053 tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_le);
6054 tcase_add_test(tc_basic, test_short_doctype);
6055 tcase_add_test(tc_basic, test_short_doctype_2);
6056 tcase_add_test(tc_basic, test_short_doctype_3);
6057 tcase_add_test(tc_basic, test_long_doctype);
6058 tcase_add_test(tc_basic, test_bad_entity);
6059 tcase_add_test(tc_basic, test_bad_entity_2);
6060 tcase_add_test(tc_basic, test_bad_entity_3);
6061 tcase_add_test(tc_basic, test_bad_entity_4);
6062 tcase_add_test(tc_basic, test_bad_notation);
6063 tcase_add_test(tc_basic, test_default_doctype_handler);
6064 tcase_add_test(tc_basic, test_empty_element_abort);
6065 tcase_add_test__ifdef_xml_dtd(tc_basic,
6066 test_pool_integrity_with_unfinished_attr);
6067 tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend);
6068 tcase_add_test(tc_basic, test_big_tokens_take_linear_time);
6069 tcase_add_test(tc_basic, test_set_reparse_deferral);
6070 tcase_add_test(tc_basic, test_reparse_deferral_is_inherited);
6071 tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser);
6072 tcase_add_test(tc_basic, test_set_reparse_deferral_on_the_fly);
6073 tcase_add_test(tc_basic, test_set_bad_reparse_option);
6074 tcase_add_test(tc_basic, test_bypass_heuristic_when_close_to_bufsize);
6075 tcase_add_test(tc_basic, test_varying_buffer_fills);
6076 }
6077