1 /* Tests in the "basic" test case for the Expat test suite
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
8
9 Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
10 Copyright (c) 2003 Greg Stein <gstein@users.sourceforge.net>
11 Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
12 Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net>
13 Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org>
14 Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
15 Copyright (c) 2017 Joe Orton <jorton@redhat.com>
16 Copyright (c) 2017 José Gutiérrez de la Concha <jose@zeroc.com>
17 Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it>
18 Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
19 Copyright (c) 2020 Tim Gates <tim.gates@iress.com>
20 Copyright (c) 2021 Donghee Na <donghee.na@python.org>
21 Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
22 Licensed under the MIT license:
23
24 Permission is hereby granted, free of charge, to any person obtaining
25 a copy of this software and associated documentation files (the
26 "Software"), to deal in the Software without restriction, including
27 without limitation the rights to use, copy, modify, merge, publish,
28 distribute, sublicense, and/or sell copies of the Software, and to permit
29 persons to whom the Software is furnished to do so, subject to the
30 following conditions:
31
32 The above copyright notice and this permission notice shall be included
33 in all copies or substantial portions of the Software.
34
35 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
36 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
37 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
38 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
39 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
40 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
41 USE OR OTHER DEALINGS IN THE SOFTWARE.
42 */
43
44 #if defined(NDEBUG)
45 # undef NDEBUG /* because test suite relies on assert(...) at the moment */
46 #endif
47
48 #include <assert.h>
49
50 #include <stdio.h>
51 #include <string.h>
52 #include <time.h>
53
54 #if ! defined(__cplusplus)
55 # include <stdbool.h>
56 #endif
57
58 #include "expat_config.h"
59
60 #include "expat.h"
61 #include "internal.h"
62 #include "minicheck.h"
63 #include "structdata.h"
64 #include "common.h"
65 #include "dummy.h"
66 #include "handlers.h"
67 #include "siphash.h"
68 #include "basic_tests.h"
69
70 static void
basic_setup(void)71 basic_setup(void) {
72 g_parser = XML_ParserCreate(NULL);
73 if (g_parser == NULL)
74 fail("Parser not created.");
75 }
76
77 /*
78 * Character & encoding tests.
79 */
80
START_TEST(test_nul_byte)81 START_TEST(test_nul_byte) {
82 char text[] = "<doc>\0</doc>";
83
84 /* test that a NUL byte (in US-ASCII data) is an error */
85 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
86 == XML_STATUS_OK)
87 fail("Parser did not report error on NUL-byte.");
88 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
89 xml_failure(g_parser);
90 }
91 END_TEST
92
START_TEST(test_u0000_char)93 START_TEST(test_u0000_char) {
94 /* test that a NUL byte (in US-ASCII data) is an error */
95 expect_failure("<doc>�</doc>", XML_ERROR_BAD_CHAR_REF,
96 "Parser did not report error on NUL-byte.");
97 }
98 END_TEST
99
START_TEST(test_siphash_self)100 START_TEST(test_siphash_self) {
101 if (! sip24_valid())
102 fail("SipHash self-test failed");
103 }
104 END_TEST
105
START_TEST(test_siphash_spec)106 START_TEST(test_siphash_spec) {
107 /* https://131002.net/siphash/siphash.pdf (page 19, "Test values") */
108 const char message[] = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
109 "\x0a\x0b\x0c\x0d\x0e";
110 const size_t len = sizeof(message) - 1;
111 const uint64_t expected = SIP_ULL(0xa129ca61U, 0x49be45e5U);
112 struct siphash state;
113 struct sipkey key;
114
115 sip_tokey(&key, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
116 "\x0a\x0b\x0c\x0d\x0e\x0f");
117 sip24_init(&state, &key);
118
119 /* Cover spread across calls */
120 sip24_update(&state, message, 4);
121 sip24_update(&state, message + 4, len - 4);
122
123 /* Cover null length */
124 sip24_update(&state, message, 0);
125
126 if (sip24_final(&state) != expected)
127 fail("sip24_final failed spec test\n");
128
129 /* Cover wrapper */
130 if (siphash24(message, len, &key) != expected)
131 fail("siphash24 failed spec test\n");
132 }
133 END_TEST
134
START_TEST(test_bom_utf8)135 START_TEST(test_bom_utf8) {
136 /* This test is really just making sure we don't core on a UTF-8 BOM. */
137 const char *text = "\357\273\277<e/>";
138
139 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
140 == XML_STATUS_ERROR)
141 xml_failure(g_parser);
142 }
143 END_TEST
144
START_TEST(test_bom_utf16_be)145 START_TEST(test_bom_utf16_be) {
146 char text[] = "\376\377\0<\0e\0/\0>";
147
148 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
149 == XML_STATUS_ERROR)
150 xml_failure(g_parser);
151 }
152 END_TEST
153
START_TEST(test_bom_utf16_le)154 START_TEST(test_bom_utf16_le) {
155 char text[] = "\377\376<\0e\0/\0>\0";
156
157 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
158 == XML_STATUS_ERROR)
159 xml_failure(g_parser);
160 }
161 END_TEST
162
START_TEST(test_nobom_utf16_le)163 START_TEST(test_nobom_utf16_le) {
164 char text[] = " \0<\0e\0/\0>\0";
165
166 if (g_chunkSize == 1) {
167 // TODO: with just the first byte, we can't tell the difference between
168 // UTF-16-LE and UTF-8. Avoid the failure for now.
169 return;
170 }
171
172 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
173 == XML_STATUS_ERROR)
174 xml_failure(g_parser);
175 }
176 END_TEST
177
START_TEST(test_hash_collision)178 START_TEST(test_hash_collision) {
179 /* For full coverage of the lookup routine, we need to ensure a
180 * hash collision even though we can only tell that we have one
181 * through breakpoint debugging or coverage statistics. The
182 * following will cause a hash collision on machines with a 64-bit
183 * long type; others will have to experiment. The full coverage
184 * tests invoked from qa.sh usually provide a hash collision, but
185 * not always. This is an attempt to provide insurance.
186 */
187 #define COLLIDING_HASH_SALT (unsigned long)SIP_ULL(0xffffffffU, 0xff99fc90U)
188 const char *text
189 = "<doc>\n"
190 "<a1/><a2/><a3/><a4/><a5/><a6/><a7/><a8/>\n"
191 "<b1></b1><b2 attr='foo'>This is a foo</b2><b3></b3><b4></b4>\n"
192 "<b5></b5><b6></b6><b7></b7><b8></b8>\n"
193 "<c1/><c2/><c3/><c4/><c5/><c6/><c7/><c8/>\n"
194 "<d1/><d2/><d3/><d4/><d5/><d6/><d7/>\n"
195 "<d8>This triggers the table growth and collides with b2</d8>\n"
196 "</doc>\n";
197
198 XML_SetHashSalt(g_parser, COLLIDING_HASH_SALT);
199 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
200 == XML_STATUS_ERROR)
201 xml_failure(g_parser);
202 }
203 END_TEST
204 #undef COLLIDING_HASH_SALT
205
206 /* Regression test for SF bug #491986. */
START_TEST(test_danish_latin1)207 START_TEST(test_danish_latin1) {
208 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
209 "<e>J\xF8rgen \xE6\xF8\xE5\xC6\xD8\xC5</e>";
210 #ifdef XML_UNICODE
211 const XML_Char *expected
212 = XCS("J\x00f8rgen \x00e6\x00f8\x00e5\x00c6\x00d8\x00c5");
213 #else
214 const XML_Char *expected
215 = XCS("J\xC3\xB8rgen \xC3\xA6\xC3\xB8\xC3\xA5\xC3\x86\xC3\x98\xC3\x85");
216 #endif
217 run_character_check(text, expected);
218 }
219 END_TEST
220
221 /* Regression test for SF bug #514281. */
START_TEST(test_french_charref_hexidecimal)222 START_TEST(test_french_charref_hexidecimal) {
223 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
224 "<doc>éèàçêÈ</doc>";
225 #ifdef XML_UNICODE
226 const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
227 #else
228 const XML_Char *expected
229 = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
230 #endif
231 run_character_check(text, expected);
232 }
233 END_TEST
234
START_TEST(test_french_charref_decimal)235 START_TEST(test_french_charref_decimal) {
236 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
237 "<doc>éèàçêÈ</doc>";
238 #ifdef XML_UNICODE
239 const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
240 #else
241 const XML_Char *expected
242 = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
243 #endif
244 run_character_check(text, expected);
245 }
246 END_TEST
247
START_TEST(test_french_latin1)248 START_TEST(test_french_latin1) {
249 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
250 "<doc>\xE9\xE8\xE0\xE7\xEa\xC8</doc>";
251 #ifdef XML_UNICODE
252 const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
253 #else
254 const XML_Char *expected
255 = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
256 #endif
257 run_character_check(text, expected);
258 }
259 END_TEST
260
START_TEST(test_french_utf8)261 START_TEST(test_french_utf8) {
262 const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
263 "<doc>\xC3\xA9</doc>";
264 #ifdef XML_UNICODE
265 const XML_Char *expected = XCS("\x00e9");
266 #else
267 const XML_Char *expected = XCS("\xC3\xA9");
268 #endif
269 run_character_check(text, expected);
270 }
271 END_TEST
272
273 /* Regression test for SF bug #600479.
274 XXX There should be a test that exercises all legal XML Unicode
275 characters as PCDATA and attribute value content, and XML Name
276 characters as part of element and attribute names.
277 */
START_TEST(test_utf8_false_rejection)278 START_TEST(test_utf8_false_rejection) {
279 const char *text = "<doc>\xEF\xBA\xBF</doc>";
280 #ifdef XML_UNICODE
281 const XML_Char *expected = XCS("\xfebf");
282 #else
283 const XML_Char *expected = XCS("\xEF\xBA\xBF");
284 #endif
285 run_character_check(text, expected);
286 }
287 END_TEST
288
289 /* Regression test for SF bug #477667.
290 This test assures that any 8-bit character followed by a 7-bit
291 character will not be mistakenly interpreted as a valid UTF-8
292 sequence.
293 */
START_TEST(test_illegal_utf8)294 START_TEST(test_illegal_utf8) {
295 char text[100];
296 int i;
297
298 for (i = 128; i <= 255; ++i) {
299 snprintf(text, sizeof(text), "<e>%ccd</e>", i);
300 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
301 == XML_STATUS_OK) {
302 snprintf(text, sizeof(text),
303 "expected token error for '%c' (ordinal %d) in UTF-8 text", i,
304 i);
305 fail(text);
306 } else if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
307 xml_failure(g_parser);
308 /* Reset the parser since we use the same parser repeatedly. */
309 XML_ParserReset(g_parser, NULL);
310 }
311 }
312 END_TEST
313
314 /* Examples, not masks: */
315 #define UTF8_LEAD_1 "\x7f" /* 0b01111111 */
316 #define UTF8_LEAD_2 "\xdf" /* 0b11011111 */
317 #define UTF8_LEAD_3 "\xef" /* 0b11101111 */
318 #define UTF8_LEAD_4 "\xf7" /* 0b11110111 */
319 #define UTF8_FOLLOW "\xbf" /* 0b10111111 */
320
START_TEST(test_utf8_auto_align)321 START_TEST(test_utf8_auto_align) {
322 struct TestCase {
323 ptrdiff_t expectedMovementInChars;
324 const char *input;
325 };
326
327 struct TestCase cases[] = {
328 {00, ""},
329
330 {00, UTF8_LEAD_1},
331
332 {-1, UTF8_LEAD_2},
333 {00, UTF8_LEAD_2 UTF8_FOLLOW},
334
335 {-1, UTF8_LEAD_3},
336 {-2, UTF8_LEAD_3 UTF8_FOLLOW},
337 {00, UTF8_LEAD_3 UTF8_FOLLOW UTF8_FOLLOW},
338
339 {-1, UTF8_LEAD_4},
340 {-2, UTF8_LEAD_4 UTF8_FOLLOW},
341 {-3, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW},
342 {00, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW UTF8_FOLLOW},
343 };
344
345 size_t i = 0;
346 bool success = true;
347 for (; i < sizeof(cases) / sizeof(*cases); i++) {
348 const char *fromLim = cases[i].input + strlen(cases[i].input);
349 const char *const fromLimInitially = fromLim;
350 ptrdiff_t actualMovementInChars;
351
352 _INTERNAL_trim_to_complete_utf8_characters(cases[i].input, &fromLim);
353
354 actualMovementInChars = (fromLim - fromLimInitially);
355 if (actualMovementInChars != cases[i].expectedMovementInChars) {
356 size_t j = 0;
357 success = false;
358 printf("[-] UTF-8 case %2u: Expected movement by %2d chars"
359 ", actually moved by %2d chars: \"",
360 (unsigned)(i + 1), (int)cases[i].expectedMovementInChars,
361 (int)actualMovementInChars);
362 for (; j < strlen(cases[i].input); j++) {
363 printf("\\x%02x", (unsigned char)cases[i].input[j]);
364 }
365 printf("\"\n");
366 }
367 }
368
369 if (! success) {
370 fail("UTF-8 auto-alignment is not bullet-proof\n");
371 }
372 }
373 END_TEST
374
START_TEST(test_utf16)375 START_TEST(test_utf16) {
376 /* <?xml version="1.0" encoding="UTF-16"?>
377 * <doc a='123'>some {A} text</doc>
378 *
379 * where {A} is U+FF21, FULLWIDTH LATIN CAPITAL LETTER A
380 */
381 char text[]
382 = "\000<\000?\000x\000m\000\154\000 \000v\000e\000r\000s\000i\000o"
383 "\000n\000=\000'\0001\000.\000\060\000'\000 \000e\000n\000c\000o"
384 "\000d\000i\000n\000g\000=\000'\000U\000T\000F\000-\0001\000\066"
385 "\000'\000?\000>\000\n"
386 "\000<\000d\000o\000c\000 \000a\000=\000'\0001\0002\0003\000'\000>"
387 "\000s\000o\000m\000e\000 \xff\x21\000 \000t\000e\000x\000t\000"
388 "<\000/\000d\000o\000c\000>";
389 #ifdef XML_UNICODE
390 const XML_Char *expected = XCS("some \xff21 text");
391 #else
392 const XML_Char *expected = XCS("some \357\274\241 text");
393 #endif
394 CharData storage;
395
396 CharData_Init(&storage);
397 XML_SetUserData(g_parser, &storage);
398 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
399 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
400 == XML_STATUS_ERROR)
401 xml_failure(g_parser);
402 CharData_CheckXMLChars(&storage, expected);
403 }
404 END_TEST
405
START_TEST(test_utf16_le_epilog_newline)406 START_TEST(test_utf16_le_epilog_newline) {
407 unsigned int first_chunk_bytes = 17;
408 char text[] = "\xFF\xFE" /* BOM */
409 "<\000e\000/\000>\000" /* document element */
410 "\r\000\n\000\r\000\n\000"; /* epilog */
411
412 if (first_chunk_bytes >= sizeof(text) - 1)
413 fail("bad value of first_chunk_bytes");
414 if (_XML_Parse_SINGLE_BYTES(g_parser, text, first_chunk_bytes, XML_FALSE)
415 == XML_STATUS_ERROR)
416 xml_failure(g_parser);
417 else {
418 enum XML_Status rc;
419 rc = _XML_Parse_SINGLE_BYTES(g_parser, text + first_chunk_bytes,
420 sizeof(text) - first_chunk_bytes - 1,
421 XML_TRUE);
422 if (rc == XML_STATUS_ERROR)
423 xml_failure(g_parser);
424 }
425 }
426 END_TEST
427
428 /* Test that an outright lie in the encoding is faulted */
START_TEST(test_not_utf16)429 START_TEST(test_not_utf16) {
430 const char *text = "<?xml version='1.0' encoding='utf-16'?>"
431 "<doc>Hi</doc>";
432
433 /* Use a handler to provoke the appropriate code paths */
434 XML_SetXmlDeclHandler(g_parser, dummy_xdecl_handler);
435 expect_failure(text, XML_ERROR_INCORRECT_ENCODING,
436 "UTF-16 declared in UTF-8 not faulted");
437 }
438 END_TEST
439
440 /* Test that an unknown encoding is rejected */
START_TEST(test_bad_encoding)441 START_TEST(test_bad_encoding) {
442 const char *text = "<doc>Hi</doc>";
443
444 if (! XML_SetEncoding(g_parser, XCS("unknown-encoding")))
445 fail("XML_SetEncoding failed");
446 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
447 "Unknown encoding not faulted");
448 }
449 END_TEST
450
451 /* Regression test for SF bug #481609, #774028. */
START_TEST(test_latin1_umlauts)452 START_TEST(test_latin1_umlauts) {
453 const char *text
454 = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
455 "<e a='\xE4 \xF6 \xFC ä ö ü ä ö ü >'\n"
456 " >\xE4 \xF6 \xFC ä ö ü ä ö ü ></e>";
457 #ifdef XML_UNICODE
458 /* Expected results in UTF-16 */
459 const XML_Char *expected = XCS("\x00e4 \x00f6 \x00fc ")
460 XCS("\x00e4 \x00f6 \x00fc ") XCS("\x00e4 \x00f6 \x00fc >");
461 #else
462 /* Expected results in UTF-8 */
463 const XML_Char *expected = XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ")
464 XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ") XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC >");
465 #endif
466
467 run_character_check(text, expected);
468 XML_ParserReset(g_parser, NULL);
469 run_attribute_check(text, expected);
470 /* Repeat with a default handler */
471 XML_ParserReset(g_parser, NULL);
472 XML_SetDefaultHandler(g_parser, dummy_default_handler);
473 run_character_check(text, expected);
474 XML_ParserReset(g_parser, NULL);
475 XML_SetDefaultHandler(g_parser, dummy_default_handler);
476 run_attribute_check(text, expected);
477 }
478 END_TEST
479
480 /* Test that an element name with a 4-byte UTF-8 character is rejected */
START_TEST(test_long_utf8_character)481 START_TEST(test_long_utf8_character) {
482 const char *text
483 = "<?xml version='1.0' encoding='utf-8'?>\n"
484 /* 0xf0 0x90 0x80 0x80 = U+10000, the first Linear B character */
485 "<do\xf0\x90\x80\x80/>";
486 expect_failure(text, XML_ERROR_INVALID_TOKEN,
487 "4-byte UTF-8 character in element name not faulted");
488 }
489 END_TEST
490
491 /* Test that a long latin-1 attribute (too long to convert in one go)
492 * is correctly converted
493 */
START_TEST(test_long_latin1_attribute)494 START_TEST(test_long_latin1_attribute) {
495 const char *text
496 = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
497 "<doc att='"
498 /* 64 characters per line */
499 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
500 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
501 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
502 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
503 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
504 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
505 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
506 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
507 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
508 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
509 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
510 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
511 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
512 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
513 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
514 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO"
515 /* Last character splits across a buffer boundary */
516 "\xe4'>\n</doc>";
517
518 const XML_Char *expected =
519 /* 64 characters per line */
520 /* clang-format off */
521 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
522 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
523 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
524 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
525 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
526 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
527 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
528 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
529 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
530 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
531 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
532 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
533 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
534 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
535 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
536 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO")
537 /* clang-format on */
538 #ifdef XML_UNICODE
539 XCS("\x00e4");
540 #else
541 XCS("\xc3\xa4");
542 #endif
543
544 run_attribute_check(text, expected);
545 }
546 END_TEST
547
548 /* Test that a long ASCII attribute (too long to convert in one go)
549 * is correctly converted
550 */
START_TEST(test_long_ascii_attribute)551 START_TEST(test_long_ascii_attribute) {
552 const char *text
553 = "<?xml version='1.0' encoding='us-ascii'?>\n"
554 "<doc att='"
555 /* 64 characters per line */
556 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
557 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
558 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
559 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
560 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
561 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
562 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
563 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
564 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
565 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
566 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
567 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
568 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
569 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
570 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
571 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
572 "01234'>\n</doc>";
573 const XML_Char *expected =
574 /* 64 characters per line */
575 /* clang-format off */
576 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
577 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
578 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
579 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
580 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
581 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
582 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
583 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
584 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
585 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
586 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
587 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
588 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
589 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
590 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
591 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
592 XCS("01234");
593 /* clang-format on */
594
595 run_attribute_check(text, expected);
596 }
597 END_TEST
598
599 /* Regression test #1 for SF bug #653180. */
START_TEST(test_line_number_after_parse)600 START_TEST(test_line_number_after_parse) {
601 const char *text = "<tag>\n"
602 "\n"
603 "\n</tag>";
604 XML_Size lineno;
605
606 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
607 == XML_STATUS_ERROR)
608 xml_failure(g_parser);
609 lineno = XML_GetCurrentLineNumber(g_parser);
610 if (lineno != 4) {
611 char buffer[100];
612 snprintf(buffer, sizeof(buffer),
613 "expected 4 lines, saw %" XML_FMT_INT_MOD "u", lineno);
614 fail(buffer);
615 }
616 }
617 END_TEST
618
619 /* Regression test #2 for SF bug #653180. */
START_TEST(test_column_number_after_parse)620 START_TEST(test_column_number_after_parse) {
621 const char *text = "<tag></tag>";
622 XML_Size colno;
623
624 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
625 == XML_STATUS_ERROR)
626 xml_failure(g_parser);
627 colno = XML_GetCurrentColumnNumber(g_parser);
628 if (colno != 11) {
629 char buffer[100];
630 snprintf(buffer, sizeof(buffer),
631 "expected 11 columns, saw %" XML_FMT_INT_MOD "u", colno);
632 fail(buffer);
633 }
634 }
635 END_TEST
636
637 /* Regression test #3 for SF bug #653180. */
START_TEST(test_line_and_column_numbers_inside_handlers)638 START_TEST(test_line_and_column_numbers_inside_handlers) {
639 const char *text = "<a>\n" /* Unix end-of-line */
640 " <b>\r\n" /* Windows end-of-line */
641 " <c/>\r" /* Mac OS end-of-line */
642 " </b>\n"
643 " <d>\n"
644 " <f/>\n"
645 " </d>\n"
646 "</a>";
647 const StructDataEntry expected[]
648 = {{XCS("a"), 0, 1, STRUCT_START_TAG}, {XCS("b"), 2, 2, STRUCT_START_TAG},
649 {XCS("c"), 4, 3, STRUCT_START_TAG}, {XCS("c"), 8, 3, STRUCT_END_TAG},
650 {XCS("b"), 2, 4, STRUCT_END_TAG}, {XCS("d"), 2, 5, STRUCT_START_TAG},
651 {XCS("f"), 4, 6, STRUCT_START_TAG}, {XCS("f"), 8, 6, STRUCT_END_TAG},
652 {XCS("d"), 2, 7, STRUCT_END_TAG}, {XCS("a"), 0, 8, STRUCT_END_TAG}};
653 const int expected_count = sizeof(expected) / sizeof(StructDataEntry);
654 StructData storage;
655
656 StructData_Init(&storage);
657 XML_SetUserData(g_parser, &storage);
658 XML_SetStartElementHandler(g_parser, start_element_event_handler2);
659 XML_SetEndElementHandler(g_parser, end_element_event_handler2);
660 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
661 == XML_STATUS_ERROR)
662 xml_failure(g_parser);
663
664 StructData_CheckItems(&storage, expected, expected_count);
665 StructData_Dispose(&storage);
666 }
667 END_TEST
668
669 /* Regression test #4 for SF bug #653180. */
START_TEST(test_line_number_after_error)670 START_TEST(test_line_number_after_error) {
671 const char *text = "<a>\n"
672 " <b>\n"
673 " </a>"; /* missing </b> */
674 XML_Size lineno;
675 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
676 != XML_STATUS_ERROR)
677 fail("Expected a parse error");
678
679 lineno = XML_GetCurrentLineNumber(g_parser);
680 if (lineno != 3) {
681 char buffer[100];
682 snprintf(buffer, sizeof(buffer),
683 "expected 3 lines, saw %" XML_FMT_INT_MOD "u", lineno);
684 fail(buffer);
685 }
686 }
687 END_TEST
688
689 /* Regression test #5 for SF bug #653180. */
START_TEST(test_column_number_after_error)690 START_TEST(test_column_number_after_error) {
691 const char *text = "<a>\n"
692 " <b>\n"
693 " </a>"; /* missing </b> */
694 XML_Size colno;
695 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
696 != XML_STATUS_ERROR)
697 fail("Expected a parse error");
698
699 colno = XML_GetCurrentColumnNumber(g_parser);
700 if (colno != 4) {
701 char buffer[100];
702 snprintf(buffer, sizeof(buffer),
703 "expected 4 columns, saw %" XML_FMT_INT_MOD "u", colno);
704 fail(buffer);
705 }
706 }
707 END_TEST
708
709 /* Regression test for SF bug #478332. */
START_TEST(test_really_long_lines)710 START_TEST(test_really_long_lines) {
711 /* This parses an input line longer than INIT_DATA_BUF_SIZE
712 characters long (defined to be 1024 in xmlparse.c). We take a
713 really cheesy approach to building the input buffer, because
714 this avoids writing bugs in buffer-filling code.
715 */
716 const char *text
717 = "<e>"
718 /* 64 chars */
719 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
720 /* until we have at least 1024 characters on the line: */
721 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
722 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
723 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
724 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
725 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
726 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
727 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
728 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
729 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
730 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
731 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
732 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
733 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
734 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
735 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
736 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
737 "</e>";
738 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
739 == XML_STATUS_ERROR)
740 xml_failure(g_parser);
741 }
742 END_TEST
743
744 /* Test cdata processing across a buffer boundary */
START_TEST(test_really_long_encoded_lines)745 START_TEST(test_really_long_encoded_lines) {
746 /* As above, except that we want to provoke an output buffer
747 * overflow with a non-trivial encoding. For this we need to pass
748 * the whole cdata in one go, not byte-by-byte.
749 */
750 void *buffer;
751 const char *text
752 = "<?xml version='1.0' encoding='iso-8859-1'?>"
753 "<e>"
754 /* 64 chars */
755 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
756 /* until we have at least 1024 characters on the line: */
757 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
758 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
759 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
760 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
761 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
762 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
763 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
764 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
765 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
766 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
767 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
768 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
769 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
770 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
771 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
772 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
773 "</e>";
774 int parse_len = (int)strlen(text);
775
776 /* Need a cdata handler to provoke the code path we want to test */
777 XML_SetCharacterDataHandler(g_parser, dummy_cdata_handler);
778 buffer = XML_GetBuffer(g_parser, parse_len);
779 if (buffer == NULL)
780 fail("Could not allocate parse buffer");
781 assert(buffer != NULL);
782 memcpy(buffer, text, parse_len);
783 if (XML_ParseBuffer(g_parser, parse_len, XML_TRUE) == XML_STATUS_ERROR)
784 xml_failure(g_parser);
785 }
786 END_TEST
787
788 /*
789 * Element event tests.
790 */
791
START_TEST(test_end_element_events)792 START_TEST(test_end_element_events) {
793 const char *text = "<a><b><c/></b><d><f/></d></a>";
794 const XML_Char *expected = XCS("/c/b/f/d/a");
795 CharData storage;
796
797 CharData_Init(&storage);
798 XML_SetUserData(g_parser, &storage);
799 XML_SetEndElementHandler(g_parser, end_element_event_handler);
800 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
801 == XML_STATUS_ERROR)
802 xml_failure(g_parser);
803 CharData_CheckXMLChars(&storage, expected);
804 }
805 END_TEST
806
807 /*
808 * Attribute tests.
809 */
810
811 /* Helper used by the following tests; this checks any "attr" and "refs"
812 attributes to make sure whitespace has been normalized.
813
814 Return true if whitespace has been normalized in a string, using
815 the rules for attribute value normalization. The 'is_cdata' flag
816 is needed since CDATA attributes don't need to have multiple
817 whitespace characters collapsed to a single space, while other
818 attribute data types do. (Section 3.3.3 of the recommendation.)
819 */
820 static int
is_whitespace_normalized(const XML_Char * s,int is_cdata)821 is_whitespace_normalized(const XML_Char *s, int is_cdata) {
822 int blanks = 0;
823 int at_start = 1;
824 while (*s) {
825 if (*s == XCS(' '))
826 ++blanks;
827 else if (*s == XCS('\t') || *s == XCS('\n') || *s == XCS('\r'))
828 return 0;
829 else {
830 if (at_start) {
831 at_start = 0;
832 if (blanks && ! is_cdata)
833 /* illegal leading blanks */
834 return 0;
835 } else if (blanks > 1 && ! is_cdata)
836 return 0;
837 blanks = 0;
838 }
839 ++s;
840 }
841 if (blanks && ! is_cdata)
842 return 0;
843 return 1;
844 }
845
846 /* Check the attribute whitespace checker: */
START_TEST(test_helper_is_whitespace_normalized)847 START_TEST(test_helper_is_whitespace_normalized) {
848 assert(is_whitespace_normalized(XCS("abc"), 0));
849 assert(is_whitespace_normalized(XCS("abc"), 1));
850 assert(is_whitespace_normalized(XCS("abc def ghi"), 0));
851 assert(is_whitespace_normalized(XCS("abc def ghi"), 1));
852 assert(! is_whitespace_normalized(XCS(" abc def ghi"), 0));
853 assert(is_whitespace_normalized(XCS(" abc def ghi"), 1));
854 assert(! is_whitespace_normalized(XCS("abc def ghi"), 0));
855 assert(is_whitespace_normalized(XCS("abc def ghi"), 1));
856 assert(! is_whitespace_normalized(XCS("abc def ghi "), 0));
857 assert(is_whitespace_normalized(XCS("abc def ghi "), 1));
858 assert(! is_whitespace_normalized(XCS(" "), 0));
859 assert(is_whitespace_normalized(XCS(" "), 1));
860 assert(! is_whitespace_normalized(XCS("\t"), 0));
861 assert(! is_whitespace_normalized(XCS("\t"), 1));
862 assert(! is_whitespace_normalized(XCS("\n"), 0));
863 assert(! is_whitespace_normalized(XCS("\n"), 1));
864 assert(! is_whitespace_normalized(XCS("\r"), 0));
865 assert(! is_whitespace_normalized(XCS("\r"), 1));
866 assert(! is_whitespace_normalized(XCS("abc\t def"), 1));
867 }
868 END_TEST
869
870 static void XMLCALL
check_attr_contains_normalized_whitespace(void * userData,const XML_Char * name,const XML_Char ** atts)871 check_attr_contains_normalized_whitespace(void *userData, const XML_Char *name,
872 const XML_Char **atts) {
873 int i;
874 UNUSED_P(userData);
875 UNUSED_P(name);
876 for (i = 0; atts[i] != NULL; i += 2) {
877 const XML_Char *attrname = atts[i];
878 const XML_Char *value = atts[i + 1];
879 if (xcstrcmp(XCS("attr"), attrname) == 0
880 || xcstrcmp(XCS("ents"), attrname) == 0
881 || xcstrcmp(XCS("refs"), attrname) == 0) {
882 if (! is_whitespace_normalized(value, 0)) {
883 char buffer[256];
884 snprintf(buffer, sizeof(buffer),
885 "attribute value not normalized: %" XML_FMT_STR
886 "='%" XML_FMT_STR "'",
887 attrname, value);
888 fail(buffer);
889 }
890 }
891 }
892 }
893
START_TEST(test_attr_whitespace_normalization)894 START_TEST(test_attr_whitespace_normalization) {
895 const char *text
896 = "<!DOCTYPE doc [\n"
897 " <!ATTLIST doc\n"
898 " attr NMTOKENS #REQUIRED\n"
899 " ents ENTITIES #REQUIRED\n"
900 " refs IDREFS #REQUIRED>\n"
901 "]>\n"
902 "<doc attr=' a b c\t\td\te\t' refs=' id-1 \t id-2\t\t' \n"
903 " ents=' ent-1 \t\r\n"
904 " ent-2 ' >\n"
905 " <e id='id-1'/>\n"
906 " <e id='id-2'/>\n"
907 "</doc>";
908
909 XML_SetStartElementHandler(g_parser,
910 check_attr_contains_normalized_whitespace);
911 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
912 == XML_STATUS_ERROR)
913 xml_failure(g_parser);
914 }
915 END_TEST
916
917 /*
918 * XML declaration tests.
919 */
920
START_TEST(test_xmldecl_misplaced)921 START_TEST(test_xmldecl_misplaced) {
922 expect_failure("\n"
923 "<?xml version='1.0'?>\n"
924 "<a/>",
925 XML_ERROR_MISPLACED_XML_PI,
926 "failed to report misplaced XML declaration");
927 }
928 END_TEST
929
START_TEST(test_xmldecl_invalid)930 START_TEST(test_xmldecl_invalid) {
931 expect_failure("<?xml version='1.0' \xc3\xa7?>\n<doc/>", XML_ERROR_XML_DECL,
932 "Failed to report invalid XML declaration");
933 }
934 END_TEST
935
START_TEST(test_xmldecl_missing_attr)936 START_TEST(test_xmldecl_missing_attr) {
937 expect_failure("<?xml ='1.0'?>\n<doc/>\n", XML_ERROR_XML_DECL,
938 "Failed to report missing XML declaration attribute");
939 }
940 END_TEST
941
START_TEST(test_xmldecl_missing_value)942 START_TEST(test_xmldecl_missing_value) {
943 expect_failure("<?xml version='1.0' encoding='us-ascii' standalone?>\n"
944 "<doc/>",
945 XML_ERROR_XML_DECL,
946 "Failed to report missing attribute value");
947 }
948 END_TEST
949
950 /* Regression test for SF bug #584832. */
START_TEST(test_unknown_encoding_internal_entity)951 START_TEST(test_unknown_encoding_internal_entity) {
952 const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
953 "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
954 "<test a='&foo;'/>";
955
956 XML_SetUnknownEncodingHandler(g_parser, UnknownEncodingHandler, NULL);
957 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
958 == XML_STATUS_ERROR)
959 xml_failure(g_parser);
960 }
961 END_TEST
962
963 /* Test unrecognised encoding handler */
START_TEST(test_unrecognised_encoding_internal_entity)964 START_TEST(test_unrecognised_encoding_internal_entity) {
965 const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
966 "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
967 "<test a='&foo;'/>";
968
969 XML_SetUnknownEncodingHandler(g_parser, UnrecognisedEncodingHandler, NULL);
970 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
971 != XML_STATUS_ERROR)
972 fail("Unrecognised encoding not rejected");
973 }
974 END_TEST
975
976 /* Regression test for SF bug #620106. */
START_TEST(test_ext_entity_set_encoding)977 START_TEST(test_ext_entity_set_encoding) {
978 const char *text = "<!DOCTYPE doc [\n"
979 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
980 "]>\n"
981 "<doc>&en;</doc>";
982 ExtTest test_data
983 = {/* This text says it's an unsupported encoding, but it's really
984 UTF-8, which we tell Expat using XML_SetEncoding().
985 */
986 "<?xml encoding='iso-8859-3'?>\xC3\xA9", XCS("utf-8"), NULL};
987 #ifdef XML_UNICODE
988 const XML_Char *expected = XCS("\x00e9");
989 #else
990 const XML_Char *expected = XCS("\xc3\xa9");
991 #endif
992
993 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
994 run_ext_character_check(text, &test_data, expected);
995 }
996 END_TEST
997
998 /* Test external entities with no handler */
START_TEST(test_ext_entity_no_handler)999 START_TEST(test_ext_entity_no_handler) {
1000 const char *text = "<!DOCTYPE doc [\n"
1001 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1002 "]>\n"
1003 "<doc>&en;</doc>";
1004
1005 XML_SetDefaultHandler(g_parser, dummy_default_handler);
1006 run_character_check(text, XCS(""));
1007 }
1008 END_TEST
1009
1010 /* Test UTF-8 BOM is accepted */
START_TEST(test_ext_entity_set_bom)1011 START_TEST(test_ext_entity_set_bom) {
1012 const char *text = "<!DOCTYPE doc [\n"
1013 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1014 "]>\n"
1015 "<doc>&en;</doc>";
1016 ExtTest test_data = {"\xEF\xBB\xBF" /* BOM */
1017 "<?xml encoding='iso-8859-3'?>"
1018 "\xC3\xA9",
1019 XCS("utf-8"), NULL};
1020 #ifdef XML_UNICODE
1021 const XML_Char *expected = XCS("\x00e9");
1022 #else
1023 const XML_Char *expected = XCS("\xc3\xa9");
1024 #endif
1025
1026 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1027 run_ext_character_check(text, &test_data, expected);
1028 }
1029 END_TEST
1030
1031 /* Test that bad encodings are faulted */
START_TEST(test_ext_entity_bad_encoding)1032 START_TEST(test_ext_entity_bad_encoding) {
1033 const char *text = "<!DOCTYPE doc [\n"
1034 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1035 "]>\n"
1036 "<doc>&en;</doc>";
1037 ExtFaults fault
1038 = {"<?xml encoding='iso-8859-3'?>u", "Unsupported encoding not faulted",
1039 XCS("unknown"), XML_ERROR_UNKNOWN_ENCODING};
1040
1041 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1042 XML_SetUserData(g_parser, &fault);
1043 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1044 "Bad encoding should not have been accepted");
1045 }
1046 END_TEST
1047
1048 /* Try handing an invalid encoding to an external entity parser */
START_TEST(test_ext_entity_bad_encoding_2)1049 START_TEST(test_ext_entity_bad_encoding_2) {
1050 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1051 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1052 "<doc>&entity;</doc>";
1053 ExtFaults fault
1054 = {"<!ELEMENT doc (#PCDATA)*>", "Unknown encoding not faulted",
1055 XCS("unknown-encoding"), XML_ERROR_UNKNOWN_ENCODING};
1056
1057 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1058 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1059 XML_SetUserData(g_parser, &fault);
1060 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1061 "Bad encoding not faulted in external entity handler");
1062 }
1063 END_TEST
1064
1065 /* Test that no error is reported for unknown entities if we don't
1066 read an external subset. This was fixed in Expat 1.95.5.
1067 */
START_TEST(test_wfc_undeclared_entity_unread_external_subset)1068 START_TEST(test_wfc_undeclared_entity_unread_external_subset) {
1069 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
1070 "<doc>&entity;</doc>";
1071
1072 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1073 == XML_STATUS_ERROR)
1074 xml_failure(g_parser);
1075 }
1076 END_TEST
1077
1078 /* Test that an error is reported for unknown entities if we don't
1079 have an external subset.
1080 */
START_TEST(test_wfc_undeclared_entity_no_external_subset)1081 START_TEST(test_wfc_undeclared_entity_no_external_subset) {
1082 expect_failure("<doc>&entity;</doc>", XML_ERROR_UNDEFINED_ENTITY,
1083 "Parser did not report undefined entity w/out a DTD.");
1084 }
1085 END_TEST
1086
1087 /* Test that an error is reported for unknown entities if we don't
1088 read an external subset, but have been declared standalone.
1089 */
START_TEST(test_wfc_undeclared_entity_standalone)1090 START_TEST(test_wfc_undeclared_entity_standalone) {
1091 const char *text
1092 = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1093 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1094 "<doc>&entity;</doc>";
1095
1096 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1097 "Parser did not report undefined entity (standalone).");
1098 }
1099 END_TEST
1100
1101 /* Test that an error is reported for unknown entities if we have read
1102 an external subset, and standalone is true.
1103 */
START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone)1104 START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone) {
1105 const char *text
1106 = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1107 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1108 "<doc>&entity;</doc>";
1109 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1110
1111 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1112 XML_SetUserData(g_parser, &test_data);
1113 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1114 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1115 "Parser did not report undefined entity (external DTD).");
1116 }
1117 END_TEST
1118
1119 /* Test that external entity handling is not done if the parsing flag
1120 * is set to UNLESS_STANDALONE
1121 */
START_TEST(test_entity_with_external_subset_unless_standalone)1122 START_TEST(test_entity_with_external_subset_unless_standalone) {
1123 const char *text
1124 = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1125 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1126 "<doc>&entity;</doc>";
1127 ExtTest test_data = {"<!ENTITY entity 'bar'>", NULL, NULL};
1128
1129 XML_SetParamEntityParsing(g_parser,
1130 XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1131 XML_SetUserData(g_parser, &test_data);
1132 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1133 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1134 "Parser did not report undefined entity");
1135 }
1136 END_TEST
1137
1138 /* Test that no error is reported for unknown entities if we have read
1139 an external subset, and standalone is false.
1140 */
START_TEST(test_wfc_undeclared_entity_with_external_subset)1141 START_TEST(test_wfc_undeclared_entity_with_external_subset) {
1142 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1143 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1144 "<doc>&entity;</doc>";
1145 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1146
1147 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1148 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1149 run_ext_character_check(text, &test_data, XCS(""));
1150 }
1151 END_TEST
1152
1153 /* Test that an error is reported if our NotStandalone handler fails */
START_TEST(test_not_standalone_handler_reject)1154 START_TEST(test_not_standalone_handler_reject) {
1155 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1156 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1157 "<doc>&entity;</doc>";
1158 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1159
1160 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1161 XML_SetUserData(g_parser, &test_data);
1162 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1163 XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
1164 expect_failure(text, XML_ERROR_NOT_STANDALONE,
1165 "NotStandalone handler failed to reject");
1166
1167 /* Try again but without external entity handling */
1168 XML_ParserReset(g_parser, NULL);
1169 XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
1170 expect_failure(text, XML_ERROR_NOT_STANDALONE,
1171 "NotStandalone handler failed to reject");
1172 }
1173 END_TEST
1174
1175 /* Test that no error is reported if our NotStandalone handler succeeds */
START_TEST(test_not_standalone_handler_accept)1176 START_TEST(test_not_standalone_handler_accept) {
1177 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1178 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1179 "<doc>&entity;</doc>";
1180 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1181
1182 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1183 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1184 XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
1185 run_ext_character_check(text, &test_data, XCS(""));
1186
1187 /* Repeat without the external entity handler */
1188 XML_ParserReset(g_parser, NULL);
1189 XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
1190 run_character_check(text, XCS(""));
1191 }
1192 END_TEST
1193
START_TEST(test_wfc_no_recursive_entity_refs)1194 START_TEST(test_wfc_no_recursive_entity_refs) {
1195 const char *text = "<!DOCTYPE doc [\n"
1196 " <!ENTITY entity '&entity;'>\n"
1197 "]>\n"
1198 "<doc>&entity;</doc>";
1199
1200 expect_failure(text, XML_ERROR_RECURSIVE_ENTITY_REF,
1201 "Parser did not report recursive entity reference.");
1202 }
1203 END_TEST
1204
START_TEST(test_recursive_external_parameter_entity_2)1205 START_TEST(test_recursive_external_parameter_entity_2) {
1206 struct TestCase {
1207 const char *doc;
1208 enum XML_Status expectedStatus;
1209 };
1210
1211 struct TestCase cases[] = {
1212 {"<!ENTITY % p1 '%p1;'>", XML_STATUS_ERROR},
1213 {"<!ENTITY % p1 '%p1;'>"
1214 "<!ENTITY % p1 'first declaration wins'>",
1215 XML_STATUS_ERROR},
1216 {"<!ENTITY % p1 'first declaration wins'>"
1217 "<!ENTITY % p1 '%p1;'>",
1218 XML_STATUS_OK},
1219 {"<!ENTITY % p1 '%p1;'>", XML_STATUS_OK},
1220 };
1221
1222 for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
1223 const char *const doc = cases[i].doc;
1224 const enum XML_Status expectedStatus = cases[i].expectedStatus;
1225 set_subtest("%s", doc);
1226
1227 XML_Parser parser = XML_ParserCreate(NULL);
1228 assert_true(parser != NULL);
1229
1230 XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
1231 assert_true(ext_parser != NULL);
1232
1233 const enum XML_Status actualStatus
1234 = _XML_Parse_SINGLE_BYTES(ext_parser, doc, (int)strlen(doc), XML_TRUE);
1235
1236 assert_true(actualStatus == expectedStatus);
1237 if (actualStatus != XML_STATUS_OK) {
1238 assert_true(XML_GetErrorCode(ext_parser)
1239 == XML_ERROR_RECURSIVE_ENTITY_REF);
1240 }
1241
1242 XML_ParserFree(ext_parser);
1243 XML_ParserFree(parser);
1244 }
1245 }
1246 END_TEST
1247
1248 /* Test incomplete external entities are faulted */
START_TEST(test_ext_entity_invalid_parse)1249 START_TEST(test_ext_entity_invalid_parse) {
1250 const char *text = "<!DOCTYPE doc [\n"
1251 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1252 "]>\n"
1253 "<doc>&en;</doc>";
1254 const ExtFaults faults[]
1255 = {{"<", "Incomplete element declaration not faulted", NULL,
1256 XML_ERROR_UNCLOSED_TOKEN},
1257 {"<\xe2\x82", /* First two bytes of a three-byte char */
1258 "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
1259 {"<tag>\xe2\x82", "Incomplete character in CDATA not faulted", NULL,
1260 XML_ERROR_PARTIAL_CHAR},
1261 {NULL, NULL, NULL, XML_ERROR_NONE}};
1262 const ExtFaults *fault = faults;
1263
1264 for (; fault->parse_text != NULL; fault++) {
1265 set_subtest("\"%s\"", fault->parse_text);
1266 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1267 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1268 XML_SetUserData(g_parser, (void *)fault);
1269 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1270 "Parser did not report external entity error");
1271 XML_ParserReset(g_parser, NULL);
1272 }
1273 }
1274 END_TEST
1275
1276 /* Regression test for SF bug #483514. */
START_TEST(test_dtd_default_handling)1277 START_TEST(test_dtd_default_handling) {
1278 const char *text = "<!DOCTYPE doc [\n"
1279 "<!ENTITY e SYSTEM 'http://example.org/e'>\n"
1280 "<!NOTATION n SYSTEM 'http://example.org/n'>\n"
1281 "<!ELEMENT doc EMPTY>\n"
1282 "<!ATTLIST doc a CDATA #IMPLIED>\n"
1283 "<?pi in dtd?>\n"
1284 "<!--comment in dtd-->\n"
1285 "]><doc/>";
1286
1287 XML_SetDefaultHandler(g_parser, accumulate_characters);
1288 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
1289 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
1290 XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
1291 XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
1292 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
1293 XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
1294 XML_SetProcessingInstructionHandler(g_parser, dummy_pi_handler);
1295 XML_SetCommentHandler(g_parser, dummy_comment_handler);
1296 XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
1297 XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
1298 run_character_check(text, XCS("\n\n\n\n\n\n\n<doc/>"));
1299 }
1300 END_TEST
1301
1302 /* Test handling of attribute declarations */
START_TEST(test_dtd_attr_handling)1303 START_TEST(test_dtd_attr_handling) {
1304 const char *prolog = "<!DOCTYPE doc [\n"
1305 "<!ELEMENT doc EMPTY>\n";
1306 AttTest attr_data[]
1307 = {{"<!ATTLIST doc a ( one | two | three ) #REQUIRED>\n"
1308 "]>"
1309 "<doc a='two'/>",
1310 XCS("doc"), XCS("a"),
1311 XCS("(one|two|three)"), /* Extraneous spaces will be removed */
1312 NULL, XML_TRUE},
1313 {"<!NOTATION foo SYSTEM 'http://example.org/foo'>\n"
1314 "<!ATTLIST doc a NOTATION (foo) #IMPLIED>\n"
1315 "]>"
1316 "<doc/>",
1317 XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), NULL, XML_FALSE},
1318 {"<!ATTLIST doc a NOTATION (foo) 'bar'>\n"
1319 "]>"
1320 "<doc/>",
1321 XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), XCS("bar"), XML_FALSE},
1322 {"<!ATTLIST doc a CDATA '\xdb\xb2'>\n"
1323 "]>"
1324 "<doc/>",
1325 XCS("doc"), XCS("a"), XCS("CDATA"),
1326 #ifdef XML_UNICODE
1327 XCS("\x06f2"),
1328 #else
1329 XCS("\xdb\xb2"),
1330 #endif
1331 XML_FALSE},
1332 {NULL, NULL, NULL, NULL, NULL, XML_FALSE}};
1333 AttTest *test;
1334
1335 for (test = attr_data; test->definition != NULL; test++) {
1336 set_subtest("%s", test->definition);
1337 XML_SetAttlistDeclHandler(g_parser, verify_attlist_decl_handler);
1338 XML_SetUserData(g_parser, test);
1339 if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)strlen(prolog),
1340 XML_FALSE)
1341 == XML_STATUS_ERROR)
1342 xml_failure(g_parser);
1343 if (_XML_Parse_SINGLE_BYTES(g_parser, test->definition,
1344 (int)strlen(test->definition), XML_TRUE)
1345 == XML_STATUS_ERROR)
1346 xml_failure(g_parser);
1347 XML_ParserReset(g_parser, NULL);
1348 }
1349 }
1350 END_TEST
1351
1352 /* See related SF bug #673791.
1353 When namespace processing is enabled, setting the namespace URI for
1354 a prefix is not allowed; this test ensures that it *is* allowed
1355 when namespace processing is not enabled.
1356 (See Namespaces in XML, section 2.)
1357 */
START_TEST(test_empty_ns_without_namespaces)1358 START_TEST(test_empty_ns_without_namespaces) {
1359 const char *text = "<doc xmlns:prefix='http://example.org/'>\n"
1360 " <e xmlns:prefix=''/>\n"
1361 "</doc>";
1362
1363 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1364 == XML_STATUS_ERROR)
1365 xml_failure(g_parser);
1366 }
1367 END_TEST
1368
1369 /* Regression test for SF bug #824420.
1370 Checks that an xmlns:prefix attribute set in an attribute's default
1371 value isn't misinterpreted.
1372 */
START_TEST(test_ns_in_attribute_default_without_namespaces)1373 START_TEST(test_ns_in_attribute_default_without_namespaces) {
1374 const char *text = "<!DOCTYPE e:element [\n"
1375 " <!ATTLIST e:element\n"
1376 " xmlns:e CDATA 'http://example.org/'>\n"
1377 " ]>\n"
1378 "<e:element/>";
1379
1380 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1381 == XML_STATUS_ERROR)
1382 xml_failure(g_parser);
1383 }
1384 END_TEST
1385
1386 /* Regression test for SF bug #1515266: missing check of stopped
1387 parser in doContext() 'for' loop. */
START_TEST(test_stop_parser_between_char_data_calls)1388 START_TEST(test_stop_parser_between_char_data_calls) {
1389 /* The sample data must be big enough that there are two calls to
1390 the character data handler from within the inner "for" loop of
1391 the XML_TOK_DATA_CHARS case in doContent(), and the character
1392 handler must stop the parser and clear the character data
1393 handler.
1394 */
1395 const char *text = long_character_data_text;
1396
1397 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1398 g_resumable = XML_FALSE;
1399 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1400 != XML_STATUS_ERROR)
1401 xml_failure(g_parser);
1402 if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
1403 xml_failure(g_parser);
1404 }
1405 END_TEST
1406
1407 /* Regression test for SF bug #1515266: missing check of stopped
1408 parser in doContext() 'for' loop. */
START_TEST(test_suspend_parser_between_char_data_calls)1409 START_TEST(test_suspend_parser_between_char_data_calls) {
1410 /* The sample data must be big enough that there are two calls to
1411 the character data handler from within the inner "for" loop of
1412 the XML_TOK_DATA_CHARS case in doContent(), and the character
1413 handler must stop the parser and clear the character data
1414 handler.
1415 */
1416 const char *text = long_character_data_text;
1417
1418 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1419 g_resumable = XML_TRUE;
1420 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1421 != XML_STATUS_SUSPENDED)
1422 xml_failure(g_parser);
1423 if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
1424 xml_failure(g_parser);
1425 /* Try parsing directly */
1426 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1427 != XML_STATUS_ERROR)
1428 fail("Attempt to continue parse while suspended not faulted");
1429 if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
1430 fail("Suspended parse not faulted with correct error");
1431 }
1432 END_TEST
1433
1434 /* Test repeated calls to XML_StopParser are handled correctly */
START_TEST(test_repeated_stop_parser_between_char_data_calls)1435 START_TEST(test_repeated_stop_parser_between_char_data_calls) {
1436 const char *text = long_character_data_text;
1437
1438 XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1439 g_resumable = XML_FALSE;
1440 g_abortable = XML_FALSE;
1441 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1442 != XML_STATUS_ERROR)
1443 fail("Failed to double-stop parser");
1444
1445 XML_ParserReset(g_parser, NULL);
1446 XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1447 g_resumable = XML_TRUE;
1448 g_abortable = XML_FALSE;
1449 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1450 != XML_STATUS_SUSPENDED)
1451 fail("Failed to double-suspend parser");
1452
1453 XML_ParserReset(g_parser, NULL);
1454 XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1455 g_resumable = XML_TRUE;
1456 g_abortable = XML_TRUE;
1457 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1458 != XML_STATUS_ERROR)
1459 fail("Failed to suspend-abort parser");
1460 }
1461 END_TEST
1462
START_TEST(test_good_cdata_ascii)1463 START_TEST(test_good_cdata_ascii) {
1464 const char *text = "<a><![CDATA[<greeting>Hello, world!</greeting>]]></a>";
1465 const XML_Char *expected = XCS("<greeting>Hello, world!</greeting>");
1466
1467 CharData storage;
1468 CharData_Init(&storage);
1469 XML_SetUserData(g_parser, &storage);
1470 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1471 /* Add start and end handlers for coverage */
1472 XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
1473 XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
1474
1475 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1476 == XML_STATUS_ERROR)
1477 xml_failure(g_parser);
1478 CharData_CheckXMLChars(&storage, expected);
1479
1480 /* Try again, this time with a default handler */
1481 XML_ParserReset(g_parser, NULL);
1482 CharData_Init(&storage);
1483 XML_SetUserData(g_parser, &storage);
1484 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1485 XML_SetDefaultHandler(g_parser, dummy_default_handler);
1486
1487 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1488 == XML_STATUS_ERROR)
1489 xml_failure(g_parser);
1490 CharData_CheckXMLChars(&storage, expected);
1491 }
1492 END_TEST
1493
START_TEST(test_good_cdata_utf16)1494 START_TEST(test_good_cdata_utf16) {
1495 /* Test data is:
1496 * <?xml version='1.0' encoding='utf-16'?>
1497 * <a><![CDATA[hello]]></a>
1498 */
1499 const char text[]
1500 = "\0<\0?\0x\0m\0l\0"
1501 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1502 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1503 "1\0"
1504 "6\0'"
1505 "\0?\0>\0\n"
1506 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>";
1507 const XML_Char *expected = XCS("hello");
1508
1509 CharData storage;
1510 CharData_Init(&storage);
1511 XML_SetUserData(g_parser, &storage);
1512 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1513
1514 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1515 == XML_STATUS_ERROR)
1516 xml_failure(g_parser);
1517 CharData_CheckXMLChars(&storage, expected);
1518 }
1519 END_TEST
1520
START_TEST(test_good_cdata_utf16_le)1521 START_TEST(test_good_cdata_utf16_le) {
1522 /* Test data is:
1523 * <?xml version='1.0' encoding='utf-16'?>
1524 * <a><![CDATA[hello]]></a>
1525 */
1526 const char text[]
1527 = "<\0?\0x\0m\0l\0"
1528 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1529 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1530 "1\0"
1531 "6\0'"
1532 "\0?\0>\0\n"
1533 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>\0";
1534 const XML_Char *expected = XCS("hello");
1535
1536 CharData storage;
1537 CharData_Init(&storage);
1538 XML_SetUserData(g_parser, &storage);
1539 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1540
1541 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1542 == XML_STATUS_ERROR)
1543 xml_failure(g_parser);
1544 CharData_CheckXMLChars(&storage, expected);
1545 }
1546 END_TEST
1547
1548 /* Test UTF16 conversion of a long cdata string */
1549
1550 /* 16 characters: handy macro to reduce visual clutter */
1551 #define A_TO_P_IN_UTF16 "\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P"
1552
START_TEST(test_long_cdata_utf16)1553 START_TEST(test_long_cdata_utf16) {
1554 /* Test data is:
1555 * <?xlm version='1.0' encoding='utf-16'?>
1556 * <a><![CDATA[
1557 * ABCDEFGHIJKLMNOP
1558 * ]]></a>
1559 */
1560 const char text[]
1561 = "\0<\0?\0x\0m\0l\0 "
1562 "\0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0 "
1563 "\0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0\x31\0\x36\0'\0?\0>"
1564 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1565 /* 64 characters per line */
1566 /* clang-format off */
1567 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1568 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1569 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1570 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1571 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1572 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1573 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1574 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1575 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1576 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1577 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1578 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1579 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1580 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1581 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1582 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
1583 A_TO_P_IN_UTF16
1584 /* clang-format on */
1585 "\0]\0]\0>\0<\0/\0a\0>";
1586 const XML_Char *expected =
1587 /* clang-format off */
1588 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1589 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1590 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1591 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1592 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1593 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1594 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1595 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1596 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1597 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1598 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1599 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1600 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1601 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1602 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1603 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1604 XCS("ABCDEFGHIJKLMNOP");
1605 /* clang-format on */
1606 CharData storage;
1607 void *buffer;
1608
1609 CharData_Init(&storage);
1610 XML_SetUserData(g_parser, &storage);
1611 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1612 buffer = XML_GetBuffer(g_parser, sizeof(text) - 1);
1613 if (buffer == NULL)
1614 fail("Could not allocate parse buffer");
1615 assert(buffer != NULL);
1616 memcpy(buffer, text, sizeof(text) - 1);
1617 if (XML_ParseBuffer(g_parser, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR)
1618 xml_failure(g_parser);
1619 CharData_CheckXMLChars(&storage, expected);
1620 }
1621 END_TEST
1622
1623 /* Test handling of multiple unit UTF-16 characters */
START_TEST(test_multichar_cdata_utf16)1624 START_TEST(test_multichar_cdata_utf16) {
1625 /* Test data is:
1626 * <?xml version='1.0' encoding='utf-16'?>
1627 * <a><![CDATA[{MINIM}{CROTCHET}]]></a>
1628 *
1629 * where {MINIM} is U+1d15e (a minim or half-note)
1630 * UTF-16: 0xd834 0xdd5e
1631 * UTF-8: 0xf0 0x9d 0x85 0x9e
1632 * and {CROTCHET} is U+1d15f (a crotchet or quarter-note)
1633 * UTF-16: 0xd834 0xdd5f
1634 * UTF-8: 0xf0 0x9d 0x85 0x9f
1635 */
1636 const char text[] = "\0<\0?\0x\0m\0l\0"
1637 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1638 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1639 "1\0"
1640 "6\0'"
1641 "\0?\0>\0\n"
1642 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1643 "\xd8\x34\xdd\x5e\xd8\x34\xdd\x5f"
1644 "\0]\0]\0>\0<\0/\0a\0>";
1645 #ifdef XML_UNICODE
1646 const XML_Char *expected = XCS("\xd834\xdd5e\xd834\xdd5f");
1647 #else
1648 const XML_Char *expected = XCS("\xf0\x9d\x85\x9e\xf0\x9d\x85\x9f");
1649 #endif
1650 CharData storage;
1651
1652 CharData_Init(&storage);
1653 XML_SetUserData(g_parser, &storage);
1654 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1655
1656 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1657 == XML_STATUS_ERROR)
1658 xml_failure(g_parser);
1659 CharData_CheckXMLChars(&storage, expected);
1660 }
1661 END_TEST
1662
1663 /* Test that an element name with a UTF-16 surrogate pair is rejected */
START_TEST(test_utf16_bad_surrogate_pair)1664 START_TEST(test_utf16_bad_surrogate_pair) {
1665 /* Test data is:
1666 * <?xml version='1.0' encoding='utf-16'?>
1667 * <a><![CDATA[{BADLINB}]]></a>
1668 *
1669 * where {BADLINB} is U+10000 (the first Linear B character)
1670 * with the UTF-16 surrogate pair in the wrong order, i.e.
1671 * 0xdc00 0xd800
1672 */
1673 const char text[] = "\0<\0?\0x\0m\0l\0"
1674 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1675 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1676 "1\0"
1677 "6\0'"
1678 "\0?\0>\0\n"
1679 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1680 "\xdc\x00\xd8\x00"
1681 "\0]\0]\0>\0<\0/\0a\0>";
1682
1683 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1684 != XML_STATUS_ERROR)
1685 fail("Reversed UTF-16 surrogate pair not faulted");
1686 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
1687 xml_failure(g_parser);
1688 }
1689 END_TEST
1690
START_TEST(test_bad_cdata)1691 START_TEST(test_bad_cdata) {
1692 struct CaseData {
1693 const char *text;
1694 enum XML_Error expectedError;
1695 };
1696
1697 struct CaseData cases[]
1698 = {{"<a><", XML_ERROR_UNCLOSED_TOKEN},
1699 {"<a><!", XML_ERROR_UNCLOSED_TOKEN},
1700 {"<a><![", XML_ERROR_UNCLOSED_TOKEN},
1701 {"<a><![C", XML_ERROR_UNCLOSED_TOKEN},
1702 {"<a><![CD", XML_ERROR_UNCLOSED_TOKEN},
1703 {"<a><![CDA", XML_ERROR_UNCLOSED_TOKEN},
1704 {"<a><![CDAT", XML_ERROR_UNCLOSED_TOKEN},
1705 {"<a><![CDATA", XML_ERROR_UNCLOSED_TOKEN},
1706
1707 {"<a><![CDATA[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1708 {"<a><![CDATA[]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1709 {"<a><![CDATA[]]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1710
1711 {"<a><!<a/>", XML_ERROR_INVALID_TOKEN},
1712 {"<a><![<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
1713 {"<a><![C<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
1714 {"<a><![CD<a/>", XML_ERROR_INVALID_TOKEN},
1715 {"<a><![CDA<a/>", XML_ERROR_INVALID_TOKEN},
1716 {"<a><![CDAT<a/>", XML_ERROR_INVALID_TOKEN},
1717 {"<a><![CDATA<a/>", XML_ERROR_INVALID_TOKEN},
1718
1719 {"<a><![CDATA[<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1720 {"<a><![CDATA[]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1721 {"<a><![CDATA[]]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}};
1722
1723 size_t i = 0;
1724 for (; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1725 set_subtest("%s", cases[i].text);
1726 const enum XML_Status actualStatus = _XML_Parse_SINGLE_BYTES(
1727 g_parser, cases[i].text, (int)strlen(cases[i].text), XML_TRUE);
1728 const enum XML_Error actualError = XML_GetErrorCode(g_parser);
1729
1730 assert(actualStatus == XML_STATUS_ERROR);
1731
1732 if (actualError != cases[i].expectedError) {
1733 char message[100];
1734 snprintf(message, sizeof(message),
1735 "Expected error %d but got error %d for case %u: \"%s\"\n",
1736 cases[i].expectedError, actualError, (unsigned int)i + 1,
1737 cases[i].text);
1738 fail(message);
1739 }
1740
1741 XML_ParserReset(g_parser, NULL);
1742 }
1743 }
1744 END_TEST
1745
1746 /* Test failures in UTF-16 CDATA */
START_TEST(test_bad_cdata_utf16)1747 START_TEST(test_bad_cdata_utf16) {
1748 struct CaseData {
1749 size_t text_bytes;
1750 const char *text;
1751 enum XML_Error expected_error;
1752 };
1753
1754 const char prolog[] = "\0<\0?\0x\0m\0l\0"
1755 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1756 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1757 "1\0"
1758 "6\0'"
1759 "\0?\0>\0\n"
1760 "\0<\0a\0>";
1761 struct CaseData cases[] = {
1762 {1, "\0", XML_ERROR_UNCLOSED_TOKEN},
1763 {2, "\0<", XML_ERROR_UNCLOSED_TOKEN},
1764 {3, "\0<\0", XML_ERROR_UNCLOSED_TOKEN},
1765 {4, "\0<\0!", XML_ERROR_UNCLOSED_TOKEN},
1766 {5, "\0<\0!\0", XML_ERROR_UNCLOSED_TOKEN},
1767 {6, "\0<\0!\0[", XML_ERROR_UNCLOSED_TOKEN},
1768 {7, "\0<\0!\0[\0", XML_ERROR_UNCLOSED_TOKEN},
1769 {8, "\0<\0!\0[\0C", XML_ERROR_UNCLOSED_TOKEN},
1770 {9, "\0<\0!\0[\0C\0", XML_ERROR_UNCLOSED_TOKEN},
1771 {10, "\0<\0!\0[\0C\0D", XML_ERROR_UNCLOSED_TOKEN},
1772 {11, "\0<\0!\0[\0C\0D\0", XML_ERROR_UNCLOSED_TOKEN},
1773 {12, "\0<\0!\0[\0C\0D\0A", XML_ERROR_UNCLOSED_TOKEN},
1774 {13, "\0<\0!\0[\0C\0D\0A\0", XML_ERROR_UNCLOSED_TOKEN},
1775 {14, "\0<\0!\0[\0C\0D\0A\0T", XML_ERROR_UNCLOSED_TOKEN},
1776 {15, "\0<\0!\0[\0C\0D\0A\0T\0", XML_ERROR_UNCLOSED_TOKEN},
1777 {16, "\0<\0!\0[\0C\0D\0A\0T\0A", XML_ERROR_UNCLOSED_TOKEN},
1778 {17, "\0<\0!\0[\0C\0D\0A\0T\0A\0", XML_ERROR_UNCLOSED_TOKEN},
1779 {18, "\0<\0!\0[\0C\0D\0A\0T\0A\0[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1780 {19, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0", XML_ERROR_UNCLOSED_CDATA_SECTION},
1781 {20, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z", XML_ERROR_UNCLOSED_CDATA_SECTION},
1782 /* Now add a four-byte UTF-16 character */
1783 {21, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8",
1784 XML_ERROR_UNCLOSED_CDATA_SECTION},
1785 {22, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34", XML_ERROR_PARTIAL_CHAR},
1786 {23, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd",
1787 XML_ERROR_PARTIAL_CHAR},
1788 {24, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd\x5e",
1789 XML_ERROR_UNCLOSED_CDATA_SECTION}};
1790 size_t i;
1791
1792 for (i = 0; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1793 set_subtest("case %lu", (long unsigned)(i + 1));
1794 enum XML_Status actual_status;
1795 enum XML_Error actual_error;
1796
1797 if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)sizeof(prolog) - 1,
1798 XML_FALSE)
1799 == XML_STATUS_ERROR)
1800 xml_failure(g_parser);
1801 actual_status = _XML_Parse_SINGLE_BYTES(g_parser, cases[i].text,
1802 (int)cases[i].text_bytes, XML_TRUE);
1803 assert(actual_status == XML_STATUS_ERROR);
1804 actual_error = XML_GetErrorCode(g_parser);
1805 if (actual_error != cases[i].expected_error) {
1806 char message[1024];
1807
1808 snprintf(message, sizeof(message),
1809 "Expected error %d (%" XML_FMT_STR "), got %d (%" XML_FMT_STR
1810 ") for case %lu\n",
1811 cases[i].expected_error,
1812 XML_ErrorString(cases[i].expected_error), actual_error,
1813 XML_ErrorString(actual_error), (long unsigned)(i + 1));
1814 fail(message);
1815 }
1816 XML_ParserReset(g_parser, NULL);
1817 }
1818 }
1819 END_TEST
1820
1821 /* Test stopping the parser in cdata handler */
START_TEST(test_stop_parser_between_cdata_calls)1822 START_TEST(test_stop_parser_between_cdata_calls) {
1823 const char *text = long_cdata_text;
1824
1825 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1826 g_resumable = XML_FALSE;
1827 expect_failure(text, XML_ERROR_ABORTED, "Parse not aborted in CDATA handler");
1828 }
1829 END_TEST
1830
1831 /* Test suspending the parser in cdata handler */
START_TEST(test_suspend_parser_between_cdata_calls)1832 START_TEST(test_suspend_parser_between_cdata_calls) {
1833 const char *text = long_cdata_text;
1834 enum XML_Status result;
1835
1836 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1837 g_resumable = XML_TRUE;
1838 result = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE);
1839 if (result != XML_STATUS_SUSPENDED) {
1840 if (result == XML_STATUS_ERROR)
1841 xml_failure(g_parser);
1842 fail("Parse not suspended in CDATA handler");
1843 }
1844 if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
1845 xml_failure(g_parser);
1846 }
1847 END_TEST
1848
1849 /* Test memory allocation functions */
START_TEST(test_memory_allocation)1850 START_TEST(test_memory_allocation) {
1851 char *buffer = (char *)XML_MemMalloc(g_parser, 256);
1852 char *p;
1853
1854 if (buffer == NULL) {
1855 fail("Allocation failed");
1856 } else {
1857 /* Try writing to memory; some OSes try to cheat! */
1858 buffer[0] = 'T';
1859 buffer[1] = 'E';
1860 buffer[2] = 'S';
1861 buffer[3] = 'T';
1862 buffer[4] = '\0';
1863 if (strcmp(buffer, "TEST") != 0) {
1864 fail("Memory not writable");
1865 } else {
1866 p = (char *)XML_MemRealloc(g_parser, buffer, 512);
1867 if (p == NULL) {
1868 fail("Reallocation failed");
1869 } else {
1870 /* Write again, just to be sure */
1871 buffer = p;
1872 buffer[0] = 'V';
1873 if (strcmp(buffer, "VEST") != 0) {
1874 fail("Reallocated memory not writable");
1875 }
1876 }
1877 }
1878 XML_MemFree(g_parser, buffer);
1879 }
1880 }
1881 END_TEST
1882
1883 /* Test XML_DefaultCurrent() passes handling on correctly */
START_TEST(test_default_current)1884 START_TEST(test_default_current) {
1885 const char *text = "<doc>hell]</doc>";
1886 const char *entity_text = "<!DOCTYPE doc [\n"
1887 "<!ENTITY entity '%'>\n"
1888 "]>\n"
1889 "<doc>&entity;</doc>";
1890
1891 set_subtest("with defaulting");
1892 {
1893 struct handler_record_list storage;
1894 storage.count = 0;
1895 XML_SetDefaultHandler(g_parser, record_default_handler);
1896 XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
1897 XML_SetUserData(g_parser, &storage);
1898 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1899 == XML_STATUS_ERROR)
1900 xml_failure(g_parser);
1901 int i = 0;
1902 assert_record_handler_called(&storage, i++, "record_default_handler", 5);
1903 // we should have gotten one or more cdata callbacks, totaling 5 chars
1904 int cdata_len_remaining = 5;
1905 while (cdata_len_remaining > 0) {
1906 const struct handler_record_entry *c_entry
1907 = handler_record_get(&storage, i++);
1908 assert_true(strcmp(c_entry->name, "record_cdata_handler") == 0);
1909 assert_true(c_entry->arg > 0);
1910 assert_true(c_entry->arg <= cdata_len_remaining);
1911 cdata_len_remaining -= c_entry->arg;
1912 // default handler must follow, with the exact same len argument.
1913 assert_record_handler_called(&storage, i++, "record_default_handler",
1914 c_entry->arg);
1915 }
1916 assert_record_handler_called(&storage, i++, "record_default_handler", 6);
1917 assert_true(storage.count == i);
1918 }
1919
1920 /* Again, without the defaulting */
1921 set_subtest("no defaulting");
1922 {
1923 struct handler_record_list storage;
1924 storage.count = 0;
1925 XML_ParserReset(g_parser, NULL);
1926 XML_SetDefaultHandler(g_parser, record_default_handler);
1927 XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
1928 XML_SetUserData(g_parser, &storage);
1929 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1930 == XML_STATUS_ERROR)
1931 xml_failure(g_parser);
1932 int i = 0;
1933 assert_record_handler_called(&storage, i++, "record_default_handler", 5);
1934 // we should have gotten one or more cdata callbacks, totaling 5 chars
1935 int cdata_len_remaining = 5;
1936 while (cdata_len_remaining > 0) {
1937 const struct handler_record_entry *c_entry
1938 = handler_record_get(&storage, i++);
1939 assert_true(strcmp(c_entry->name, "record_cdata_nodefault_handler") == 0);
1940 assert_true(c_entry->arg > 0);
1941 assert_true(c_entry->arg <= cdata_len_remaining);
1942 cdata_len_remaining -= c_entry->arg;
1943 }
1944 assert_record_handler_called(&storage, i++, "record_default_handler", 6);
1945 assert_true(storage.count == i);
1946 }
1947
1948 /* Now with an internal entity to complicate matters */
1949 set_subtest("with internal entity");
1950 {
1951 struct handler_record_list storage;
1952 storage.count = 0;
1953 XML_ParserReset(g_parser, NULL);
1954 XML_SetDefaultHandler(g_parser, record_default_handler);
1955 XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
1956 XML_SetUserData(g_parser, &storage);
1957 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
1958 XML_TRUE)
1959 == XML_STATUS_ERROR)
1960 xml_failure(g_parser);
1961 /* The default handler suppresses the entity */
1962 assert_record_handler_called(&storage, 0, "record_default_handler", 9);
1963 assert_record_handler_called(&storage, 1, "record_default_handler", 1);
1964 assert_record_handler_called(&storage, 2, "record_default_handler", 3);
1965 assert_record_handler_called(&storage, 3, "record_default_handler", 1);
1966 assert_record_handler_called(&storage, 4, "record_default_handler", 1);
1967 assert_record_handler_called(&storage, 5, "record_default_handler", 1);
1968 assert_record_handler_called(&storage, 6, "record_default_handler", 8);
1969 assert_record_handler_called(&storage, 7, "record_default_handler", 1);
1970 assert_record_handler_called(&storage, 8, "record_default_handler", 6);
1971 assert_record_handler_called(&storage, 9, "record_default_handler", 1);
1972 assert_record_handler_called(&storage, 10, "record_default_handler", 7);
1973 assert_record_handler_called(&storage, 11, "record_default_handler", 1);
1974 assert_record_handler_called(&storage, 12, "record_default_handler", 1);
1975 assert_record_handler_called(&storage, 13, "record_default_handler", 1);
1976 assert_record_handler_called(&storage, 14, "record_default_handler", 1);
1977 assert_record_handler_called(&storage, 15, "record_default_handler", 1);
1978 assert_record_handler_called(&storage, 16, "record_default_handler", 5);
1979 assert_record_handler_called(&storage, 17, "record_default_handler", 8);
1980 assert_record_handler_called(&storage, 18, "record_default_handler", 6);
1981 assert_true(storage.count == 19);
1982 }
1983
1984 /* Again, with a skip handler */
1985 set_subtest("with skip handler");
1986 {
1987 struct handler_record_list storage;
1988 storage.count = 0;
1989 XML_ParserReset(g_parser, NULL);
1990 XML_SetDefaultHandler(g_parser, record_default_handler);
1991 XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
1992 XML_SetSkippedEntityHandler(g_parser, record_skip_handler);
1993 XML_SetUserData(g_parser, &storage);
1994 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
1995 XML_TRUE)
1996 == XML_STATUS_ERROR)
1997 xml_failure(g_parser);
1998 /* The default handler suppresses the entity */
1999 assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2000 assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2001 assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2002 assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2003 assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2004 assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2005 assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2006 assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2007 assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2008 assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2009 assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2010 assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2011 assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2012 assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2013 assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2014 assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2015 assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2016 assert_record_handler_called(&storage, 17, "record_skip_handler", 0);
2017 assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2018 assert_true(storage.count == 19);
2019 }
2020
2021 /* This time, allow the entity through */
2022 set_subtest("allow entity");
2023 {
2024 struct handler_record_list storage;
2025 storage.count = 0;
2026 XML_ParserReset(g_parser, NULL);
2027 XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
2028 XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
2029 XML_SetUserData(g_parser, &storage);
2030 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2031 XML_TRUE)
2032 == XML_STATUS_ERROR)
2033 xml_failure(g_parser);
2034 assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2035 assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2036 assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2037 assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2038 assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2039 assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2040 assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2041 assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2042 assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2043 assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2044 assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2045 assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2046 assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2047 assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2048 assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2049 assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2050 assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2051 assert_record_handler_called(&storage, 17, "record_cdata_handler", 1);
2052 assert_record_handler_called(&storage, 18, "record_default_handler", 1);
2053 assert_record_handler_called(&storage, 19, "record_default_handler", 6);
2054 assert_true(storage.count == 20);
2055 }
2056
2057 /* Finally, without passing the cdata to the default handler */
2058 set_subtest("not passing cdata");
2059 {
2060 struct handler_record_list storage;
2061 storage.count = 0;
2062 XML_ParserReset(g_parser, NULL);
2063 XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
2064 XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
2065 XML_SetUserData(g_parser, &storage);
2066 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2067 XML_TRUE)
2068 == XML_STATUS_ERROR)
2069 xml_failure(g_parser);
2070 assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2071 assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2072 assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2073 assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2074 assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2075 assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2076 assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2077 assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2078 assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2079 assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2080 assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2081 assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2082 assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2083 assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2084 assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2085 assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2086 assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2087 assert_record_handler_called(&storage, 17, "record_cdata_nodefault_handler",
2088 1);
2089 assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2090 assert_true(storage.count == 19);
2091 }
2092 }
2093 END_TEST
2094
2095 /* Test DTD element parsing code paths */
START_TEST(test_dtd_elements)2096 START_TEST(test_dtd_elements) {
2097 const char *text = "<!DOCTYPE doc [\n"
2098 "<!ELEMENT doc (chapter)>\n"
2099 "<!ELEMENT chapter (#PCDATA)>\n"
2100 "]>\n"
2101 "<doc><chapter>Wombats are go</chapter></doc>";
2102
2103 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
2104 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2105 == XML_STATUS_ERROR)
2106 xml_failure(g_parser);
2107 }
2108 END_TEST
2109
2110 static void XMLCALL
element_decl_check_model(void * userData,const XML_Char * name,XML_Content * model)2111 element_decl_check_model(void *userData, const XML_Char *name,
2112 XML_Content *model) {
2113 UNUSED_P(userData);
2114 uint32_t errorFlags = 0;
2115
2116 /* Expected model array structure is this:
2117 * [0] (type 6, quant 0)
2118 * [1] (type 5, quant 0)
2119 * [3] (type 4, quant 0, name "bar")
2120 * [4] (type 4, quant 0, name "foo")
2121 * [5] (type 4, quant 3, name "xyz")
2122 * [2] (type 4, quant 2, name "zebra")
2123 */
2124 errorFlags |= ((xcstrcmp(name, XCS("junk")) == 0) ? 0 : (1u << 0));
2125 errorFlags |= ((model != NULL) ? 0 : (1u << 1));
2126
2127 if (model != NULL) {
2128 errorFlags |= ((model[0].type == XML_CTYPE_SEQ) ? 0 : (1u << 2));
2129 errorFlags |= ((model[0].quant == XML_CQUANT_NONE) ? 0 : (1u << 3));
2130 errorFlags |= ((model[0].numchildren == 2) ? 0 : (1u << 4));
2131 errorFlags |= ((model[0].children == &model[1]) ? 0 : (1u << 5));
2132 errorFlags |= ((model[0].name == NULL) ? 0 : (1u << 6));
2133
2134 errorFlags |= ((model[1].type == XML_CTYPE_CHOICE) ? 0 : (1u << 7));
2135 errorFlags |= ((model[1].quant == XML_CQUANT_NONE) ? 0 : (1u << 8));
2136 errorFlags |= ((model[1].numchildren == 3) ? 0 : (1u << 9));
2137 errorFlags |= ((model[1].children == &model[3]) ? 0 : (1u << 10));
2138 errorFlags |= ((model[1].name == NULL) ? 0 : (1u << 11));
2139
2140 errorFlags |= ((model[2].type == XML_CTYPE_NAME) ? 0 : (1u << 12));
2141 errorFlags |= ((model[2].quant == XML_CQUANT_REP) ? 0 : (1u << 13));
2142 errorFlags |= ((model[2].numchildren == 0) ? 0 : (1u << 14));
2143 errorFlags |= ((model[2].children == NULL) ? 0 : (1u << 15));
2144 errorFlags
2145 |= ((xcstrcmp(model[2].name, XCS("zebra")) == 0) ? 0 : (1u << 16));
2146
2147 errorFlags |= ((model[3].type == XML_CTYPE_NAME) ? 0 : (1u << 17));
2148 errorFlags |= ((model[3].quant == XML_CQUANT_NONE) ? 0 : (1u << 18));
2149 errorFlags |= ((model[3].numchildren == 0) ? 0 : (1u << 19));
2150 errorFlags |= ((model[3].children == NULL) ? 0 : (1u << 20));
2151 errorFlags |= ((xcstrcmp(model[3].name, XCS("bar")) == 0) ? 0 : (1u << 21));
2152
2153 errorFlags |= ((model[4].type == XML_CTYPE_NAME) ? 0 : (1u << 22));
2154 errorFlags |= ((model[4].quant == XML_CQUANT_NONE) ? 0 : (1u << 23));
2155 errorFlags |= ((model[4].numchildren == 0) ? 0 : (1u << 24));
2156 errorFlags |= ((model[4].children == NULL) ? 0 : (1u << 25));
2157 errorFlags |= ((xcstrcmp(model[4].name, XCS("foo")) == 0) ? 0 : (1u << 26));
2158
2159 errorFlags |= ((model[5].type == XML_CTYPE_NAME) ? 0 : (1u << 27));
2160 errorFlags |= ((model[5].quant == XML_CQUANT_PLUS) ? 0 : (1u << 28));
2161 errorFlags |= ((model[5].numchildren == 0) ? 0 : (1u << 29));
2162 errorFlags |= ((model[5].children == NULL) ? 0 : (1u << 30));
2163 errorFlags |= ((xcstrcmp(model[5].name, XCS("xyz")) == 0) ? 0 : (1u << 31));
2164 }
2165
2166 XML_SetUserData(g_parser, (void *)(uintptr_t)errorFlags);
2167 XML_FreeContentModel(g_parser, model);
2168 }
2169
START_TEST(test_dtd_elements_nesting)2170 START_TEST(test_dtd_elements_nesting) {
2171 // Payload inspired by a test in Perl's XML::Parser
2172 const char *text = "<!DOCTYPE foo [\n"
2173 "<!ELEMENT junk ((bar|foo|xyz+), zebra*)>\n"
2174 "]>\n"
2175 "<foo/>";
2176
2177 XML_SetUserData(g_parser, (void *)(uintptr_t)-1);
2178
2179 XML_SetElementDeclHandler(g_parser, element_decl_check_model);
2180 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2181 == XML_STATUS_ERROR)
2182 xml_failure(g_parser);
2183
2184 if ((uint32_t)(uintptr_t)XML_GetUserData(g_parser) != 0)
2185 fail("Element declaration model regression detected");
2186 }
2187 END_TEST
2188
2189 /* Test foreign DTD handling */
START_TEST(test_set_foreign_dtd)2190 START_TEST(test_set_foreign_dtd) {
2191 const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n";
2192 const char *text2 = "<doc>&entity;</doc>";
2193 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2194
2195 /* Check hash salt is passed through too */
2196 XML_SetHashSalt(g_parser, 0x12345678);
2197 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2198 XML_SetUserData(g_parser, &test_data);
2199 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2200 /* Add a default handler to exercise more code paths */
2201 XML_SetDefaultHandler(g_parser, dummy_default_handler);
2202 if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2203 fail("Could not set foreign DTD");
2204 if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2205 == XML_STATUS_ERROR)
2206 xml_failure(g_parser);
2207
2208 /* Ensure that trying to set the DTD after parsing has started
2209 * is faulted, even if it's the same setting.
2210 */
2211 if (XML_UseForeignDTD(g_parser, XML_TRUE)
2212 != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
2213 fail("Failed to reject late foreign DTD setting");
2214 /* Ditto for the hash salt */
2215 if (XML_SetHashSalt(g_parser, 0x23456789))
2216 fail("Failed to reject late hash salt change");
2217
2218 /* Now finish the parse */
2219 if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2220 == XML_STATUS_ERROR)
2221 xml_failure(g_parser);
2222 }
2223 END_TEST
2224
2225 /* Test foreign DTD handling with a failing NotStandalone handler */
START_TEST(test_foreign_dtd_not_standalone)2226 START_TEST(test_foreign_dtd_not_standalone) {
2227 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2228 "<doc>&entity;</doc>";
2229 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2230
2231 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2232 XML_SetUserData(g_parser, &test_data);
2233 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2234 XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
2235 if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2236 fail("Could not set foreign DTD");
2237 expect_failure(text, XML_ERROR_NOT_STANDALONE,
2238 "NotStandalonehandler failed to reject");
2239 }
2240 END_TEST
2241
2242 /* Test invalid character in a foreign DTD is faulted */
START_TEST(test_invalid_foreign_dtd)2243 START_TEST(test_invalid_foreign_dtd) {
2244 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2245 "<doc>&entity;</doc>";
2246 ExtFaults test_data
2247 = {"$", "Dollar not faulted", NULL, XML_ERROR_INVALID_TOKEN};
2248
2249 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2250 XML_SetUserData(g_parser, &test_data);
2251 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
2252 XML_UseForeignDTD(g_parser, XML_TRUE);
2253 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
2254 "Bad DTD should not have been accepted");
2255 }
2256 END_TEST
2257
2258 /* Test foreign DTD use with a doctype */
START_TEST(test_foreign_dtd_with_doctype)2259 START_TEST(test_foreign_dtd_with_doctype) {
2260 const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n"
2261 "<!DOCTYPE doc [<!ENTITY entity 'hello world'>]>\n";
2262 const char *text2 = "<doc>&entity;</doc>";
2263 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2264
2265 /* Check hash salt is passed through too */
2266 XML_SetHashSalt(g_parser, 0x12345678);
2267 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2268 XML_SetUserData(g_parser, &test_data);
2269 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2270 /* Add a default handler to exercise more code paths */
2271 XML_SetDefaultHandler(g_parser, dummy_default_handler);
2272 if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2273 fail("Could not set foreign DTD");
2274 if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2275 == XML_STATUS_ERROR)
2276 xml_failure(g_parser);
2277
2278 /* Ensure that trying to set the DTD after parsing has started
2279 * is faulted, even if it's the same setting.
2280 */
2281 if (XML_UseForeignDTD(g_parser, XML_TRUE)
2282 != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
2283 fail("Failed to reject late foreign DTD setting");
2284 /* Ditto for the hash salt */
2285 if (XML_SetHashSalt(g_parser, 0x23456789))
2286 fail("Failed to reject late hash salt change");
2287
2288 /* Now finish the parse */
2289 if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2290 == XML_STATUS_ERROR)
2291 xml_failure(g_parser);
2292 }
2293 END_TEST
2294
2295 /* Test XML_UseForeignDTD with no external subset present */
START_TEST(test_foreign_dtd_without_external_subset)2296 START_TEST(test_foreign_dtd_without_external_subset) {
2297 const char *text = "<!DOCTYPE doc [<!ENTITY foo 'bar'>]>\n"
2298 "<doc>&foo;</doc>";
2299
2300 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2301 XML_SetUserData(g_parser, NULL);
2302 XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
2303 XML_UseForeignDTD(g_parser, XML_TRUE);
2304 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2305 == XML_STATUS_ERROR)
2306 xml_failure(g_parser);
2307 }
2308 END_TEST
2309
START_TEST(test_empty_foreign_dtd)2310 START_TEST(test_empty_foreign_dtd) {
2311 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2312 "<doc>&entity;</doc>";
2313
2314 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2315 XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
2316 XML_UseForeignDTD(g_parser, XML_TRUE);
2317 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
2318 "Undefined entity not faulted");
2319 }
2320 END_TEST
2321
2322 /* Test XML Base is set and unset appropriately */
START_TEST(test_set_base)2323 START_TEST(test_set_base) {
2324 const XML_Char *old_base;
2325 const XML_Char *new_base = XCS("/local/file/name.xml");
2326
2327 old_base = XML_GetBase(g_parser);
2328 if (XML_SetBase(g_parser, new_base) != XML_STATUS_OK)
2329 fail("Unable to set base");
2330 if (xcstrcmp(XML_GetBase(g_parser), new_base) != 0)
2331 fail("Base setting not correct");
2332 if (XML_SetBase(g_parser, NULL) != XML_STATUS_OK)
2333 fail("Unable to NULL base");
2334 if (XML_GetBase(g_parser) != NULL)
2335 fail("Base setting not nulled");
2336 XML_SetBase(g_parser, old_base);
2337 }
2338 END_TEST
2339
2340 /* Test attribute counts, indexing, etc */
START_TEST(test_attributes)2341 START_TEST(test_attributes) {
2342 const char *text = "<!DOCTYPE doc [\n"
2343 "<!ELEMENT doc (tag)>\n"
2344 "<!ATTLIST doc id ID #REQUIRED>\n"
2345 "]>"
2346 "<doc a='1' id='one' b='2'>"
2347 "<tag c='3'/>"
2348 "</doc>";
2349 AttrInfo doc_info[] = {{XCS("a"), XCS("1")},
2350 {XCS("b"), XCS("2")},
2351 {XCS("id"), XCS("one")},
2352 {NULL, NULL}};
2353 AttrInfo tag_info[] = {{XCS("c"), XCS("3")}, {NULL, NULL}};
2354 ElementInfo info[] = {{XCS("doc"), 3, XCS("id"), NULL},
2355 {XCS("tag"), 1, NULL, NULL},
2356 {NULL, 0, NULL, NULL}};
2357 info[0].attributes = doc_info;
2358 info[1].attributes = tag_info;
2359
2360 XML_Parser parser = XML_ParserCreate(NULL);
2361 assert_true(parser != NULL);
2362 ParserAndElementInfo parserAndElementInfos = {
2363 parser,
2364 info,
2365 };
2366
2367 XML_SetStartElementHandler(parser, counting_start_element_handler);
2368 XML_SetUserData(parser, &parserAndElementInfos);
2369 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
2370 == XML_STATUS_ERROR)
2371 xml_failure(parser);
2372
2373 XML_ParserFree(parser);
2374 }
2375 END_TEST
2376
2377 /* Test reset works correctly in the middle of processing an internal
2378 * entity. Exercises some obscure code in XML_ParserReset().
2379 */
START_TEST(test_reset_in_entity)2380 START_TEST(test_reset_in_entity) {
2381 const char *text = "<!DOCTYPE doc [\n"
2382 "<!ENTITY wombat 'wom'>\n"
2383 "<!ENTITY entity 'hi &wom; there'>\n"
2384 "]>\n"
2385 "<doc>&entity;</doc>";
2386 XML_ParsingStatus status;
2387
2388 g_resumable = XML_TRUE;
2389 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2390 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2391 == XML_STATUS_ERROR)
2392 xml_failure(g_parser);
2393 XML_GetParsingStatus(g_parser, &status);
2394 if (status.parsing != XML_SUSPENDED)
2395 fail("Parsing status not SUSPENDED");
2396 XML_ParserReset(g_parser, NULL);
2397 XML_GetParsingStatus(g_parser, &status);
2398 if (status.parsing != XML_INITIALIZED)
2399 fail("Parsing status doesn't reset to INITIALIZED");
2400 }
2401 END_TEST
2402
2403 /* Test that resume correctly passes through parse errors */
START_TEST(test_resume_invalid_parse)2404 START_TEST(test_resume_invalid_parse) {
2405 const char *text = "<doc>Hello</doc"; /* Missing closing wedge */
2406
2407 g_resumable = XML_TRUE;
2408 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2409 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2410 == XML_STATUS_ERROR)
2411 xml_failure(g_parser);
2412 if (XML_ResumeParser(g_parser) == XML_STATUS_OK)
2413 fail("Resumed invalid parse not faulted");
2414 if (XML_GetErrorCode(g_parser) != XML_ERROR_UNCLOSED_TOKEN)
2415 fail("Invalid parse not correctly faulted");
2416 }
2417 END_TEST
2418
2419 /* Test that re-suspended parses are correctly passed through */
START_TEST(test_resume_resuspended)2420 START_TEST(test_resume_resuspended) {
2421 const char *text = "<doc>Hello<meep/>world</doc>";
2422
2423 g_resumable = XML_TRUE;
2424 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2425 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2426 == XML_STATUS_ERROR)
2427 xml_failure(g_parser);
2428 g_resumable = XML_TRUE;
2429 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2430 if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
2431 fail("Resumption not suspended");
2432 /* This one should succeed and finish up */
2433 if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
2434 xml_failure(g_parser);
2435 }
2436 END_TEST
2437
2438 /* Test that CDATA shows up correctly through a default handler */
START_TEST(test_cdata_default)2439 START_TEST(test_cdata_default) {
2440 const char *text = "<doc><![CDATA[Hello\nworld]]></doc>";
2441 const XML_Char *expected = XCS("<doc><![CDATA[Hello\nworld]]></doc>");
2442 CharData storage;
2443
2444 CharData_Init(&storage);
2445 XML_SetUserData(g_parser, &storage);
2446 XML_SetDefaultHandler(g_parser, accumulate_characters);
2447
2448 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2449 == XML_STATUS_ERROR)
2450 xml_failure(g_parser);
2451 CharData_CheckXMLChars(&storage, expected);
2452 }
2453 END_TEST
2454
2455 /* Test resetting a subordinate parser does exactly nothing */
START_TEST(test_subordinate_reset)2456 START_TEST(test_subordinate_reset) {
2457 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2458 "<!DOCTYPE doc SYSTEM 'foo'>\n"
2459 "<doc>&entity;</doc>";
2460
2461 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2462 XML_SetExternalEntityRefHandler(g_parser, external_entity_resetter);
2463 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2464 == XML_STATUS_ERROR)
2465 xml_failure(g_parser);
2466 }
2467 END_TEST
2468
2469 /* Test suspending a subordinate parser */
START_TEST(test_subordinate_suspend)2470 START_TEST(test_subordinate_suspend) {
2471 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2472 "<!DOCTYPE doc SYSTEM 'foo'>\n"
2473 "<doc>&entity;</doc>";
2474
2475 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2476 XML_SetExternalEntityRefHandler(g_parser, external_entity_suspender);
2477 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2478 == XML_STATUS_ERROR)
2479 xml_failure(g_parser);
2480 }
2481 END_TEST
2482
2483 /* Test suspending a subordinate parser from an XML declaration */
2484 /* Increases code coverage of the tests */
2485
START_TEST(test_subordinate_xdecl_suspend)2486 START_TEST(test_subordinate_xdecl_suspend) {
2487 const char *text
2488 = "<!DOCTYPE doc [\n"
2489 " <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
2490 "]>\n"
2491 "<doc>&entity;</doc>";
2492
2493 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2494 XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
2495 g_resumable = XML_TRUE;
2496 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2497 == XML_STATUS_ERROR)
2498 xml_failure(g_parser);
2499 }
2500 END_TEST
2501
START_TEST(test_subordinate_xdecl_abort)2502 START_TEST(test_subordinate_xdecl_abort) {
2503 const char *text
2504 = "<!DOCTYPE doc [\n"
2505 " <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
2506 "]>\n"
2507 "<doc>&entity;</doc>";
2508
2509 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2510 XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
2511 g_resumable = XML_FALSE;
2512 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2513 == XML_STATUS_ERROR)
2514 xml_failure(g_parser);
2515 }
2516 END_TEST
2517
2518 /* Test external entity fault handling with suspension */
START_TEST(test_ext_entity_invalid_suspended_parse)2519 START_TEST(test_ext_entity_invalid_suspended_parse) {
2520 const char *text = "<!DOCTYPE doc [\n"
2521 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2522 "]>\n"
2523 "<doc>&en;</doc>";
2524 ExtFaults faults[]
2525 = {{"<?xml version='1.0' encoding='us-ascii'?><",
2526 "Incomplete element declaration not faulted", NULL,
2527 XML_ERROR_UNCLOSED_TOKEN},
2528 {/* First two bytes of a three-byte char */
2529 "<?xml version='1.0' encoding='utf-8'?>\xe2\x82",
2530 "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
2531 {NULL, NULL, NULL, XML_ERROR_NONE}};
2532 ExtFaults *fault;
2533
2534 for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
2535 set_subtest("%s", fault->parse_text);
2536 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2537 XML_SetExternalEntityRefHandler(g_parser,
2538 external_entity_suspending_faulter);
2539 XML_SetUserData(g_parser, fault);
2540 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
2541 "Parser did not report external entity error");
2542 XML_ParserReset(g_parser, NULL);
2543 }
2544 }
2545 END_TEST
2546
2547 /* Test setting an explicit encoding */
START_TEST(test_explicit_encoding)2548 START_TEST(test_explicit_encoding) {
2549 const char *text1 = "<doc>Hello ";
2550 const char *text2 = " World</doc>";
2551
2552 /* Just check that we can set the encoding to NULL before starting */
2553 if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
2554 fail("Failed to initialise encoding to NULL");
2555 /* Say we are UTF-8 */
2556 if (XML_SetEncoding(g_parser, XCS("utf-8")) != XML_STATUS_OK)
2557 fail("Failed to set explicit encoding");
2558 if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2559 == XML_STATUS_ERROR)
2560 xml_failure(g_parser);
2561 /* Try to switch encodings mid-parse */
2562 if (XML_SetEncoding(g_parser, XCS("us-ascii")) != XML_STATUS_ERROR)
2563 fail("Allowed encoding change");
2564 if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2565 == XML_STATUS_ERROR)
2566 xml_failure(g_parser);
2567 /* Try now the parse is over */
2568 if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
2569 fail("Failed to unset encoding");
2570 }
2571 END_TEST
2572
2573 /* Test handling of trailing CR (rather than newline) */
START_TEST(test_trailing_cr)2574 START_TEST(test_trailing_cr) {
2575 const char *text = "<doc>\r";
2576 int found_cr;
2577
2578 /* Try with a character handler, for code coverage */
2579 XML_SetCharacterDataHandler(g_parser, cr_cdata_handler);
2580 XML_SetUserData(g_parser, &found_cr);
2581 found_cr = 0;
2582 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2583 == XML_STATUS_OK)
2584 fail("Failed to fault unclosed doc");
2585 if (found_cr == 0)
2586 fail("Did not catch the carriage return");
2587 XML_ParserReset(g_parser, NULL);
2588
2589 /* Now with a default handler instead */
2590 XML_SetDefaultHandler(g_parser, cr_cdata_handler);
2591 XML_SetUserData(g_parser, &found_cr);
2592 found_cr = 0;
2593 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2594 == XML_STATUS_OK)
2595 fail("Failed to fault unclosed doc");
2596 if (found_cr == 0)
2597 fail("Did not catch default carriage return");
2598 }
2599 END_TEST
2600
2601 /* Test trailing CR in an external entity parse */
START_TEST(test_ext_entity_trailing_cr)2602 START_TEST(test_ext_entity_trailing_cr) {
2603 const char *text = "<!DOCTYPE doc [\n"
2604 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2605 "]>\n"
2606 "<doc>&en;</doc>";
2607 int found_cr;
2608
2609 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2610 XML_SetExternalEntityRefHandler(g_parser, external_entity_cr_catcher);
2611 XML_SetUserData(g_parser, &found_cr);
2612 found_cr = 0;
2613 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2614 != XML_STATUS_OK)
2615 xml_failure(g_parser);
2616 if (found_cr == 0)
2617 fail("No carriage return found");
2618 XML_ParserReset(g_parser, NULL);
2619
2620 /* Try again with a different trailing CR */
2621 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2622 XML_SetExternalEntityRefHandler(g_parser, external_entity_bad_cr_catcher);
2623 XML_SetUserData(g_parser, &found_cr);
2624 found_cr = 0;
2625 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2626 != XML_STATUS_OK)
2627 xml_failure(g_parser);
2628 if (found_cr == 0)
2629 fail("No carriage return found");
2630 }
2631 END_TEST
2632
2633 /* Test handling of trailing square bracket */
START_TEST(test_trailing_rsqb)2634 START_TEST(test_trailing_rsqb) {
2635 const char *text8 = "<doc>]";
2636 const char text16[] = "\xFF\xFE<\000d\000o\000c\000>\000]\000";
2637 int found_rsqb;
2638 int text8_len = (int)strlen(text8);
2639
2640 XML_SetCharacterDataHandler(g_parser, rsqb_handler);
2641 XML_SetUserData(g_parser, &found_rsqb);
2642 found_rsqb = 0;
2643 if (_XML_Parse_SINGLE_BYTES(g_parser, text8, text8_len, XML_TRUE)
2644 == XML_STATUS_OK)
2645 fail("Failed to fault unclosed doc");
2646 if (found_rsqb == 0)
2647 fail("Did not catch the right square bracket");
2648
2649 /* Try again with a different encoding */
2650 XML_ParserReset(g_parser, NULL);
2651 XML_SetCharacterDataHandler(g_parser, rsqb_handler);
2652 XML_SetUserData(g_parser, &found_rsqb);
2653 found_rsqb = 0;
2654 if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
2655 XML_TRUE)
2656 == XML_STATUS_OK)
2657 fail("Failed to fault unclosed doc");
2658 if (found_rsqb == 0)
2659 fail("Did not catch the right square bracket");
2660
2661 /* And finally with a default handler */
2662 XML_ParserReset(g_parser, NULL);
2663 XML_SetDefaultHandler(g_parser, rsqb_handler);
2664 XML_SetUserData(g_parser, &found_rsqb);
2665 found_rsqb = 0;
2666 if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
2667 XML_TRUE)
2668 == XML_STATUS_OK)
2669 fail("Failed to fault unclosed doc");
2670 if (found_rsqb == 0)
2671 fail("Did not catch the right square bracket");
2672 }
2673 END_TEST
2674
2675 /* Test trailing right square bracket in an external entity parse */
START_TEST(test_ext_entity_trailing_rsqb)2676 START_TEST(test_ext_entity_trailing_rsqb) {
2677 const char *text = "<!DOCTYPE doc [\n"
2678 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2679 "]>\n"
2680 "<doc>&en;</doc>";
2681 int found_rsqb;
2682
2683 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2684 XML_SetExternalEntityRefHandler(g_parser, external_entity_rsqb_catcher);
2685 XML_SetUserData(g_parser, &found_rsqb);
2686 found_rsqb = 0;
2687 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2688 != XML_STATUS_OK)
2689 xml_failure(g_parser);
2690 if (found_rsqb == 0)
2691 fail("No right square bracket found");
2692 }
2693 END_TEST
2694
2695 /* Test CDATA handling in an external entity */
START_TEST(test_ext_entity_good_cdata)2696 START_TEST(test_ext_entity_good_cdata) {
2697 const char *text = "<!DOCTYPE doc [\n"
2698 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2699 "]>\n"
2700 "<doc>&en;</doc>";
2701
2702 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2703 XML_SetExternalEntityRefHandler(g_parser, external_entity_good_cdata_ascii);
2704 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2705 != XML_STATUS_OK)
2706 xml_failure(g_parser);
2707 }
2708 END_TEST
2709
2710 /* Test user parameter settings */
START_TEST(test_user_parameters)2711 START_TEST(test_user_parameters) {
2712 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2713 "<!-- Primary parse -->\n"
2714 "<!DOCTYPE doc SYSTEM 'foo'>\n"
2715 "<doc>&entity;";
2716 const char *epilog = "<!-- Back to primary parser -->\n"
2717 "</doc>";
2718
2719 g_comment_count = 0;
2720 g_skip_count = 0;
2721 g_xdecl_count = 0;
2722 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2723 XML_SetXmlDeclHandler(g_parser, xml_decl_handler);
2724 XML_SetExternalEntityRefHandler(g_parser, external_entity_param_checker);
2725 XML_SetCommentHandler(g_parser, data_check_comment_handler);
2726 XML_SetSkippedEntityHandler(g_parser, param_check_skip_handler);
2727 XML_UseParserAsHandlerArg(g_parser);
2728 XML_SetUserData(g_parser, (void *)1);
2729 g_handler_data = g_parser;
2730 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
2731 == XML_STATUS_ERROR)
2732 xml_failure(g_parser);
2733 /* Ensure we can't change policy mid-parse */
2734 if (XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_NEVER))
2735 fail("Changed param entity parsing policy while parsing");
2736 if (_XML_Parse_SINGLE_BYTES(g_parser, epilog, (int)strlen(epilog), XML_TRUE)
2737 == XML_STATUS_ERROR)
2738 xml_failure(g_parser);
2739 if (g_comment_count != 3)
2740 fail("Comment handler not invoked enough times");
2741 if (g_skip_count != 1)
2742 fail("Skip handler not invoked enough times");
2743 if (g_xdecl_count != 1)
2744 fail("XML declaration handler not invoked");
2745 }
2746 END_TEST
2747
2748 /* Test that an explicit external entity handler argument replaces
2749 * the parser as the first argument.
2750 *
2751 * We do not call the first parameter to the external entity handler
2752 * 'parser' for once, since the first time the handler is called it
2753 * will actually be a text string. We need to be able to access the
2754 * global 'parser' variable to create our external entity parser from,
2755 * since there are code paths we need to ensure get executed.
2756 */
START_TEST(test_ext_entity_ref_parameter)2757 START_TEST(test_ext_entity_ref_parameter) {
2758 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2759 "<!DOCTYPE doc SYSTEM 'foo'>\n"
2760 "<doc>&entity;</doc>";
2761
2762 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2763 XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
2764 /* Set a handler arg that is not NULL and not parser (which is
2765 * what NULL would cause to be passed.
2766 */
2767 XML_SetExternalEntityRefHandlerArg(g_parser, (void *)text);
2768 g_handler_data = text;
2769 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2770 == XML_STATUS_ERROR)
2771 xml_failure(g_parser);
2772
2773 /* Now try again with unset args */
2774 XML_ParserReset(g_parser, NULL);
2775 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2776 XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
2777 XML_SetExternalEntityRefHandlerArg(g_parser, NULL);
2778 g_handler_data = g_parser;
2779 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2780 == XML_STATUS_ERROR)
2781 xml_failure(g_parser);
2782 }
2783 END_TEST
2784
2785 /* Test the parsing of an empty string */
START_TEST(test_empty_parse)2786 START_TEST(test_empty_parse) {
2787 const char *text = "<doc></doc>";
2788 const char *partial = "<doc>";
2789
2790 if (XML_Parse(g_parser, NULL, 0, XML_FALSE) == XML_STATUS_ERROR)
2791 fail("Parsing empty string faulted");
2792 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
2793 fail("Parsing final empty string not faulted");
2794 if (XML_GetErrorCode(g_parser) != XML_ERROR_NO_ELEMENTS)
2795 fail("Parsing final empty string faulted for wrong reason");
2796
2797 /* Now try with valid text before the empty end */
2798 XML_ParserReset(g_parser, NULL);
2799 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
2800 == XML_STATUS_ERROR)
2801 xml_failure(g_parser);
2802 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) == XML_STATUS_ERROR)
2803 fail("Parsing final empty string faulted");
2804
2805 /* Now try with invalid text before the empty end */
2806 XML_ParserReset(g_parser, NULL);
2807 if (_XML_Parse_SINGLE_BYTES(g_parser, partial, (int)strlen(partial),
2808 XML_FALSE)
2809 == XML_STATUS_ERROR)
2810 xml_failure(g_parser);
2811 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
2812 fail("Parsing final incomplete empty string not faulted");
2813 }
2814 END_TEST
2815
2816 /* Test XML_Parse for len < 0 */
START_TEST(test_negative_len_parse)2817 START_TEST(test_negative_len_parse) {
2818 const char *const doc = "<root/>";
2819 for (int isFinal = 0; isFinal < 2; isFinal++) {
2820 set_subtest("isFinal=%d", isFinal);
2821
2822 XML_Parser parser = XML_ParserCreate(NULL);
2823
2824 if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
2825 fail("There was not supposed to be any initial parse error.");
2826
2827 const enum XML_Status status = XML_Parse(parser, doc, -1, isFinal);
2828
2829 if (status != XML_STATUS_ERROR)
2830 fail("Negative len was expected to fail the parse but did not.");
2831
2832 if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT)
2833 fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT.");
2834
2835 XML_ParserFree(parser);
2836 }
2837 }
2838 END_TEST
2839
2840 /* Test XML_ParseBuffer for len < 0 */
START_TEST(test_negative_len_parse_buffer)2841 START_TEST(test_negative_len_parse_buffer) {
2842 const char *const doc = "<root/>";
2843 for (int isFinal = 0; isFinal < 2; isFinal++) {
2844 set_subtest("isFinal=%d", isFinal);
2845
2846 XML_Parser parser = XML_ParserCreate(NULL);
2847
2848 if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
2849 fail("There was not supposed to be any initial parse error.");
2850
2851 void *const buffer = XML_GetBuffer(parser, (int)strlen(doc));
2852
2853 if (buffer == NULL)
2854 fail("XML_GetBuffer failed.");
2855
2856 memcpy(buffer, doc, strlen(doc));
2857
2858 const enum XML_Status status = XML_ParseBuffer(parser, -1, isFinal);
2859
2860 if (status != XML_STATUS_ERROR)
2861 fail("Negative len was expected to fail the parse but did not.");
2862
2863 if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT)
2864 fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT.");
2865
2866 XML_ParserFree(parser);
2867 }
2868 }
2869 END_TEST
2870
2871 /* Test odd corners of the XML_GetBuffer interface */
2872 static enum XML_Status
get_feature(enum XML_FeatureEnum feature_id,long * presult)2873 get_feature(enum XML_FeatureEnum feature_id, long *presult) {
2874 const XML_Feature *feature = XML_GetFeatureList();
2875
2876 if (feature == NULL)
2877 return XML_STATUS_ERROR;
2878 for (; feature->feature != XML_FEATURE_END; feature++) {
2879 if (feature->feature == feature_id) {
2880 *presult = feature->value;
2881 return XML_STATUS_OK;
2882 }
2883 }
2884 return XML_STATUS_ERROR;
2885 }
2886
2887 /* Test odd corners of the XML_GetBuffer interface */
START_TEST(test_get_buffer_1)2888 START_TEST(test_get_buffer_1) {
2889 const char *text = get_buffer_test_text;
2890 void *buffer;
2891 long context_bytes;
2892
2893 /* Attempt to allocate a negative length buffer */
2894 if (XML_GetBuffer(g_parser, -12) != NULL)
2895 fail("Negative length buffer not failed");
2896
2897 /* Now get a small buffer and extend it past valid length */
2898 buffer = XML_GetBuffer(g_parser, 1536);
2899 if (buffer == NULL)
2900 fail("1.5K buffer failed");
2901 assert(buffer != NULL);
2902 memcpy(buffer, text, strlen(text));
2903 if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
2904 == XML_STATUS_ERROR)
2905 xml_failure(g_parser);
2906 if (XML_GetBuffer(g_parser, INT_MAX) != NULL)
2907 fail("INT_MAX buffer not failed");
2908
2909 /* Now try extending it a more reasonable but still too large
2910 * amount. The allocator in XML_GetBuffer() doubles the buffer
2911 * size until it exceeds the requested amount or INT_MAX. If it
2912 * exceeds INT_MAX, it rejects the request, so we want a request
2913 * between INT_MAX and INT_MAX/2. A gap of 1K seems comfortable,
2914 * with an extra byte just to ensure that the request is off any
2915 * boundary. The request will be inflated internally by
2916 * XML_CONTEXT_BYTES (if >=1), so we subtract that from our
2917 * request.
2918 */
2919 if (get_feature(XML_FEATURE_CONTEXT_BYTES, &context_bytes) != XML_STATUS_OK)
2920 context_bytes = 0;
2921 if (XML_GetBuffer(g_parser, INT_MAX - (context_bytes + 1025)) != NULL)
2922 fail("INT_MAX- buffer not failed");
2923
2924 /* Now try extending it a carefully crafted amount */
2925 if (XML_GetBuffer(g_parser, 1000) == NULL)
2926 fail("1000 buffer failed");
2927 }
2928 END_TEST
2929
2930 /* Test more corners of the XML_GetBuffer interface */
START_TEST(test_get_buffer_2)2931 START_TEST(test_get_buffer_2) {
2932 const char *text = get_buffer_test_text;
2933 void *buffer;
2934
2935 /* Now get a decent buffer */
2936 buffer = XML_GetBuffer(g_parser, 1536);
2937 if (buffer == NULL)
2938 fail("1.5K buffer failed");
2939 assert(buffer != NULL);
2940 memcpy(buffer, text, strlen(text));
2941 if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
2942 == XML_STATUS_ERROR)
2943 xml_failure(g_parser);
2944
2945 /* Extend it, to catch a different code path */
2946 if (XML_GetBuffer(g_parser, 1024) == NULL)
2947 fail("1024 buffer failed");
2948 }
2949 END_TEST
2950
2951 /* Test for signed integer overflow CVE-2022-23852 */
2952 #if XML_CONTEXT_BYTES > 0
START_TEST(test_get_buffer_3_overflow)2953 START_TEST(test_get_buffer_3_overflow) {
2954 XML_Parser parser = XML_ParserCreate(NULL);
2955 assert(parser != NULL);
2956
2957 const char *const text = "\n";
2958 const int expectedKeepValue = (int)strlen(text);
2959
2960 // After this call, variable "keep" in XML_GetBuffer will
2961 // have value expectedKeepValue
2962 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text),
2963 XML_FALSE /* isFinal */)
2964 == XML_STATUS_ERROR)
2965 xml_failure(parser);
2966
2967 assert(expectedKeepValue > 0);
2968 if (XML_GetBuffer(parser, INT_MAX - expectedKeepValue + 1) != NULL)
2969 fail("enlarging buffer not failed");
2970
2971 XML_ParserFree(parser);
2972 }
2973 END_TEST
2974 #endif // XML_CONTEXT_BYTES > 0
2975
START_TEST(test_buffer_can_grow_to_max)2976 START_TEST(test_buffer_can_grow_to_max) {
2977 const char *const prefixes[] = {
2978 "",
2979 "<",
2980 "<x a='",
2981 "<doc><x a='",
2982 "<document><x a='",
2983 "<averylongelementnamesuchthatitwillhopefullystretchacrossmultiplelinesand"
2984 "lookprettyridiculousitsalsoveryhardtoreadandifyouredoingitihavetowonderif"
2985 "youreallydonthaveanythingbettertodoofcourseiguessicouldveputsomethingbadin"
2986 "herebutipromisethatididntheybtwhowgreatarespacesandpunctuationforhelping"
2987 "withreadabilityprettygreatithinkanywaysthisisprobablylongenoughbye><x a='"};
2988 const int num_prefixes = sizeof(prefixes) / sizeof(prefixes[0]);
2989 int maxbuf = INT_MAX / 2 + (INT_MAX & 1); // round up without overflow
2990 #if defined(__MINGW32__) && ! defined(__MINGW64__)
2991 // workaround for mingw/wine32 on GitHub CI not being able to reach 1GiB
2992 // Can we make a big allocation?
2993 void *big = malloc(maxbuf);
2994 if (! big) {
2995 // The big allocation failed. Let's be a little lenient.
2996 maxbuf = maxbuf / 2;
2997 }
2998 free(big);
2999 #endif
3000
3001 for (int i = 0; i < num_prefixes; ++i) {
3002 set_subtest("\"%s\"", prefixes[i]);
3003 XML_Parser parser = XML_ParserCreate(NULL);
3004 const int prefix_len = (int)strlen(prefixes[i]);
3005 const enum XML_Status s
3006 = _XML_Parse_SINGLE_BYTES(parser, prefixes[i], prefix_len, XML_FALSE);
3007 if (s != XML_STATUS_OK)
3008 xml_failure(parser);
3009
3010 // XML_CONTEXT_BYTES of the prefix may remain in the buffer;
3011 // subtracting the whole prefix is easiest, and close enough.
3012 assert_true(XML_GetBuffer(parser, maxbuf - prefix_len) != NULL);
3013 // The limit should be consistent; no prefix should allow us to
3014 // reach above the max buffer size.
3015 assert_true(XML_GetBuffer(parser, maxbuf + 1) == NULL);
3016 XML_ParserFree(parser);
3017 }
3018 }
3019 END_TEST
3020
START_TEST(test_getbuffer_allocates_on_zero_len)3021 START_TEST(test_getbuffer_allocates_on_zero_len) {
3022 for (int first_len = 1; first_len >= 0; first_len--) {
3023 set_subtest("with len=%d first", first_len);
3024 XML_Parser parser = XML_ParserCreate(NULL);
3025 assert_true(parser != NULL);
3026 assert_true(XML_GetBuffer(parser, first_len) != NULL);
3027 assert_true(XML_GetBuffer(parser, 0) != NULL);
3028 if (XML_ParseBuffer(parser, 0, XML_FALSE) != XML_STATUS_OK)
3029 xml_failure(parser);
3030 XML_ParserFree(parser);
3031 }
3032 }
3033 END_TEST
3034
3035 /* Test position information macros */
START_TEST(test_byte_info_at_end)3036 START_TEST(test_byte_info_at_end) {
3037 const char *text = "<doc></doc>";
3038
3039 if (XML_GetCurrentByteIndex(g_parser) != -1
3040 || XML_GetCurrentByteCount(g_parser) != 0)
3041 fail("Byte index/count incorrect at start of parse");
3042 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3043 == XML_STATUS_ERROR)
3044 xml_failure(g_parser);
3045 /* At end, the count will be zero and the index the end of string */
3046 if (XML_GetCurrentByteCount(g_parser) != 0)
3047 fail("Terminal byte count incorrect");
3048 if (XML_GetCurrentByteIndex(g_parser) != (XML_Index)strlen(text))
3049 fail("Terminal byte index incorrect");
3050 }
3051 END_TEST
3052
3053 /* Test position information from errors */
3054 #define PRE_ERROR_STR "<doc></"
3055 #define POST_ERROR_STR "wombat></doc>"
START_TEST(test_byte_info_at_error)3056 START_TEST(test_byte_info_at_error) {
3057 const char *text = PRE_ERROR_STR POST_ERROR_STR;
3058
3059 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3060 == XML_STATUS_OK)
3061 fail("Syntax error not faulted");
3062 if (XML_GetCurrentByteCount(g_parser) != 0)
3063 fail("Error byte count incorrect");
3064 if (XML_GetCurrentByteIndex(g_parser) != strlen(PRE_ERROR_STR))
3065 fail("Error byte index incorrect");
3066 }
3067 END_TEST
3068 #undef PRE_ERROR_STR
3069 #undef POST_ERROR_STR
3070
3071 /* Test position information in handler */
3072 #define START_ELEMENT "<e>"
3073 #define CDATA_TEXT "Hello"
3074 #define END_ELEMENT "</e>"
START_TEST(test_byte_info_at_cdata)3075 START_TEST(test_byte_info_at_cdata) {
3076 const char *text = START_ELEMENT CDATA_TEXT END_ELEMENT;
3077 int offset, size;
3078 ByteTestData data;
3079
3080 /* Check initial context is empty */
3081 if (XML_GetInputContext(g_parser, &offset, &size) != NULL)
3082 fail("Unexpected context at start of parse");
3083
3084 data.start_element_len = (int)strlen(START_ELEMENT);
3085 data.cdata_len = (int)strlen(CDATA_TEXT);
3086 data.total_string_len = (int)strlen(text);
3087 XML_SetCharacterDataHandler(g_parser, byte_character_handler);
3088 XML_SetUserData(g_parser, &data);
3089 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
3090 xml_failure(g_parser);
3091 }
3092 END_TEST
3093 #undef START_ELEMENT
3094 #undef CDATA_TEXT
3095 #undef END_ELEMENT
3096
3097 /* Test predefined entities are correctly recognised */
START_TEST(test_predefined_entities)3098 START_TEST(test_predefined_entities) {
3099 const char *text = "<doc><>&"'</doc>";
3100 const XML_Char *expected = XCS("<doc><>&"'</doc>");
3101 const XML_Char *result = XCS("<>&\"'");
3102 CharData storage;
3103
3104 XML_SetDefaultHandler(g_parser, accumulate_characters);
3105 /* run_character_check uses XML_SetCharacterDataHandler(), which
3106 * unfortunately heads off a code path that we need to exercise.
3107 */
3108 CharData_Init(&storage);
3109 XML_SetUserData(g_parser, &storage);
3110 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3111 == XML_STATUS_ERROR)
3112 xml_failure(g_parser);
3113 /* The default handler doesn't translate the entities */
3114 CharData_CheckXMLChars(&storage, expected);
3115
3116 /* Now try again and check the translation */
3117 XML_ParserReset(g_parser, NULL);
3118 run_character_check(text, result);
3119 }
3120 END_TEST
3121
3122 /* Regression test that an invalid tag in an external parameter
3123 * reference in an external DTD is correctly faulted.
3124 *
3125 * Only a few specific tags are legal in DTDs ignoring comments and
3126 * processing instructions, all of which begin with an exclamation
3127 * mark. "<el/>" is not one of them, so the parser should raise an
3128 * error on encountering it.
3129 */
START_TEST(test_invalid_tag_in_dtd)3130 START_TEST(test_invalid_tag_in_dtd) {
3131 const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3132 "<doc></doc>\n";
3133
3134 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3135 XML_SetExternalEntityRefHandler(g_parser, external_entity_param);
3136 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3137 "Invalid tag IN DTD external param not rejected");
3138 }
3139 END_TEST
3140
3141 /* Test entities not quite the predefined ones are not mis-recognised */
START_TEST(test_not_predefined_entities)3142 START_TEST(test_not_predefined_entities) {
3143 const char *text[] = {"<doc>&pt;</doc>", "<doc>&amo;</doc>",
3144 "<doc>&quid;</doc>", "<doc>&apod;</doc>", NULL};
3145 int i = 0;
3146
3147 while (text[i] != NULL) {
3148 expect_failure(text[i], XML_ERROR_UNDEFINED_ENTITY,
3149 "Undefined entity not rejected");
3150 XML_ParserReset(g_parser, NULL);
3151 i++;
3152 }
3153 }
3154 END_TEST
3155
3156 /* Test conditional inclusion (IGNORE) */
START_TEST(test_ignore_section)3157 START_TEST(test_ignore_section) {
3158 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3159 "<doc><e>&entity;</e></doc>";
3160 const XML_Char *expected
3161 = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&entity;");
3162 CharData storage;
3163
3164 CharData_Init(&storage);
3165 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3166 XML_SetUserData(g_parser, &storage);
3167 XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore);
3168 XML_SetDefaultHandler(g_parser, accumulate_characters);
3169 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3170 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3171 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3172 XML_SetStartElementHandler(g_parser, dummy_start_element);
3173 XML_SetEndElementHandler(g_parser, dummy_end_element);
3174 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3175 == XML_STATUS_ERROR)
3176 xml_failure(g_parser);
3177 CharData_CheckXMLChars(&storage, expected);
3178 }
3179 END_TEST
3180
START_TEST(test_ignore_section_utf16)3181 START_TEST(test_ignore_section_utf16) {
3182 const char text[] =
3183 /* <!DOCTYPE d SYSTEM 's'> */
3184 "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
3185 "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n\0"
3186 /* <d><e>&en;</e></d> */
3187 "<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>\0";
3188 const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
3189 CharData storage;
3190
3191 CharData_Init(&storage);
3192 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3193 XML_SetUserData(g_parser, &storage);
3194 XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore_utf16);
3195 XML_SetDefaultHandler(g_parser, accumulate_characters);
3196 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3197 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3198 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3199 XML_SetStartElementHandler(g_parser, dummy_start_element);
3200 XML_SetEndElementHandler(g_parser, dummy_end_element);
3201 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3202 == XML_STATUS_ERROR)
3203 xml_failure(g_parser);
3204 CharData_CheckXMLChars(&storage, expected);
3205 }
3206 END_TEST
3207
START_TEST(test_ignore_section_utf16_be)3208 START_TEST(test_ignore_section_utf16_be) {
3209 const char text[] =
3210 /* <!DOCTYPE d SYSTEM 's'> */
3211 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
3212 "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n"
3213 /* <d><e>&en;</e></d> */
3214 "\0<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>";
3215 const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
3216 CharData storage;
3217
3218 CharData_Init(&storage);
3219 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3220 XML_SetUserData(g_parser, &storage);
3221 XML_SetExternalEntityRefHandler(g_parser,
3222 external_entity_load_ignore_utf16_be);
3223 XML_SetDefaultHandler(g_parser, accumulate_characters);
3224 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3225 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3226 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3227 XML_SetStartElementHandler(g_parser, dummy_start_element);
3228 XML_SetEndElementHandler(g_parser, dummy_end_element);
3229 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3230 == XML_STATUS_ERROR)
3231 xml_failure(g_parser);
3232 CharData_CheckXMLChars(&storage, expected);
3233 }
3234 END_TEST
3235
3236 /* Test mis-formatted conditional exclusion */
START_TEST(test_bad_ignore_section)3237 START_TEST(test_bad_ignore_section) {
3238 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3239 "<doc><e>&entity;</e></doc>";
3240 ExtFaults faults[]
3241 = {{"<![IGNORE[<!ELEM", "Broken-off declaration not faulted", NULL,
3242 XML_ERROR_SYNTAX},
3243 {"<![IGNORE[\x01]]>", "Invalid XML character not faulted", NULL,
3244 XML_ERROR_INVALID_TOKEN},
3245 {/* FIrst two bytes of a three-byte char */
3246 "<![IGNORE[\xe2\x82", "Partial XML character not faulted", NULL,
3247 XML_ERROR_PARTIAL_CHAR},
3248 {NULL, NULL, NULL, XML_ERROR_NONE}};
3249 ExtFaults *fault;
3250
3251 for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
3252 set_subtest("%s", fault->parse_text);
3253 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3254 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
3255 XML_SetUserData(g_parser, fault);
3256 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3257 "Incomplete IGNORE section not failed");
3258 XML_ParserReset(g_parser, NULL);
3259 }
3260 }
3261 END_TEST
3262
3263 struct bom_testdata {
3264 const char *external;
3265 int split;
3266 XML_Bool nested_callback_happened;
3267 };
3268
3269 static int XMLCALL
external_bom_checker(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)3270 external_bom_checker(XML_Parser parser, const XML_Char *context,
3271 const XML_Char *base, const XML_Char *systemId,
3272 const XML_Char *publicId) {
3273 const char *text;
3274 UNUSED_P(base);
3275 UNUSED_P(systemId);
3276 UNUSED_P(publicId);
3277
3278 XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL);
3279 if (ext_parser == NULL)
3280 fail("Could not create external entity parser");
3281
3282 if (! xcstrcmp(systemId, XCS("004-2.ent"))) {
3283 struct bom_testdata *const testdata
3284 = (struct bom_testdata *)XML_GetUserData(parser);
3285 const char *const external = testdata->external;
3286 const int split = testdata->split;
3287 testdata->nested_callback_happened = XML_TRUE;
3288
3289 if (_XML_Parse_SINGLE_BYTES(ext_parser, external, split, XML_FALSE)
3290 != XML_STATUS_OK) {
3291 xml_failure(ext_parser);
3292 }
3293 text = external + split; // the parse below will continue where we left off.
3294 } else if (! xcstrcmp(systemId, XCS("004-1.ent"))) {
3295 text = "<!ELEMENT doc EMPTY>\n"
3296 "<!ENTITY % e1 SYSTEM '004-2.ent'>\n"
3297 "<!ENTITY % e2 '%e1;'>\n";
3298 } else {
3299 fail("unknown systemId");
3300 }
3301
3302 if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE)
3303 != XML_STATUS_OK)
3304 xml_failure(ext_parser);
3305
3306 XML_ParserFree(ext_parser);
3307 return XML_STATUS_OK;
3308 }
3309
3310 /* regression test: BOM should be consumed when followed by a partial token. */
START_TEST(test_external_bom_consumed)3311 START_TEST(test_external_bom_consumed) {
3312 const char *const text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3313 "<doc></doc>\n";
3314 const char *const external = "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>";
3315 const int len = (int)strlen(external);
3316 for (int split = 0; split <= len; ++split) {
3317 set_subtest("split at byte %d", split);
3318
3319 struct bom_testdata testdata;
3320 testdata.external = external;
3321 testdata.split = split;
3322 testdata.nested_callback_happened = XML_FALSE;
3323
3324 XML_Parser parser = XML_ParserCreate(NULL);
3325 if (parser == NULL) {
3326 fail("Couldn't create parser");
3327 }
3328 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3329 XML_SetExternalEntityRefHandler(parser, external_bom_checker);
3330 XML_SetUserData(parser, &testdata);
3331 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
3332 == XML_STATUS_ERROR)
3333 xml_failure(parser);
3334 if (! testdata.nested_callback_happened) {
3335 fail("ref handler not called");
3336 }
3337 XML_ParserFree(parser);
3338 }
3339 }
3340 END_TEST
3341
3342 /* Test recursive parsing */
START_TEST(test_external_entity_values)3343 START_TEST(test_external_entity_values) {
3344 const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3345 "<doc></doc>\n";
3346 ExtFaults data_004_2[] = {
3347 {"<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL, XML_ERROR_NONE},
3348 {"<!ATTLIST $doc a1 CDATA 'value'>", "Invalid token not faulted", NULL,
3349 XML_ERROR_INVALID_TOKEN},
3350 {"'wombat", "Unterminated string not faulted", NULL,
3351 XML_ERROR_UNCLOSED_TOKEN},
3352 {"\xe2\x82", "Partial UTF-8 character not faulted", NULL,
3353 XML_ERROR_PARTIAL_CHAR},
3354 {"<?xml version='1.0' encoding='utf-8'?>\n", NULL, NULL, XML_ERROR_NONE},
3355 {"<?xml?>", "Malformed XML declaration not faulted", NULL,
3356 XML_ERROR_XML_DECL},
3357 {/* UTF-8 BOM */
3358 "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL,
3359 XML_ERROR_NONE},
3360 {"<?xml version='1.0' encoding='utf-8'?>\n$",
3361 "Invalid token after text declaration not faulted", NULL,
3362 XML_ERROR_INVALID_TOKEN},
3363 {"<?xml version='1.0' encoding='utf-8'?>\n'wombat",
3364 "Unterminated string after text decl not faulted", NULL,
3365 XML_ERROR_UNCLOSED_TOKEN},
3366 {"<?xml version='1.0' encoding='utf-8'?>\n\xe2\x82",
3367 "Partial UTF-8 character after text decl not faulted", NULL,
3368 XML_ERROR_PARTIAL_CHAR},
3369 {"%e1;", "Recursive parameter entity not faulted", NULL,
3370 XML_ERROR_RECURSIVE_ENTITY_REF},
3371 {NULL, NULL, NULL, XML_ERROR_NONE}};
3372 int i;
3373
3374 for (i = 0; data_004_2[i].parse_text != NULL; i++) {
3375 set_subtest("%s", data_004_2[i].parse_text);
3376 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3377 XML_SetExternalEntityRefHandler(g_parser, external_entity_valuer);
3378 XML_SetUserData(g_parser, &data_004_2[i]);
3379 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3380 == XML_STATUS_ERROR)
3381 xml_failure(g_parser);
3382 XML_ParserReset(g_parser, NULL);
3383 }
3384 }
3385 END_TEST
3386
3387 /* Test the recursive parse interacts with a not standalone handler */
START_TEST(test_ext_entity_not_standalone)3388 START_TEST(test_ext_entity_not_standalone) {
3389 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3390 "<doc></doc>";
3391
3392 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3393 XML_SetExternalEntityRefHandler(g_parser, external_entity_not_standalone);
3394 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3395 "Standalone rejection not caught");
3396 }
3397 END_TEST
3398
START_TEST(test_ext_entity_value_abort)3399 START_TEST(test_ext_entity_value_abort) {
3400 const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3401 "<doc></doc>\n";
3402
3403 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3404 XML_SetExternalEntityRefHandler(g_parser, external_entity_value_aborter);
3405 g_resumable = XML_FALSE;
3406 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3407 == XML_STATUS_ERROR)
3408 xml_failure(g_parser);
3409 }
3410 END_TEST
3411
START_TEST(test_bad_public_doctype)3412 START_TEST(test_bad_public_doctype) {
3413 const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
3414 "<!DOCTYPE doc PUBLIC '{BadName}' 'test'>\n"
3415 "<doc></doc>";
3416
3417 /* Setting a handler provokes a particular code path */
3418 XML_SetDoctypeDeclHandler(g_parser, dummy_start_doctype_handler,
3419 dummy_end_doctype_handler);
3420 expect_failure(text, XML_ERROR_PUBLICID, "Bad Public ID not failed");
3421 }
3422 END_TEST
3423
3424 /* Test based on ibm/valid/P32/ibm32v04.xml */
START_TEST(test_attribute_enum_value)3425 START_TEST(test_attribute_enum_value) {
3426 const char *text = "<?xml version='1.0' standalone='no'?>\n"
3427 "<!DOCTYPE animal SYSTEM 'test.dtd'>\n"
3428 "<animal>This is a \n <a/> \n\nyellow tiger</animal>";
3429 ExtTest dtd_data
3430 = {"<!ELEMENT animal (#PCDATA|a)*>\n"
3431 "<!ELEMENT a EMPTY>\n"
3432 "<!ATTLIST animal xml:space (default|preserve) 'preserve'>",
3433 NULL, NULL};
3434 const XML_Char *expected = XCS("This is a \n \n\nyellow tiger");
3435
3436 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3437 XML_SetUserData(g_parser, &dtd_data);
3438 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3439 /* An attribute list handler provokes a different code path */
3440 XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
3441 run_ext_character_check(text, &dtd_data, expected);
3442 }
3443 END_TEST
3444
3445 /* Slightly bizarrely, the library seems to silently ignore entity
3446 * definitions for predefined entities, even when they are wrong. The
3447 * language of the XML 1.0 spec is somewhat unhelpful as to what ought
3448 * to happen, so this is currently treated as acceptable.
3449 */
START_TEST(test_predefined_entity_redefinition)3450 START_TEST(test_predefined_entity_redefinition) {
3451 const char *text = "<!DOCTYPE doc [\n"
3452 "<!ENTITY apos 'foo'>\n"
3453 "]>\n"
3454 "<doc>'</doc>";
3455 run_character_check(text, XCS("'"));
3456 }
3457 END_TEST
3458
3459 /* Test that the parser stops processing the DTD after an unresolved
3460 * parameter entity is encountered.
3461 */
START_TEST(test_dtd_stop_processing)3462 START_TEST(test_dtd_stop_processing) {
3463 const char *text = "<!DOCTYPE doc [\n"
3464 "%foo;\n"
3465 "<!ENTITY bar 'bas'>\n"
3466 "]><doc/>";
3467
3468 XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
3469 init_dummy_handlers();
3470 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3471 == XML_STATUS_ERROR)
3472 xml_failure(g_parser);
3473 if (get_dummy_handler_flags() != 0)
3474 fail("DTD processing still going after undefined PE");
3475 }
3476 END_TEST
3477
3478 /* Test public notations with no system ID */
START_TEST(test_public_notation_no_sysid)3479 START_TEST(test_public_notation_no_sysid) {
3480 const char *text = "<!DOCTYPE doc [\n"
3481 "<!NOTATION note PUBLIC 'foo'>\n"
3482 "<!ELEMENT doc EMPTY>\n"
3483 "]>\n<doc/>";
3484
3485 init_dummy_handlers();
3486 XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
3487 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3488 == XML_STATUS_ERROR)
3489 xml_failure(g_parser);
3490 if (get_dummy_handler_flags() != DUMMY_NOTATION_DECL_HANDLER_FLAG)
3491 fail("Notation declaration handler not called");
3492 }
3493 END_TEST
3494
START_TEST(test_nested_groups)3495 START_TEST(test_nested_groups) {
3496 const char *text
3497 = "<!DOCTYPE doc [\n"
3498 "<!ELEMENT doc "
3499 /* Sixteen elements per line */
3500 "(e,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,"
3501 "(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?"
3502 "))))))))))))))))))))))))))))))))>\n"
3503 "<!ELEMENT e EMPTY>"
3504 "]>\n"
3505 "<doc><e/></doc>";
3506 CharData storage;
3507
3508 CharData_Init(&storage);
3509 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3510 XML_SetStartElementHandler(g_parser, record_element_start_handler);
3511 XML_SetUserData(g_parser, &storage);
3512 init_dummy_handlers();
3513 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3514 == XML_STATUS_ERROR)
3515 xml_failure(g_parser);
3516 CharData_CheckXMLChars(&storage, XCS("doce"));
3517 if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
3518 fail("Element handler not fired");
3519 }
3520 END_TEST
3521
START_TEST(test_group_choice)3522 START_TEST(test_group_choice) {
3523 const char *text = "<!DOCTYPE doc [\n"
3524 "<!ELEMENT doc (a|b|c)+>\n"
3525 "<!ELEMENT a EMPTY>\n"
3526 "<!ELEMENT b (#PCDATA)>\n"
3527 "<!ELEMENT c ANY>\n"
3528 "]>\n"
3529 "<doc>\n"
3530 "<a/>\n"
3531 "<b attr='foo'>This is a foo</b>\n"
3532 "<c></c>\n"
3533 "</doc>\n";
3534
3535 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3536 init_dummy_handlers();
3537 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3538 == XML_STATUS_ERROR)
3539 xml_failure(g_parser);
3540 if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
3541 fail("Element handler flag not raised");
3542 }
3543 END_TEST
3544
START_TEST(test_standalone_parameter_entity)3545 START_TEST(test_standalone_parameter_entity) {
3546 const char *text = "<?xml version='1.0' standalone='yes'?>\n"
3547 "<!DOCTYPE doc SYSTEM 'http://example.org/' [\n"
3548 "<!ENTITY % entity '<!ELEMENT doc (#PCDATA)>'>\n"
3549 "%entity;\n"
3550 "]>\n"
3551 "<doc></doc>";
3552 char dtd_data[] = "<!ENTITY % e1 'foo'>\n";
3553
3554 XML_SetUserData(g_parser, dtd_data);
3555 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3556 XML_SetExternalEntityRefHandler(g_parser, external_entity_public);
3557 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3558 == XML_STATUS_ERROR)
3559 xml_failure(g_parser);
3560 }
3561 END_TEST
3562
3563 /* Test skipping of parameter entity in an external DTD */
3564 /* Derived from ibm/invalid/P69/ibm69i01.xml */
START_TEST(test_skipped_parameter_entity)3565 START_TEST(test_skipped_parameter_entity) {
3566 const char *text = "<?xml version='1.0'?>\n"
3567 "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
3568 "<!ELEMENT root (#PCDATA|a)* >\n"
3569 "]>\n"
3570 "<root></root>";
3571 ExtTest dtd_data = {"%pe2;", NULL, NULL};
3572
3573 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3574 XML_SetUserData(g_parser, &dtd_data);
3575 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3576 XML_SetSkippedEntityHandler(g_parser, dummy_skip_handler);
3577 init_dummy_handlers();
3578 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3579 == XML_STATUS_ERROR)
3580 xml_failure(g_parser);
3581 if (get_dummy_handler_flags() != DUMMY_SKIP_HANDLER_FLAG)
3582 fail("Skip handler not executed");
3583 }
3584 END_TEST
3585
3586 /* Test recursive parameter entity definition rejected in external DTD */
START_TEST(test_recursive_external_parameter_entity)3587 START_TEST(test_recursive_external_parameter_entity) {
3588 const char *text = "<?xml version='1.0'?>\n"
3589 "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
3590 "<!ELEMENT root (#PCDATA|a)* >\n"
3591 "]>\n"
3592 "<root></root>";
3593 ExtFaults dtd_data = {"<!ENTITY % pe2 '%pe2;'>\n%pe2;",
3594 "Recursive external parameter entity not faulted", NULL,
3595 XML_ERROR_RECURSIVE_ENTITY_REF};
3596
3597 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
3598 XML_SetUserData(g_parser, &dtd_data);
3599 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3600 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3601 "Recursive external parameter not spotted");
3602 }
3603 END_TEST
3604
3605 /* Test undefined parameter entity in external entity handler */
START_TEST(test_undefined_ext_entity_in_external_dtd)3606 START_TEST(test_undefined_ext_entity_in_external_dtd) {
3607 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3608 "<doc></doc>\n";
3609
3610 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3611 XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
3612 XML_SetUserData(g_parser, NULL);
3613 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3614 == XML_STATUS_ERROR)
3615 xml_failure(g_parser);
3616
3617 /* Now repeat without the external entity ref handler invoking
3618 * another copy of itself.
3619 */
3620 XML_ParserReset(g_parser, NULL);
3621 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3622 XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
3623 XML_SetUserData(g_parser, g_parser); /* Any non-NULL value will do */
3624 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3625 == XML_STATUS_ERROR)
3626 xml_failure(g_parser);
3627 }
3628 END_TEST
3629
3630 /* Test suspending the parse on receiving an XML declaration works */
START_TEST(test_suspend_xdecl)3631 START_TEST(test_suspend_xdecl) {
3632 const char *text = long_character_data_text;
3633
3634 XML_SetXmlDeclHandler(g_parser, entity_suspending_xdecl_handler);
3635 XML_SetUserData(g_parser, g_parser);
3636 g_resumable = XML_TRUE;
3637 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3638 != XML_STATUS_SUSPENDED)
3639 xml_failure(g_parser);
3640 if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
3641 xml_failure(g_parser);
3642 /* Attempt to start a new parse while suspended */
3643 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3644 != XML_STATUS_ERROR)
3645 fail("Attempt to parse while suspended not faulted");
3646 if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
3647 fail("Suspended parse not faulted with correct error");
3648 }
3649 END_TEST
3650
3651 /* Test aborting the parse in an epilog works */
START_TEST(test_abort_epilog)3652 START_TEST(test_abort_epilog) {
3653 const char *text = "<doc></doc>\n\r\n";
3654 XML_Char trigger_char = XCS('\r');
3655
3656 XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3657 XML_SetUserData(g_parser, &trigger_char);
3658 g_resumable = XML_FALSE;
3659 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3660 != XML_STATUS_ERROR)
3661 fail("Abort not triggered");
3662 if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
3663 xml_failure(g_parser);
3664 }
3665 END_TEST
3666
3667 /* Test a different code path for abort in the epilog */
START_TEST(test_abort_epilog_2)3668 START_TEST(test_abort_epilog_2) {
3669 const char *text = "<doc></doc>\n";
3670 XML_Char trigger_char = XCS('\n');
3671
3672 XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3673 XML_SetUserData(g_parser, &trigger_char);
3674 g_resumable = XML_FALSE;
3675 expect_failure(text, XML_ERROR_ABORTED, "Abort not triggered");
3676 }
3677 END_TEST
3678
3679 /* Test suspension from the epilog */
START_TEST(test_suspend_epilog)3680 START_TEST(test_suspend_epilog) {
3681 const char *text = "<doc></doc>\n";
3682 XML_Char trigger_char = XCS('\n');
3683
3684 XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3685 XML_SetUserData(g_parser, &trigger_char);
3686 g_resumable = XML_TRUE;
3687 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3688 != XML_STATUS_SUSPENDED)
3689 xml_failure(g_parser);
3690 }
3691 END_TEST
3692
START_TEST(test_suspend_in_sole_empty_tag)3693 START_TEST(test_suspend_in_sole_empty_tag) {
3694 const char *text = "<doc/>";
3695 enum XML_Status rc;
3696
3697 XML_SetEndElementHandler(g_parser, suspending_end_handler);
3698 XML_SetUserData(g_parser, g_parser);
3699 rc = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE);
3700 if (rc == XML_STATUS_ERROR)
3701 xml_failure(g_parser);
3702 else if (rc != XML_STATUS_SUSPENDED)
3703 fail("Suspend not triggered");
3704 rc = XML_ResumeParser(g_parser);
3705 if (rc == XML_STATUS_ERROR)
3706 xml_failure(g_parser);
3707 else if (rc != XML_STATUS_OK)
3708 fail("Resume failed");
3709 }
3710 END_TEST
3711
START_TEST(test_unfinished_epilog)3712 START_TEST(test_unfinished_epilog) {
3713 const char *text = "<doc></doc><";
3714
3715 expect_failure(text, XML_ERROR_UNCLOSED_TOKEN,
3716 "Incomplete epilog entry not faulted");
3717 }
3718 END_TEST
3719
START_TEST(test_partial_char_in_epilog)3720 START_TEST(test_partial_char_in_epilog) {
3721 const char *text = "<doc></doc>\xe2\x82";
3722
3723 /* First check that no fault is raised if the parse is not finished */
3724 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
3725 == XML_STATUS_ERROR)
3726 xml_failure(g_parser);
3727 /* Now check that it is faulted once we finish */
3728 if (XML_ParseBuffer(g_parser, 0, XML_TRUE) != XML_STATUS_ERROR)
3729 fail("Partial character in epilog not faulted");
3730 if (XML_GetErrorCode(g_parser) != XML_ERROR_PARTIAL_CHAR)
3731 xml_failure(g_parser);
3732 }
3733 END_TEST
3734
3735 /* Test resuming a parse suspended in entity substitution */
START_TEST(test_suspend_resume_internal_entity)3736 START_TEST(test_suspend_resume_internal_entity) {
3737 const char *text
3738 = "<!DOCTYPE doc [\n"
3739 "<!ENTITY foo '<suspend>Hi<suspend>Ho</suspend></suspend>'>\n"
3740 "]>\n"
3741 "<doc>&foo;</doc>\n";
3742 const XML_Char *expected1 = XCS("Hi");
3743 const XML_Char *expected2 = XCS("HiHo");
3744 CharData storage;
3745
3746 CharData_Init(&storage);
3747 XML_SetStartElementHandler(g_parser, start_element_suspender);
3748 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
3749 XML_SetUserData(g_parser, &storage);
3750 // can't use SINGLE_BYTES here, because it'll return early on suspension, and
3751 // we won't know exactly how much input we actually managed to give Expat.
3752 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
3753 != XML_STATUS_SUSPENDED)
3754 xml_failure(g_parser);
3755 CharData_CheckXMLChars(&storage, XCS(""));
3756 if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
3757 xml_failure(g_parser);
3758 CharData_CheckXMLChars(&storage, expected1);
3759 if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
3760 xml_failure(g_parser);
3761 CharData_CheckXMLChars(&storage, expected2);
3762 }
3763 END_TEST
3764
START_TEST(test_suspend_resume_internal_entity_issue_629)3765 START_TEST(test_suspend_resume_internal_entity_issue_629) {
3766 const char *const text
3767 = "<!DOCTYPE a [<!ENTITY e '<!--COMMENT-->a'>]><a>&e;<b>\n"
3768 "<"
3769 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3770 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3771 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3772 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3773 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3774 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3775 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3776 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3777 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3778 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3779 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3780 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3781 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3782 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3783 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3784 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3785 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3786 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3787 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3788 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3789 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3790 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3791 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3792 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3793 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3794 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3795 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3796 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3797 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3798 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3799 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3800 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3801 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3802 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3803 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3804 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3805 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3806 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3807 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3808 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3809 "/>"
3810 "</b></a>";
3811 const size_t firstChunkSizeBytes = 54;
3812
3813 XML_Parser parser = XML_ParserCreate(NULL);
3814 XML_SetUserData(parser, parser);
3815 XML_SetCommentHandler(parser, suspending_comment_handler);
3816
3817 if (XML_Parse(parser, text, (int)firstChunkSizeBytes, XML_FALSE)
3818 != XML_STATUS_SUSPENDED)
3819 xml_failure(parser);
3820 if (XML_ResumeParser(parser) != XML_STATUS_OK)
3821 xml_failure(parser);
3822 if (_XML_Parse_SINGLE_BYTES(parser, text + firstChunkSizeBytes,
3823 (int)(strlen(text) - firstChunkSizeBytes),
3824 XML_TRUE)
3825 != XML_STATUS_OK)
3826 xml_failure(parser);
3827 XML_ParserFree(parser);
3828 }
3829 END_TEST
3830
3831 /* Test syntax error is caught at parse resumption */
START_TEST(test_resume_entity_with_syntax_error)3832 START_TEST(test_resume_entity_with_syntax_error) {
3833 const char *text = "<!DOCTYPE doc [\n"
3834 "<!ENTITY foo '<suspend>Hi</wombat>'>\n"
3835 "]>\n"
3836 "<doc>&foo;</doc>\n";
3837
3838 XML_SetStartElementHandler(g_parser, start_element_suspender);
3839 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3840 != XML_STATUS_SUSPENDED)
3841 xml_failure(g_parser);
3842 if (XML_ResumeParser(g_parser) != XML_STATUS_ERROR)
3843 fail("Syntax error in entity not faulted");
3844 if (XML_GetErrorCode(g_parser) != XML_ERROR_TAG_MISMATCH)
3845 xml_failure(g_parser);
3846 }
3847 END_TEST
3848
3849 /* Test suspending and resuming in a parameter entity substitution */
START_TEST(test_suspend_resume_parameter_entity)3850 START_TEST(test_suspend_resume_parameter_entity) {
3851 const char *text = "<!DOCTYPE doc [\n"
3852 "<!ENTITY % foo '<!ELEMENT doc (#PCDATA)*>'>\n"
3853 "%foo;\n"
3854 "]>\n"
3855 "<doc>Hello, world</doc>";
3856 const XML_Char *expected = XCS("Hello, world");
3857 CharData storage;
3858
3859 CharData_Init(&storage);
3860 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3861 XML_SetElementDeclHandler(g_parser, element_decl_suspender);
3862 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
3863 XML_SetUserData(g_parser, &storage);
3864 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
3865 != XML_STATUS_SUSPENDED)
3866 xml_failure(g_parser);
3867 CharData_CheckXMLChars(&storage, XCS(""));
3868 if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
3869 xml_failure(g_parser);
3870 CharData_CheckXMLChars(&storage, expected);
3871 }
3872 END_TEST
3873
3874 /* Test attempting to use parser after an error is faulted */
START_TEST(test_restart_on_error)3875 START_TEST(test_restart_on_error) {
3876 const char *text = "<$doc><doc></doc>";
3877
3878 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3879 != XML_STATUS_ERROR)
3880 fail("Invalid tag name not faulted");
3881 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
3882 xml_failure(g_parser);
3883 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
3884 fail("Restarting invalid parse not faulted");
3885 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
3886 xml_failure(g_parser);
3887 }
3888 END_TEST
3889
3890 /* Test that angle brackets in an attribute default value are faulted */
START_TEST(test_reject_lt_in_attribute_value)3891 START_TEST(test_reject_lt_in_attribute_value) {
3892 const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '<bar>'>]>\n"
3893 "<doc></doc>";
3894
3895 expect_failure(text, XML_ERROR_INVALID_TOKEN,
3896 "Bad attribute default not faulted");
3897 }
3898 END_TEST
3899
START_TEST(test_reject_unfinished_param_in_att_value)3900 START_TEST(test_reject_unfinished_param_in_att_value) {
3901 const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '&foo'>]>\n"
3902 "<doc></doc>";
3903
3904 expect_failure(text, XML_ERROR_INVALID_TOKEN,
3905 "Bad attribute default not faulted");
3906 }
3907 END_TEST
3908
START_TEST(test_trailing_cr_in_att_value)3909 START_TEST(test_trailing_cr_in_att_value) {
3910 const char *text = "<doc a='value\r'/>";
3911
3912 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3913 == XML_STATUS_ERROR)
3914 xml_failure(g_parser);
3915 }
3916 END_TEST
3917
3918 /* Try parsing a general entity within a parameter entity in a
3919 * standalone internal DTD. Covers a corner case in the parser.
3920 */
START_TEST(test_standalone_internal_entity)3921 START_TEST(test_standalone_internal_entity) {
3922 const char *text = "<?xml version='1.0' standalone='yes' ?>\n"
3923 "<!DOCTYPE doc [\n"
3924 " <!ELEMENT doc (#PCDATA)>\n"
3925 " <!ENTITY % pe '<!ATTLIST doc att2 CDATA \"≥\">'>\n"
3926 " <!ENTITY ge 'AttDefaultValue'>\n"
3927 " %pe;\n"
3928 "]>\n"
3929 "<doc att2='any'/>";
3930
3931 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3932 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3933 == XML_STATUS_ERROR)
3934 xml_failure(g_parser);
3935 }
3936 END_TEST
3937
3938 /* Test that a reference to an unknown external entity is skipped */
START_TEST(test_skipped_external_entity)3939 START_TEST(test_skipped_external_entity) {
3940 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
3941 "<doc></doc>\n";
3942 ExtTest test_data = {"<!ELEMENT doc EMPTY>\n"
3943 "<!ENTITY % e2 '%e1;'>\n",
3944 NULL, NULL};
3945
3946 XML_SetUserData(g_parser, &test_data);
3947 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3948 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3949 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3950 == XML_STATUS_ERROR)
3951 xml_failure(g_parser);
3952 }
3953 END_TEST
3954
3955 /* Test a different form of unknown external entity */
START_TEST(test_skipped_null_loaded_ext_entity)3956 START_TEST(test_skipped_null_loaded_ext_entity) {
3957 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
3958 "<doc />";
3959 ExtHdlrData test_data
3960 = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
3961 "<!ENTITY % pe2 '%pe1;'>\n"
3962 "%pe2;\n",
3963 external_entity_null_loader};
3964
3965 XML_SetUserData(g_parser, &test_data);
3966 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3967 XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
3968 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3969 == XML_STATUS_ERROR)
3970 xml_failure(g_parser);
3971 }
3972 END_TEST
3973
START_TEST(test_skipped_unloaded_ext_entity)3974 START_TEST(test_skipped_unloaded_ext_entity) {
3975 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
3976 "<doc />";
3977 ExtHdlrData test_data
3978 = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
3979 "<!ENTITY % pe2 '%pe1;'>\n"
3980 "%pe2;\n",
3981 NULL};
3982
3983 XML_SetUserData(g_parser, &test_data);
3984 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3985 XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
3986 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3987 == XML_STATUS_ERROR)
3988 xml_failure(g_parser);
3989 }
3990 END_TEST
3991
3992 /* Test that a parameter entity value ending with a carriage return
3993 * has it translated internally into a newline.
3994 */
START_TEST(test_param_entity_with_trailing_cr)3995 START_TEST(test_param_entity_with_trailing_cr) {
3996 #define PARAM_ENTITY_NAME "pe"
3997 #define PARAM_ENTITY_CORE_VALUE "<!ATTLIST doc att CDATA \"default\">"
3998 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
3999 "<doc/>";
4000 ExtTest test_data
4001 = {"<!ENTITY % " PARAM_ENTITY_NAME " '" PARAM_ENTITY_CORE_VALUE "\r'>\n"
4002 "%" PARAM_ENTITY_NAME ";\n",
4003 NULL, NULL};
4004
4005 XML_SetUserData(g_parser, &test_data);
4006 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4007 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
4008 XML_SetEntityDeclHandler(g_parser, param_entity_match_handler);
4009 param_entity_match_init(XCS(PARAM_ENTITY_NAME),
4010 XCS(PARAM_ENTITY_CORE_VALUE) XCS("\n"));
4011 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4012 == XML_STATUS_ERROR)
4013 xml_failure(g_parser);
4014 int entity_match_flag = get_param_entity_match_flag();
4015 if (entity_match_flag == ENTITY_MATCH_FAIL)
4016 fail("Parameter entity CR->NEWLINE conversion failed");
4017 else if (entity_match_flag == ENTITY_MATCH_NOT_FOUND)
4018 fail("Parameter entity not parsed");
4019 }
4020 #undef PARAM_ENTITY_NAME
4021 #undef PARAM_ENTITY_CORE_VALUE
4022 END_TEST
4023
START_TEST(test_invalid_character_entity)4024 START_TEST(test_invalid_character_entity) {
4025 const char *text = "<!DOCTYPE doc [\n"
4026 " <!ENTITY entity '�'>\n"
4027 "]>\n"
4028 "<doc>&entity;</doc>";
4029
4030 expect_failure(text, XML_ERROR_BAD_CHAR_REF,
4031 "Out of range character reference not faulted");
4032 }
4033 END_TEST
4034
START_TEST(test_invalid_character_entity_2)4035 START_TEST(test_invalid_character_entity_2) {
4036 const char *text = "<!DOCTYPE doc [\n"
4037 " <!ENTITY entity '&#xg0;'>\n"
4038 "]>\n"
4039 "<doc>&entity;</doc>";
4040
4041 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4042 "Out of range character reference not faulted");
4043 }
4044 END_TEST
4045
START_TEST(test_invalid_character_entity_3)4046 START_TEST(test_invalid_character_entity_3) {
4047 const char text[] =
4048 /* <!DOCTYPE doc [\n */
4049 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
4050 /* U+0E04 = KHO KHWAI
4051 * U+0E08 = CHO CHAN */
4052 /* <!ENTITY entity '&\u0e04\u0e08;'>\n */
4053 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0e\0n\0t\0i\0t\0y\0 "
4054 "\0'\0&\x0e\x04\x0e\x08\0;\0'\0>\0\n"
4055 /* ]>\n */
4056 "\0]\0>\0\n"
4057 /* <doc>&entity;</doc> */
4058 "\0<\0d\0o\0c\0>\0&\0e\0n\0t\0i\0t\0y\0;\0<\0/\0d\0o\0c\0>";
4059
4060 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4061 != XML_STATUS_ERROR)
4062 fail("Invalid start of entity name not faulted");
4063 if (XML_GetErrorCode(g_parser) != XML_ERROR_UNDEFINED_ENTITY)
4064 xml_failure(g_parser);
4065 }
4066 END_TEST
4067
START_TEST(test_invalid_character_entity_4)4068 START_TEST(test_invalid_character_entity_4) {
4069 const char *text = "<!DOCTYPE doc [\n"
4070 " <!ENTITY entity '�'>\n" /* = � */
4071 "]>\n"
4072 "<doc>&entity;</doc>";
4073
4074 expect_failure(text, XML_ERROR_BAD_CHAR_REF,
4075 "Out of range character reference not faulted");
4076 }
4077 END_TEST
4078
4079 /* Test that processing instructions are picked up by a default handler */
START_TEST(test_pi_handled_in_default)4080 START_TEST(test_pi_handled_in_default) {
4081 const char *text = "<?test processing instruction?>\n<doc/>";
4082 const XML_Char *expected = XCS("<?test processing instruction?>\n<doc/>");
4083 CharData storage;
4084
4085 CharData_Init(&storage);
4086 XML_SetDefaultHandler(g_parser, accumulate_characters);
4087 XML_SetUserData(g_parser, &storage);
4088 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4089 == XML_STATUS_ERROR)
4090 xml_failure(g_parser);
4091 CharData_CheckXMLChars(&storage, expected);
4092 }
4093 END_TEST
4094
4095 /* Test that comments are picked up by a default handler */
START_TEST(test_comment_handled_in_default)4096 START_TEST(test_comment_handled_in_default) {
4097 const char *text = "<!-- This is a comment -->\n<doc/>";
4098 const XML_Char *expected = XCS("<!-- This is a comment -->\n<doc/>");
4099 CharData storage;
4100
4101 CharData_Init(&storage);
4102 XML_SetDefaultHandler(g_parser, accumulate_characters);
4103 XML_SetUserData(g_parser, &storage);
4104 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4105 == XML_STATUS_ERROR)
4106 xml_failure(g_parser);
4107 CharData_CheckXMLChars(&storage, expected);
4108 }
4109 END_TEST
4110
4111 /* Test PIs that look almost but not quite like XML declarations */
START_TEST(test_pi_yml)4112 START_TEST(test_pi_yml) {
4113 const char *text = "<?yml something like data?><doc/>";
4114 const XML_Char *expected = XCS("yml: something like data\n");
4115 CharData storage;
4116
4117 CharData_Init(&storage);
4118 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4119 XML_SetUserData(g_parser, &storage);
4120 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4121 == XML_STATUS_ERROR)
4122 xml_failure(g_parser);
4123 CharData_CheckXMLChars(&storage, expected);
4124 }
4125 END_TEST
4126
START_TEST(test_pi_xnl)4127 START_TEST(test_pi_xnl) {
4128 const char *text = "<?xnl nothing like data?><doc/>";
4129 const XML_Char *expected = XCS("xnl: nothing like data\n");
4130 CharData storage;
4131
4132 CharData_Init(&storage);
4133 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4134 XML_SetUserData(g_parser, &storage);
4135 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4136 == XML_STATUS_ERROR)
4137 xml_failure(g_parser);
4138 CharData_CheckXMLChars(&storage, expected);
4139 }
4140 END_TEST
4141
START_TEST(test_pi_xmm)4142 START_TEST(test_pi_xmm) {
4143 const char *text = "<?xmm everything like data?><doc/>";
4144 const XML_Char *expected = XCS("xmm: everything like data\n");
4145 CharData storage;
4146
4147 CharData_Init(&storage);
4148 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4149 XML_SetUserData(g_parser, &storage);
4150 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4151 == XML_STATUS_ERROR)
4152 xml_failure(g_parser);
4153 CharData_CheckXMLChars(&storage, expected);
4154 }
4155 END_TEST
4156
START_TEST(test_utf16_pi)4157 START_TEST(test_utf16_pi) {
4158 const char text[] =
4159 /* <?{KHO KHWAI}{CHO CHAN}?>
4160 * where {KHO KHWAI} = U+0E04
4161 * and {CHO CHAN} = U+0E08
4162 */
4163 "<\0?\0\x04\x0e\x08\x0e?\0>\0"
4164 /* <q/> */
4165 "<\0q\0/\0>\0";
4166 #ifdef XML_UNICODE
4167 const XML_Char *expected = XCS("\x0e04\x0e08: \n");
4168 #else
4169 const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
4170 #endif
4171 CharData storage;
4172
4173 CharData_Init(&storage);
4174 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4175 XML_SetUserData(g_parser, &storage);
4176 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4177 == XML_STATUS_ERROR)
4178 xml_failure(g_parser);
4179 CharData_CheckXMLChars(&storage, expected);
4180 }
4181 END_TEST
4182
START_TEST(test_utf16_be_pi)4183 START_TEST(test_utf16_be_pi) {
4184 const char text[] =
4185 /* <?{KHO KHWAI}{CHO CHAN}?>
4186 * where {KHO KHWAI} = U+0E04
4187 * and {CHO CHAN} = U+0E08
4188 */
4189 "\0<\0?\x0e\x04\x0e\x08\0?\0>"
4190 /* <q/> */
4191 "\0<\0q\0/\0>";
4192 #ifdef XML_UNICODE
4193 const XML_Char *expected = XCS("\x0e04\x0e08: \n");
4194 #else
4195 const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
4196 #endif
4197 CharData storage;
4198
4199 CharData_Init(&storage);
4200 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4201 XML_SetUserData(g_parser, &storage);
4202 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4203 == XML_STATUS_ERROR)
4204 xml_failure(g_parser);
4205 CharData_CheckXMLChars(&storage, expected);
4206 }
4207 END_TEST
4208
4209 /* Test that comments can be picked up and translated */
START_TEST(test_utf16_be_comment)4210 START_TEST(test_utf16_be_comment) {
4211 const char text[] =
4212 /* <!-- Comment A --> */
4213 "\0<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0A\0 \0-\0-\0>\0\n"
4214 /* <doc/> */
4215 "\0<\0d\0o\0c\0/\0>";
4216 const XML_Char *expected = XCS(" Comment A ");
4217 CharData storage;
4218
4219 CharData_Init(&storage);
4220 XML_SetCommentHandler(g_parser, accumulate_comment);
4221 XML_SetUserData(g_parser, &storage);
4222 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4223 == XML_STATUS_ERROR)
4224 xml_failure(g_parser);
4225 CharData_CheckXMLChars(&storage, expected);
4226 }
4227 END_TEST
4228
START_TEST(test_utf16_le_comment)4229 START_TEST(test_utf16_le_comment) {
4230 const char text[] =
4231 /* <!-- Comment B --> */
4232 "<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0B\0 \0-\0-\0>\0\n\0"
4233 /* <doc/> */
4234 "<\0d\0o\0c\0/\0>\0";
4235 const XML_Char *expected = XCS(" Comment B ");
4236 CharData storage;
4237
4238 CharData_Init(&storage);
4239 XML_SetCommentHandler(g_parser, accumulate_comment);
4240 XML_SetUserData(g_parser, &storage);
4241 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4242 == XML_STATUS_ERROR)
4243 xml_failure(g_parser);
4244 CharData_CheckXMLChars(&storage, expected);
4245 }
4246 END_TEST
4247
4248 /* Test that the unknown encoding handler with map entries that expect
4249 * conversion but no conversion function is faulted
4250 */
START_TEST(test_missing_encoding_conversion_fn)4251 START_TEST(test_missing_encoding_conversion_fn) {
4252 const char *text = "<?xml version='1.0' encoding='no-conv'?>\n"
4253 "<doc>\x81</doc>";
4254
4255 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4256 /* MiscEncodingHandler sets up an encoding with every top-bit-set
4257 * character introducing a two-byte sequence. For this, it
4258 * requires a convert function. The above function call doesn't
4259 * pass one through, so when BadEncodingHandler actually gets
4260 * called it should supply an invalid encoding.
4261 */
4262 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4263 "Encoding with missing convert() not faulted");
4264 }
4265 END_TEST
4266
START_TEST(test_failing_encoding_conversion_fn)4267 START_TEST(test_failing_encoding_conversion_fn) {
4268 const char *text = "<?xml version='1.0' encoding='failing-conv'?>\n"
4269 "<doc>\x81</doc>";
4270
4271 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4272 /* BadEncodingHandler sets up an encoding with every top-bit-set
4273 * character introducing a two-byte sequence. For this, it
4274 * requires a convert function. The above function call passes
4275 * one that insists all possible sequences are invalid anyway.
4276 */
4277 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4278 "Encoding with failing convert() not faulted");
4279 }
4280 END_TEST
4281
4282 /* Test unknown encoding conversions */
START_TEST(test_unknown_encoding_success)4283 START_TEST(test_unknown_encoding_success) {
4284 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4285 /* Equivalent to <eoc>Hello, world</eoc> */
4286 "<\x81\x64\x80oc>Hello, world</\x81\x64\x80oc>";
4287
4288 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4289 run_character_check(text, XCS("Hello, world"));
4290 }
4291 END_TEST
4292
4293 /* Test bad name character in unknown encoding */
START_TEST(test_unknown_encoding_bad_name)4294 START_TEST(test_unknown_encoding_bad_name) {
4295 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4296 "<\xff\x64oc>Hello, world</\xff\x64oc>";
4297
4298 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4299 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4300 "Bad name start in unknown encoding not faulted");
4301 }
4302 END_TEST
4303
4304 /* Test bad mid-name character in unknown encoding */
START_TEST(test_unknown_encoding_bad_name_2)4305 START_TEST(test_unknown_encoding_bad_name_2) {
4306 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4307 "<d\xffoc>Hello, world</d\xffoc>";
4308
4309 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4310 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4311 "Bad name in unknown encoding not faulted");
4312 }
4313 END_TEST
4314
4315 /* Test element name that is long enough to fill the conversion buffer
4316 * in an unknown encoding, finishing with an encoded character.
4317 */
START_TEST(test_unknown_encoding_long_name_1)4318 START_TEST(test_unknown_encoding_long_name_1) {
4319 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4320 "<abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>"
4321 "Hi"
4322 "</abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>";
4323 const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
4324 CharData storage;
4325
4326 CharData_Init(&storage);
4327 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4328 XML_SetStartElementHandler(g_parser, record_element_start_handler);
4329 XML_SetUserData(g_parser, &storage);
4330 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4331 == XML_STATUS_ERROR)
4332 xml_failure(g_parser);
4333 CharData_CheckXMLChars(&storage, expected);
4334 }
4335 END_TEST
4336
4337 /* Test element name that is long enough to fill the conversion buffer
4338 * in an unknown encoding, finishing with an simple character.
4339 */
START_TEST(test_unknown_encoding_long_name_2)4340 START_TEST(test_unknown_encoding_long_name_2) {
4341 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4342 "<abcdefghabcdefghabcdefghijklmnop>"
4343 "Hi"
4344 "</abcdefghabcdefghabcdefghijklmnop>";
4345 const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
4346 CharData storage;
4347
4348 CharData_Init(&storage);
4349 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4350 XML_SetStartElementHandler(g_parser, record_element_start_handler);
4351 XML_SetUserData(g_parser, &storage);
4352 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4353 == XML_STATUS_ERROR)
4354 xml_failure(g_parser);
4355 CharData_CheckXMLChars(&storage, expected);
4356 }
4357 END_TEST
4358
START_TEST(test_invalid_unknown_encoding)4359 START_TEST(test_invalid_unknown_encoding) {
4360 const char *text = "<?xml version='1.0' encoding='invalid-9'?>\n"
4361 "<doc>Hello world</doc>";
4362
4363 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4364 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4365 "Invalid unknown encoding not faulted");
4366 }
4367 END_TEST
4368
START_TEST(test_unknown_ascii_encoding_ok)4369 START_TEST(test_unknown_ascii_encoding_ok) {
4370 const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
4371 "<doc>Hello, world</doc>";
4372
4373 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4374 run_character_check(text, XCS("Hello, world"));
4375 }
4376 END_TEST
4377
START_TEST(test_unknown_ascii_encoding_fail)4378 START_TEST(test_unknown_ascii_encoding_fail) {
4379 const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
4380 "<doc>Hello, \x80 world</doc>";
4381
4382 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4383 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4384 "Invalid character not faulted");
4385 }
4386 END_TEST
4387
START_TEST(test_unknown_encoding_invalid_length)4388 START_TEST(test_unknown_encoding_invalid_length) {
4389 const char *text = "<?xml version='1.0' encoding='invalid-len'?>\n"
4390 "<doc>Hello, world</doc>";
4391
4392 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4393 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4394 "Invalid unknown encoding not faulted");
4395 }
4396 END_TEST
4397
START_TEST(test_unknown_encoding_invalid_topbit)4398 START_TEST(test_unknown_encoding_invalid_topbit) {
4399 const char *text = "<?xml version='1.0' encoding='invalid-a'?>\n"
4400 "<doc>Hello, world</doc>";
4401
4402 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4403 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4404 "Invalid unknown encoding not faulted");
4405 }
4406 END_TEST
4407
START_TEST(test_unknown_encoding_invalid_surrogate)4408 START_TEST(test_unknown_encoding_invalid_surrogate) {
4409 const char *text = "<?xml version='1.0' encoding='invalid-surrogate'?>\n"
4410 "<doc>Hello, \x82 world</doc>";
4411
4412 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4413 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4414 "Invalid unknown encoding not faulted");
4415 }
4416 END_TEST
4417
START_TEST(test_unknown_encoding_invalid_high)4418 START_TEST(test_unknown_encoding_invalid_high) {
4419 const char *text = "<?xml version='1.0' encoding='invalid-high'?>\n"
4420 "<doc>Hello, world</doc>";
4421
4422 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4423 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4424 "Invalid unknown encoding not faulted");
4425 }
4426 END_TEST
4427
START_TEST(test_unknown_encoding_invalid_attr_value)4428 START_TEST(test_unknown_encoding_invalid_attr_value) {
4429 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4430 "<doc attr='\xff\x30'/>";
4431
4432 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4433 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4434 "Invalid attribute valid not faulted");
4435 }
4436 END_TEST
4437
4438 /* Test an external entity parser set to use latin-1 detects UTF-16
4439 * BOMs correctly.
4440 */
4441 /* Test that UTF-16 BOM does not select UTF-16 given explicit encoding */
START_TEST(test_ext_entity_latin1_utf16le_bom)4442 START_TEST(test_ext_entity_latin1_utf16le_bom) {
4443 const char *text = "<!DOCTYPE doc [\n"
4444 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4445 "]>\n"
4446 "<doc>&en;</doc>";
4447 ExtTest2 test_data
4448 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4449 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4450 * 0x4c = L and 0x20 is a space
4451 */
4452 "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
4453 #ifdef XML_UNICODE
4454 const XML_Char *expected = XCS("\x00ff\x00feL ");
4455 #else
4456 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4457 const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
4458 #endif
4459 CharData storage;
4460
4461 CharData_Init(&storage);
4462 test_data.storage = &storage;
4463 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4464 XML_SetUserData(g_parser, &test_data);
4465 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4466 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4467 == XML_STATUS_ERROR)
4468 xml_failure(g_parser);
4469 CharData_CheckXMLChars(&storage, expected);
4470 }
4471 END_TEST
4472
START_TEST(test_ext_entity_latin1_utf16be_bom)4473 START_TEST(test_ext_entity_latin1_utf16be_bom) {
4474 const char *text = "<!DOCTYPE doc [\n"
4475 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4476 "]>\n"
4477 "<doc>&en;</doc>";
4478 ExtTest2 test_data
4479 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4480 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4481 * 0x4c = L and 0x20 is a space
4482 */
4483 "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
4484 #ifdef XML_UNICODE
4485 const XML_Char *expected = XCS("\x00fe\x00ff L");
4486 #else
4487 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4488 const XML_Char *expected = XCS("\xc3\xbe\xc3\xbf L");
4489 #endif
4490 CharData storage;
4491
4492 CharData_Init(&storage);
4493 test_data.storage = &storage;
4494 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4495 XML_SetUserData(g_parser, &test_data);
4496 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4497 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4498 == XML_STATUS_ERROR)
4499 xml_failure(g_parser);
4500 CharData_CheckXMLChars(&storage, expected);
4501 }
4502 END_TEST
4503
4504 /* Parsing the full buffer rather than a byte at a time makes a
4505 * difference to the encoding scanning code, so repeat the above tests
4506 * without breaking them down by byte.
4507 */
START_TEST(test_ext_entity_latin1_utf16le_bom2)4508 START_TEST(test_ext_entity_latin1_utf16le_bom2) {
4509 const char *text = "<!DOCTYPE doc [\n"
4510 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4511 "]>\n"
4512 "<doc>&en;</doc>";
4513 ExtTest2 test_data
4514 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4515 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4516 * 0x4c = L and 0x20 is a space
4517 */
4518 "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
4519 #ifdef XML_UNICODE
4520 const XML_Char *expected = XCS("\x00ff\x00feL ");
4521 #else
4522 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4523 const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
4524 #endif
4525 CharData storage;
4526
4527 CharData_Init(&storage);
4528 test_data.storage = &storage;
4529 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4530 XML_SetUserData(g_parser, &test_data);
4531 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4532 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4533 == XML_STATUS_ERROR)
4534 xml_failure(g_parser);
4535 CharData_CheckXMLChars(&storage, expected);
4536 }
4537 END_TEST
4538
START_TEST(test_ext_entity_latin1_utf16be_bom2)4539 START_TEST(test_ext_entity_latin1_utf16be_bom2) {
4540 const char *text = "<!DOCTYPE doc [\n"
4541 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4542 "]>\n"
4543 "<doc>&en;</doc>";
4544 ExtTest2 test_data
4545 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4546 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4547 * 0x4c = L and 0x20 is a space
4548 */
4549 "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
4550 #ifdef XML_UNICODE
4551 const XML_Char *expected = XCS("\x00fe\x00ff L");
4552 #else
4553 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4554 const XML_Char *expected = "\xc3\xbe\xc3\xbf L";
4555 #endif
4556 CharData storage;
4557
4558 CharData_Init(&storage);
4559 test_data.storage = &storage;
4560 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4561 XML_SetUserData(g_parser, &test_data);
4562 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4563 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4564 == XML_STATUS_ERROR)
4565 xml_failure(g_parser);
4566 CharData_CheckXMLChars(&storage, expected);
4567 }
4568 END_TEST
4569
4570 /* Test little-endian UTF-16 given an explicit big-endian encoding */
START_TEST(test_ext_entity_utf16_be)4571 START_TEST(test_ext_entity_utf16_be) {
4572 const char *text = "<!DOCTYPE doc [\n"
4573 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4574 "]>\n"
4575 "<doc>&en;</doc>";
4576 ExtTest2 test_data = {"<\0e\0/\0>\0", 8, XCS("utf-16be"), NULL};
4577 #ifdef XML_UNICODE
4578 const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
4579 #else
4580 const XML_Char *expected = XCS("\xe3\xb0\x80" /* U+3C00 */
4581 "\xe6\x94\x80" /* U+6500 */
4582 "\xe2\xbc\x80" /* U+2F00 */
4583 "\xe3\xb8\x80"); /* U+3E00 */
4584 #endif
4585 CharData storage;
4586
4587 CharData_Init(&storage);
4588 test_data.storage = &storage;
4589 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4590 XML_SetUserData(g_parser, &test_data);
4591 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4592 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4593 == XML_STATUS_ERROR)
4594 xml_failure(g_parser);
4595 CharData_CheckXMLChars(&storage, expected);
4596 }
4597 END_TEST
4598
4599 /* Test big-endian UTF-16 given an explicit little-endian encoding */
START_TEST(test_ext_entity_utf16_le)4600 START_TEST(test_ext_entity_utf16_le) {
4601 const char *text = "<!DOCTYPE doc [\n"
4602 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4603 "]>\n"
4604 "<doc>&en;</doc>";
4605 ExtTest2 test_data = {"\0<\0e\0/\0>", 8, XCS("utf-16le"), NULL};
4606 #ifdef XML_UNICODE
4607 const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
4608 #else
4609 const XML_Char *expected = XCS("\xe3\xb0\x80" /* U+3C00 */
4610 "\xe6\x94\x80" /* U+6500 */
4611 "\xe2\xbc\x80" /* U+2F00 */
4612 "\xe3\xb8\x80"); /* U+3E00 */
4613 #endif
4614 CharData storage;
4615
4616 CharData_Init(&storage);
4617 test_data.storage = &storage;
4618 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4619 XML_SetUserData(g_parser, &test_data);
4620 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4621 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4622 == XML_STATUS_ERROR)
4623 xml_failure(g_parser);
4624 CharData_CheckXMLChars(&storage, expected);
4625 }
4626 END_TEST
4627
4628 /* Test little-endian UTF-16 given no explicit encoding.
4629 * The existing default encoding (UTF-8) is assumed to hold without a
4630 * BOM to contradict it, so the entity value will in fact provoke an
4631 * error because 0x00 is not a valid XML character. We parse the
4632 * whole buffer in one go rather than feeding it in byte by byte to
4633 * exercise different code paths in the initial scanning routines.
4634 */
START_TEST(test_ext_entity_utf16_unknown)4635 START_TEST(test_ext_entity_utf16_unknown) {
4636 const char *text = "<!DOCTYPE doc [\n"
4637 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4638 "]>\n"
4639 "<doc>&en;</doc>";
4640 ExtFaults2 test_data
4641 = {"a\0b\0c\0", 6, "Invalid character in entity not faulted", NULL,
4642 XML_ERROR_INVALID_TOKEN};
4643
4644 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter2);
4645 XML_SetUserData(g_parser, &test_data);
4646 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
4647 "Invalid character should not have been accepted");
4648 }
4649 END_TEST
4650
4651 /* Test not-quite-UTF-8 BOM (0xEF 0xBB 0xBF) */
START_TEST(test_ext_entity_utf8_non_bom)4652 START_TEST(test_ext_entity_utf8_non_bom) {
4653 const char *text = "<!DOCTYPE doc [\n"
4654 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4655 "]>\n"
4656 "<doc>&en;</doc>";
4657 ExtTest2 test_data
4658 = {"\xef\xbb\x80", /* Arabic letter DAD medial form, U+FEC0 */
4659 3, NULL, NULL};
4660 #ifdef XML_UNICODE
4661 const XML_Char *expected = XCS("\xfec0");
4662 #else
4663 const XML_Char *expected = XCS("\xef\xbb\x80");
4664 #endif
4665 CharData storage;
4666
4667 CharData_Init(&storage);
4668 test_data.storage = &storage;
4669 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4670 XML_SetUserData(g_parser, &test_data);
4671 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4672 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4673 == XML_STATUS_ERROR)
4674 xml_failure(g_parser);
4675 CharData_CheckXMLChars(&storage, expected);
4676 }
4677 END_TEST
4678
4679 /* Test that UTF-8 in a CDATA section is correctly passed through */
START_TEST(test_utf8_in_cdata_section)4680 START_TEST(test_utf8_in_cdata_section) {
4681 const char *text = "<doc><![CDATA[one \xc3\xa9 two]]></doc>";
4682 #ifdef XML_UNICODE
4683 const XML_Char *expected = XCS("one \x00e9 two");
4684 #else
4685 const XML_Char *expected = XCS("one \xc3\xa9 two");
4686 #endif
4687
4688 run_character_check(text, expected);
4689 }
4690 END_TEST
4691
4692 /* Test that little-endian UTF-16 in a CDATA section is handled */
START_TEST(test_utf8_in_cdata_section_2)4693 START_TEST(test_utf8_in_cdata_section_2) {
4694 const char *text = "<doc><![CDATA[\xc3\xa9]\xc3\xa9two]]></doc>";
4695 #ifdef XML_UNICODE
4696 const XML_Char *expected = XCS("\x00e9]\x00e9two");
4697 #else
4698 const XML_Char *expected = XCS("\xc3\xa9]\xc3\xa9two");
4699 #endif
4700
4701 run_character_check(text, expected);
4702 }
4703 END_TEST
4704
START_TEST(test_utf8_in_start_tags)4705 START_TEST(test_utf8_in_start_tags) {
4706 struct test_case {
4707 bool goodName;
4708 bool goodNameStart;
4709 const char *tagName;
4710 };
4711
4712 // The idea with the tests below is this:
4713 // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences
4714 // go to isNever and are hence not a concern.
4715 //
4716 // We start with a character that is a valid name character
4717 // (or even name-start character, see XML 1.0r4 spec) and then we flip
4718 // single bits at places where (1) the result leaves the UTF-8 encoding space
4719 // and (2) we stay in the same n-byte sequence family.
4720 //
4721 // The flipped bits are highlighted in angle brackets in comments,
4722 // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped
4723 // the most significant bit to 1 to leave UTF-8 encoding space.
4724 struct test_case cases[] = {
4725 // 1-byte UTF-8: [0xxx xxxx]
4726 {true, true, "\x3A"}, // [0011 1010] = ASCII colon ':'
4727 {false, false, "\xBA"}, // [<1>011 1010]
4728 {true, false, "\x39"}, // [0011 1001] = ASCII nine '9'
4729 {false, false, "\xB9"}, // [<1>011 1001]
4730
4731 // 2-byte UTF-8: [110x xxxx] [10xx xxxx]
4732 {true, true, "\xDB\xA5"}, // [1101 1011] [1010 0101] =
4733 // Arabic small waw U+06E5
4734 {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101]
4735 {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101]
4736 {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101]
4737 {true, false, "\xCC\x81"}, // [1100 1100] [1000 0001] =
4738 // combining char U+0301
4739 {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001]
4740 {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001]
4741 {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001]
4742
4743 // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx]
4744 {true, true, "\xE0\xA4\x85"}, // [1110 0000] [1010 0100] [1000 0101] =
4745 // Devanagari Letter A U+0905
4746 {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101]
4747 {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101]
4748 {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101]
4749 {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101]
4750 {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101]
4751 {true, false, "\xE0\xA4\x81"}, // [1110 0000] [1010 0100] [1000 0001] =
4752 // combining char U+0901
4753 {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001]
4754 {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001]
4755 {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001]
4756 {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001]
4757 {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001]
4758 };
4759 const bool atNameStart[] = {true, false};
4760
4761 size_t i = 0;
4762 char doc[1024];
4763 size_t failCount = 0;
4764
4765 // we need all the bytes to be parsed, but we don't want the errors that can
4766 // trigger on isFinal=XML_TRUE, so we skip the test if the heuristic is on.
4767 if (g_reparseDeferralEnabledDefault) {
4768 return;
4769 }
4770
4771 for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
4772 size_t j = 0;
4773 for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) {
4774 const bool expectedSuccess
4775 = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName;
4776 snprintf(doc, sizeof(doc), "<%s%s><!--", atNameStart[j] ? "" : "a",
4777 cases[i].tagName);
4778 XML_Parser parser = XML_ParserCreate(NULL);
4779
4780 const enum XML_Status status = _XML_Parse_SINGLE_BYTES(
4781 parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE);
4782
4783 bool success = true;
4784 if ((status == XML_STATUS_OK) != expectedSuccess) {
4785 success = false;
4786 }
4787 if ((status == XML_STATUS_ERROR)
4788 && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) {
4789 success = false;
4790 }
4791
4792 if (! success) {
4793 fprintf(
4794 stderr,
4795 "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n",
4796 (unsigned)i + 1u, atNameStart[j] ? " " : "not ",
4797 (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser));
4798 failCount++;
4799 }
4800
4801 XML_ParserFree(parser);
4802 }
4803 }
4804
4805 if (failCount > 0) {
4806 fail("UTF-8 regression detected");
4807 }
4808 }
4809 END_TEST
4810
4811 /* Test trailing spaces in elements are accepted */
START_TEST(test_trailing_spaces_in_elements)4812 START_TEST(test_trailing_spaces_in_elements) {
4813 const char *text = "<doc >Hi</doc >";
4814 const XML_Char *expected = XCS("doc/doc");
4815 CharData storage;
4816
4817 CharData_Init(&storage);
4818 XML_SetElementHandler(g_parser, record_element_start_handler,
4819 record_element_end_handler);
4820 XML_SetUserData(g_parser, &storage);
4821 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4822 == XML_STATUS_ERROR)
4823 xml_failure(g_parser);
4824 CharData_CheckXMLChars(&storage, expected);
4825 }
4826 END_TEST
4827
START_TEST(test_utf16_attribute)4828 START_TEST(test_utf16_attribute) {
4829 const char text[] =
4830 /* <d {KHO KHWAI}{CHO CHAN}='a'/>
4831 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4832 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4833 */
4834 "<\0d\0 \0\x04\x0e\x08\x0e=\0'\0a\0'\0/\0>\0";
4835 const XML_Char *expected = XCS("a");
4836 CharData storage;
4837
4838 CharData_Init(&storage);
4839 XML_SetStartElementHandler(g_parser, accumulate_attribute);
4840 XML_SetUserData(g_parser, &storage);
4841 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4842 == XML_STATUS_ERROR)
4843 xml_failure(g_parser);
4844 CharData_CheckXMLChars(&storage, expected);
4845 }
4846 END_TEST
4847
START_TEST(test_utf16_second_attr)4848 START_TEST(test_utf16_second_attr) {
4849 /* <d a='1' {KHO KHWAI}{CHO CHAN}='2'/>
4850 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4851 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4852 */
4853 const char text[] = "<\0d\0 \0a\0=\0'\0\x31\0'\0 \0"
4854 "\x04\x0e\x08\x0e=\0'\0\x32\0'\0/\0>\0";
4855 const XML_Char *expected = XCS("1");
4856 CharData storage;
4857
4858 CharData_Init(&storage);
4859 XML_SetStartElementHandler(g_parser, accumulate_attribute);
4860 XML_SetUserData(g_parser, &storage);
4861 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4862 == XML_STATUS_ERROR)
4863 xml_failure(g_parser);
4864 CharData_CheckXMLChars(&storage, expected);
4865 }
4866 END_TEST
4867
START_TEST(test_attr_after_solidus)4868 START_TEST(test_attr_after_solidus) {
4869 const char *text = "<doc attr1='a' / attr2='b'>";
4870
4871 expect_failure(text, XML_ERROR_INVALID_TOKEN, "Misplaced / not faulted");
4872 }
4873 END_TEST
4874
START_TEST(test_utf16_pe)4875 START_TEST(test_utf16_pe) {
4876 /* <!DOCTYPE doc [
4877 * <!ENTITY % {KHO KHWAI}{CHO CHAN} '<!ELEMENT doc (#PCDATA)>'>
4878 * %{KHO KHWAI}{CHO CHAN};
4879 * ]>
4880 * <doc></doc>
4881 *
4882 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4883 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4884 */
4885 const char text[] = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
4886 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \x0e\x04\x0e\x08\0 "
4887 "\0'\0<\0!\0E\0L\0E\0M\0E\0N\0T\0 "
4888 "\0d\0o\0c\0 \0(\0#\0P\0C\0D\0A\0T\0A\0)\0>\0'\0>\0\n"
4889 "\0%\x0e\x04\x0e\x08\0;\0\n"
4890 "\0]\0>\0\n"
4891 "\0<\0d\0o\0c\0>\0<\0/\0d\0o\0c\0>";
4892 #ifdef XML_UNICODE
4893 const XML_Char *expected = XCS("\x0e04\x0e08=<!ELEMENT doc (#PCDATA)>\n");
4894 #else
4895 const XML_Char *expected
4896 = XCS("\xe0\xb8\x84\xe0\xb8\x88=<!ELEMENT doc (#PCDATA)>\n");
4897 #endif
4898 CharData storage;
4899
4900 CharData_Init(&storage);
4901 XML_SetUserData(g_parser, &storage);
4902 XML_SetEntityDeclHandler(g_parser, accumulate_entity_decl);
4903 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4904 == XML_STATUS_ERROR)
4905 xml_failure(g_parser);
4906 CharData_CheckXMLChars(&storage, expected);
4907 }
4908 END_TEST
4909
4910 /* Test that duff attribute description keywords are rejected */
START_TEST(test_bad_attr_desc_keyword)4911 START_TEST(test_bad_attr_desc_keyword) {
4912 const char *text = "<!DOCTYPE doc [\n"
4913 " <!ATTLIST doc attr CDATA #!IMPLIED>\n"
4914 "]>\n"
4915 "<doc />";
4916
4917 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4918 "Bad keyword !IMPLIED not faulted");
4919 }
4920 END_TEST
4921
4922 /* Test that an invalid attribute description keyword consisting of
4923 * UTF-16 characters with their top bytes non-zero are correctly
4924 * faulted
4925 */
START_TEST(test_bad_attr_desc_keyword_utf16)4926 START_TEST(test_bad_attr_desc_keyword_utf16) {
4927 /* <!DOCTYPE d [
4928 * <!ATTLIST d a CDATA #{KHO KHWAI}{CHO CHAN}>
4929 * ]><d/>
4930 *
4931 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4932 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4933 */
4934 const char text[]
4935 = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
4936 "\0<\0!\0A\0T\0T\0L\0I\0S\0T\0 \0d\0 \0a\0 \0C\0D\0A\0T\0A\0 "
4937 "\0#\x0e\x04\x0e\x08\0>\0\n"
4938 "\0]\0>\0<\0d\0/\0>";
4939
4940 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4941 != XML_STATUS_ERROR)
4942 fail("Invalid UTF16 attribute keyword not faulted");
4943 if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
4944 xml_failure(g_parser);
4945 }
4946 END_TEST
4947
4948 /* Test that invalid syntax in a <!DOCTYPE> is rejected. Do this
4949 * using prefix-encoding (see above) to trigger specific code paths
4950 */
START_TEST(test_bad_doctype)4951 START_TEST(test_bad_doctype) {
4952 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4953 "<!DOCTYPE doc [ \x80\x44 ]><doc/>";
4954
4955 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4956 expect_failure(text, XML_ERROR_SYNTAX,
4957 "Invalid bytes in DOCTYPE not faulted");
4958 }
4959 END_TEST
4960
START_TEST(test_bad_doctype_utf8)4961 START_TEST(test_bad_doctype_utf8) {
4962 const char *text = "<!DOCTYPE \xDB\x25"
4963 "doc><doc/>"; // [1101 1011] [<0>010 0101]
4964 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4965 "Invalid UTF-8 in DOCTYPE not faulted");
4966 }
4967 END_TEST
4968
START_TEST(test_bad_doctype_utf16)4969 START_TEST(test_bad_doctype_utf16) {
4970 const char text[] =
4971 /* <!DOCTYPE doc [ \x06f2 ]><doc/>
4972 *
4973 * U+06F2 = EXTENDED ARABIC-INDIC DIGIT TWO, a valid number
4974 * (name character) but not a valid letter (name start character)
4975 */
4976 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0 "
4977 "\x06\xf2"
4978 "\0 \0]\0>\0<\0d\0o\0c\0/\0>";
4979
4980 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4981 != XML_STATUS_ERROR)
4982 fail("Invalid bytes in DOCTYPE not faulted");
4983 if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
4984 xml_failure(g_parser);
4985 }
4986 END_TEST
4987
START_TEST(test_bad_doctype_plus)4988 START_TEST(test_bad_doctype_plus) {
4989 const char *text = "<!DOCTYPE 1+ [ <!ENTITY foo 'bar'> ]>\n"
4990 "<1+>&foo;</1+>";
4991
4992 expect_failure(text, XML_ERROR_INVALID_TOKEN,
4993 "'+' in document name not faulted");
4994 }
4995 END_TEST
4996
START_TEST(test_bad_doctype_star)4997 START_TEST(test_bad_doctype_star) {
4998 const char *text = "<!DOCTYPE 1* [ <!ENTITY foo 'bar'> ]>\n"
4999 "<1*>&foo;</1*>";
5000
5001 expect_failure(text, XML_ERROR_INVALID_TOKEN,
5002 "'*' in document name not faulted");
5003 }
5004 END_TEST
5005
START_TEST(test_bad_doctype_query)5006 START_TEST(test_bad_doctype_query) {
5007 const char *text = "<!DOCTYPE 1? [ <!ENTITY foo 'bar'> ]>\n"
5008 "<1?>&foo;</1?>";
5009
5010 expect_failure(text, XML_ERROR_INVALID_TOKEN,
5011 "'?' in document name not faulted");
5012 }
5013 END_TEST
5014
START_TEST(test_unknown_encoding_bad_ignore)5015 START_TEST(test_unknown_encoding_bad_ignore) {
5016 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>"
5017 "<!DOCTYPE doc SYSTEM 'foo'>"
5018 "<doc><e>&entity;</e></doc>";
5019 ExtFaults fault = {"<![IGNORE[<!ELEMENT \xffG (#PCDATA)*>]]>",
5020 "Invalid character not faulted", XCS("prefix-conv"),
5021 XML_ERROR_INVALID_TOKEN};
5022
5023 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
5024 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5025 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
5026 XML_SetUserData(g_parser, &fault);
5027 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
5028 "Bad IGNORE section with unknown encoding not failed");
5029 }
5030 END_TEST
5031
START_TEST(test_entity_in_utf16_be_attr)5032 START_TEST(test_entity_in_utf16_be_attr) {
5033 const char text[] =
5034 /* <e a='ä ä'></e> */
5035 "\0<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 "
5036 "\0&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>";
5037 #ifdef XML_UNICODE
5038 const XML_Char *expected = XCS("\x00e4 \x00e4");
5039 #else
5040 const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
5041 #endif
5042 CharData storage;
5043
5044 CharData_Init(&storage);
5045 XML_SetUserData(g_parser, &storage);
5046 XML_SetStartElementHandler(g_parser, accumulate_attribute);
5047 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5048 == XML_STATUS_ERROR)
5049 xml_failure(g_parser);
5050 CharData_CheckXMLChars(&storage, expected);
5051 }
5052 END_TEST
5053
START_TEST(test_entity_in_utf16_le_attr)5054 START_TEST(test_entity_in_utf16_le_attr) {
5055 const char text[] =
5056 /* <e a='ä ä'></e> */
5057 "<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 \0"
5058 "&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>\0";
5059 #ifdef XML_UNICODE
5060 const XML_Char *expected = XCS("\x00e4 \x00e4");
5061 #else
5062 const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
5063 #endif
5064 CharData storage;
5065
5066 CharData_Init(&storage);
5067 XML_SetUserData(g_parser, &storage);
5068 XML_SetStartElementHandler(g_parser, accumulate_attribute);
5069 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5070 == XML_STATUS_ERROR)
5071 xml_failure(g_parser);
5072 CharData_CheckXMLChars(&storage, expected);
5073 }
5074 END_TEST
5075
START_TEST(test_entity_public_utf16_be)5076 START_TEST(test_entity_public_utf16_be) {
5077 const char text[] =
5078 /* <!DOCTYPE d [ */
5079 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
5080 /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
5081 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 "
5082 "\0'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n"
5083 /* %e; */
5084 "\0%\0e\0;\0\n"
5085 /* ]> */
5086 "\0]\0>\0\n"
5087 /* <d>&j;</d> */
5088 "\0<\0d\0>\0&\0j\0;\0<\0/\0d\0>";
5089 ExtTest2 test_data
5090 = {/* <!ENTITY j 'baz'> */
5091 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>", 34, NULL, NULL};
5092 const XML_Char *expected = XCS("baz");
5093 CharData storage;
5094
5095 CharData_Init(&storage);
5096 test_data.storage = &storage;
5097 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5098 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5099 XML_SetUserData(g_parser, &test_data);
5100 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5101 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5102 == XML_STATUS_ERROR)
5103 xml_failure(g_parser);
5104 CharData_CheckXMLChars(&storage, expected);
5105 }
5106 END_TEST
5107
START_TEST(test_entity_public_utf16_le)5108 START_TEST(test_entity_public_utf16_le) {
5109 const char text[] =
5110 /* <!DOCTYPE d [ */
5111 "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n\0"
5112 /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
5113 "<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 \0"
5114 "'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n\0"
5115 /* %e; */
5116 "%\0e\0;\0\n\0"
5117 /* ]> */
5118 "]\0>\0\n\0"
5119 /* <d>&j;</d> */
5120 "<\0d\0>\0&\0j\0;\0<\0/\0d\0>\0";
5121 ExtTest2 test_data
5122 = {/* <!ENTITY j 'baz'> */
5123 "<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>\0", 34, NULL, NULL};
5124 const XML_Char *expected = XCS("baz");
5125 CharData storage;
5126
5127 CharData_Init(&storage);
5128 test_data.storage = &storage;
5129 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5130 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5131 XML_SetUserData(g_parser, &test_data);
5132 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5133 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5134 == XML_STATUS_ERROR)
5135 xml_failure(g_parser);
5136 CharData_CheckXMLChars(&storage, expected);
5137 }
5138 END_TEST
5139
5140 /* Test that a doctype with neither an internal nor external subset is
5141 * faulted
5142 */
START_TEST(test_short_doctype)5143 START_TEST(test_short_doctype) {
5144 const char *text = "<!DOCTYPE doc></doc>";
5145 expect_failure(text, XML_ERROR_INVALID_TOKEN,
5146 "DOCTYPE without subset not rejected");
5147 }
5148 END_TEST
5149
START_TEST(test_short_doctype_2)5150 START_TEST(test_short_doctype_2) {
5151 const char *text = "<!DOCTYPE doc PUBLIC></doc>";
5152 expect_failure(text, XML_ERROR_SYNTAX,
5153 "DOCTYPE without Public ID not rejected");
5154 }
5155 END_TEST
5156
START_TEST(test_short_doctype_3)5157 START_TEST(test_short_doctype_3) {
5158 const char *text = "<!DOCTYPE doc SYSTEM></doc>";
5159 expect_failure(text, XML_ERROR_SYNTAX,
5160 "DOCTYPE without System ID not rejected");
5161 }
5162 END_TEST
5163
START_TEST(test_long_doctype)5164 START_TEST(test_long_doctype) {
5165 const char *text = "<!DOCTYPE doc PUBLIC 'foo' 'bar' 'baz'></doc>";
5166 expect_failure(text, XML_ERROR_SYNTAX, "DOCTYPE with extra ID not rejected");
5167 }
5168 END_TEST
5169
START_TEST(test_bad_entity)5170 START_TEST(test_bad_entity) {
5171 const char *text = "<!DOCTYPE doc [\n"
5172 " <!ENTITY foo PUBLIC>\n"
5173 "]>\n"
5174 "<doc/>";
5175 expect_failure(text, XML_ERROR_SYNTAX,
5176 "ENTITY without Public ID is not rejected");
5177 }
5178 END_TEST
5179
5180 /* Test unquoted value is faulted */
START_TEST(test_bad_entity_2)5181 START_TEST(test_bad_entity_2) {
5182 const char *text = "<!DOCTYPE doc [\n"
5183 " <!ENTITY % foo bar>\n"
5184 "]>\n"
5185 "<doc/>";
5186 expect_failure(text, XML_ERROR_SYNTAX,
5187 "ENTITY without Public ID is not rejected");
5188 }
5189 END_TEST
5190
START_TEST(test_bad_entity_3)5191 START_TEST(test_bad_entity_3) {
5192 const char *text = "<!DOCTYPE doc [\n"
5193 " <!ENTITY % foo PUBLIC>\n"
5194 "]>\n"
5195 "<doc/>";
5196 expect_failure(text, XML_ERROR_SYNTAX,
5197 "Parameter ENTITY without Public ID is not rejected");
5198 }
5199 END_TEST
5200
START_TEST(test_bad_entity_4)5201 START_TEST(test_bad_entity_4) {
5202 const char *text = "<!DOCTYPE doc [\n"
5203 " <!ENTITY % foo SYSTEM>\n"
5204 "]>\n"
5205 "<doc/>";
5206 expect_failure(text, XML_ERROR_SYNTAX,
5207 "Parameter ENTITY without Public ID is not rejected");
5208 }
5209 END_TEST
5210
START_TEST(test_bad_notation)5211 START_TEST(test_bad_notation) {
5212 const char *text = "<!DOCTYPE doc [\n"
5213 " <!NOTATION n SYSTEM>\n"
5214 "]>\n"
5215 "<doc/>";
5216 expect_failure(text, XML_ERROR_SYNTAX,
5217 "Notation without System ID is not rejected");
5218 }
5219 END_TEST
5220
5221 /* Test for issue #11, wrongly suppressed default handler */
START_TEST(test_default_doctype_handler)5222 START_TEST(test_default_doctype_handler) {
5223 const char *text = "<!DOCTYPE doc PUBLIC 'pubname' 'test.dtd' [\n"
5224 " <!ENTITY foo 'bar'>\n"
5225 "]>\n"
5226 "<doc>&foo;</doc>";
5227 DefaultCheck test_data[] = {{XCS("'pubname'"), 9, XML_FALSE},
5228 {XCS("'test.dtd'"), 10, XML_FALSE},
5229 {NULL, 0, XML_FALSE}};
5230 int i;
5231
5232 XML_SetUserData(g_parser, &test_data);
5233 XML_SetDefaultHandler(g_parser, checking_default_handler);
5234 XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
5235 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5236 == XML_STATUS_ERROR)
5237 xml_failure(g_parser);
5238 for (i = 0; test_data[i].expected != NULL; i++)
5239 if (! test_data[i].seen)
5240 fail("Default handler not run for public !DOCTYPE");
5241 }
5242 END_TEST
5243
START_TEST(test_empty_element_abort)5244 START_TEST(test_empty_element_abort) {
5245 const char *text = "<abort/>";
5246
5247 XML_SetStartElementHandler(g_parser, start_element_suspender);
5248 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5249 != XML_STATUS_ERROR)
5250 fail("Expected to error on abort");
5251 }
5252 END_TEST
5253
5254 /* Regression test for GH issue #612: unfinished m_declAttributeType
5255 * allocation in ->m_tempPool can corrupt following allocation.
5256 */
START_TEST(test_pool_integrity_with_unfinished_attr)5257 START_TEST(test_pool_integrity_with_unfinished_attr) {
5258 const char *text = "<?xml version='1.0' encoding='UTF-8'?>\n"
5259 "<!DOCTYPE foo [\n"
5260 "<!ELEMENT foo ANY>\n"
5261 "<!ENTITY % entp SYSTEM \"external.dtd\">\n"
5262 "%entp;\n"
5263 "]>\n"
5264 "<a></a>\n";
5265 const XML_Char *expected = XCS("COMMENT");
5266 CharData storage;
5267
5268 CharData_Init(&storage);
5269 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5270 XML_SetExternalEntityRefHandler(g_parser, external_entity_unfinished_attlist);
5271 XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
5272 XML_SetCommentHandler(g_parser, accumulate_comment);
5273 XML_SetUserData(g_parser, &storage);
5274 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5275 == XML_STATUS_ERROR)
5276 xml_failure(g_parser);
5277 CharData_CheckXMLChars(&storage, expected);
5278 }
5279 END_TEST
5280
START_TEST(test_nested_entity_suspend)5281 START_TEST(test_nested_entity_suspend) {
5282 const char *const text = "<!DOCTYPE a [\n"
5283 " <!ENTITY e1 '<!--e1-->'>\n"
5284 " <!ENTITY e2 '<!--e2 head-->&e1;<!--e2 tail-->'>\n"
5285 " <!ENTITY e3 '<!--e3 head-->&e2;<!--e3 tail-->'>\n"
5286 "]>\n"
5287 "<a><!--start-->&e3;<!--end--></a>";
5288 const XML_Char *const expected = XCS("start") XCS("e3 head") XCS("e2 head")
5289 XCS("e1") XCS("e2 tail") XCS("e3 tail") XCS("end");
5290 CharData storage;
5291 CharData_Init(&storage);
5292 XML_Parser parser = XML_ParserCreate(NULL);
5293 ParserPlusStorage parserPlusStorage = {parser, &storage};
5294
5295 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5296 XML_SetCommentHandler(parser, accumulate_and_suspend_comment_handler);
5297 XML_SetUserData(parser, &parserPlusStorage);
5298
5299 enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE);
5300 while (status == XML_STATUS_SUSPENDED) {
5301 status = XML_ResumeParser(parser);
5302 }
5303 if (status != XML_STATUS_OK)
5304 xml_failure(parser);
5305
5306 CharData_CheckXMLChars(&storage, expected);
5307 XML_ParserFree(parser);
5308 }
5309 END_TEST
5310
5311 #if defined(XML_TESTING)
5312 /* Regression test for quadratic parsing on large tokens */
START_TEST(test_big_tokens_scale_linearly)5313 START_TEST(test_big_tokens_scale_linearly) {
5314 const struct {
5315 const char *pre;
5316 const char *post;
5317 } text[] = {
5318 {"<a>", "</a>"}, // assumed good, used as baseline
5319 {"<b><![CDATA[ value: ", " ]]></b>"}, // CDATA, performed OK before patch
5320 {"<c attr='", "'></c>"}, // big attribute, used to be O(N²)
5321 {"<d><!-- ", " --></d>"}, // long comment, used to be O(N²)
5322 {"<e><", "/></e>"}, // big elem name, used to be O(N²)
5323 };
5324 const int num_cases = sizeof(text) / sizeof(text[0]);
5325 char aaaaaa[4096];
5326 const int fillsize = (int)sizeof(aaaaaa);
5327 const int fillcount = 100;
5328 const unsigned approx_bytes = fillsize * fillcount; // ignore pre/post.
5329 const unsigned max_factor = 4;
5330 const unsigned max_scanned = max_factor * approx_bytes;
5331
5332 memset(aaaaaa, 'a', fillsize);
5333
5334 if (! g_reparseDeferralEnabledDefault) {
5335 return; // heuristic is disabled; we would get O(n^2) and fail.
5336 }
5337
5338 for (int i = 0; i < num_cases; ++i) {
5339 XML_Parser parser = XML_ParserCreate(NULL);
5340 assert_true(parser != NULL);
5341 enum XML_Status status;
5342 set_subtest("text=\"%saaaaaa%s\"", text[i].pre, text[i].post);
5343
5344 // parse the start text
5345 g_bytesScanned = 0;
5346 status = _XML_Parse_SINGLE_BYTES(parser, text[i].pre,
5347 (int)strlen(text[i].pre), XML_FALSE);
5348 if (status != XML_STATUS_OK) {
5349 xml_failure(parser);
5350 }
5351
5352 // parse lots of 'a', failing the test early if it takes too long
5353 unsigned past_max_count = 0;
5354 for (int f = 0; f < fillcount; ++f) {
5355 status = _XML_Parse_SINGLE_BYTES(parser, aaaaaa, fillsize, XML_FALSE);
5356 if (status != XML_STATUS_OK) {
5357 xml_failure(parser);
5358 }
5359 if (g_bytesScanned > max_scanned) {
5360 // We're not done, and have already passed the limit -- the test will
5361 // definitely fail. This block allows us to save time by failing early.
5362 const unsigned pushed
5363 = (unsigned)strlen(text[i].pre) + (f + 1) * fillsize;
5364 fprintf(
5365 stderr,
5366 "after %d/%d loops: pushed=%u scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
5367 f + 1, fillcount, pushed, g_bytesScanned,
5368 g_bytesScanned / (double)pushed, max_scanned, max_factor);
5369 past_max_count++;
5370 // We are failing, but allow a few log prints first. If we don't reach
5371 // a count of five, the test will fail after the loop instead.
5372 assert_true(past_max_count < 5);
5373 }
5374 }
5375
5376 // parse the end text
5377 status = _XML_Parse_SINGLE_BYTES(parser, text[i].post,
5378 (int)strlen(text[i].post), XML_TRUE);
5379 if (status != XML_STATUS_OK) {
5380 xml_failure(parser);
5381 }
5382
5383 assert_true(g_bytesScanned > approx_bytes); // or the counter isn't working
5384 if (g_bytesScanned > max_scanned) {
5385 fprintf(
5386 stderr,
5387 "after all input: scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
5388 g_bytesScanned, g_bytesScanned / (double)approx_bytes, max_scanned,
5389 max_factor);
5390 fail("scanned too many bytes");
5391 }
5392
5393 XML_ParserFree(parser);
5394 }
5395 }
5396 END_TEST
5397 #endif
5398
START_TEST(test_set_reparse_deferral)5399 START_TEST(test_set_reparse_deferral) {
5400 const char *const pre = "<d>";
5401 const char *const start = "<x attr='";
5402 const char *const end = "'></x>";
5403 char eeeeee[100];
5404 const int fillsize = (int)sizeof(eeeeee);
5405 memset(eeeeee, 'e', fillsize);
5406
5407 for (int enabled = 0; enabled <= 1; enabled += 1) {
5408 set_subtest("deferral=%d", enabled);
5409
5410 XML_Parser parser = XML_ParserCreate(NULL);
5411 assert_true(parser != NULL);
5412 assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
5413 // pre-grow the buffer to avoid reparsing due to almost-fullness
5414 assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
5415
5416 CharData storage;
5417 CharData_Init(&storage);
5418 XML_SetUserData(parser, &storage);
5419 XML_SetStartElementHandler(parser, start_element_event_handler);
5420
5421 enum XML_Status status;
5422 // parse the start text
5423 status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5424 if (status != XML_STATUS_OK) {
5425 xml_failure(parser);
5426 }
5427 CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
5428
5429 // ..and the start of the token
5430 status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
5431 if (status != XML_STATUS_OK) {
5432 xml_failure(parser);
5433 }
5434 CharData_CheckXMLChars(&storage, XCS("d")); // still just the first one
5435
5436 // try to parse lots of 'e', but the token isn't finished
5437 for (int c = 0; c < 100; ++c) {
5438 status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5439 if (status != XML_STATUS_OK) {
5440 xml_failure(parser);
5441 }
5442 }
5443 CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
5444
5445 // end the <x> token.
5446 status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5447 if (status != XML_STATUS_OK) {
5448 xml_failure(parser);
5449 }
5450
5451 if (enabled) {
5452 // In general, we may need to push more data to trigger a reparse attempt,
5453 // but in this test, the data is constructed to always require it.
5454 CharData_CheckXMLChars(&storage, XCS("d")); // or the test is incorrect
5455 // 2x the token length should suffice; the +1 covers the start and end.
5456 for (int c = 0; c < 101; ++c) {
5457 status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5458 if (status != XML_STATUS_OK) {
5459 xml_failure(parser);
5460 }
5461 }
5462 }
5463 CharData_CheckXMLChars(&storage, XCS("dx")); // the <x> should be done
5464
5465 XML_ParserFree(parser);
5466 }
5467 }
5468 END_TEST
5469
5470 struct element_decl_data {
5471 XML_Parser parser;
5472 int count;
5473 };
5474
5475 static void
element_decl_counter(void * userData,const XML_Char * name,XML_Content * model)5476 element_decl_counter(void *userData, const XML_Char *name, XML_Content *model) {
5477 UNUSED_P(name);
5478 struct element_decl_data *testdata = (struct element_decl_data *)userData;
5479 testdata->count += 1;
5480 XML_FreeContentModel(testdata->parser, model);
5481 }
5482
5483 static int
external_inherited_parser(XML_Parser p,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)5484 external_inherited_parser(XML_Parser p, const XML_Char *context,
5485 const XML_Char *base, const XML_Char *systemId,
5486 const XML_Char *publicId) {
5487 UNUSED_P(base);
5488 UNUSED_P(systemId);
5489 UNUSED_P(publicId);
5490 const char *const pre = "<!ELEMENT document ANY>\n";
5491 const char *const start = "<!ELEMENT ";
5492 const char *const end = " ANY>\n";
5493 const char *const post = "<!ELEMENT xyz ANY>\n";
5494 const int enabled = *(int *)XML_GetUserData(p);
5495 char eeeeee[100];
5496 char spaces[100];
5497 const int fillsize = (int)sizeof(eeeeee);
5498 assert_true(fillsize == (int)sizeof(spaces));
5499 memset(eeeeee, 'e', fillsize);
5500 memset(spaces, ' ', fillsize);
5501
5502 XML_Parser parser = XML_ExternalEntityParserCreate(p, context, NULL);
5503 assert_true(parser != NULL);
5504 // pre-grow the buffer to avoid reparsing due to almost-fullness
5505 assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
5506
5507 struct element_decl_data testdata;
5508 testdata.parser = parser;
5509 testdata.count = 0;
5510 XML_SetUserData(parser, &testdata);
5511 XML_SetElementDeclHandler(parser, element_decl_counter);
5512
5513 enum XML_Status status;
5514 // parse the initial text
5515 status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5516 if (status != XML_STATUS_OK) {
5517 xml_failure(parser);
5518 }
5519 assert_true(testdata.count == 1); // first element should be done
5520
5521 // ..and the start of the big token
5522 status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
5523 if (status != XML_STATUS_OK) {
5524 xml_failure(parser);
5525 }
5526 assert_true(testdata.count == 1); // still just the first one
5527
5528 // try to parse lots of 'e', but the token isn't finished
5529 for (int c = 0; c < 100; ++c) {
5530 status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5531 if (status != XML_STATUS_OK) {
5532 xml_failure(parser);
5533 }
5534 }
5535 assert_true(testdata.count == 1); // *still* just the first one
5536
5537 // end the big token.
5538 status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5539 if (status != XML_STATUS_OK) {
5540 xml_failure(parser);
5541 }
5542
5543 if (enabled) {
5544 // In general, we may need to push more data to trigger a reparse attempt,
5545 // but in this test, the data is constructed to always require it.
5546 assert_true(testdata.count == 1); // or the test is incorrect
5547 // 2x the token length should suffice; the +1 covers the start and end.
5548 for (int c = 0; c < 101; ++c) {
5549 status = XML_Parse(parser, spaces, fillsize, XML_FALSE);
5550 if (status != XML_STATUS_OK) {
5551 xml_failure(parser);
5552 }
5553 }
5554 }
5555 assert_true(testdata.count == 2); // the big token should be done
5556
5557 // parse the final text
5558 status = XML_Parse(parser, post, (int)strlen(post), XML_TRUE);
5559 if (status != XML_STATUS_OK) {
5560 xml_failure(parser);
5561 }
5562 assert_true(testdata.count == 3); // after isFinal=XML_TRUE, all must be done
5563
5564 XML_ParserFree(parser);
5565 return XML_STATUS_OK;
5566 }
5567
START_TEST(test_reparse_deferral_is_inherited)5568 START_TEST(test_reparse_deferral_is_inherited) {
5569 const char *const text
5570 = "<!DOCTYPE document SYSTEM 'something.ext'><document/>";
5571 for (int enabled = 0; enabled <= 1; ++enabled) {
5572 set_subtest("deferral=%d", enabled);
5573
5574 XML_Parser parser = XML_ParserCreate(NULL);
5575 assert_true(parser != NULL);
5576 XML_SetUserData(parser, (void *)&enabled);
5577 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5578 // this handler creates a sub-parser and checks that its deferral behavior
5579 // is what we expected, based on the value of `enabled` (in userdata).
5580 XML_SetExternalEntityRefHandler(parser, external_inherited_parser);
5581 assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
5582 if (XML_Parse(parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
5583 xml_failure(parser);
5584
5585 XML_ParserFree(parser);
5586 }
5587 }
5588 END_TEST
5589
START_TEST(test_set_reparse_deferral_on_null_parser)5590 START_TEST(test_set_reparse_deferral_on_null_parser) {
5591 assert_true(XML_SetReparseDeferralEnabled(NULL, 0) == XML_FALSE);
5592 assert_true(XML_SetReparseDeferralEnabled(NULL, 1) == XML_FALSE);
5593 assert_true(XML_SetReparseDeferralEnabled(NULL, 10) == XML_FALSE);
5594 assert_true(XML_SetReparseDeferralEnabled(NULL, 100) == XML_FALSE);
5595 assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MIN)
5596 == XML_FALSE);
5597 assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MAX)
5598 == XML_FALSE);
5599 }
5600 END_TEST
5601
START_TEST(test_set_reparse_deferral_on_the_fly)5602 START_TEST(test_set_reparse_deferral_on_the_fly) {
5603 const char *const pre = "<d><x attr='";
5604 const char *const end = "'></x>";
5605 char iiiiii[100];
5606 const int fillsize = (int)sizeof(iiiiii);
5607 memset(iiiiii, 'i', fillsize);
5608
5609 XML_Parser parser = XML_ParserCreate(NULL);
5610 assert_true(parser != NULL);
5611 assert_true(XML_SetReparseDeferralEnabled(parser, XML_TRUE));
5612
5613 CharData storage;
5614 CharData_Init(&storage);
5615 XML_SetUserData(parser, &storage);
5616 XML_SetStartElementHandler(parser, start_element_event_handler);
5617
5618 enum XML_Status status;
5619 // parse the start text
5620 status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5621 if (status != XML_STATUS_OK) {
5622 xml_failure(parser);
5623 }
5624 CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
5625
5626 // try to parse some 'i', but the token isn't finished
5627 status = XML_Parse(parser, iiiiii, fillsize, XML_FALSE);
5628 if (status != XML_STATUS_OK) {
5629 xml_failure(parser);
5630 }
5631 CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
5632
5633 // end the <x> token.
5634 status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5635 if (status != XML_STATUS_OK) {
5636 xml_failure(parser);
5637 }
5638 CharData_CheckXMLChars(&storage, XCS("d")); // not yet.
5639
5640 // now change the heuristic setting and add *no* data
5641 assert_true(XML_SetReparseDeferralEnabled(parser, XML_FALSE));
5642 // we avoid isFinal=XML_TRUE, because that would force-bypass the heuristic.
5643 status = XML_Parse(parser, "", 0, XML_FALSE);
5644 if (status != XML_STATUS_OK) {
5645 xml_failure(parser);
5646 }
5647 CharData_CheckXMLChars(&storage, XCS("dx"));
5648
5649 XML_ParserFree(parser);
5650 }
5651 END_TEST
5652
START_TEST(test_set_bad_reparse_option)5653 START_TEST(test_set_bad_reparse_option) {
5654 XML_Parser parser = XML_ParserCreate(NULL);
5655 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 2));
5656 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 3));
5657 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 99));
5658 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 127));
5659 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 128));
5660 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 129));
5661 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 255));
5662 assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 0));
5663 assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 1));
5664 XML_ParserFree(parser);
5665 }
5666 END_TEST
5667
5668 static size_t g_totalAlloc = 0;
5669 static size_t g_biggestAlloc = 0;
5670
5671 static void *
counting_realloc(void * ptr,size_t size)5672 counting_realloc(void *ptr, size_t size) {
5673 g_totalAlloc += size;
5674 if (size > g_biggestAlloc) {
5675 g_biggestAlloc = size;
5676 }
5677 return realloc(ptr, size);
5678 }
5679
5680 static void *
counting_malloc(size_t size)5681 counting_malloc(size_t size) {
5682 return counting_realloc(NULL, size);
5683 }
5684
START_TEST(test_bypass_heuristic_when_close_to_bufsize)5685 START_TEST(test_bypass_heuristic_when_close_to_bufsize) {
5686 if (g_chunkSize != 0) {
5687 // this test does not use SINGLE_BYTES, because it depends on very precise
5688 // buffer fills.
5689 return;
5690 }
5691 if (! g_reparseDeferralEnabledDefault) {
5692 return; // this test is irrelevant when the deferral heuristic is disabled.
5693 }
5694
5695 const int document_length = 65536;
5696 char *const document = (char *)malloc(document_length);
5697
5698 const XML_Memory_Handling_Suite memfuncs = {
5699 counting_malloc,
5700 counting_realloc,
5701 free,
5702 };
5703
5704 const int leading_list[] = {0, 3, 61, 96, 400, 401, 4000, 4010, 4099, -1};
5705 const int bigtoken_list[] = {3000, 4000, 4001, 4096, 4099, 5000, 20000, -1};
5706 const int fillsize_list[] = {131, 256, 399, 400, 401, 1025, 4099, 4321, -1};
5707
5708 for (const int *leading = leading_list; *leading >= 0; leading++) {
5709 for (const int *bigtoken = bigtoken_list; *bigtoken >= 0; bigtoken++) {
5710 for (const int *fillsize = fillsize_list; *fillsize >= 0; fillsize++) {
5711 set_subtest("leading=%d bigtoken=%d fillsize=%d", *leading, *bigtoken,
5712 *fillsize);
5713 // start by checking that the test looks reasonably valid
5714 assert_true(*leading + *bigtoken <= document_length);
5715
5716 // put 'x' everywhere; some will be overwritten by elements.
5717 memset(document, 'x', document_length);
5718 // maybe add an initial tag
5719 if (*leading) {
5720 assert_true(*leading >= 3); // or the test case is invalid
5721 memcpy(document, "<a>", 3);
5722 }
5723 // add the large token
5724 document[*leading + 0] = '<';
5725 document[*leading + 1] = 'b';
5726 memset(&document[*leading + 2], ' ', *bigtoken - 2); // a spacy token
5727 document[*leading + *bigtoken - 1] = '>';
5728
5729 // 1 for 'b', plus 1 or 0 depending on the presence of 'a'
5730 const int expected_elem_total = 1 + (*leading ? 1 : 0);
5731
5732 XML_Parser parser = XML_ParserCreate_MM(NULL, &memfuncs, NULL);
5733 assert_true(parser != NULL);
5734
5735 CharData storage;
5736 CharData_Init(&storage);
5737 XML_SetUserData(parser, &storage);
5738 XML_SetStartElementHandler(parser, start_element_event_handler);
5739
5740 g_biggestAlloc = 0;
5741 g_totalAlloc = 0;
5742 int offset = 0;
5743 // fill data until the big token is covered (but not necessarily parsed)
5744 while (offset < *leading + *bigtoken) {
5745 assert_true(offset + *fillsize <= document_length);
5746 const enum XML_Status status
5747 = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
5748 if (status != XML_STATUS_OK) {
5749 xml_failure(parser);
5750 }
5751 offset += *fillsize;
5752 }
5753 // Now, check that we've had a buffer allocation that could fit the
5754 // context bytes and our big token. In order to detect a special case,
5755 // we need to know how many bytes of our big token were included in the
5756 // first push that contained _any_ bytes of the big token:
5757 const int bigtok_first_chunk_bytes = *fillsize - (*leading % *fillsize);
5758 if (bigtok_first_chunk_bytes >= *bigtoken && XML_CONTEXT_BYTES == 0) {
5759 // Special case: we aren't saving any context, and the whole big token
5760 // was covered by a single fill, so Expat may have parsed directly
5761 // from our input pointer, without allocating an internal buffer.
5762 } else if (*leading < XML_CONTEXT_BYTES) {
5763 assert_true(g_biggestAlloc >= *leading + (size_t)*bigtoken);
5764 } else {
5765 assert_true(g_biggestAlloc >= XML_CONTEXT_BYTES + (size_t)*bigtoken);
5766 }
5767 // fill data until the big token is actually parsed
5768 while (storage.count < expected_elem_total) {
5769 const size_t alloc_before = g_totalAlloc;
5770 assert_true(offset + *fillsize <= document_length);
5771 const enum XML_Status status
5772 = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
5773 if (status != XML_STATUS_OK) {
5774 xml_failure(parser);
5775 }
5776 offset += *fillsize;
5777 // since all the bytes of the big token are already in the buffer,
5778 // the bufsize ceiling should make us finish its parsing without any
5779 // further buffer allocations. We assume that there will be no other
5780 // large allocations in this test.
5781 assert_true(g_totalAlloc - alloc_before < 4096);
5782 }
5783 // test-the-test: was our alloc even called?
5784 assert_true(g_totalAlloc > 0);
5785 // test-the-test: there shouldn't be any extra start elements
5786 assert_true(storage.count == expected_elem_total);
5787
5788 XML_ParserFree(parser);
5789 }
5790 }
5791 }
5792 free(document);
5793 }
5794 END_TEST
5795
5796 #if defined(XML_TESTING)
START_TEST(test_varying_buffer_fills)5797 START_TEST(test_varying_buffer_fills) {
5798 const int KiB = 1024;
5799 const int MiB = 1024 * KiB;
5800 const int document_length = 16 * MiB;
5801 const int big = 7654321; // arbitrarily chosen between 4 and 8 MiB
5802
5803 if (g_chunkSize != 0) {
5804 return; // this test is slow, and doesn't use _XML_Parse_SINGLE_BYTES().
5805 }
5806
5807 char *const document = (char *)malloc(document_length);
5808 assert_true(document != NULL);
5809 memset(document, 'x', document_length);
5810 document[0] = '<';
5811 document[1] = 't';
5812 memset(&document[2], ' ', big - 2); // a very spacy token
5813 document[big - 1] = '>';
5814
5815 // Each testcase is a list of buffer fill sizes, terminated by a value < 0.
5816 // When reparse deferral is enabled, the final (negated) value is the expected
5817 // maximum number of bytes scanned in parse attempts.
5818 const int testcases[][30] = {
5819 {8 * MiB, -8 * MiB},
5820 {4 * MiB, 4 * MiB, -12 * MiB}, // try at 4MB, then 8MB = 12 MB total
5821 // zero-size fills shouldn't trigger the bypass
5822 {4 * MiB, 0, 4 * MiB, -12 * MiB},
5823 {4 * MiB, 0, 0, 4 * MiB, -12 * MiB},
5824 {4 * MiB, 0, 1 * MiB, 0, 3 * MiB, -12 * MiB},
5825 // try to hit the buffer ceiling only once (at the end)
5826 {4 * MiB, 2 * MiB, 1 * MiB, 512 * KiB, 256 * KiB, 256 * KiB, -12 * MiB},
5827 // try to hit the same buffer ceiling multiple times
5828 {4 * MiB + 1, 2 * MiB, 1 * MiB, 512 * KiB, -25 * MiB},
5829
5830 // try to hit every ceiling, by always landing 1K shy of the buffer size
5831 {1 * KiB, 2 * KiB, 4 * KiB, 8 * KiB, 16 * KiB, 32 * KiB, 64 * KiB,
5832 128 * KiB, 256 * KiB, 512 * KiB, 1 * MiB, 2 * MiB, 4 * MiB, -16 * MiB},
5833
5834 // try to avoid every ceiling, by always landing 1B past the buffer size
5835 // the normal 2x heuristic threshold still forces parse attempts.
5836 {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1
5837 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2
5838 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3
5839 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4
5840 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
5841 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6
5842 2 * MiB, 4 * MiB, // will attempt 8MiB + 1 ==> total 10M + 682K + 7
5843 -(10 * MiB + 682 * KiB + 7)},
5844 // try to avoid every ceiling again, except on our last fill.
5845 {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1
5846 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2
5847 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3
5848 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4
5849 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
5850 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6
5851 2 * MiB, 4 * MiB - 1, // will attempt 8MiB ==> total 10M + 682K + 6
5852 -(10 * MiB + 682 * KiB + 6)},
5853
5854 // try to hit ceilings on the way multiple times
5855 {512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 1 MiB buffer
5856 512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 2 MiB buffer
5857 1 * MiB + 1, 512 * KiB, 256 * KiB, 256 * KiB - 1, // 4 MiB buffer
5858 2 * MiB + 1, 1 * MiB, 512 * KiB, // 8 MiB buffer
5859 // we'll make a parse attempt at every parse call
5860 -(45 * MiB + 12)},
5861 };
5862 const int testcount = sizeof(testcases) / sizeof(testcases[0]);
5863 for (int test_i = 0; test_i < testcount; test_i++) {
5864 const int *fillsize = testcases[test_i];
5865 set_subtest("#%d {%d %d %d %d ...}", test_i, fillsize[0], fillsize[1],
5866 fillsize[2], fillsize[3]);
5867 XML_Parser parser = XML_ParserCreate(NULL);
5868 assert_true(parser != NULL);
5869
5870 CharData storage;
5871 CharData_Init(&storage);
5872 XML_SetUserData(parser, &storage);
5873 XML_SetStartElementHandler(parser, start_element_event_handler);
5874
5875 g_bytesScanned = 0;
5876 int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call)
5877 int offset = 0;
5878 while (*fillsize >= 0) {
5879 assert_true(offset + *fillsize <= document_length); // or test is invalid
5880 const enum XML_Status status
5881 = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
5882 if (status != XML_STATUS_OK) {
5883 xml_failure(parser);
5884 }
5885 offset += *fillsize;
5886 fillsize++;
5887 assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow
5888 worstcase_bytes += offset; // we might've tried to parse all pending bytes
5889 }
5890 assert_true(storage.count == 1); // the big token should've been parsed
5891 assert_true(g_bytesScanned > 0); // test-the-test: does our counter work?
5892 if (g_reparseDeferralEnabledDefault) {
5893 // heuristic is enabled; some XML_Parse calls may have deferred reparsing
5894 const unsigned max_bytes_scanned = -*fillsize;
5895 if (g_bytesScanned > max_bytes_scanned) {
5896 fprintf(stderr,
5897 "bytes scanned in parse attempts: actual=%u limit=%u \n",
5898 g_bytesScanned, max_bytes_scanned);
5899 fail("too many bytes scanned in parse attempts");
5900 }
5901 }
5902 assert_true(g_bytesScanned <= (unsigned)worstcase_bytes);
5903
5904 XML_ParserFree(parser);
5905 }
5906 free(document);
5907 }
5908 END_TEST
5909 #endif
5910
5911 void
make_basic_test_case(Suite * s)5912 make_basic_test_case(Suite *s) {
5913 TCase *tc_basic = tcase_create("basic tests");
5914
5915 suite_add_tcase(s, tc_basic);
5916 tcase_add_checked_fixture(tc_basic, basic_setup, basic_teardown);
5917
5918 tcase_add_test(tc_basic, test_nul_byte);
5919 tcase_add_test(tc_basic, test_u0000_char);
5920 tcase_add_test(tc_basic, test_siphash_self);
5921 tcase_add_test(tc_basic, test_siphash_spec);
5922 tcase_add_test(tc_basic, test_bom_utf8);
5923 tcase_add_test(tc_basic, test_bom_utf16_be);
5924 tcase_add_test(tc_basic, test_bom_utf16_le);
5925 tcase_add_test(tc_basic, test_nobom_utf16_le);
5926 tcase_add_test(tc_basic, test_hash_collision);
5927 tcase_add_test(tc_basic, test_illegal_utf8);
5928 tcase_add_test(tc_basic, test_utf8_auto_align);
5929 tcase_add_test(tc_basic, test_utf16);
5930 tcase_add_test(tc_basic, test_utf16_le_epilog_newline);
5931 tcase_add_test(tc_basic, test_not_utf16);
5932 tcase_add_test(tc_basic, test_bad_encoding);
5933 tcase_add_test(tc_basic, test_latin1_umlauts);
5934 tcase_add_test(tc_basic, test_long_utf8_character);
5935 tcase_add_test(tc_basic, test_long_latin1_attribute);
5936 tcase_add_test(tc_basic, test_long_ascii_attribute);
5937 /* Regression test for SF bug #491986. */
5938 tcase_add_test(tc_basic, test_danish_latin1);
5939 /* Regression test for SF bug #514281. */
5940 tcase_add_test(tc_basic, test_french_charref_hexidecimal);
5941 tcase_add_test(tc_basic, test_french_charref_decimal);
5942 tcase_add_test(tc_basic, test_french_latin1);
5943 tcase_add_test(tc_basic, test_french_utf8);
5944 tcase_add_test(tc_basic, test_utf8_false_rejection);
5945 tcase_add_test(tc_basic, test_line_number_after_parse);
5946 tcase_add_test(tc_basic, test_column_number_after_parse);
5947 tcase_add_test(tc_basic, test_line_and_column_numbers_inside_handlers);
5948 tcase_add_test(tc_basic, test_line_number_after_error);
5949 tcase_add_test(tc_basic, test_column_number_after_error);
5950 tcase_add_test(tc_basic, test_really_long_lines);
5951 tcase_add_test(tc_basic, test_really_long_encoded_lines);
5952 tcase_add_test(tc_basic, test_end_element_events);
5953 tcase_add_test(tc_basic, test_helper_is_whitespace_normalized);
5954 tcase_add_test(tc_basic, test_attr_whitespace_normalization);
5955 tcase_add_test(tc_basic, test_xmldecl_misplaced);
5956 tcase_add_test(tc_basic, test_xmldecl_invalid);
5957 tcase_add_test(tc_basic, test_xmldecl_missing_attr);
5958 tcase_add_test(tc_basic, test_xmldecl_missing_value);
5959 tcase_add_test__if_xml_ge(tc_basic, test_unknown_encoding_internal_entity);
5960 tcase_add_test(tc_basic, test_unrecognised_encoding_internal_entity);
5961 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_encoding);
5962 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_no_handler);
5963 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_bom);
5964 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding);
5965 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding_2);
5966 tcase_add_test(tc_basic, test_wfc_undeclared_entity_unread_external_subset);
5967 tcase_add_test(tc_basic, test_wfc_undeclared_entity_no_external_subset);
5968 tcase_add_test(tc_basic, test_wfc_undeclared_entity_standalone);
5969 tcase_add_test(tc_basic,
5970 test_wfc_undeclared_entity_with_external_subset_standalone);
5971 tcase_add_test(tc_basic, test_entity_with_external_subset_unless_standalone);
5972 tcase_add_test(tc_basic, test_wfc_undeclared_entity_with_external_subset);
5973 tcase_add_test(tc_basic, test_not_standalone_handler_reject);
5974 tcase_add_test(tc_basic, test_not_standalone_handler_accept);
5975 tcase_add_test__if_xml_ge(tc_basic, test_wfc_no_recursive_entity_refs);
5976 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_invalid_parse);
5977 tcase_add_test__if_xml_ge(tc_basic, test_dtd_default_handling);
5978 tcase_add_test(tc_basic, test_dtd_attr_handling);
5979 tcase_add_test(tc_basic, test_empty_ns_without_namespaces);
5980 tcase_add_test(tc_basic, test_ns_in_attribute_default_without_namespaces);
5981 tcase_add_test(tc_basic, test_stop_parser_between_char_data_calls);
5982 tcase_add_test(tc_basic, test_suspend_parser_between_char_data_calls);
5983 tcase_add_test(tc_basic, test_repeated_stop_parser_between_char_data_calls);
5984 tcase_add_test(tc_basic, test_good_cdata_ascii);
5985 tcase_add_test(tc_basic, test_good_cdata_utf16);
5986 tcase_add_test(tc_basic, test_good_cdata_utf16_le);
5987 tcase_add_test(tc_basic, test_long_cdata_utf16);
5988 tcase_add_test(tc_basic, test_multichar_cdata_utf16);
5989 tcase_add_test(tc_basic, test_utf16_bad_surrogate_pair);
5990 tcase_add_test(tc_basic, test_bad_cdata);
5991 tcase_add_test(tc_basic, test_bad_cdata_utf16);
5992 tcase_add_test(tc_basic, test_stop_parser_between_cdata_calls);
5993 tcase_add_test(tc_basic, test_suspend_parser_between_cdata_calls);
5994 tcase_add_test(tc_basic, test_memory_allocation);
5995 tcase_add_test__if_xml_ge(tc_basic, test_default_current);
5996 tcase_add_test(tc_basic, test_dtd_elements);
5997 tcase_add_test(tc_basic, test_dtd_elements_nesting);
5998 tcase_add_test__ifdef_xml_dtd(tc_basic, test_set_foreign_dtd);
5999 tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_not_standalone);
6000 tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_foreign_dtd);
6001 tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_with_doctype);
6002 tcase_add_test__ifdef_xml_dtd(tc_basic,
6003 test_foreign_dtd_without_external_subset);
6004 tcase_add_test__ifdef_xml_dtd(tc_basic, test_empty_foreign_dtd);
6005 tcase_add_test(tc_basic, test_set_base);
6006 tcase_add_test(tc_basic, test_attributes);
6007 tcase_add_test__if_xml_ge(tc_basic, test_reset_in_entity);
6008 tcase_add_test(tc_basic, test_resume_invalid_parse);
6009 tcase_add_test(tc_basic, test_resume_resuspended);
6010 tcase_add_test(tc_basic, test_cdata_default);
6011 tcase_add_test(tc_basic, test_subordinate_reset);
6012 tcase_add_test(tc_basic, test_subordinate_suspend);
6013 tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_suspend);
6014 tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_abort);
6015 tcase_add_test__ifdef_xml_dtd(tc_basic,
6016 test_ext_entity_invalid_suspended_parse);
6017 tcase_add_test(tc_basic, test_explicit_encoding);
6018 tcase_add_test(tc_basic, test_trailing_cr);
6019 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_cr);
6020 tcase_add_test(tc_basic, test_trailing_rsqb);
6021 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_rsqb);
6022 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_good_cdata);
6023 tcase_add_test__ifdef_xml_dtd(tc_basic, test_user_parameters);
6024 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_ref_parameter);
6025 tcase_add_test(tc_basic, test_empty_parse);
6026 tcase_add_test(tc_basic, test_negative_len_parse);
6027 tcase_add_test(tc_basic, test_negative_len_parse_buffer);
6028 tcase_add_test(tc_basic, test_get_buffer_1);
6029 tcase_add_test(tc_basic, test_get_buffer_2);
6030 #if XML_CONTEXT_BYTES > 0
6031 tcase_add_test(tc_basic, test_get_buffer_3_overflow);
6032 #endif
6033 tcase_add_test(tc_basic, test_buffer_can_grow_to_max);
6034 tcase_add_test(tc_basic, test_getbuffer_allocates_on_zero_len);
6035 tcase_add_test(tc_basic, test_byte_info_at_end);
6036 tcase_add_test(tc_basic, test_byte_info_at_error);
6037 tcase_add_test(tc_basic, test_byte_info_at_cdata);
6038 tcase_add_test(tc_basic, test_predefined_entities);
6039 tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_tag_in_dtd);
6040 tcase_add_test(tc_basic, test_not_predefined_entities);
6041 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section);
6042 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16);
6043 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16_be);
6044 tcase_add_test__ifdef_xml_dtd(tc_basic, test_bad_ignore_section);
6045 tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_bom_consumed);
6046 tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_entity_values);
6047 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_not_standalone);
6048 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_value_abort);
6049 tcase_add_test(tc_basic, test_bad_public_doctype);
6050 tcase_add_test(tc_basic, test_attribute_enum_value);
6051 tcase_add_test(tc_basic, test_predefined_entity_redefinition);
6052 tcase_add_test__ifdef_xml_dtd(tc_basic, test_dtd_stop_processing);
6053 tcase_add_test(tc_basic, test_public_notation_no_sysid);
6054 tcase_add_test(tc_basic, test_nested_groups);
6055 tcase_add_test(tc_basic, test_group_choice);
6056 tcase_add_test(tc_basic, test_standalone_parameter_entity);
6057 tcase_add_test__ifdef_xml_dtd(tc_basic, test_skipped_parameter_entity);
6058 tcase_add_test__ifdef_xml_dtd(tc_basic,
6059 test_recursive_external_parameter_entity);
6060 tcase_add_test__ifdef_xml_dtd(tc_basic,
6061 test_recursive_external_parameter_entity_2);
6062 tcase_add_test(tc_basic, test_undefined_ext_entity_in_external_dtd);
6063 tcase_add_test(tc_basic, test_suspend_xdecl);
6064 tcase_add_test(tc_basic, test_abort_epilog);
6065 tcase_add_test(tc_basic, test_abort_epilog_2);
6066 tcase_add_test(tc_basic, test_suspend_epilog);
6067 tcase_add_test(tc_basic, test_suspend_in_sole_empty_tag);
6068 tcase_add_test(tc_basic, test_unfinished_epilog);
6069 tcase_add_test(tc_basic, test_partial_char_in_epilog);
6070 tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_internal_entity);
6071 tcase_add_test__ifdef_xml_dtd(tc_basic,
6072 test_suspend_resume_internal_entity_issue_629);
6073 tcase_add_test__ifdef_xml_dtd(tc_basic, test_resume_entity_with_syntax_error);
6074 tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_parameter_entity);
6075 tcase_add_test(tc_basic, test_restart_on_error);
6076 tcase_add_test(tc_basic, test_reject_lt_in_attribute_value);
6077 tcase_add_test(tc_basic, test_reject_unfinished_param_in_att_value);
6078 tcase_add_test(tc_basic, test_trailing_cr_in_att_value);
6079 tcase_add_test(tc_basic, test_standalone_internal_entity);
6080 tcase_add_test(tc_basic, test_skipped_external_entity);
6081 tcase_add_test(tc_basic, test_skipped_null_loaded_ext_entity);
6082 tcase_add_test(tc_basic, test_skipped_unloaded_ext_entity);
6083 tcase_add_test__ifdef_xml_dtd(tc_basic, test_param_entity_with_trailing_cr);
6084 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity);
6085 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_2);
6086 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_3);
6087 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_4);
6088 tcase_add_test(tc_basic, test_pi_handled_in_default);
6089 tcase_add_test(tc_basic, test_comment_handled_in_default);
6090 tcase_add_test(tc_basic, test_pi_yml);
6091 tcase_add_test(tc_basic, test_pi_xnl);
6092 tcase_add_test(tc_basic, test_pi_xmm);
6093 tcase_add_test(tc_basic, test_utf16_pi);
6094 tcase_add_test(tc_basic, test_utf16_be_pi);
6095 tcase_add_test(tc_basic, test_utf16_be_comment);
6096 tcase_add_test(tc_basic, test_utf16_le_comment);
6097 tcase_add_test(tc_basic, test_missing_encoding_conversion_fn);
6098 tcase_add_test(tc_basic, test_failing_encoding_conversion_fn);
6099 tcase_add_test(tc_basic, test_unknown_encoding_success);
6100 tcase_add_test(tc_basic, test_unknown_encoding_bad_name);
6101 tcase_add_test(tc_basic, test_unknown_encoding_bad_name_2);
6102 tcase_add_test(tc_basic, test_unknown_encoding_long_name_1);
6103 tcase_add_test(tc_basic, test_unknown_encoding_long_name_2);
6104 tcase_add_test(tc_basic, test_invalid_unknown_encoding);
6105 tcase_add_test(tc_basic, test_unknown_ascii_encoding_ok);
6106 tcase_add_test(tc_basic, test_unknown_ascii_encoding_fail);
6107 tcase_add_test(tc_basic, test_unknown_encoding_invalid_length);
6108 tcase_add_test(tc_basic, test_unknown_encoding_invalid_topbit);
6109 tcase_add_test(tc_basic, test_unknown_encoding_invalid_surrogate);
6110 tcase_add_test(tc_basic, test_unknown_encoding_invalid_high);
6111 tcase_add_test(tc_basic, test_unknown_encoding_invalid_attr_value);
6112 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom);
6113 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom);
6114 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom2);
6115 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom2);
6116 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_be);
6117 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_le);
6118 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_unknown);
6119 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf8_non_bom);
6120 tcase_add_test(tc_basic, test_utf8_in_cdata_section);
6121 tcase_add_test(tc_basic, test_utf8_in_cdata_section_2);
6122 tcase_add_test(tc_basic, test_utf8_in_start_tags);
6123 tcase_add_test(tc_basic, test_trailing_spaces_in_elements);
6124 tcase_add_test(tc_basic, test_utf16_attribute);
6125 tcase_add_test(tc_basic, test_utf16_second_attr);
6126 tcase_add_test(tc_basic, test_attr_after_solidus);
6127 tcase_add_test__ifdef_xml_dtd(tc_basic, test_utf16_pe);
6128 tcase_add_test(tc_basic, test_bad_attr_desc_keyword);
6129 tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16);
6130 tcase_add_test(tc_basic, test_bad_doctype);
6131 tcase_add_test(tc_basic, test_bad_doctype_utf8);
6132 tcase_add_test(tc_basic, test_bad_doctype_utf16);
6133 tcase_add_test(tc_basic, test_bad_doctype_plus);
6134 tcase_add_test(tc_basic, test_bad_doctype_star);
6135 tcase_add_test(tc_basic, test_bad_doctype_query);
6136 tcase_add_test__ifdef_xml_dtd(tc_basic, test_unknown_encoding_bad_ignore);
6137 tcase_add_test(tc_basic, test_entity_in_utf16_be_attr);
6138 tcase_add_test(tc_basic, test_entity_in_utf16_le_attr);
6139 tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_be);
6140 tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_le);
6141 tcase_add_test(tc_basic, test_short_doctype);
6142 tcase_add_test(tc_basic, test_short_doctype_2);
6143 tcase_add_test(tc_basic, test_short_doctype_3);
6144 tcase_add_test(tc_basic, test_long_doctype);
6145 tcase_add_test(tc_basic, test_bad_entity);
6146 tcase_add_test(tc_basic, test_bad_entity_2);
6147 tcase_add_test(tc_basic, test_bad_entity_3);
6148 tcase_add_test(tc_basic, test_bad_entity_4);
6149 tcase_add_test(tc_basic, test_bad_notation);
6150 tcase_add_test(tc_basic, test_default_doctype_handler);
6151 tcase_add_test(tc_basic, test_empty_element_abort);
6152 tcase_add_test__ifdef_xml_dtd(tc_basic,
6153 test_pool_integrity_with_unfinished_attr);
6154 tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend);
6155 #if defined(XML_TESTING)
6156 tcase_add_test(tc_basic, test_big_tokens_scale_linearly);
6157 #endif
6158 tcase_add_test(tc_basic, test_set_reparse_deferral);
6159 tcase_add_test(tc_basic, test_reparse_deferral_is_inherited);
6160 tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser);
6161 tcase_add_test(tc_basic, test_set_reparse_deferral_on_the_fly);
6162 tcase_add_test(tc_basic, test_set_bad_reparse_option);
6163 tcase_add_test(tc_basic, test_bypass_heuristic_when_close_to_bufsize);
6164 #if defined(XML_TESTING)
6165 tcase_add_test(tc_basic, test_varying_buffer_fills);
6166 #endif
6167 }
6168