1 /* Tests in the "miscellaneous" test case for the Expat test suite
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
8
9 Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
10 Copyright (c) 2003 Greg Stein <gstein@users.sourceforge.net>
11 Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
12 Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net>
13 Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org>
14 Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
15 Copyright (c) 2017 Joe Orton <jorton@redhat.com>
16 Copyright (c) 2017 José Gutiérrez de la Concha <jose@zeroc.com>
17 Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it>
18 Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
19 Copyright (c) 2020 Tim Gates <tim.gates@iress.com>
20 Copyright (c) 2021 Donghee Na <donghee.na@python.org>
21 Copyright (c) 2023 Sony Corporation / Snild Dolkow <snild@sony.com>
22 Licensed under the MIT license:
23
24 Permission is hereby granted, free of charge, to any person obtaining
25 a copy of this software and associated documentation files (the
26 "Software"), to deal in the Software without restriction, including
27 without limitation the rights to use, copy, modify, merge, publish,
28 distribute, sublicense, and/or sell copies of the Software, and to permit
29 persons to whom the Software is furnished to do so, subject to the
30 following conditions:
31
32 The above copyright notice and this permission notice shall be included
33 in all copies or substantial portions of the Software.
34
35 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
36 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
37 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
38 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
39 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
40 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
41 USE OR OTHER DEALINGS IN THE SOFTWARE.
42 */
43
44 #if defined(NDEBUG)
45 # undef NDEBUG /* because test suite relies on assert(...) at the moment */
46 #endif
47
48 #include <assert.h>
49 #include <string.h>
50
51 #include "expat_config.h"
52
53 #include "expat.h"
54 #include "internal.h"
55 #include "minicheck.h"
56 #include "memcheck.h"
57 #include "common.h"
58 #include "ascii.h" /* for ASCII_xxx */
59 #include "handlers.h"
60 #include "misc_tests.h"
61
62 /* Test that a failure to allocate the parser structure fails gracefully */
START_TEST(test_misc_alloc_create_parser)63 START_TEST(test_misc_alloc_create_parser) {
64 XML_Memory_Handling_Suite memsuite = {duff_allocator, realloc, free};
65 unsigned int i;
66 const unsigned int max_alloc_count = 10;
67
68 /* Something this simple shouldn't need more than 10 allocations */
69 for (i = 0; i < max_alloc_count; i++) {
70 g_allocation_count = i;
71 g_parser = XML_ParserCreate_MM(NULL, &memsuite, NULL);
72 if (g_parser != NULL)
73 break;
74 }
75 if (i == 0)
76 fail("Parser unexpectedly ignored failing allocator");
77 else if (i == max_alloc_count)
78 fail("Parser not created with max allocation count");
79 }
80 END_TEST
81
82 /* Test memory allocation failures for a parser with an encoding */
START_TEST(test_misc_alloc_create_parser_with_encoding)83 START_TEST(test_misc_alloc_create_parser_with_encoding) {
84 XML_Memory_Handling_Suite memsuite = {duff_allocator, realloc, free};
85 unsigned int i;
86 const unsigned int max_alloc_count = 10;
87
88 /* Try several levels of allocation */
89 for (i = 0; i < max_alloc_count; i++) {
90 g_allocation_count = i;
91 g_parser = XML_ParserCreate_MM(XCS("us-ascii"), &memsuite, NULL);
92 if (g_parser != NULL)
93 break;
94 }
95 if (i == 0)
96 fail("Parser ignored failing allocator");
97 else if (i == max_alloc_count)
98 fail("Parser not created with max allocation count");
99 }
100 END_TEST
101
102 /* Test that freeing a NULL parser doesn't cause an explosion.
103 * (Not actually tested anywhere else)
104 */
START_TEST(test_misc_null_parser)105 START_TEST(test_misc_null_parser) {
106 XML_ParserFree(NULL);
107 }
108 END_TEST
109
110 #if defined(__has_feature)
111 # if __has_feature(undefined_behavior_sanitizer)
112 # define EXPAT_TESTS_UBSAN 1
113 # else
114 # define EXPAT_TESTS_UBSAN 0
115 # endif
116 #else
117 # define EXPAT_TESTS_UBSAN 0
118 #endif
119
120 /* Test that XML_ErrorString rejects out-of-range codes */
START_TEST(test_misc_error_string)121 START_TEST(test_misc_error_string) {
122 #if ! EXPAT_TESTS_UBSAN // because this would trigger UBSan
123 union {
124 enum XML_Error xml_error;
125 int integer;
126 } trickery;
127
128 assert_true(sizeof(enum XML_Error) == sizeof(int)); // self-test
129
130 trickery.integer = -1;
131 if (XML_ErrorString(trickery.xml_error) != NULL)
132 fail("Negative error code not rejected");
133
134 trickery.integer = 100;
135 if (XML_ErrorString(trickery.xml_error) != NULL)
136 fail("Large error code not rejected");
137 #endif
138 }
139 END_TEST
140
141 /* Test the version information is consistent */
142
143 /* Since we are working in XML_LChars (potentially 16-bits), we
144 * can't use the standard C library functions for character
145 * manipulation and have to roll our own.
146 */
147 static int
parse_version(const XML_LChar * version_text,XML_Expat_Version * version_struct)148 parse_version(const XML_LChar *version_text,
149 XML_Expat_Version *version_struct) {
150 if (! version_text)
151 return XML_FALSE;
152
153 while (*version_text != 0x00) {
154 if (*version_text >= ASCII_0 && *version_text <= ASCII_9)
155 break;
156 version_text++;
157 }
158 if (*version_text == 0x00)
159 return XML_FALSE;
160
161 /* version_struct->major = strtoul(version_text, 10, &version_text) */
162 version_struct->major = 0;
163 while (*version_text >= ASCII_0 && *version_text <= ASCII_9) {
164 version_struct->major
165 = 10 * version_struct->major + (*version_text++ - ASCII_0);
166 }
167 if (*version_text++ != ASCII_PERIOD)
168 return XML_FALSE;
169
170 /* Now for the minor version number */
171 version_struct->minor = 0;
172 while (*version_text >= ASCII_0 && *version_text <= ASCII_9) {
173 version_struct->minor
174 = 10 * version_struct->minor + (*version_text++ - ASCII_0);
175 }
176 if (*version_text++ != ASCII_PERIOD)
177 return XML_FALSE;
178
179 /* Finally the micro version number */
180 version_struct->micro = 0;
181 while (*version_text >= ASCII_0 && *version_text <= ASCII_9) {
182 version_struct->micro
183 = 10 * version_struct->micro + (*version_text++ - ASCII_0);
184 }
185 if (*version_text != 0x00)
186 return XML_FALSE;
187 return XML_TRUE;
188 }
189
190 static int
versions_equal(const XML_Expat_Version * first,const XML_Expat_Version * second)191 versions_equal(const XML_Expat_Version *first,
192 const XML_Expat_Version *second) {
193 return (first->major == second->major && first->minor == second->minor
194 && first->micro == second->micro);
195 }
196
START_TEST(test_misc_version)197 START_TEST(test_misc_version) {
198 XML_Expat_Version read_version = XML_ExpatVersionInfo();
199 /* Silence compiler warning with the following assignment */
200 XML_Expat_Version parsed_version = {0, 0, 0};
201 const XML_LChar *version_text = XML_ExpatVersion();
202
203 if (version_text == NULL)
204 fail("Could not obtain version text");
205 assert(version_text != NULL);
206 if (! parse_version(version_text, &parsed_version))
207 fail("Unable to parse version text");
208 if (! versions_equal(&read_version, &parsed_version))
209 fail("Version mismatch");
210
211 if (xcstrcmp(version_text, XCS("expat_2.6.4"))) /* needs bump on releases */
212 fail("XML_*_VERSION in expat.h out of sync?\n");
213 }
214 END_TEST
215
216 /* Test feature information */
START_TEST(test_misc_features)217 START_TEST(test_misc_features) {
218 const XML_Feature *features = XML_GetFeatureList();
219
220 /* Prevent problems with double-freeing parsers */
221 g_parser = NULL;
222 if (features == NULL) {
223 fail("Failed to get feature information");
224 } else {
225 /* Loop through the features checking what we can */
226 while (features->feature != XML_FEATURE_END) {
227 switch (features->feature) {
228 case XML_FEATURE_SIZEOF_XML_CHAR:
229 if (features->value != sizeof(XML_Char))
230 fail("Incorrect size of XML_Char");
231 break;
232 case XML_FEATURE_SIZEOF_XML_LCHAR:
233 if (features->value != sizeof(XML_LChar))
234 fail("Incorrect size of XML_LChar");
235 break;
236 default:
237 break;
238 }
239 features++;
240 }
241 }
242 }
243 END_TEST
244
245 /* Regression test for GitHub Issue #17: memory leak parsing attribute
246 * values with mixed bound and unbound namespaces.
247 */
START_TEST(test_misc_attribute_leak)248 START_TEST(test_misc_attribute_leak) {
249 const char *text = "<D xmlns:L=\"D\" l:a='' L:a=''/>";
250 XML_Memory_Handling_Suite memsuite
251 = {tracking_malloc, tracking_realloc, tracking_free};
252
253 g_parser = XML_ParserCreate_MM(XCS("UTF-8"), &memsuite, XCS("\n"));
254 expect_failure(text, XML_ERROR_UNBOUND_PREFIX, "Unbound prefixes not found");
255 XML_ParserFree(g_parser);
256 /* Prevent the teardown trying to double free */
257 g_parser = NULL;
258
259 if (! tracking_report())
260 fail("Memory leak found");
261 }
262 END_TEST
263
264 /* Test parser created for UTF-16LE is successful */
START_TEST(test_misc_utf16le)265 START_TEST(test_misc_utf16le) {
266 const char text[] =
267 /* <?xml version='1.0'?><q>Hi</q> */
268 "<\0?\0x\0m\0l\0 \0"
269 "v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0?\0>\0"
270 "<\0q\0>\0H\0i\0<\0/\0q\0>\0";
271 const XML_Char *expected = XCS("Hi");
272 CharData storage;
273
274 g_parser = XML_ParserCreate(XCS("UTF-16LE"));
275 if (g_parser == NULL)
276 fail("Parser not created");
277
278 CharData_Init(&storage);
279 XML_SetUserData(g_parser, &storage);
280 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
281 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
282 == XML_STATUS_ERROR)
283 xml_failure(g_parser);
284 CharData_CheckXMLChars(&storage, expected);
285 }
286 END_TEST
287
START_TEST(test_misc_stop_during_end_handler_issue_240_1)288 START_TEST(test_misc_stop_during_end_handler_issue_240_1) {
289 XML_Parser parser;
290 DataIssue240 *mydata;
291 enum XML_Status result;
292 const char *const doc1 = "<doc><e1/><e><foo/></e></doc>";
293
294 parser = XML_ParserCreate(NULL);
295 XML_SetElementHandler(parser, start_element_issue_240, end_element_issue_240);
296 mydata = (DataIssue240 *)malloc(sizeof(DataIssue240));
297 mydata->parser = parser;
298 mydata->deep = 0;
299 XML_SetUserData(parser, mydata);
300
301 result = _XML_Parse_SINGLE_BYTES(parser, doc1, (int)strlen(doc1), 1);
302 XML_ParserFree(parser);
303 free(mydata);
304 if (result != XML_STATUS_ERROR)
305 fail("Stopping the parser did not work as expected");
306 }
307 END_TEST
308
START_TEST(test_misc_stop_during_end_handler_issue_240_2)309 START_TEST(test_misc_stop_during_end_handler_issue_240_2) {
310 XML_Parser parser;
311 DataIssue240 *mydata;
312 enum XML_Status result;
313 const char *const doc2 = "<doc><elem/></doc>";
314
315 parser = XML_ParserCreate(NULL);
316 XML_SetElementHandler(parser, start_element_issue_240, end_element_issue_240);
317 mydata = (DataIssue240 *)malloc(sizeof(DataIssue240));
318 mydata->parser = parser;
319 mydata->deep = 0;
320 XML_SetUserData(parser, mydata);
321
322 result = _XML_Parse_SINGLE_BYTES(parser, doc2, (int)strlen(doc2), 1);
323 XML_ParserFree(parser);
324 free(mydata);
325 if (result != XML_STATUS_ERROR)
326 fail("Stopping the parser did not work as expected");
327 }
328 END_TEST
329
START_TEST(test_misc_deny_internal_entity_closing_doctype_issue_317)330 START_TEST(test_misc_deny_internal_entity_closing_doctype_issue_317) {
331 const char *const inputOne = "<!DOCTYPE d [\n"
332 "<!ENTITY % e ']><d/>'>\n"
333 "\n"
334 "%e;";
335 const char *const inputTwo
336 = "<!DOCTYPE d [\n"
337 "<!ENTITY % e1 ']><d/>'><!ENTITY % e2 '%e1;'>\n"
338 "\n"
339 "%e2;";
340 const char *const inputThree = "<!DOCTYPE d [\n"
341 "<!ENTITY % e ']><d'>\n"
342 "\n"
343 "%e;/>";
344 const char *const inputIssue317 = "<!DOCTYPE doc [\n"
345 "<!ENTITY % foo ']>\n"
346 "<doc>Hell<oc (#PCDATA)*>'>\n"
347 "%foo;\n"
348 "]>\n"
349 "<doc>Hello, world</dVc>";
350
351 const char *const inputs[] = {inputOne, inputTwo, inputThree, inputIssue317};
352 size_t inputIndex = 0;
353
354 for (; inputIndex < sizeof(inputs) / sizeof(inputs[0]); inputIndex++) {
355 set_subtest("%s", inputs[inputIndex]);
356 XML_Parser parser;
357 enum XML_Status parseResult;
358 int setParamEntityResult;
359 XML_Size lineNumber;
360 XML_Size columnNumber;
361 const char *const input = inputs[inputIndex];
362
363 parser = XML_ParserCreate(NULL);
364 setParamEntityResult
365 = XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
366 if (setParamEntityResult != 1)
367 fail("Failed to set XML_PARAM_ENTITY_PARSING_ALWAYS.");
368
369 parseResult = _XML_Parse_SINGLE_BYTES(parser, input, (int)strlen(input), 0);
370 if (parseResult != XML_STATUS_ERROR) {
371 parseResult = _XML_Parse_SINGLE_BYTES(parser, "", 0, 1);
372 if (parseResult != XML_STATUS_ERROR) {
373 fail("Parsing was expected to fail but succeeded.");
374 }
375 }
376
377 if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)
378 fail("Error code does not match XML_ERROR_INVALID_TOKEN");
379
380 lineNumber = XML_GetCurrentLineNumber(parser);
381 if (lineNumber != 4)
382 fail("XML_GetCurrentLineNumber does not work as expected.");
383
384 columnNumber = XML_GetCurrentColumnNumber(parser);
385 if (columnNumber != 0)
386 fail("XML_GetCurrentColumnNumber does not work as expected.");
387
388 XML_ParserFree(parser);
389 }
390 }
391 END_TEST
392
START_TEST(test_misc_tag_mismatch_reset_leak)393 START_TEST(test_misc_tag_mismatch_reset_leak) {
394 #ifdef XML_NS
395 const char *const text = "<open xmlns='https://namespace1.test'></close>";
396 XML_Parser parser = XML_ParserCreateNS(NULL, XCS('\n'));
397
398 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
399 != XML_STATUS_ERROR)
400 fail("Call to parse was expected to fail");
401 if (XML_GetErrorCode(parser) != XML_ERROR_TAG_MISMATCH)
402 fail("Call to parse was expected to fail from a closing tag mismatch");
403
404 XML_ParserReset(parser, NULL);
405
406 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
407 != XML_STATUS_ERROR)
408 fail("Call to parse was expected to fail");
409 if (XML_GetErrorCode(parser) != XML_ERROR_TAG_MISMATCH)
410 fail("Call to parse was expected to fail from a closing tag mismatch");
411
412 XML_ParserFree(parser);
413 #endif
414 }
415 END_TEST
416
START_TEST(test_misc_create_external_entity_parser_with_null_context)417 START_TEST(test_misc_create_external_entity_parser_with_null_context) {
418 // With XML_DTD undefined, the only supported case of external entities
419 // is pattern "<!ENTITY entity123 SYSTEM 'filename123'>". A NULL context
420 // was causing a segfault through a null pointer dereference in function
421 // setContext, previously.
422 XML_Parser parser = XML_ParserCreate(NULL);
423 XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
424 #ifdef XML_DTD
425 assert_true(ext_parser != NULL);
426 XML_ParserFree(ext_parser);
427 #else
428 assert_true(ext_parser == NULL);
429 #endif /* XML_DTD */
430 XML_ParserFree(parser);
431 }
432 END_TEST
433
START_TEST(test_misc_general_entities_support)434 START_TEST(test_misc_general_entities_support) {
435 const char *const doc
436 = "<!DOCTYPE r [\n"
437 "<!ENTITY e1 'v1'>\n"
438 "<!ENTITY e2 SYSTEM 'v2'>\n"
439 "]>\n"
440 "<r a1='[&e1;]'>[&e1;][&e2;][&'><"]</r>";
441
442 CharData storage;
443 CharData_Init(&storage);
444
445 XML_Parser parser = XML_ParserCreate(NULL);
446 XML_SetUserData(parser, &storage);
447 XML_SetStartElementHandler(parser, accumulate_start_element);
448 XML_SetExternalEntityRefHandler(parser,
449 external_entity_failer__if_not_xml_ge);
450 XML_SetEntityDeclHandler(parser, accumulate_entity_decl);
451 XML_SetCharacterDataHandler(parser, accumulate_characters);
452
453 if (_XML_Parse_SINGLE_BYTES(parser, doc, (int)strlen(doc), XML_TRUE)
454 != XML_STATUS_OK) {
455 xml_failure(parser);
456 }
457
458 XML_ParserFree(parser);
459
460 CharData_CheckXMLChars(&storage,
461 /* clang-format off */
462 #if XML_GE == 1
463 XCS("e1=v1\n")
464 XCS("e2=(null)\n")
465 XCS("(r(a1=[v1]))\n")
466 XCS("[v1][][&'><\"]")
467 #else
468 XCS("e1=&e1;\n")
469 XCS("e2=(null)\n")
470 XCS("(r(a1=[&e1;]))\n")
471 XCS("[&e1;][&e2;][&'><\"]")
472 #endif
473 );
474 /* clang-format on */
475 }
476 END_TEST
477
478 static void XMLCALL
resumable_stopping_character_handler(void * userData,const XML_Char * s,int len)479 resumable_stopping_character_handler(void *userData, const XML_Char *s,
480 int len) {
481 UNUSED_P(s);
482 UNUSED_P(len);
483 XML_Parser parser = (XML_Parser)userData;
484 XML_StopParser(parser, XML_TRUE);
485 }
486
487 // NOTE: This test needs active LeakSanitizer to be of actual use
START_TEST(test_misc_char_handler_stop_without_leak)488 START_TEST(test_misc_char_handler_stop_without_leak) {
489 const char *const data
490 = "<!DOCTYPE t1[<!ENTITY e1 'angle<'><!ENTITY e2 '&e1;'>]><t1>&e2;";
491 XML_Parser parser = XML_ParserCreate(NULL);
492 assert_true(parser != NULL);
493 XML_SetUserData(parser, parser);
494 XML_SetCharacterDataHandler(parser, resumable_stopping_character_handler);
495 _XML_Parse_SINGLE_BYTES(parser, data, (int)strlen(data), XML_FALSE);
496 XML_ParserFree(parser);
497 }
498 END_TEST
499
START_TEST(test_misc_resumeparser_not_crashing)500 START_TEST(test_misc_resumeparser_not_crashing) {
501 XML_Parser parser = XML_ParserCreate(NULL);
502 XML_GetBuffer(parser, 1);
503 XML_StopParser(parser, /*resumable=*/XML_TRUE);
504 XML_ResumeParser(parser); // could crash here, previously
505 XML_ParserFree(parser);
506 }
507 END_TEST
508
START_TEST(test_misc_stopparser_rejects_unstarted_parser)509 START_TEST(test_misc_stopparser_rejects_unstarted_parser) {
510 const XML_Bool cases[] = {XML_TRUE, XML_FALSE};
511 for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
512 const XML_Bool resumable = cases[i];
513 XML_Parser parser = XML_ParserCreate(NULL);
514 assert_true(XML_GetErrorCode(parser) == XML_ERROR_NONE);
515 assert_true(XML_StopParser(parser, resumable) == XML_STATUS_ERROR);
516 assert_true(XML_GetErrorCode(parser) == XML_ERROR_NOT_STARTED);
517 XML_ParserFree(parser);
518 }
519 }
520 END_TEST
521
522 void
make_miscellaneous_test_case(Suite * s)523 make_miscellaneous_test_case(Suite *s) {
524 TCase *tc_misc = tcase_create("miscellaneous tests");
525
526 suite_add_tcase(s, tc_misc);
527 tcase_add_checked_fixture(tc_misc, NULL, basic_teardown);
528
529 tcase_add_test(tc_misc, test_misc_alloc_create_parser);
530 tcase_add_test(tc_misc, test_misc_alloc_create_parser_with_encoding);
531 tcase_add_test(tc_misc, test_misc_null_parser);
532 tcase_add_test(tc_misc, test_misc_error_string);
533 tcase_add_test(tc_misc, test_misc_version);
534 tcase_add_test(tc_misc, test_misc_features);
535 tcase_add_test(tc_misc, test_misc_attribute_leak);
536 tcase_add_test(tc_misc, test_misc_utf16le);
537 tcase_add_test(tc_misc, test_misc_stop_during_end_handler_issue_240_1);
538 tcase_add_test(tc_misc, test_misc_stop_during_end_handler_issue_240_2);
539 tcase_add_test__ifdef_xml_dtd(
540 tc_misc, test_misc_deny_internal_entity_closing_doctype_issue_317);
541 tcase_add_test(tc_misc, test_misc_tag_mismatch_reset_leak);
542 tcase_add_test(tc_misc,
543 test_misc_create_external_entity_parser_with_null_context);
544 tcase_add_test(tc_misc, test_misc_general_entities_support);
545 tcase_add_test(tc_misc, test_misc_char_handler_stop_without_leak);
546 tcase_add_test(tc_misc, test_misc_resumeparser_not_crashing);
547 tcase_add_test(tc_misc, test_misc_stopparser_rejects_unstarted_parser);
548 }
549