xref: /freebsd/contrib/expat/tests/misc_tests.c (revision 5f757f3f)
1 /* Tests in the "miscellaneous" test case for the Expat test suite
2                             __  __            _
3                          ___\ \/ /_ __   __ _| |_
4                         / _ \\  /| '_ \ / _` | __|
5                        |  __//  \| |_) | (_| | |_
6                         \___/_/\_\ .__/ \__,_|\__|
7                                  |_| XML parser
8 
9    Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
10    Copyright (c) 2003      Greg Stein <gstein@users.sourceforge.net>
11    Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
12    Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net>
13    Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org>
14    Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
15    Copyright (c) 2017      Joe Orton <jorton@redhat.com>
16    Copyright (c) 2017      José Gutiérrez de la Concha <jose@zeroc.com>
17    Copyright (c) 2018      Marco Maggi <marco.maggi-ipsu@poste.it>
18    Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
19    Copyright (c) 2020      Tim Gates <tim.gates@iress.com>
20    Copyright (c) 2021      Donghee Na <donghee.na@python.org>
21    Copyright (c) 2023      Sony Corporation / Snild Dolkow <snild@sony.com>
22    Licensed under the MIT license:
23 
24    Permission is  hereby granted,  free of charge,  to any  person obtaining
25    a  copy  of  this  software   and  associated  documentation  files  (the
26    "Software"),  to  deal in  the  Software  without restriction,  including
27    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
28    distribute, sublicense, and/or sell copies of the Software, and to permit
29    persons  to whom  the Software  is  furnished to  do so,  subject to  the
30    following conditions:
31 
32    The above copyright  notice and this permission notice  shall be included
33    in all copies or substantial portions of the Software.
34 
35    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
36    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
37    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
38    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
39    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
40    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
41    USE OR OTHER DEALINGS IN THE SOFTWARE.
42 */
43 
44 #if defined(NDEBUG)
45 #  undef NDEBUG /* because test suite relies on assert(...) at the moment */
46 #endif
47 
48 #include <assert.h>
49 #include <string.h>
50 
51 #include "expat_config.h"
52 
53 #include "expat.h"
54 #include "internal.h"
55 #include "minicheck.h"
56 #include "memcheck.h"
57 #include "common.h"
58 #include "ascii.h" /* for ASCII_xxx */
59 #include "handlers.h"
60 #include "misc_tests.h"
61 
62 /* Test that a failure to allocate the parser structure fails gracefully */
63 START_TEST(test_misc_alloc_create_parser) {
64   XML_Memory_Handling_Suite memsuite = {duff_allocator, realloc, free};
65   unsigned int i;
66   const unsigned int max_alloc_count = 10;
67 
68   /* Something this simple shouldn't need more than 10 allocations */
69   for (i = 0; i < max_alloc_count; i++) {
70     g_allocation_count = i;
71     g_parser = XML_ParserCreate_MM(NULL, &memsuite, NULL);
72     if (g_parser != NULL)
73       break;
74   }
75   if (i == 0)
76     fail("Parser unexpectedly ignored failing allocator");
77   else if (i == max_alloc_count)
78     fail("Parser not created with max allocation count");
79 }
80 END_TEST
81 
82 /* Test memory allocation failures for a parser with an encoding */
83 START_TEST(test_misc_alloc_create_parser_with_encoding) {
84   XML_Memory_Handling_Suite memsuite = {duff_allocator, realloc, free};
85   unsigned int i;
86   const unsigned int max_alloc_count = 10;
87 
88   /* Try several levels of allocation */
89   for (i = 0; i < max_alloc_count; i++) {
90     g_allocation_count = i;
91     g_parser = XML_ParserCreate_MM(XCS("us-ascii"), &memsuite, NULL);
92     if (g_parser != NULL)
93       break;
94   }
95   if (i == 0)
96     fail("Parser ignored failing allocator");
97   else if (i == max_alloc_count)
98     fail("Parser not created with max allocation count");
99 }
100 END_TEST
101 
102 /* Test that freeing a NULL parser doesn't cause an explosion.
103  * (Not actually tested anywhere else)
104  */
105 START_TEST(test_misc_null_parser) {
106   XML_ParserFree(NULL);
107 }
108 END_TEST
109 
110 #if defined(__has_feature)
111 #  if __has_feature(undefined_behavior_sanitizer)
112 #    define EXPAT_TESTS_UBSAN 1
113 #  else
114 #    define EXPAT_TESTS_UBSAN 0
115 #  endif
116 #else
117 #  define EXPAT_TESTS_UBSAN 0
118 #endif
119 
120 /* Test that XML_ErrorString rejects out-of-range codes */
121 START_TEST(test_misc_error_string) {
122 #if ! EXPAT_TESTS_UBSAN // because this would trigger UBSan
123   union {
124     enum XML_Error xml_error;
125     int integer;
126   } trickery;
127 
128   assert_true(sizeof(enum XML_Error) == sizeof(int)); // self-test
129 
130   trickery.integer = -1;
131   if (XML_ErrorString(trickery.xml_error) != NULL)
132     fail("Negative error code not rejected");
133 
134   trickery.integer = 100;
135   if (XML_ErrorString(trickery.xml_error) != NULL)
136     fail("Large error code not rejected");
137 #endif
138 }
139 END_TEST
140 
141 /* Test the version information is consistent */
142 
143 /* Since we are working in XML_LChars (potentially 16-bits), we
144  * can't use the standard C library functions for character
145  * manipulation and have to roll our own.
146  */
147 static int
148 parse_version(const XML_LChar *version_text,
149               XML_Expat_Version *version_struct) {
150   if (! version_text)
151     return XML_FALSE;
152 
153   while (*version_text != 0x00) {
154     if (*version_text >= ASCII_0 && *version_text <= ASCII_9)
155       break;
156     version_text++;
157   }
158   if (*version_text == 0x00)
159     return XML_FALSE;
160 
161   /* version_struct->major = strtoul(version_text, 10, &version_text) */
162   version_struct->major = 0;
163   while (*version_text >= ASCII_0 && *version_text <= ASCII_9) {
164     version_struct->major
165         = 10 * version_struct->major + (*version_text++ - ASCII_0);
166   }
167   if (*version_text++ != ASCII_PERIOD)
168     return XML_FALSE;
169 
170   /* Now for the minor version number */
171   version_struct->minor = 0;
172   while (*version_text >= ASCII_0 && *version_text <= ASCII_9) {
173     version_struct->minor
174         = 10 * version_struct->minor + (*version_text++ - ASCII_0);
175   }
176   if (*version_text++ != ASCII_PERIOD)
177     return XML_FALSE;
178 
179   /* Finally the micro version number */
180   version_struct->micro = 0;
181   while (*version_text >= ASCII_0 && *version_text <= ASCII_9) {
182     version_struct->micro
183         = 10 * version_struct->micro + (*version_text++ - ASCII_0);
184   }
185   if (*version_text != 0x00)
186     return XML_FALSE;
187   return XML_TRUE;
188 }
189 
190 static int
191 versions_equal(const XML_Expat_Version *first,
192                const XML_Expat_Version *second) {
193   return (first->major == second->major && first->minor == second->minor
194           && first->micro == second->micro);
195 }
196 
197 START_TEST(test_misc_version) {
198   XML_Expat_Version read_version = XML_ExpatVersionInfo();
199   /* Silence compiler warning with the following assignment */
200   XML_Expat_Version parsed_version = {0, 0, 0};
201   const XML_LChar *version_text = XML_ExpatVersion();
202 
203   if (version_text == NULL)
204     fail("Could not obtain version text");
205   assert(version_text != NULL);
206   if (! parse_version(version_text, &parsed_version))
207     fail("Unable to parse version text");
208   if (! versions_equal(&read_version, &parsed_version))
209     fail("Version mismatch");
210 
211   if (xcstrcmp(version_text, XCS("expat_2.6.0"))) /* needs bump on releases */
212     fail("XML_*_VERSION in expat.h out of sync?\n");
213 }
214 END_TEST
215 
216 /* Test feature information */
217 START_TEST(test_misc_features) {
218   const XML_Feature *features = XML_GetFeatureList();
219 
220   /* Prevent problems with double-freeing parsers */
221   g_parser = NULL;
222   if (features == NULL) {
223     fail("Failed to get feature information");
224   } else {
225     /* Loop through the features checking what we can */
226     while (features->feature != XML_FEATURE_END) {
227       switch (features->feature) {
228       case XML_FEATURE_SIZEOF_XML_CHAR:
229         if (features->value != sizeof(XML_Char))
230           fail("Incorrect size of XML_Char");
231         break;
232       case XML_FEATURE_SIZEOF_XML_LCHAR:
233         if (features->value != sizeof(XML_LChar))
234           fail("Incorrect size of XML_LChar");
235         break;
236       default:
237         break;
238       }
239       features++;
240     }
241   }
242 }
243 END_TEST
244 
245 /* Regression test for GitHub Issue #17: memory leak parsing attribute
246  * values with mixed bound and unbound namespaces.
247  */
248 START_TEST(test_misc_attribute_leak) {
249   const char *text = "<D xmlns:L=\"D\" l:a='' L:a=''/>";
250   XML_Memory_Handling_Suite memsuite
251       = {tracking_malloc, tracking_realloc, tracking_free};
252 
253   g_parser = XML_ParserCreate_MM(XCS("UTF-8"), &memsuite, XCS("\n"));
254   expect_failure(text, XML_ERROR_UNBOUND_PREFIX, "Unbound prefixes not found");
255   XML_ParserFree(g_parser);
256   /* Prevent the teardown trying to double free */
257   g_parser = NULL;
258 
259   if (! tracking_report())
260     fail("Memory leak found");
261 }
262 END_TEST
263 
264 /* Test parser created for UTF-16LE is successful */
265 START_TEST(test_misc_utf16le) {
266   const char text[] =
267       /* <?xml version='1.0'?><q>Hi</q> */
268       "<\0?\0x\0m\0l\0 \0"
269       "v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0?\0>\0"
270       "<\0q\0>\0H\0i\0<\0/\0q\0>\0";
271   const XML_Char *expected = XCS("Hi");
272   CharData storage;
273 
274   g_parser = XML_ParserCreate(XCS("UTF-16LE"));
275   if (g_parser == NULL)
276     fail("Parser not created");
277 
278   CharData_Init(&storage);
279   XML_SetUserData(g_parser, &storage);
280   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
281   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
282       == XML_STATUS_ERROR)
283     xml_failure(g_parser);
284   CharData_CheckXMLChars(&storage, expected);
285 }
286 END_TEST
287 
288 START_TEST(test_misc_stop_during_end_handler_issue_240_1) {
289   XML_Parser parser;
290   DataIssue240 *mydata;
291   enum XML_Status result;
292   const char *const doc1 = "<doc><e1/><e><foo/></e></doc>";
293 
294   parser = XML_ParserCreate(NULL);
295   XML_SetElementHandler(parser, start_element_issue_240, end_element_issue_240);
296   mydata = (DataIssue240 *)malloc(sizeof(DataIssue240));
297   mydata->parser = parser;
298   mydata->deep = 0;
299   XML_SetUserData(parser, mydata);
300 
301   result = _XML_Parse_SINGLE_BYTES(parser, doc1, (int)strlen(doc1), 1);
302   XML_ParserFree(parser);
303   free(mydata);
304   if (result != XML_STATUS_ERROR)
305     fail("Stopping the parser did not work as expected");
306 }
307 END_TEST
308 
309 START_TEST(test_misc_stop_during_end_handler_issue_240_2) {
310   XML_Parser parser;
311   DataIssue240 *mydata;
312   enum XML_Status result;
313   const char *const doc2 = "<doc><elem/></doc>";
314 
315   parser = XML_ParserCreate(NULL);
316   XML_SetElementHandler(parser, start_element_issue_240, end_element_issue_240);
317   mydata = (DataIssue240 *)malloc(sizeof(DataIssue240));
318   mydata->parser = parser;
319   mydata->deep = 0;
320   XML_SetUserData(parser, mydata);
321 
322   result = _XML_Parse_SINGLE_BYTES(parser, doc2, (int)strlen(doc2), 1);
323   XML_ParserFree(parser);
324   free(mydata);
325   if (result != XML_STATUS_ERROR)
326     fail("Stopping the parser did not work as expected");
327 }
328 END_TEST
329 
330 START_TEST(test_misc_deny_internal_entity_closing_doctype_issue_317) {
331   const char *const inputOne = "<!DOCTYPE d [\n"
332                                "<!ENTITY % e ']><d/>'>\n"
333                                "\n"
334                                "%e;";
335   const char *const inputTwo = "<!DOCTYPE d [\n"
336                                "<!ENTITY % e1 ']><d/>'><!ENTITY % e2 '&e1;'>\n"
337                                "\n"
338                                "%e2;";
339   const char *const inputThree = "<!DOCTYPE d [\n"
340                                  "<!ENTITY % e ']><d'>\n"
341                                  "\n"
342                                  "%e;";
343   const char *const inputIssue317 = "<!DOCTYPE doc [\n"
344                                     "<!ENTITY % foo ']>\n"
345                                     "<doc>Hell<oc (#PCDATA)*>'>\n"
346                                     "%foo;\n"
347                                     "]>\n"
348                                     "<doc>Hello, world</dVc>";
349 
350   const char *const inputs[] = {inputOne, inputTwo, inputThree, inputIssue317};
351   size_t inputIndex = 0;
352 
353   for (; inputIndex < sizeof(inputs) / sizeof(inputs[0]); inputIndex++) {
354     set_subtest("%s", inputs[inputIndex]);
355     XML_Parser parser;
356     enum XML_Status parseResult;
357     int setParamEntityResult;
358     XML_Size lineNumber;
359     XML_Size columnNumber;
360     const char *const input = inputs[inputIndex];
361 
362     parser = XML_ParserCreate(NULL);
363     setParamEntityResult
364         = XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
365     if (setParamEntityResult != 1)
366       fail("Failed to set XML_PARAM_ENTITY_PARSING_ALWAYS.");
367 
368     parseResult = _XML_Parse_SINGLE_BYTES(parser, input, (int)strlen(input), 0);
369     if (parseResult != XML_STATUS_ERROR) {
370       parseResult = _XML_Parse_SINGLE_BYTES(parser, "", 0, 1);
371       if (parseResult != XML_STATUS_ERROR) {
372         fail("Parsing was expected to fail but succeeded.");
373       }
374     }
375 
376     if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)
377       fail("Error code does not match XML_ERROR_INVALID_TOKEN");
378 
379     lineNumber = XML_GetCurrentLineNumber(parser);
380     if (lineNumber != 4)
381       fail("XML_GetCurrentLineNumber does not work as expected.");
382 
383     columnNumber = XML_GetCurrentColumnNumber(parser);
384     if (columnNumber != 0)
385       fail("XML_GetCurrentColumnNumber does not work as expected.");
386 
387     XML_ParserFree(parser);
388   }
389 }
390 END_TEST
391 
392 START_TEST(test_misc_tag_mismatch_reset_leak) {
393 #ifdef XML_NS
394   const char *const text = "<open xmlns='https://namespace1.test'></close>";
395   XML_Parser parser = XML_ParserCreateNS(NULL, XCS('\n'));
396 
397   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
398       != XML_STATUS_ERROR)
399     fail("Call to parse was expected to fail");
400   if (XML_GetErrorCode(parser) != XML_ERROR_TAG_MISMATCH)
401     fail("Call to parse was expected to fail from a closing tag mismatch");
402 
403   XML_ParserReset(parser, NULL);
404 
405   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
406       != XML_STATUS_ERROR)
407     fail("Call to parse was expected to fail");
408   if (XML_GetErrorCode(parser) != XML_ERROR_TAG_MISMATCH)
409     fail("Call to parse was expected to fail from a closing tag mismatch");
410 
411   XML_ParserFree(parser);
412 #endif
413 }
414 END_TEST
415 
416 START_TEST(test_misc_create_external_entity_parser_with_null_context) {
417   // With XML_DTD undefined, the only supported case of external entities
418   // is pattern "<!ENTITY entity123 SYSTEM 'filename123'>". A NULL context
419   // was causing a segfault through a null pointer dereference in function
420   // setContext, previously.
421   XML_Parser parser = XML_ParserCreate(NULL);
422   XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
423 #ifdef XML_DTD
424   assert_true(ext_parser != NULL);
425   XML_ParserFree(ext_parser);
426 #else
427   assert_true(ext_parser == NULL);
428 #endif /* XML_DTD */
429   XML_ParserFree(parser);
430 }
431 END_TEST
432 
433 START_TEST(test_misc_general_entities_support) {
434   const char *const doc
435       = "<!DOCTYPE r [\n"
436         "<!ENTITY e1 'v1'>\n"
437         "<!ENTITY e2 SYSTEM 'v2'>\n"
438         "]>\n"
439         "<r a1='[&e1;]'>[&e1;][&e2;][&amp;&apos;&gt;&lt;&quot;]</r>";
440 
441   CharData storage;
442   CharData_Init(&storage);
443 
444   XML_Parser parser = XML_ParserCreate(NULL);
445   XML_SetUserData(parser, &storage);
446   XML_SetStartElementHandler(parser, accumulate_start_element);
447   XML_SetExternalEntityRefHandler(parser,
448                                   external_entity_failer__if_not_xml_ge);
449   XML_SetEntityDeclHandler(parser, accumulate_entity_decl);
450   XML_SetCharacterDataHandler(parser, accumulate_char_data);
451 
452   if (_XML_Parse_SINGLE_BYTES(parser, doc, (int)strlen(doc), XML_TRUE)
453       != XML_STATUS_OK) {
454     xml_failure(parser);
455   }
456 
457   XML_ParserFree(parser);
458 
459   CharData_CheckXMLChars(&storage,
460   /* clang-format off */
461 #if XML_GE == 1
462                          XCS("e1=v1\n")
463                          XCS("e2=(null)\n")
464                          XCS("(r(a1=[v1]))\n")
465                          XCS("[v1][][&'><\"]")
466 #else
467                          XCS("e1=&amp;e1;\n")
468                          XCS("e2=(null)\n")
469                          XCS("(r(a1=[&e1;]))\n")
470                          XCS("[&e1;][&e2;][&'><\"]")
471 #endif
472   );
473   /* clang-format on */
474 }
475 END_TEST
476 
477 static void XMLCALL
478 resumable_stopping_character_handler(void *userData, const XML_Char *s,
479                                      int len) {
480   UNUSED_P(s);
481   UNUSED_P(len);
482   XML_Parser parser = (XML_Parser)userData;
483   XML_StopParser(parser, XML_TRUE);
484 }
485 
486 // NOTE: This test needs active LeakSanitizer to be of actual use
487 START_TEST(test_misc_char_handler_stop_without_leak) {
488   const char *const data
489       = "<!DOCTYPE t1[<!ENTITY e1 'angle<'><!ENTITY e2 '&e1;'>]><t1>&e2;";
490   XML_Parser parser = XML_ParserCreate(NULL);
491   assert_true(parser != NULL);
492   XML_SetUserData(parser, parser);
493   XML_SetCharacterDataHandler(parser, resumable_stopping_character_handler);
494   _XML_Parse_SINGLE_BYTES(parser, data, (int)strlen(data), XML_FALSE);
495   XML_ParserFree(parser);
496 }
497 END_TEST
498 
499 void
500 make_miscellaneous_test_case(Suite *s) {
501   TCase *tc_misc = tcase_create("miscellaneous tests");
502 
503   suite_add_tcase(s, tc_misc);
504   tcase_add_checked_fixture(tc_misc, NULL, basic_teardown);
505 
506   tcase_add_test(tc_misc, test_misc_alloc_create_parser);
507   tcase_add_test(tc_misc, test_misc_alloc_create_parser_with_encoding);
508   tcase_add_test(tc_misc, test_misc_null_parser);
509   tcase_add_test(tc_misc, test_misc_error_string);
510   tcase_add_test(tc_misc, test_misc_version);
511   tcase_add_test(tc_misc, test_misc_features);
512   tcase_add_test(tc_misc, test_misc_attribute_leak);
513   tcase_add_test(tc_misc, test_misc_utf16le);
514   tcase_add_test(tc_misc, test_misc_stop_during_end_handler_issue_240_1);
515   tcase_add_test(tc_misc, test_misc_stop_during_end_handler_issue_240_2);
516   tcase_add_test__ifdef_xml_dtd(
517       tc_misc, test_misc_deny_internal_entity_closing_doctype_issue_317);
518   tcase_add_test(tc_misc, test_misc_tag_mismatch_reset_leak);
519   tcase_add_test(tc_misc,
520                  test_misc_create_external_entity_parser_with_null_context);
521   tcase_add_test(tc_misc, test_misc_general_entities_support);
522   tcase_add_test(tc_misc, test_misc_char_handler_stop_without_leak);
523 }
524