xref: /freebsd/contrib/expat/tests/basic_tests.c (revision 4543ef51)
1*4543ef51SXin LI /* Tests in the "basic" test case for the Expat test suite
2*4543ef51SXin LI                             __  __            _
3*4543ef51SXin LI                          ___\ \/ /_ __   __ _| |_
4*4543ef51SXin LI                         / _ \\  /| '_ \ / _` | __|
5*4543ef51SXin LI                        |  __//  \| |_) | (_| | |_
6*4543ef51SXin LI                         \___/_/\_\ .__/ \__,_|\__|
7*4543ef51SXin LI                                  |_| XML parser
8*4543ef51SXin LI 
9*4543ef51SXin LI    Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
10*4543ef51SXin LI    Copyright (c) 2003      Greg Stein <gstein@users.sourceforge.net>
11*4543ef51SXin LI    Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
12*4543ef51SXin LI    Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net>
13*4543ef51SXin LI    Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org>
14*4543ef51SXin LI    Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
15*4543ef51SXin LI    Copyright (c) 2017      Joe Orton <jorton@redhat.com>
16*4543ef51SXin LI    Copyright (c) 2017      José Gutiérrez de la Concha <jose@zeroc.com>
17*4543ef51SXin LI    Copyright (c) 2018      Marco Maggi <marco.maggi-ipsu@poste.it>
18*4543ef51SXin LI    Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
19*4543ef51SXin LI    Copyright (c) 2020      Tim Gates <tim.gates@iress.com>
20*4543ef51SXin LI    Copyright (c) 2021      Donghee Na <donghee.na@python.org>
21*4543ef51SXin LI    Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
22*4543ef51SXin LI    Licensed under the MIT license:
23*4543ef51SXin LI 
24*4543ef51SXin LI    Permission is  hereby granted,  free of charge,  to any  person obtaining
25*4543ef51SXin LI    a  copy  of  this  software   and  associated  documentation  files  (the
26*4543ef51SXin LI    "Software"),  to  deal in  the  Software  without restriction,  including
27*4543ef51SXin LI    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
28*4543ef51SXin LI    distribute, sublicense, and/or sell copies of the Software, and to permit
29*4543ef51SXin LI    persons  to whom  the Software  is  furnished to  do so,  subject to  the
30*4543ef51SXin LI    following conditions:
31*4543ef51SXin LI 
32*4543ef51SXin LI    The above copyright  notice and this permission notice  shall be included
33*4543ef51SXin LI    in all copies or substantial portions of the Software.
34*4543ef51SXin LI 
35*4543ef51SXin LI    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
36*4543ef51SXin LI    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
37*4543ef51SXin LI    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
38*4543ef51SXin LI    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
39*4543ef51SXin LI    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
40*4543ef51SXin LI    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
41*4543ef51SXin LI    USE OR OTHER DEALINGS IN THE SOFTWARE.
42*4543ef51SXin LI */
43*4543ef51SXin LI 
44*4543ef51SXin LI #if defined(NDEBUG)
45*4543ef51SXin LI #  undef NDEBUG /* because test suite relies on assert(...) at the moment */
46*4543ef51SXin LI #endif
47*4543ef51SXin LI 
48*4543ef51SXin LI #include <assert.h>
49*4543ef51SXin LI 
50*4543ef51SXin LI #include <stdio.h>
51*4543ef51SXin LI #include <string.h>
52*4543ef51SXin LI #include <time.h>
53*4543ef51SXin LI 
54*4543ef51SXin LI #if ! defined(__cplusplus)
55*4543ef51SXin LI #  include <stdbool.h>
56*4543ef51SXin LI #endif
57*4543ef51SXin LI 
58*4543ef51SXin LI #include "expat_config.h"
59*4543ef51SXin LI 
60*4543ef51SXin LI #include "expat.h"
61*4543ef51SXin LI #include "internal.h"
62*4543ef51SXin LI #include "minicheck.h"
63*4543ef51SXin LI #include "structdata.h"
64*4543ef51SXin LI #include "common.h"
65*4543ef51SXin LI #include "dummy.h"
66*4543ef51SXin LI #include "handlers.h"
67*4543ef51SXin LI #include "siphash.h"
68*4543ef51SXin LI #include "basic_tests.h"
69*4543ef51SXin LI 
70*4543ef51SXin LI static void
basic_setup(void)71*4543ef51SXin LI basic_setup(void) {
72*4543ef51SXin LI   g_parser = XML_ParserCreate(NULL);
73*4543ef51SXin LI   if (g_parser == NULL)
74*4543ef51SXin LI     fail("Parser not created.");
75*4543ef51SXin LI }
76*4543ef51SXin LI 
77*4543ef51SXin LI /*
78*4543ef51SXin LI  * Character & encoding tests.
79*4543ef51SXin LI  */
80*4543ef51SXin LI 
START_TEST(test_nul_byte)81*4543ef51SXin LI START_TEST(test_nul_byte) {
82*4543ef51SXin LI   char text[] = "<doc>\0</doc>";
83*4543ef51SXin LI 
84*4543ef51SXin LI   /* test that a NUL byte (in US-ASCII data) is an error */
85*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
86*4543ef51SXin LI       == XML_STATUS_OK)
87*4543ef51SXin LI     fail("Parser did not report error on NUL-byte.");
88*4543ef51SXin LI   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
89*4543ef51SXin LI     xml_failure(g_parser);
90*4543ef51SXin LI }
91*4543ef51SXin LI END_TEST
92*4543ef51SXin LI 
START_TEST(test_u0000_char)93*4543ef51SXin LI START_TEST(test_u0000_char) {
94*4543ef51SXin LI   /* test that a NUL byte (in US-ASCII data) is an error */
95*4543ef51SXin LI   expect_failure("<doc>&#0;</doc>", XML_ERROR_BAD_CHAR_REF,
96*4543ef51SXin LI                  "Parser did not report error on NUL-byte.");
97*4543ef51SXin LI }
98*4543ef51SXin LI END_TEST
99*4543ef51SXin LI 
START_TEST(test_siphash_self)100*4543ef51SXin LI START_TEST(test_siphash_self) {
101*4543ef51SXin LI   if (! sip24_valid())
102*4543ef51SXin LI     fail("SipHash self-test failed");
103*4543ef51SXin LI }
104*4543ef51SXin LI END_TEST
105*4543ef51SXin LI 
START_TEST(test_siphash_spec)106*4543ef51SXin LI START_TEST(test_siphash_spec) {
107*4543ef51SXin LI   /* https://131002.net/siphash/siphash.pdf (page 19, "Test values") */
108*4543ef51SXin LI   const char message[] = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
109*4543ef51SXin LI                          "\x0a\x0b\x0c\x0d\x0e";
110*4543ef51SXin LI   const size_t len = sizeof(message) - 1;
111*4543ef51SXin LI   const uint64_t expected = SIP_ULL(0xa129ca61U, 0x49be45e5U);
112*4543ef51SXin LI   struct siphash state;
113*4543ef51SXin LI   struct sipkey key;
114*4543ef51SXin LI 
115*4543ef51SXin LI   sip_tokey(&key, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
116*4543ef51SXin LI                   "\x0a\x0b\x0c\x0d\x0e\x0f");
117*4543ef51SXin LI   sip24_init(&state, &key);
118*4543ef51SXin LI 
119*4543ef51SXin LI   /* Cover spread across calls */
120*4543ef51SXin LI   sip24_update(&state, message, 4);
121*4543ef51SXin LI   sip24_update(&state, message + 4, len - 4);
122*4543ef51SXin LI 
123*4543ef51SXin LI   /* Cover null length */
124*4543ef51SXin LI   sip24_update(&state, message, 0);
125*4543ef51SXin LI 
126*4543ef51SXin LI   if (sip24_final(&state) != expected)
127*4543ef51SXin LI     fail("sip24_final failed spec test\n");
128*4543ef51SXin LI 
129*4543ef51SXin LI   /* Cover wrapper */
130*4543ef51SXin LI   if (siphash24(message, len, &key) != expected)
131*4543ef51SXin LI     fail("siphash24 failed spec test\n");
132*4543ef51SXin LI }
133*4543ef51SXin LI END_TEST
134*4543ef51SXin LI 
START_TEST(test_bom_utf8)135*4543ef51SXin LI START_TEST(test_bom_utf8) {
136*4543ef51SXin LI   /* This test is really just making sure we don't core on a UTF-8 BOM. */
137*4543ef51SXin LI   const char *text = "\357\273\277<e/>";
138*4543ef51SXin LI 
139*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
140*4543ef51SXin LI       == XML_STATUS_ERROR)
141*4543ef51SXin LI     xml_failure(g_parser);
142*4543ef51SXin LI }
143*4543ef51SXin LI END_TEST
144*4543ef51SXin LI 
START_TEST(test_bom_utf16_be)145*4543ef51SXin LI START_TEST(test_bom_utf16_be) {
146*4543ef51SXin LI   char text[] = "\376\377\0<\0e\0/\0>";
147*4543ef51SXin LI 
148*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
149*4543ef51SXin LI       == XML_STATUS_ERROR)
150*4543ef51SXin LI     xml_failure(g_parser);
151*4543ef51SXin LI }
152*4543ef51SXin LI END_TEST
153*4543ef51SXin LI 
START_TEST(test_bom_utf16_le)154*4543ef51SXin LI START_TEST(test_bom_utf16_le) {
155*4543ef51SXin LI   char text[] = "\377\376<\0e\0/\0>\0";
156*4543ef51SXin LI 
157*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
158*4543ef51SXin LI       == XML_STATUS_ERROR)
159*4543ef51SXin LI     xml_failure(g_parser);
160*4543ef51SXin LI }
161*4543ef51SXin LI END_TEST
162*4543ef51SXin LI 
START_TEST(test_nobom_utf16_le)163*4543ef51SXin LI START_TEST(test_nobom_utf16_le) {
164*4543ef51SXin LI   char text[] = " \0<\0e\0/\0>\0";
165*4543ef51SXin LI 
166*4543ef51SXin LI   if (g_chunkSize == 1) {
167*4543ef51SXin LI     // TODO: with just the first byte, we can't tell the difference between
168*4543ef51SXin LI     // UTF-16-LE and UTF-8. Avoid the failure for now.
169*4543ef51SXin LI     return;
170*4543ef51SXin LI   }
171*4543ef51SXin LI 
172*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
173*4543ef51SXin LI       == XML_STATUS_ERROR)
174*4543ef51SXin LI     xml_failure(g_parser);
175*4543ef51SXin LI }
176*4543ef51SXin LI END_TEST
177*4543ef51SXin LI 
START_TEST(test_hash_collision)178*4543ef51SXin LI START_TEST(test_hash_collision) {
179*4543ef51SXin LI   /* For full coverage of the lookup routine, we need to ensure a
180*4543ef51SXin LI    * hash collision even though we can only tell that we have one
181*4543ef51SXin LI    * through breakpoint debugging or coverage statistics.  The
182*4543ef51SXin LI    * following will cause a hash collision on machines with a 64-bit
183*4543ef51SXin LI    * long type; others will have to experiment.  The full coverage
184*4543ef51SXin LI    * tests invoked from qa.sh usually provide a hash collision, but
185*4543ef51SXin LI    * not always.  This is an attempt to provide insurance.
186*4543ef51SXin LI    */
187*4543ef51SXin LI #define COLLIDING_HASH_SALT (unsigned long)SIP_ULL(0xffffffffU, 0xff99fc90U)
188*4543ef51SXin LI   const char *text
189*4543ef51SXin LI       = "<doc>\n"
190*4543ef51SXin LI         "<a1/><a2/><a3/><a4/><a5/><a6/><a7/><a8/>\n"
191*4543ef51SXin LI         "<b1></b1><b2 attr='foo'>This is a foo</b2><b3></b3><b4></b4>\n"
192*4543ef51SXin LI         "<b5></b5><b6></b6><b7></b7><b8></b8>\n"
193*4543ef51SXin LI         "<c1/><c2/><c3/><c4/><c5/><c6/><c7/><c8/>\n"
194*4543ef51SXin LI         "<d1/><d2/><d3/><d4/><d5/><d6/><d7/>\n"
195*4543ef51SXin LI         "<d8>This triggers the table growth and collides with b2</d8>\n"
196*4543ef51SXin LI         "</doc>\n";
197*4543ef51SXin LI 
198*4543ef51SXin LI   XML_SetHashSalt(g_parser, COLLIDING_HASH_SALT);
199*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
200*4543ef51SXin LI       == XML_STATUS_ERROR)
201*4543ef51SXin LI     xml_failure(g_parser);
202*4543ef51SXin LI }
203*4543ef51SXin LI END_TEST
204*4543ef51SXin LI #undef COLLIDING_HASH_SALT
205*4543ef51SXin LI 
206*4543ef51SXin LI /* Regression test for SF bug #491986. */
START_TEST(test_danish_latin1)207*4543ef51SXin LI START_TEST(test_danish_latin1) {
208*4543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
209*4543ef51SXin LI                      "<e>J\xF8rgen \xE6\xF8\xE5\xC6\xD8\xC5</e>";
210*4543ef51SXin LI #ifdef XML_UNICODE
211*4543ef51SXin LI   const XML_Char *expected
212*4543ef51SXin LI       = XCS("J\x00f8rgen \x00e6\x00f8\x00e5\x00c6\x00d8\x00c5");
213*4543ef51SXin LI #else
214*4543ef51SXin LI   const XML_Char *expected
215*4543ef51SXin LI       = XCS("J\xC3\xB8rgen \xC3\xA6\xC3\xB8\xC3\xA5\xC3\x86\xC3\x98\xC3\x85");
216*4543ef51SXin LI #endif
217*4543ef51SXin LI   run_character_check(text, expected);
218*4543ef51SXin LI }
219*4543ef51SXin LI END_TEST
220*4543ef51SXin LI 
221*4543ef51SXin LI /* Regression test for SF bug #514281. */
START_TEST(test_french_charref_hexidecimal)222*4543ef51SXin LI START_TEST(test_french_charref_hexidecimal) {
223*4543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
224*4543ef51SXin LI                      "<doc>&#xE9;&#xE8;&#xE0;&#xE7;&#xEA;&#xC8;</doc>";
225*4543ef51SXin LI #ifdef XML_UNICODE
226*4543ef51SXin LI   const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
227*4543ef51SXin LI #else
228*4543ef51SXin LI   const XML_Char *expected
229*4543ef51SXin LI       = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
230*4543ef51SXin LI #endif
231*4543ef51SXin LI   run_character_check(text, expected);
232*4543ef51SXin LI }
233*4543ef51SXin LI END_TEST
234*4543ef51SXin LI 
START_TEST(test_french_charref_decimal)235*4543ef51SXin LI START_TEST(test_french_charref_decimal) {
236*4543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
237*4543ef51SXin LI                      "<doc>&#233;&#232;&#224;&#231;&#234;&#200;</doc>";
238*4543ef51SXin LI #ifdef XML_UNICODE
239*4543ef51SXin LI   const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
240*4543ef51SXin LI #else
241*4543ef51SXin LI   const XML_Char *expected
242*4543ef51SXin LI       = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
243*4543ef51SXin LI #endif
244*4543ef51SXin LI   run_character_check(text, expected);
245*4543ef51SXin LI }
246*4543ef51SXin LI END_TEST
247*4543ef51SXin LI 
START_TEST(test_french_latin1)248*4543ef51SXin LI START_TEST(test_french_latin1) {
249*4543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
250*4543ef51SXin LI                      "<doc>\xE9\xE8\xE0\xE7\xEa\xC8</doc>";
251*4543ef51SXin LI #ifdef XML_UNICODE
252*4543ef51SXin LI   const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
253*4543ef51SXin LI #else
254*4543ef51SXin LI   const XML_Char *expected
255*4543ef51SXin LI       = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
256*4543ef51SXin LI #endif
257*4543ef51SXin LI   run_character_check(text, expected);
258*4543ef51SXin LI }
259*4543ef51SXin LI END_TEST
260*4543ef51SXin LI 
START_TEST(test_french_utf8)261*4543ef51SXin LI START_TEST(test_french_utf8) {
262*4543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
263*4543ef51SXin LI                      "<doc>\xC3\xA9</doc>";
264*4543ef51SXin LI #ifdef XML_UNICODE
265*4543ef51SXin LI   const XML_Char *expected = XCS("\x00e9");
266*4543ef51SXin LI #else
267*4543ef51SXin LI   const XML_Char *expected = XCS("\xC3\xA9");
268*4543ef51SXin LI #endif
269*4543ef51SXin LI   run_character_check(text, expected);
270*4543ef51SXin LI }
271*4543ef51SXin LI END_TEST
272*4543ef51SXin LI 
273*4543ef51SXin LI /* Regression test for SF bug #600479.
274*4543ef51SXin LI    XXX There should be a test that exercises all legal XML Unicode
275*4543ef51SXin LI    characters as PCDATA and attribute value content, and XML Name
276*4543ef51SXin LI    characters as part of element and attribute names.
277*4543ef51SXin LI */
START_TEST(test_utf8_false_rejection)278*4543ef51SXin LI START_TEST(test_utf8_false_rejection) {
279*4543ef51SXin LI   const char *text = "<doc>\xEF\xBA\xBF</doc>";
280*4543ef51SXin LI #ifdef XML_UNICODE
281*4543ef51SXin LI   const XML_Char *expected = XCS("\xfebf");
282*4543ef51SXin LI #else
283*4543ef51SXin LI   const XML_Char *expected = XCS("\xEF\xBA\xBF");
284*4543ef51SXin LI #endif
285*4543ef51SXin LI   run_character_check(text, expected);
286*4543ef51SXin LI }
287*4543ef51SXin LI END_TEST
288*4543ef51SXin LI 
289*4543ef51SXin LI /* Regression test for SF bug #477667.
290*4543ef51SXin LI    This test assures that any 8-bit character followed by a 7-bit
291*4543ef51SXin LI    character will not be mistakenly interpreted as a valid UTF-8
292*4543ef51SXin LI    sequence.
293*4543ef51SXin LI */
START_TEST(test_illegal_utf8)294*4543ef51SXin LI START_TEST(test_illegal_utf8) {
295*4543ef51SXin LI   char text[100];
296*4543ef51SXin LI   int i;
297*4543ef51SXin LI 
298*4543ef51SXin LI   for (i = 128; i <= 255; ++i) {
299*4543ef51SXin LI     snprintf(text, sizeof(text), "<e>%ccd</e>", i);
300*4543ef51SXin LI     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
301*4543ef51SXin LI         == XML_STATUS_OK) {
302*4543ef51SXin LI       snprintf(text, sizeof(text),
303*4543ef51SXin LI                "expected token error for '%c' (ordinal %d) in UTF-8 text", i,
304*4543ef51SXin LI                i);
305*4543ef51SXin LI       fail(text);
306*4543ef51SXin LI     } else if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
307*4543ef51SXin LI       xml_failure(g_parser);
308*4543ef51SXin LI     /* Reset the parser since we use the same parser repeatedly. */
309*4543ef51SXin LI     XML_ParserReset(g_parser, NULL);
310*4543ef51SXin LI   }
311*4543ef51SXin LI }
312*4543ef51SXin LI END_TEST
313*4543ef51SXin LI 
314*4543ef51SXin LI /* Examples, not masks: */
315*4543ef51SXin LI #define UTF8_LEAD_1 "\x7f" /* 0b01111111 */
316*4543ef51SXin LI #define UTF8_LEAD_2 "\xdf" /* 0b11011111 */
317*4543ef51SXin LI #define UTF8_LEAD_3 "\xef" /* 0b11101111 */
318*4543ef51SXin LI #define UTF8_LEAD_4 "\xf7" /* 0b11110111 */
319*4543ef51SXin LI #define UTF8_FOLLOW "\xbf" /* 0b10111111 */
320*4543ef51SXin LI 
START_TEST(test_utf8_auto_align)321*4543ef51SXin LI START_TEST(test_utf8_auto_align) {
322*4543ef51SXin LI   struct TestCase {
323*4543ef51SXin LI     ptrdiff_t expectedMovementInChars;
324*4543ef51SXin LI     const char *input;
325*4543ef51SXin LI   };
326*4543ef51SXin LI 
327*4543ef51SXin LI   struct TestCase cases[] = {
328*4543ef51SXin LI       {00, ""},
329*4543ef51SXin LI 
330*4543ef51SXin LI       {00, UTF8_LEAD_1},
331*4543ef51SXin LI 
332*4543ef51SXin LI       {-1, UTF8_LEAD_2},
333*4543ef51SXin LI       {00, UTF8_LEAD_2 UTF8_FOLLOW},
334*4543ef51SXin LI 
335*4543ef51SXin LI       {-1, UTF8_LEAD_3},
336*4543ef51SXin LI       {-2, UTF8_LEAD_3 UTF8_FOLLOW},
337*4543ef51SXin LI       {00, UTF8_LEAD_3 UTF8_FOLLOW UTF8_FOLLOW},
338*4543ef51SXin LI 
339*4543ef51SXin LI       {-1, UTF8_LEAD_4},
340*4543ef51SXin LI       {-2, UTF8_LEAD_4 UTF8_FOLLOW},
341*4543ef51SXin LI       {-3, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW},
342*4543ef51SXin LI       {00, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW UTF8_FOLLOW},
343*4543ef51SXin LI   };
344*4543ef51SXin LI 
345*4543ef51SXin LI   size_t i = 0;
346*4543ef51SXin LI   bool success = true;
347*4543ef51SXin LI   for (; i < sizeof(cases) / sizeof(*cases); i++) {
348*4543ef51SXin LI     const char *fromLim = cases[i].input + strlen(cases[i].input);
349*4543ef51SXin LI     const char *const fromLimInitially = fromLim;
350*4543ef51SXin LI     ptrdiff_t actualMovementInChars;
351*4543ef51SXin LI 
352*4543ef51SXin LI     _INTERNAL_trim_to_complete_utf8_characters(cases[i].input, &fromLim);
353*4543ef51SXin LI 
354*4543ef51SXin LI     actualMovementInChars = (fromLim - fromLimInitially);
355*4543ef51SXin LI     if (actualMovementInChars != cases[i].expectedMovementInChars) {
356*4543ef51SXin LI       size_t j = 0;
357*4543ef51SXin LI       success = false;
358*4543ef51SXin LI       printf("[-] UTF-8 case %2u: Expected movement by %2d chars"
359*4543ef51SXin LI              ", actually moved by %2d chars: \"",
360*4543ef51SXin LI              (unsigned)(i + 1), (int)cases[i].expectedMovementInChars,
361*4543ef51SXin LI              (int)actualMovementInChars);
362*4543ef51SXin LI       for (; j < strlen(cases[i].input); j++) {
363*4543ef51SXin LI         printf("\\x%02x", (unsigned char)cases[i].input[j]);
364*4543ef51SXin LI       }
365*4543ef51SXin LI       printf("\"\n");
366*4543ef51SXin LI     }
367*4543ef51SXin LI   }
368*4543ef51SXin LI 
369*4543ef51SXin LI   if (! success) {
370*4543ef51SXin LI     fail("UTF-8 auto-alignment is not bullet-proof\n");
371*4543ef51SXin LI   }
372*4543ef51SXin LI }
373*4543ef51SXin LI END_TEST
374*4543ef51SXin LI 
START_TEST(test_utf16)375*4543ef51SXin LI START_TEST(test_utf16) {
376*4543ef51SXin LI   /* <?xml version="1.0" encoding="UTF-16"?>
377*4543ef51SXin LI    *  <doc a='123'>some {A} text</doc>
378*4543ef51SXin LI    *
379*4543ef51SXin LI    * where {A} is U+FF21, FULLWIDTH LATIN CAPITAL LETTER A
380*4543ef51SXin LI    */
381*4543ef51SXin LI   char text[]
382*4543ef51SXin LI       = "\000<\000?\000x\000m\000\154\000 \000v\000e\000r\000s\000i\000o"
383*4543ef51SXin LI         "\000n\000=\000'\0001\000.\000\060\000'\000 \000e\000n\000c\000o"
384*4543ef51SXin LI         "\000d\000i\000n\000g\000=\000'\000U\000T\000F\000-\0001\000\066"
385*4543ef51SXin LI         "\000'\000?\000>\000\n"
386*4543ef51SXin LI         "\000<\000d\000o\000c\000 \000a\000=\000'\0001\0002\0003\000'\000>"
387*4543ef51SXin LI         "\000s\000o\000m\000e\000 \xff\x21\000 \000t\000e\000x\000t\000"
388*4543ef51SXin LI         "<\000/\000d\000o\000c\000>";
389*4543ef51SXin LI #ifdef XML_UNICODE
390*4543ef51SXin LI   const XML_Char *expected = XCS("some \xff21 text");
391*4543ef51SXin LI #else
392*4543ef51SXin LI   const XML_Char *expected = XCS("some \357\274\241 text");
393*4543ef51SXin LI #endif
394*4543ef51SXin LI   CharData storage;
395*4543ef51SXin LI 
396*4543ef51SXin LI   CharData_Init(&storage);
397*4543ef51SXin LI   XML_SetUserData(g_parser, &storage);
398*4543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
399*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
400*4543ef51SXin LI       == XML_STATUS_ERROR)
401*4543ef51SXin LI     xml_failure(g_parser);
402*4543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
403*4543ef51SXin LI }
404*4543ef51SXin LI END_TEST
405*4543ef51SXin LI 
START_TEST(test_utf16_le_epilog_newline)406*4543ef51SXin LI START_TEST(test_utf16_le_epilog_newline) {
407*4543ef51SXin LI   unsigned int first_chunk_bytes = 17;
408*4543ef51SXin LI   char text[] = "\xFF\xFE"                  /* BOM */
409*4543ef51SXin LI                 "<\000e\000/\000>\000"      /* document element */
410*4543ef51SXin LI                 "\r\000\n\000\r\000\n\000"; /* epilog */
411*4543ef51SXin LI 
412*4543ef51SXin LI   if (first_chunk_bytes >= sizeof(text) - 1)
413*4543ef51SXin LI     fail("bad value of first_chunk_bytes");
414*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, first_chunk_bytes, XML_FALSE)
415*4543ef51SXin LI       == XML_STATUS_ERROR)
416*4543ef51SXin LI     xml_failure(g_parser);
417*4543ef51SXin LI   else {
418*4543ef51SXin LI     enum XML_Status rc;
419*4543ef51SXin LI     rc = _XML_Parse_SINGLE_BYTES(g_parser, text + first_chunk_bytes,
420*4543ef51SXin LI                                  sizeof(text) - first_chunk_bytes - 1,
421*4543ef51SXin LI                                  XML_TRUE);
422*4543ef51SXin LI     if (rc == XML_STATUS_ERROR)
423*4543ef51SXin LI       xml_failure(g_parser);
424*4543ef51SXin LI   }
425*4543ef51SXin LI }
426*4543ef51SXin LI END_TEST
427*4543ef51SXin LI 
428*4543ef51SXin LI /* Test that an outright lie in the encoding is faulted */
START_TEST(test_not_utf16)429*4543ef51SXin LI START_TEST(test_not_utf16) {
430*4543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='utf-16'?>"
431*4543ef51SXin LI                      "<doc>Hi</doc>";
432*4543ef51SXin LI 
433*4543ef51SXin LI   /* Use a handler to provoke the appropriate code paths */
434*4543ef51SXin LI   XML_SetXmlDeclHandler(g_parser, dummy_xdecl_handler);
435*4543ef51SXin LI   expect_failure(text, XML_ERROR_INCORRECT_ENCODING,
436*4543ef51SXin LI                  "UTF-16 declared in UTF-8 not faulted");
437*4543ef51SXin LI }
438*4543ef51SXin LI END_TEST
439*4543ef51SXin LI 
440*4543ef51SXin LI /* Test that an unknown encoding is rejected */
START_TEST(test_bad_encoding)441*4543ef51SXin LI START_TEST(test_bad_encoding) {
442*4543ef51SXin LI   const char *text = "<doc>Hi</doc>";
443*4543ef51SXin LI 
444*4543ef51SXin LI   if (! XML_SetEncoding(g_parser, XCS("unknown-encoding")))
445*4543ef51SXin LI     fail("XML_SetEncoding failed");
446*4543ef51SXin LI   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
447*4543ef51SXin LI                  "Unknown encoding not faulted");
448*4543ef51SXin LI }
449*4543ef51SXin LI END_TEST
450*4543ef51SXin LI 
451*4543ef51SXin LI /* Regression test for SF bug #481609, #774028. */
START_TEST(test_latin1_umlauts)452*4543ef51SXin LI START_TEST(test_latin1_umlauts) {
453*4543ef51SXin LI   const char *text
454*4543ef51SXin LI       = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
455*4543ef51SXin LI         "<e a='\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; >'\n"
456*4543ef51SXin LI         "  >\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; ></e>";
457*4543ef51SXin LI #ifdef XML_UNICODE
458*4543ef51SXin LI   /* Expected results in UTF-16 */
459*4543ef51SXin LI   const XML_Char *expected = XCS("\x00e4 \x00f6 \x00fc ")
460*4543ef51SXin LI       XCS("\x00e4 \x00f6 \x00fc ") XCS("\x00e4 \x00f6 \x00fc >");
461*4543ef51SXin LI #else
462*4543ef51SXin LI   /* Expected results in UTF-8 */
463*4543ef51SXin LI   const XML_Char *expected = XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ")
464*4543ef51SXin LI       XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ") XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC >");
465*4543ef51SXin LI #endif
466*4543ef51SXin LI 
467*4543ef51SXin LI   run_character_check(text, expected);
468*4543ef51SXin LI   XML_ParserReset(g_parser, NULL);
469*4543ef51SXin LI   run_attribute_check(text, expected);
470*4543ef51SXin LI   /* Repeat with a default handler */
471*4543ef51SXin LI   XML_ParserReset(g_parser, NULL);
472*4543ef51SXin LI   XML_SetDefaultHandler(g_parser, dummy_default_handler);
473*4543ef51SXin LI   run_character_check(text, expected);
474*4543ef51SXin LI   XML_ParserReset(g_parser, NULL);
475*4543ef51SXin LI   XML_SetDefaultHandler(g_parser, dummy_default_handler);
476*4543ef51SXin LI   run_attribute_check(text, expected);
477*4543ef51SXin LI }
478*4543ef51SXin LI END_TEST
479*4543ef51SXin LI 
480*4543ef51SXin LI /* Test that an element name with a 4-byte UTF-8 character is rejected */
START_TEST(test_long_utf8_character)481*4543ef51SXin LI START_TEST(test_long_utf8_character) {
482*4543ef51SXin LI   const char *text
483*4543ef51SXin LI       = "<?xml version='1.0' encoding='utf-8'?>\n"
484*4543ef51SXin LI         /* 0xf0 0x90 0x80 0x80 = U+10000, the first Linear B character */
485*4543ef51SXin LI         "<do\xf0\x90\x80\x80/>";
486*4543ef51SXin LI   expect_failure(text, XML_ERROR_INVALID_TOKEN,
487*4543ef51SXin LI                  "4-byte UTF-8 character in element name not faulted");
488*4543ef51SXin LI }
489*4543ef51SXin LI END_TEST
490*4543ef51SXin LI 
491*4543ef51SXin LI /* Test that a long latin-1 attribute (too long to convert in one go)
492*4543ef51SXin LI  * is correctly converted
493*4543ef51SXin LI  */
START_TEST(test_long_latin1_attribute)494*4543ef51SXin LI START_TEST(test_long_latin1_attribute) {
495*4543ef51SXin LI   const char *text
496*4543ef51SXin LI       = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
497*4543ef51SXin LI         "<doc att='"
498*4543ef51SXin LI         /* 64 characters per line */
499*4543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
500*4543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
501*4543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
502*4543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
503*4543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
504*4543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
505*4543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
506*4543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
507*4543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
508*4543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
509*4543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
510*4543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
511*4543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
512*4543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
513*4543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
514*4543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO"
515*4543ef51SXin LI         /* Last character splits across a buffer boundary */
516*4543ef51SXin LI         "\xe4'>\n</doc>";
517*4543ef51SXin LI 
518*4543ef51SXin LI   const XML_Char *expected =
519*4543ef51SXin LI       /* 64 characters per line */
520*4543ef51SXin LI       /* clang-format off */
521*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
522*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
523*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
524*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
525*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
526*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
527*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
528*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
529*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
530*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
531*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
532*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
533*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
534*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
535*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
536*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO")
537*4543ef51SXin LI   /* clang-format on */
538*4543ef51SXin LI #ifdef XML_UNICODE
539*4543ef51SXin LI                                                   XCS("\x00e4");
540*4543ef51SXin LI #else
541*4543ef51SXin LI                                                   XCS("\xc3\xa4");
542*4543ef51SXin LI #endif
543*4543ef51SXin LI 
544*4543ef51SXin LI   run_attribute_check(text, expected);
545*4543ef51SXin LI }
546*4543ef51SXin LI END_TEST
547*4543ef51SXin LI 
548*4543ef51SXin LI /* Test that a long ASCII attribute (too long to convert in one go)
549*4543ef51SXin LI  * is correctly converted
550*4543ef51SXin LI  */
START_TEST(test_long_ascii_attribute)551*4543ef51SXin LI START_TEST(test_long_ascii_attribute) {
552*4543ef51SXin LI   const char *text
553*4543ef51SXin LI       = "<?xml version='1.0' encoding='us-ascii'?>\n"
554*4543ef51SXin LI         "<doc att='"
555*4543ef51SXin LI         /* 64 characters per line */
556*4543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
557*4543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
558*4543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
559*4543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
560*4543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
561*4543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
562*4543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
563*4543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
564*4543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
565*4543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
566*4543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
567*4543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
568*4543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
569*4543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
570*4543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
571*4543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
572*4543ef51SXin LI         "01234'>\n</doc>";
573*4543ef51SXin LI   const XML_Char *expected =
574*4543ef51SXin LI       /* 64 characters per line */
575*4543ef51SXin LI       /* clang-format off */
576*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
577*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
578*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
579*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
580*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
581*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
582*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
583*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
584*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
585*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
586*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
587*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
588*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
589*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
590*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
591*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
592*4543ef51SXin LI         XCS("01234");
593*4543ef51SXin LI   /* clang-format on */
594*4543ef51SXin LI 
595*4543ef51SXin LI   run_attribute_check(text, expected);
596*4543ef51SXin LI }
597*4543ef51SXin LI END_TEST
598*4543ef51SXin LI 
599*4543ef51SXin LI /* Regression test #1 for SF bug #653180. */
START_TEST(test_line_number_after_parse)600*4543ef51SXin LI START_TEST(test_line_number_after_parse) {
601*4543ef51SXin LI   const char *text = "<tag>\n"
602*4543ef51SXin LI                      "\n"
603*4543ef51SXin LI                      "\n</tag>";
604*4543ef51SXin LI   XML_Size lineno;
605*4543ef51SXin LI 
606*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
607*4543ef51SXin LI       == XML_STATUS_ERROR)
608*4543ef51SXin LI     xml_failure(g_parser);
609*4543ef51SXin LI   lineno = XML_GetCurrentLineNumber(g_parser);
610*4543ef51SXin LI   if (lineno != 4) {
611*4543ef51SXin LI     char buffer[100];
612*4543ef51SXin LI     snprintf(buffer, sizeof(buffer),
613*4543ef51SXin LI              "expected 4 lines, saw %" XML_FMT_INT_MOD "u", lineno);
614*4543ef51SXin LI     fail(buffer);
615*4543ef51SXin LI   }
616*4543ef51SXin LI }
617*4543ef51SXin LI END_TEST
618*4543ef51SXin LI 
619*4543ef51SXin LI /* Regression test #2 for SF bug #653180. */
START_TEST(test_column_number_after_parse)620*4543ef51SXin LI START_TEST(test_column_number_after_parse) {
621*4543ef51SXin LI   const char *text = "<tag></tag>";
622*4543ef51SXin LI   XML_Size colno;
623*4543ef51SXin LI 
624*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
625*4543ef51SXin LI       == XML_STATUS_ERROR)
626*4543ef51SXin LI     xml_failure(g_parser);
627*4543ef51SXin LI   colno = XML_GetCurrentColumnNumber(g_parser);
628*4543ef51SXin LI   if (colno != 11) {
629*4543ef51SXin LI     char buffer[100];
630*4543ef51SXin LI     snprintf(buffer, sizeof(buffer),
631*4543ef51SXin LI              "expected 11 columns, saw %" XML_FMT_INT_MOD "u", colno);
632*4543ef51SXin LI     fail(buffer);
633*4543ef51SXin LI   }
634*4543ef51SXin LI }
635*4543ef51SXin LI END_TEST
636*4543ef51SXin LI 
637*4543ef51SXin LI /* Regression test #3 for SF bug #653180. */
START_TEST(test_line_and_column_numbers_inside_handlers)638*4543ef51SXin LI START_TEST(test_line_and_column_numbers_inside_handlers) {
639*4543ef51SXin LI   const char *text = "<a>\n"      /* Unix end-of-line */
640*4543ef51SXin LI                      "  <b>\r\n"  /* Windows end-of-line */
641*4543ef51SXin LI                      "    <c/>\r" /* Mac OS end-of-line */
642*4543ef51SXin LI                      "  </b>\n"
643*4543ef51SXin LI                      "  <d>\n"
644*4543ef51SXin LI                      "    <f/>\n"
645*4543ef51SXin LI                      "  </d>\n"
646*4543ef51SXin LI                      "</a>";
647*4543ef51SXin LI   const StructDataEntry expected[]
648*4543ef51SXin LI       = {{XCS("a"), 0, 1, STRUCT_START_TAG}, {XCS("b"), 2, 2, STRUCT_START_TAG},
649*4543ef51SXin LI          {XCS("c"), 4, 3, STRUCT_START_TAG}, {XCS("c"), 8, 3, STRUCT_END_TAG},
650*4543ef51SXin LI          {XCS("b"), 2, 4, STRUCT_END_TAG},   {XCS("d"), 2, 5, STRUCT_START_TAG},
651*4543ef51SXin LI          {XCS("f"), 4, 6, STRUCT_START_TAG}, {XCS("f"), 8, 6, STRUCT_END_TAG},
652*4543ef51SXin LI          {XCS("d"), 2, 7, STRUCT_END_TAG},   {XCS("a"), 0, 8, STRUCT_END_TAG}};
653*4543ef51SXin LI   const int expected_count = sizeof(expected) / sizeof(StructDataEntry);
654*4543ef51SXin LI   StructData storage;
655*4543ef51SXin LI 
656*4543ef51SXin LI   StructData_Init(&storage);
657*4543ef51SXin LI   XML_SetUserData(g_parser, &storage);
658*4543ef51SXin LI   XML_SetStartElementHandler(g_parser, start_element_event_handler2);
659*4543ef51SXin LI   XML_SetEndElementHandler(g_parser, end_element_event_handler2);
660*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
661*4543ef51SXin LI       == XML_STATUS_ERROR)
662*4543ef51SXin LI     xml_failure(g_parser);
663*4543ef51SXin LI 
664*4543ef51SXin LI   StructData_CheckItems(&storage, expected, expected_count);
665*4543ef51SXin LI   StructData_Dispose(&storage);
666*4543ef51SXin LI }
667*4543ef51SXin LI END_TEST
668*4543ef51SXin LI 
669*4543ef51SXin LI /* Regression test #4 for SF bug #653180. */
START_TEST(test_line_number_after_error)670*4543ef51SXin LI START_TEST(test_line_number_after_error) {
671*4543ef51SXin LI   const char *text = "<a>\n"
672*4543ef51SXin LI                      "  <b>\n"
673*4543ef51SXin LI                      "  </a>"; /* missing </b> */
674*4543ef51SXin LI   XML_Size lineno;
675*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
676*4543ef51SXin LI       != XML_STATUS_ERROR)
677*4543ef51SXin LI     fail("Expected a parse error");
678*4543ef51SXin LI 
679*4543ef51SXin LI   lineno = XML_GetCurrentLineNumber(g_parser);
680*4543ef51SXin LI   if (lineno != 3) {
681*4543ef51SXin LI     char buffer[100];
682*4543ef51SXin LI     snprintf(buffer, sizeof(buffer),
683*4543ef51SXin LI              "expected 3 lines, saw %" XML_FMT_INT_MOD "u", lineno);
684*4543ef51SXin LI     fail(buffer);
685*4543ef51SXin LI   }
686*4543ef51SXin LI }
687*4543ef51SXin LI END_TEST
688*4543ef51SXin LI 
689*4543ef51SXin LI /* Regression test #5 for SF bug #653180. */
START_TEST(test_column_number_after_error)690*4543ef51SXin LI START_TEST(test_column_number_after_error) {
691*4543ef51SXin LI   const char *text = "<a>\n"
692*4543ef51SXin LI                      "  <b>\n"
693*4543ef51SXin LI                      "  </a>"; /* missing </b> */
694*4543ef51SXin LI   XML_Size colno;
695*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
696*4543ef51SXin LI       != XML_STATUS_ERROR)
697*4543ef51SXin LI     fail("Expected a parse error");
698*4543ef51SXin LI 
699*4543ef51SXin LI   colno = XML_GetCurrentColumnNumber(g_parser);
700*4543ef51SXin LI   if (colno != 4) {
701*4543ef51SXin LI     char buffer[100];
702*4543ef51SXin LI     snprintf(buffer, sizeof(buffer),
703*4543ef51SXin LI              "expected 4 columns, saw %" XML_FMT_INT_MOD "u", colno);
704*4543ef51SXin LI     fail(buffer);
705*4543ef51SXin LI   }
706*4543ef51SXin LI }
707*4543ef51SXin LI END_TEST
708*4543ef51SXin LI 
709*4543ef51SXin LI /* Regression test for SF bug #478332. */
START_TEST(test_really_long_lines)710*4543ef51SXin LI START_TEST(test_really_long_lines) {
711*4543ef51SXin LI   /* This parses an input line longer than INIT_DATA_BUF_SIZE
712*4543ef51SXin LI      characters long (defined to be 1024 in xmlparse.c).  We take a
713*4543ef51SXin LI      really cheesy approach to building the input buffer, because
714*4543ef51SXin LI      this avoids writing bugs in buffer-filling code.
715*4543ef51SXin LI   */
716*4543ef51SXin LI   const char *text
717*4543ef51SXin LI       = "<e>"
718*4543ef51SXin LI         /* 64 chars */
719*4543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
720*4543ef51SXin LI         /* until we have at least 1024 characters on the line: */
721*4543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
722*4543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
723*4543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
724*4543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
725*4543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
726*4543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
727*4543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
728*4543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
729*4543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
730*4543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
731*4543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
732*4543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
733*4543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
734*4543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
735*4543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
736*4543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
737*4543ef51SXin LI         "</e>";
738*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
739*4543ef51SXin LI       == XML_STATUS_ERROR)
740*4543ef51SXin LI     xml_failure(g_parser);
741*4543ef51SXin LI }
742*4543ef51SXin LI END_TEST
743*4543ef51SXin LI 
744*4543ef51SXin LI /* Test cdata processing across a buffer boundary */
START_TEST(test_really_long_encoded_lines)745*4543ef51SXin LI START_TEST(test_really_long_encoded_lines) {
746*4543ef51SXin LI   /* As above, except that we want to provoke an output buffer
747*4543ef51SXin LI    * overflow with a non-trivial encoding.  For this we need to pass
748*4543ef51SXin LI    * the whole cdata in one go, not byte-by-byte.
749*4543ef51SXin LI    */
750*4543ef51SXin LI   void *buffer;
751*4543ef51SXin LI   const char *text
752*4543ef51SXin LI       = "<?xml version='1.0' encoding='iso-8859-1'?>"
753*4543ef51SXin LI         "<e>"
754*4543ef51SXin LI         /* 64 chars */
755*4543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
756*4543ef51SXin LI         /* until we have at least 1024 characters on the line: */
757*4543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
758*4543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
759*4543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
760*4543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
761*4543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
762*4543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
763*4543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
764*4543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
765*4543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
766*4543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
767*4543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
768*4543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
769*4543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
770*4543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
771*4543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
772*4543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
773*4543ef51SXin LI         "</e>";
774*4543ef51SXin LI   int parse_len = (int)strlen(text);
775*4543ef51SXin LI 
776*4543ef51SXin LI   /* Need a cdata handler to provoke the code path we want to test */
777*4543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, dummy_cdata_handler);
778*4543ef51SXin LI   buffer = XML_GetBuffer(g_parser, parse_len);
779*4543ef51SXin LI   if (buffer == NULL)
780*4543ef51SXin LI     fail("Could not allocate parse buffer");
781*4543ef51SXin LI   assert(buffer != NULL);
782*4543ef51SXin LI   memcpy(buffer, text, parse_len);
783*4543ef51SXin LI   if (XML_ParseBuffer(g_parser, parse_len, XML_TRUE) == XML_STATUS_ERROR)
784*4543ef51SXin LI     xml_failure(g_parser);
785*4543ef51SXin LI }
786*4543ef51SXin LI END_TEST
787*4543ef51SXin LI 
788*4543ef51SXin LI /*
789*4543ef51SXin LI  * Element event tests.
790*4543ef51SXin LI  */
791*4543ef51SXin LI 
START_TEST(test_end_element_events)792*4543ef51SXin LI START_TEST(test_end_element_events) {
793*4543ef51SXin LI   const char *text = "<a><b><c/></b><d><f/></d></a>";
794*4543ef51SXin LI   const XML_Char *expected = XCS("/c/b/f/d/a");
795*4543ef51SXin LI   CharData storage;
796*4543ef51SXin LI 
797*4543ef51SXin LI   CharData_Init(&storage);
798*4543ef51SXin LI   XML_SetUserData(g_parser, &storage);
799*4543ef51SXin LI   XML_SetEndElementHandler(g_parser, end_element_event_handler);
800*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
801*4543ef51SXin LI       == XML_STATUS_ERROR)
802*4543ef51SXin LI     xml_failure(g_parser);
803*4543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
804*4543ef51SXin LI }
805*4543ef51SXin LI END_TEST
806*4543ef51SXin LI 
807*4543ef51SXin LI /*
808*4543ef51SXin LI  * Attribute tests.
809*4543ef51SXin LI  */
810*4543ef51SXin LI 
811*4543ef51SXin LI /* Helper used by the following tests; this checks any "attr" and "refs"
812*4543ef51SXin LI    attributes to make sure whitespace has been normalized.
813*4543ef51SXin LI 
814*4543ef51SXin LI    Return true if whitespace has been normalized in a string, using
815*4543ef51SXin LI    the rules for attribute value normalization.  The 'is_cdata' flag
816*4543ef51SXin LI    is needed since CDATA attributes don't need to have multiple
817*4543ef51SXin LI    whitespace characters collapsed to a single space, while other
818*4543ef51SXin LI    attribute data types do.  (Section 3.3.3 of the recommendation.)
819*4543ef51SXin LI */
820*4543ef51SXin LI static int
is_whitespace_normalized(const XML_Char * s,int is_cdata)821*4543ef51SXin LI is_whitespace_normalized(const XML_Char *s, int is_cdata) {
822*4543ef51SXin LI   int blanks = 0;
823*4543ef51SXin LI   int at_start = 1;
824*4543ef51SXin LI   while (*s) {
825*4543ef51SXin LI     if (*s == XCS(' '))
826*4543ef51SXin LI       ++blanks;
827*4543ef51SXin LI     else if (*s == XCS('\t') || *s == XCS('\n') || *s == XCS('\r'))
828*4543ef51SXin LI       return 0;
829*4543ef51SXin LI     else {
830*4543ef51SXin LI       if (at_start) {
831*4543ef51SXin LI         at_start = 0;
832*4543ef51SXin LI         if (blanks && ! is_cdata)
833*4543ef51SXin LI           /* illegal leading blanks */
834*4543ef51SXin LI           return 0;
835*4543ef51SXin LI       } else if (blanks > 1 && ! is_cdata)
836*4543ef51SXin LI         return 0;
837*4543ef51SXin LI       blanks = 0;
838*4543ef51SXin LI     }
839*4543ef51SXin LI     ++s;
840*4543ef51SXin LI   }
841*4543ef51SXin LI   if (blanks && ! is_cdata)
842*4543ef51SXin LI     return 0;
843*4543ef51SXin LI   return 1;
844*4543ef51SXin LI }
845*4543ef51SXin LI 
846*4543ef51SXin LI /* Check the attribute whitespace checker: */
START_TEST(test_helper_is_whitespace_normalized)847*4543ef51SXin LI START_TEST(test_helper_is_whitespace_normalized) {
848*4543ef51SXin LI   assert(is_whitespace_normalized(XCS("abc"), 0));
849*4543ef51SXin LI   assert(is_whitespace_normalized(XCS("abc"), 1));
850*4543ef51SXin LI   assert(is_whitespace_normalized(XCS("abc def ghi"), 0));
851*4543ef51SXin LI   assert(is_whitespace_normalized(XCS("abc def ghi"), 1));
852*4543ef51SXin LI   assert(! is_whitespace_normalized(XCS(" abc def ghi"), 0));
853*4543ef51SXin LI   assert(is_whitespace_normalized(XCS(" abc def ghi"), 1));
854*4543ef51SXin LI   assert(! is_whitespace_normalized(XCS("abc  def ghi"), 0));
855*4543ef51SXin LI   assert(is_whitespace_normalized(XCS("abc  def ghi"), 1));
856*4543ef51SXin LI   assert(! is_whitespace_normalized(XCS("abc def ghi "), 0));
857*4543ef51SXin LI   assert(is_whitespace_normalized(XCS("abc def ghi "), 1));
858*4543ef51SXin LI   assert(! is_whitespace_normalized(XCS(" "), 0));
859*4543ef51SXin LI   assert(is_whitespace_normalized(XCS(" "), 1));
860*4543ef51SXin LI   assert(! is_whitespace_normalized(XCS("\t"), 0));
861*4543ef51SXin LI   assert(! is_whitespace_normalized(XCS("\t"), 1));
862*4543ef51SXin LI   assert(! is_whitespace_normalized(XCS("\n"), 0));
863*4543ef51SXin LI   assert(! is_whitespace_normalized(XCS("\n"), 1));
864*4543ef51SXin LI   assert(! is_whitespace_normalized(XCS("\r"), 0));
865*4543ef51SXin LI   assert(! is_whitespace_normalized(XCS("\r"), 1));
866*4543ef51SXin LI   assert(! is_whitespace_normalized(XCS("abc\t def"), 1));
867*4543ef51SXin LI }
868*4543ef51SXin LI END_TEST
869*4543ef51SXin LI 
870*4543ef51SXin LI static void XMLCALL
check_attr_contains_normalized_whitespace(void * userData,const XML_Char * name,const XML_Char ** atts)871*4543ef51SXin LI check_attr_contains_normalized_whitespace(void *userData, const XML_Char *name,
872*4543ef51SXin LI                                           const XML_Char **atts) {
873*4543ef51SXin LI   int i;
874*4543ef51SXin LI   UNUSED_P(userData);
875*4543ef51SXin LI   UNUSED_P(name);
876*4543ef51SXin LI   for (i = 0; atts[i] != NULL; i += 2) {
877*4543ef51SXin LI     const XML_Char *attrname = atts[i];
878*4543ef51SXin LI     const XML_Char *value = atts[i + 1];
879*4543ef51SXin LI     if (xcstrcmp(XCS("attr"), attrname) == 0
880*4543ef51SXin LI         || xcstrcmp(XCS("ents"), attrname) == 0
881*4543ef51SXin LI         || xcstrcmp(XCS("refs"), attrname) == 0) {
882*4543ef51SXin LI       if (! is_whitespace_normalized(value, 0)) {
883*4543ef51SXin LI         char buffer[256];
884*4543ef51SXin LI         snprintf(buffer, sizeof(buffer),
885*4543ef51SXin LI                  "attribute value not normalized: %" XML_FMT_STR
886*4543ef51SXin LI                  "='%" XML_FMT_STR "'",
887*4543ef51SXin LI                  attrname, value);
888*4543ef51SXin LI         fail(buffer);
889*4543ef51SXin LI       }
890*4543ef51SXin LI     }
891*4543ef51SXin LI   }
892*4543ef51SXin LI }
893*4543ef51SXin LI 
START_TEST(test_attr_whitespace_normalization)894*4543ef51SXin LI START_TEST(test_attr_whitespace_normalization) {
895*4543ef51SXin LI   const char *text
896*4543ef51SXin LI       = "<!DOCTYPE doc [\n"
897*4543ef51SXin LI         "  <!ATTLIST doc\n"
898*4543ef51SXin LI         "            attr NMTOKENS #REQUIRED\n"
899*4543ef51SXin LI         "            ents ENTITIES #REQUIRED\n"
900*4543ef51SXin LI         "            refs IDREFS   #REQUIRED>\n"
901*4543ef51SXin LI         "]>\n"
902*4543ef51SXin LI         "<doc attr='    a  b c\t\td\te\t' refs=' id-1   \t  id-2\t\t'  \n"
903*4543ef51SXin LI         "     ents=' ent-1   \t\r\n"
904*4543ef51SXin LI         "            ent-2  ' >\n"
905*4543ef51SXin LI         "  <e id='id-1'/>\n"
906*4543ef51SXin LI         "  <e id='id-2'/>\n"
907*4543ef51SXin LI         "</doc>";
908*4543ef51SXin LI 
909*4543ef51SXin LI   XML_SetStartElementHandler(g_parser,
910*4543ef51SXin LI                              check_attr_contains_normalized_whitespace);
911*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
912*4543ef51SXin LI       == XML_STATUS_ERROR)
913*4543ef51SXin LI     xml_failure(g_parser);
914*4543ef51SXin LI }
915*4543ef51SXin LI END_TEST
916*4543ef51SXin LI 
917*4543ef51SXin LI /*
918*4543ef51SXin LI  * XML declaration tests.
919*4543ef51SXin LI  */
920*4543ef51SXin LI 
START_TEST(test_xmldecl_misplaced)921*4543ef51SXin LI START_TEST(test_xmldecl_misplaced) {
922*4543ef51SXin LI   expect_failure("\n"
923*4543ef51SXin LI                  "<?xml version='1.0'?>\n"
924*4543ef51SXin LI                  "<a/>",
925*4543ef51SXin LI                  XML_ERROR_MISPLACED_XML_PI,
926*4543ef51SXin LI                  "failed to report misplaced XML declaration");
927*4543ef51SXin LI }
928*4543ef51SXin LI END_TEST
929*4543ef51SXin LI 
START_TEST(test_xmldecl_invalid)930*4543ef51SXin LI START_TEST(test_xmldecl_invalid) {
931*4543ef51SXin LI   expect_failure("<?xml version='1.0' \xc3\xa7?>\n<doc/>", XML_ERROR_XML_DECL,
932*4543ef51SXin LI                  "Failed to report invalid XML declaration");
933*4543ef51SXin LI }
934*4543ef51SXin LI END_TEST
935*4543ef51SXin LI 
START_TEST(test_xmldecl_missing_attr)936*4543ef51SXin LI START_TEST(test_xmldecl_missing_attr) {
937*4543ef51SXin LI   expect_failure("<?xml ='1.0'?>\n<doc/>\n", XML_ERROR_XML_DECL,
938*4543ef51SXin LI                  "Failed to report missing XML declaration attribute");
939*4543ef51SXin LI }
940*4543ef51SXin LI END_TEST
941*4543ef51SXin LI 
START_TEST(test_xmldecl_missing_value)942*4543ef51SXin LI START_TEST(test_xmldecl_missing_value) {
943*4543ef51SXin LI   expect_failure("<?xml version='1.0' encoding='us-ascii' standalone?>\n"
944*4543ef51SXin LI                  "<doc/>",
945*4543ef51SXin LI                  XML_ERROR_XML_DECL,
946*4543ef51SXin LI                  "Failed to report missing attribute value");
947*4543ef51SXin LI }
948*4543ef51SXin LI END_TEST
949*4543ef51SXin LI 
950*4543ef51SXin LI /* Regression test for SF bug #584832. */
START_TEST(test_unknown_encoding_internal_entity)951*4543ef51SXin LI START_TEST(test_unknown_encoding_internal_entity) {
952*4543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
953*4543ef51SXin LI                      "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
954*4543ef51SXin LI                      "<test a='&foo;'/>";
955*4543ef51SXin LI 
956*4543ef51SXin LI   XML_SetUnknownEncodingHandler(g_parser, UnknownEncodingHandler, NULL);
957*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
958*4543ef51SXin LI       == XML_STATUS_ERROR)
959*4543ef51SXin LI     xml_failure(g_parser);
960*4543ef51SXin LI }
961*4543ef51SXin LI END_TEST
962*4543ef51SXin LI 
963*4543ef51SXin LI /* Test unrecognised encoding handler */
START_TEST(test_unrecognised_encoding_internal_entity)964*4543ef51SXin LI START_TEST(test_unrecognised_encoding_internal_entity) {
965*4543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
966*4543ef51SXin LI                      "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
967*4543ef51SXin LI                      "<test a='&foo;'/>";
968*4543ef51SXin LI 
969*4543ef51SXin LI   XML_SetUnknownEncodingHandler(g_parser, UnrecognisedEncodingHandler, NULL);
970*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
971*4543ef51SXin LI       != XML_STATUS_ERROR)
972*4543ef51SXin LI     fail("Unrecognised encoding not rejected");
973*4543ef51SXin LI }
974*4543ef51SXin LI END_TEST
975*4543ef51SXin LI 
976*4543ef51SXin LI /* Regression test for SF bug #620106. */
START_TEST(test_ext_entity_set_encoding)977*4543ef51SXin LI START_TEST(test_ext_entity_set_encoding) {
978*4543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
979*4543ef51SXin LI                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
980*4543ef51SXin LI                      "]>\n"
981*4543ef51SXin LI                      "<doc>&en;</doc>";
982*4543ef51SXin LI   ExtTest test_data
983*4543ef51SXin LI       = {/* This text says it's an unsupported encoding, but it's really
984*4543ef51SXin LI             UTF-8, which we tell Expat using XML_SetEncoding().
985*4543ef51SXin LI          */
986*4543ef51SXin LI          "<?xml encoding='iso-8859-3'?>\xC3\xA9", XCS("utf-8"), NULL};
987*4543ef51SXin LI #ifdef XML_UNICODE
988*4543ef51SXin LI   const XML_Char *expected = XCS("\x00e9");
989*4543ef51SXin LI #else
990*4543ef51SXin LI   const XML_Char *expected = XCS("\xc3\xa9");
991*4543ef51SXin LI #endif
992*4543ef51SXin LI 
993*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
994*4543ef51SXin LI   run_ext_character_check(text, &test_data, expected);
995*4543ef51SXin LI }
996*4543ef51SXin LI END_TEST
997*4543ef51SXin LI 
998*4543ef51SXin LI /* Test external entities with no handler */
START_TEST(test_ext_entity_no_handler)999*4543ef51SXin LI START_TEST(test_ext_entity_no_handler) {
1000*4543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
1001*4543ef51SXin LI                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1002*4543ef51SXin LI                      "]>\n"
1003*4543ef51SXin LI                      "<doc>&en;</doc>";
1004*4543ef51SXin LI 
1005*4543ef51SXin LI   XML_SetDefaultHandler(g_parser, dummy_default_handler);
1006*4543ef51SXin LI   run_character_check(text, XCS(""));
1007*4543ef51SXin LI }
1008*4543ef51SXin LI END_TEST
1009*4543ef51SXin LI 
1010*4543ef51SXin LI /* Test UTF-8 BOM is accepted */
START_TEST(test_ext_entity_set_bom)1011*4543ef51SXin LI START_TEST(test_ext_entity_set_bom) {
1012*4543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
1013*4543ef51SXin LI                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1014*4543ef51SXin LI                      "]>\n"
1015*4543ef51SXin LI                      "<doc>&en;</doc>";
1016*4543ef51SXin LI   ExtTest test_data = {"\xEF\xBB\xBF" /* BOM */
1017*4543ef51SXin LI                        "<?xml encoding='iso-8859-3'?>"
1018*4543ef51SXin LI                        "\xC3\xA9",
1019*4543ef51SXin LI                        XCS("utf-8"), NULL};
1020*4543ef51SXin LI #ifdef XML_UNICODE
1021*4543ef51SXin LI   const XML_Char *expected = XCS("\x00e9");
1022*4543ef51SXin LI #else
1023*4543ef51SXin LI   const XML_Char *expected = XCS("\xc3\xa9");
1024*4543ef51SXin LI #endif
1025*4543ef51SXin LI 
1026*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1027*4543ef51SXin LI   run_ext_character_check(text, &test_data, expected);
1028*4543ef51SXin LI }
1029*4543ef51SXin LI END_TEST
1030*4543ef51SXin LI 
1031*4543ef51SXin LI /* Test that bad encodings are faulted */
START_TEST(test_ext_entity_bad_encoding)1032*4543ef51SXin LI START_TEST(test_ext_entity_bad_encoding) {
1033*4543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
1034*4543ef51SXin LI                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1035*4543ef51SXin LI                      "]>\n"
1036*4543ef51SXin LI                      "<doc>&en;</doc>";
1037*4543ef51SXin LI   ExtFaults fault
1038*4543ef51SXin LI       = {"<?xml encoding='iso-8859-3'?>u", "Unsupported encoding not faulted",
1039*4543ef51SXin LI          XCS("unknown"), XML_ERROR_UNKNOWN_ENCODING};
1040*4543ef51SXin LI 
1041*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1042*4543ef51SXin LI   XML_SetUserData(g_parser, &fault);
1043*4543ef51SXin LI   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1044*4543ef51SXin LI                  "Bad encoding should not have been accepted");
1045*4543ef51SXin LI }
1046*4543ef51SXin LI END_TEST
1047*4543ef51SXin LI 
1048*4543ef51SXin LI /* Try handing an invalid encoding to an external entity parser */
START_TEST(test_ext_entity_bad_encoding_2)1049*4543ef51SXin LI START_TEST(test_ext_entity_bad_encoding_2) {
1050*4543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1051*4543ef51SXin LI                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
1052*4543ef51SXin LI                      "<doc>&entity;</doc>";
1053*4543ef51SXin LI   ExtFaults fault
1054*4543ef51SXin LI       = {"<!ELEMENT doc (#PCDATA)*>", "Unknown encoding not faulted",
1055*4543ef51SXin LI          XCS("unknown-encoding"), XML_ERROR_UNKNOWN_ENCODING};
1056*4543ef51SXin LI 
1057*4543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1058*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1059*4543ef51SXin LI   XML_SetUserData(g_parser, &fault);
1060*4543ef51SXin LI   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1061*4543ef51SXin LI                  "Bad encoding not faulted in external entity handler");
1062*4543ef51SXin LI }
1063*4543ef51SXin LI END_TEST
1064*4543ef51SXin LI 
1065*4543ef51SXin LI /* Test that no error is reported for unknown entities if we don't
1066*4543ef51SXin LI    read an external subset.  This was fixed in Expat 1.95.5.
1067*4543ef51SXin LI */
START_TEST(test_wfc_undeclared_entity_unread_external_subset)1068*4543ef51SXin LI START_TEST(test_wfc_undeclared_entity_unread_external_subset) {
1069*4543ef51SXin LI   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
1070*4543ef51SXin LI                      "<doc>&entity;</doc>";
1071*4543ef51SXin LI 
1072*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1073*4543ef51SXin LI       == XML_STATUS_ERROR)
1074*4543ef51SXin LI     xml_failure(g_parser);
1075*4543ef51SXin LI }
1076*4543ef51SXin LI END_TEST
1077*4543ef51SXin LI 
1078*4543ef51SXin LI /* Test that an error is reported for unknown entities if we don't
1079*4543ef51SXin LI    have an external subset.
1080*4543ef51SXin LI */
START_TEST(test_wfc_undeclared_entity_no_external_subset)1081*4543ef51SXin LI START_TEST(test_wfc_undeclared_entity_no_external_subset) {
1082*4543ef51SXin LI   expect_failure("<doc>&entity;</doc>", XML_ERROR_UNDEFINED_ENTITY,
1083*4543ef51SXin LI                  "Parser did not report undefined entity w/out a DTD.");
1084*4543ef51SXin LI }
1085*4543ef51SXin LI END_TEST
1086*4543ef51SXin LI 
1087*4543ef51SXin LI /* Test that an error is reported for unknown entities if we don't
1088*4543ef51SXin LI    read an external subset, but have been declared standalone.
1089*4543ef51SXin LI */
START_TEST(test_wfc_undeclared_entity_standalone)1090*4543ef51SXin LI START_TEST(test_wfc_undeclared_entity_standalone) {
1091*4543ef51SXin LI   const char *text
1092*4543ef51SXin LI       = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1093*4543ef51SXin LI         "<!DOCTYPE doc SYSTEM 'foo'>\n"
1094*4543ef51SXin LI         "<doc>&entity;</doc>";
1095*4543ef51SXin LI 
1096*4543ef51SXin LI   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1097*4543ef51SXin LI                  "Parser did not report undefined entity (standalone).");
1098*4543ef51SXin LI }
1099*4543ef51SXin LI END_TEST
1100*4543ef51SXin LI 
1101*4543ef51SXin LI /* Test that an error is reported for unknown entities if we have read
1102*4543ef51SXin LI    an external subset, and standalone is true.
1103*4543ef51SXin LI */
START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone)1104*4543ef51SXin LI START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone) {
1105*4543ef51SXin LI   const char *text
1106*4543ef51SXin LI       = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1107*4543ef51SXin LI         "<!DOCTYPE doc SYSTEM 'foo'>\n"
1108*4543ef51SXin LI         "<doc>&entity;</doc>";
1109*4543ef51SXin LI   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1110*4543ef51SXin LI 
1111*4543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1112*4543ef51SXin LI   XML_SetUserData(g_parser, &test_data);
1113*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1114*4543ef51SXin LI   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1115*4543ef51SXin LI                  "Parser did not report undefined entity (external DTD).");
1116*4543ef51SXin LI }
1117*4543ef51SXin LI END_TEST
1118*4543ef51SXin LI 
1119*4543ef51SXin LI /* Test that external entity handling is not done if the parsing flag
1120*4543ef51SXin LI  * is set to UNLESS_STANDALONE
1121*4543ef51SXin LI  */
START_TEST(test_entity_with_external_subset_unless_standalone)1122*4543ef51SXin LI START_TEST(test_entity_with_external_subset_unless_standalone) {
1123*4543ef51SXin LI   const char *text
1124*4543ef51SXin LI       = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1125*4543ef51SXin LI         "<!DOCTYPE doc SYSTEM 'foo'>\n"
1126*4543ef51SXin LI         "<doc>&entity;</doc>";
1127*4543ef51SXin LI   ExtTest test_data = {"<!ENTITY entity 'bar'>", NULL, NULL};
1128*4543ef51SXin LI 
1129*4543ef51SXin LI   XML_SetParamEntityParsing(g_parser,
1130*4543ef51SXin LI                             XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1131*4543ef51SXin LI   XML_SetUserData(g_parser, &test_data);
1132*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1133*4543ef51SXin LI   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1134*4543ef51SXin LI                  "Parser did not report undefined entity");
1135*4543ef51SXin LI }
1136*4543ef51SXin LI END_TEST
1137*4543ef51SXin LI 
1138*4543ef51SXin LI /* Test that no error is reported for unknown entities if we have read
1139*4543ef51SXin LI    an external subset, and standalone is false.
1140*4543ef51SXin LI */
START_TEST(test_wfc_undeclared_entity_with_external_subset)1141*4543ef51SXin LI START_TEST(test_wfc_undeclared_entity_with_external_subset) {
1142*4543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1143*4543ef51SXin LI                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
1144*4543ef51SXin LI                      "<doc>&entity;</doc>";
1145*4543ef51SXin LI   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1146*4543ef51SXin LI 
1147*4543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1148*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1149*4543ef51SXin LI   run_ext_character_check(text, &test_data, XCS(""));
1150*4543ef51SXin LI }
1151*4543ef51SXin LI END_TEST
1152*4543ef51SXin LI 
1153*4543ef51SXin LI /* Test that an error is reported if our NotStandalone handler fails */
START_TEST(test_not_standalone_handler_reject)1154*4543ef51SXin LI START_TEST(test_not_standalone_handler_reject) {
1155*4543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1156*4543ef51SXin LI                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
1157*4543ef51SXin LI                      "<doc>&entity;</doc>";
1158*4543ef51SXin LI   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1159*4543ef51SXin LI 
1160*4543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1161*4543ef51SXin LI   XML_SetUserData(g_parser, &test_data);
1162*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1163*4543ef51SXin LI   XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
1164*4543ef51SXin LI   expect_failure(text, XML_ERROR_NOT_STANDALONE,
1165*4543ef51SXin LI                  "NotStandalone handler failed to reject");
1166*4543ef51SXin LI 
1167*4543ef51SXin LI   /* Try again but without external entity handling */
1168*4543ef51SXin LI   XML_ParserReset(g_parser, NULL);
1169*4543ef51SXin LI   XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
1170*4543ef51SXin LI   expect_failure(text, XML_ERROR_NOT_STANDALONE,
1171*4543ef51SXin LI                  "NotStandalone handler failed to reject");
1172*4543ef51SXin LI }
1173*4543ef51SXin LI END_TEST
1174*4543ef51SXin LI 
1175*4543ef51SXin LI /* Test that no error is reported if our NotStandalone handler succeeds */
START_TEST(test_not_standalone_handler_accept)1176*4543ef51SXin LI START_TEST(test_not_standalone_handler_accept) {
1177*4543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1178*4543ef51SXin LI                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
1179*4543ef51SXin LI                      "<doc>&entity;</doc>";
1180*4543ef51SXin LI   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1181*4543ef51SXin LI 
1182*4543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1183*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1184*4543ef51SXin LI   XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
1185*4543ef51SXin LI   run_ext_character_check(text, &test_data, XCS(""));
1186*4543ef51SXin LI 
1187*4543ef51SXin LI   /* Repeat without the external entity handler */
1188*4543ef51SXin LI   XML_ParserReset(g_parser, NULL);
1189*4543ef51SXin LI   XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
1190*4543ef51SXin LI   run_character_check(text, XCS(""));
1191*4543ef51SXin LI }
1192*4543ef51SXin LI END_TEST
1193*4543ef51SXin LI 
START_TEST(test_wfc_no_recursive_entity_refs)1194*4543ef51SXin LI START_TEST(test_wfc_no_recursive_entity_refs) {
1195*4543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
1196*4543ef51SXin LI                      "  <!ENTITY entity '&#38;entity;'>\n"
1197*4543ef51SXin LI                      "]>\n"
1198*4543ef51SXin LI                      "<doc>&entity;</doc>";
1199*4543ef51SXin LI 
1200*4543ef51SXin LI   expect_failure(text, XML_ERROR_RECURSIVE_ENTITY_REF,
1201*4543ef51SXin LI                  "Parser did not report recursive entity reference.");
1202*4543ef51SXin LI }
1203*4543ef51SXin LI END_TEST
1204*4543ef51SXin LI 
1205*4543ef51SXin LI /* Test incomplete external entities are faulted */
START_TEST(test_ext_entity_invalid_parse)1206*4543ef51SXin LI START_TEST(test_ext_entity_invalid_parse) {
1207*4543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
1208*4543ef51SXin LI                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1209*4543ef51SXin LI                      "]>\n"
1210*4543ef51SXin LI                      "<doc>&en;</doc>";
1211*4543ef51SXin LI   const ExtFaults faults[]
1212*4543ef51SXin LI       = {{"<", "Incomplete element declaration not faulted", NULL,
1213*4543ef51SXin LI           XML_ERROR_UNCLOSED_TOKEN},
1214*4543ef51SXin LI          {"<\xe2\x82", /* First two bytes of a three-byte char */
1215*4543ef51SXin LI           "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
1216*4543ef51SXin LI          {"<tag>\xe2\x82", "Incomplete character in CDATA not faulted", NULL,
1217*4543ef51SXin LI           XML_ERROR_PARTIAL_CHAR},
1218*4543ef51SXin LI          {NULL, NULL, NULL, XML_ERROR_NONE}};
1219*4543ef51SXin LI   const ExtFaults *fault = faults;
1220*4543ef51SXin LI 
1221*4543ef51SXin LI   for (; fault->parse_text != NULL; fault++) {
1222*4543ef51SXin LI     set_subtest("\"%s\"", fault->parse_text);
1223*4543ef51SXin LI     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1224*4543ef51SXin LI     XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1225*4543ef51SXin LI     XML_SetUserData(g_parser, (void *)fault);
1226*4543ef51SXin LI     expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1227*4543ef51SXin LI                    "Parser did not report external entity error");
1228*4543ef51SXin LI     XML_ParserReset(g_parser, NULL);
1229*4543ef51SXin LI   }
1230*4543ef51SXin LI }
1231*4543ef51SXin LI END_TEST
1232*4543ef51SXin LI 
1233*4543ef51SXin LI /* Regression test for SF bug #483514. */
START_TEST(test_dtd_default_handling)1234*4543ef51SXin LI START_TEST(test_dtd_default_handling) {
1235*4543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
1236*4543ef51SXin LI                      "<!ENTITY e SYSTEM 'http://example.org/e'>\n"
1237*4543ef51SXin LI                      "<!NOTATION n SYSTEM 'http://example.org/n'>\n"
1238*4543ef51SXin LI                      "<!ELEMENT doc EMPTY>\n"
1239*4543ef51SXin LI                      "<!ATTLIST doc a CDATA #IMPLIED>\n"
1240*4543ef51SXin LI                      "<?pi in dtd?>\n"
1241*4543ef51SXin LI                      "<!--comment in dtd-->\n"
1242*4543ef51SXin LI                      "]><doc/>";
1243*4543ef51SXin LI 
1244*4543ef51SXin LI   XML_SetDefaultHandler(g_parser, accumulate_characters);
1245*4543ef51SXin LI   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
1246*4543ef51SXin LI   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
1247*4543ef51SXin LI   XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
1248*4543ef51SXin LI   XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
1249*4543ef51SXin LI   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
1250*4543ef51SXin LI   XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
1251*4543ef51SXin LI   XML_SetProcessingInstructionHandler(g_parser, dummy_pi_handler);
1252*4543ef51SXin LI   XML_SetCommentHandler(g_parser, dummy_comment_handler);
1253*4543ef51SXin LI   XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
1254*4543ef51SXin LI   XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
1255*4543ef51SXin LI   run_character_check(text, XCS("\n\n\n\n\n\n\n<doc/>"));
1256*4543ef51SXin LI }
1257*4543ef51SXin LI END_TEST
1258*4543ef51SXin LI 
1259*4543ef51SXin LI /* Test handling of attribute declarations */
START_TEST(test_dtd_attr_handling)1260*4543ef51SXin LI START_TEST(test_dtd_attr_handling) {
1261*4543ef51SXin LI   const char *prolog = "<!DOCTYPE doc [\n"
1262*4543ef51SXin LI                        "<!ELEMENT doc EMPTY>\n";
1263*4543ef51SXin LI   AttTest attr_data[]
1264*4543ef51SXin LI       = {{"<!ATTLIST doc a ( one | two | three ) #REQUIRED>\n"
1265*4543ef51SXin LI           "]>"
1266*4543ef51SXin LI           "<doc a='two'/>",
1267*4543ef51SXin LI           XCS("doc"), XCS("a"),
1268*4543ef51SXin LI           XCS("(one|two|three)"), /* Extraneous spaces will be removed */
1269*4543ef51SXin LI           NULL, XML_TRUE},
1270*4543ef51SXin LI          {"<!NOTATION foo SYSTEM 'http://example.org/foo'>\n"
1271*4543ef51SXin LI           "<!ATTLIST doc a NOTATION (foo) #IMPLIED>\n"
1272*4543ef51SXin LI           "]>"
1273*4543ef51SXin LI           "<doc/>",
1274*4543ef51SXin LI           XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), NULL, XML_FALSE},
1275*4543ef51SXin LI          {"<!ATTLIST doc a NOTATION (foo) 'bar'>\n"
1276*4543ef51SXin LI           "]>"
1277*4543ef51SXin LI           "<doc/>",
1278*4543ef51SXin LI           XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), XCS("bar"), XML_FALSE},
1279*4543ef51SXin LI          {"<!ATTLIST doc a CDATA '\xdb\xb2'>\n"
1280*4543ef51SXin LI           "]>"
1281*4543ef51SXin LI           "<doc/>",
1282*4543ef51SXin LI           XCS("doc"), XCS("a"), XCS("CDATA"),
1283*4543ef51SXin LI #ifdef XML_UNICODE
1284*4543ef51SXin LI           XCS("\x06f2"),
1285*4543ef51SXin LI #else
1286*4543ef51SXin LI           XCS("\xdb\xb2"),
1287*4543ef51SXin LI #endif
1288*4543ef51SXin LI           XML_FALSE},
1289*4543ef51SXin LI          {NULL, NULL, NULL, NULL, NULL, XML_FALSE}};
1290*4543ef51SXin LI   AttTest *test;
1291*4543ef51SXin LI 
1292*4543ef51SXin LI   for (test = attr_data; test->definition != NULL; test++) {
1293*4543ef51SXin LI     set_subtest("%s", test->definition);
1294*4543ef51SXin LI     XML_SetAttlistDeclHandler(g_parser, verify_attlist_decl_handler);
1295*4543ef51SXin LI     XML_SetUserData(g_parser, test);
1296*4543ef51SXin LI     if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)strlen(prolog),
1297*4543ef51SXin LI                                 XML_FALSE)
1298*4543ef51SXin LI         == XML_STATUS_ERROR)
1299*4543ef51SXin LI       xml_failure(g_parser);
1300*4543ef51SXin LI     if (_XML_Parse_SINGLE_BYTES(g_parser, test->definition,
1301*4543ef51SXin LI                                 (int)strlen(test->definition), XML_TRUE)
1302*4543ef51SXin LI         == XML_STATUS_ERROR)
1303*4543ef51SXin LI       xml_failure(g_parser);
1304*4543ef51SXin LI     XML_ParserReset(g_parser, NULL);
1305*4543ef51SXin LI   }
1306*4543ef51SXin LI }
1307*4543ef51SXin LI END_TEST
1308*4543ef51SXin LI 
1309*4543ef51SXin LI /* See related SF bug #673791.
1310*4543ef51SXin LI    When namespace processing is enabled, setting the namespace URI for
1311*4543ef51SXin LI    a prefix is not allowed; this test ensures that it *is* allowed
1312*4543ef51SXin LI    when namespace processing is not enabled.
1313*4543ef51SXin LI    (See Namespaces in XML, section 2.)
1314*4543ef51SXin LI */
START_TEST(test_empty_ns_without_namespaces)1315*4543ef51SXin LI START_TEST(test_empty_ns_without_namespaces) {
1316*4543ef51SXin LI   const char *text = "<doc xmlns:prefix='http://example.org/'>\n"
1317*4543ef51SXin LI                      "  <e xmlns:prefix=''/>\n"
1318*4543ef51SXin LI                      "</doc>";
1319*4543ef51SXin LI 
1320*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1321*4543ef51SXin LI       == XML_STATUS_ERROR)
1322*4543ef51SXin LI     xml_failure(g_parser);
1323*4543ef51SXin LI }
1324*4543ef51SXin LI END_TEST
1325*4543ef51SXin LI 
1326*4543ef51SXin LI /* Regression test for SF bug #824420.
1327*4543ef51SXin LI    Checks that an xmlns:prefix attribute set in an attribute's default
1328*4543ef51SXin LI    value isn't misinterpreted.
1329*4543ef51SXin LI */
START_TEST(test_ns_in_attribute_default_without_namespaces)1330*4543ef51SXin LI START_TEST(test_ns_in_attribute_default_without_namespaces) {
1331*4543ef51SXin LI   const char *text = "<!DOCTYPE e:element [\n"
1332*4543ef51SXin LI                      "  <!ATTLIST e:element\n"
1333*4543ef51SXin LI                      "    xmlns:e CDATA 'http://example.org/'>\n"
1334*4543ef51SXin LI                      "      ]>\n"
1335*4543ef51SXin LI                      "<e:element/>";
1336*4543ef51SXin LI 
1337*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1338*4543ef51SXin LI       == XML_STATUS_ERROR)
1339*4543ef51SXin LI     xml_failure(g_parser);
1340*4543ef51SXin LI }
1341*4543ef51SXin LI END_TEST
1342*4543ef51SXin LI 
1343*4543ef51SXin LI /* Regression test for SF bug #1515266: missing check of stopped
1344*4543ef51SXin LI    parser in doContext() 'for' loop. */
START_TEST(test_stop_parser_between_char_data_calls)1345*4543ef51SXin LI START_TEST(test_stop_parser_between_char_data_calls) {
1346*4543ef51SXin LI   /* The sample data must be big enough that there are two calls to
1347*4543ef51SXin LI      the character data handler from within the inner "for" loop of
1348*4543ef51SXin LI      the XML_TOK_DATA_CHARS case in doContent(), and the character
1349*4543ef51SXin LI      handler must stop the parser and clear the character data
1350*4543ef51SXin LI      handler.
1351*4543ef51SXin LI   */
1352*4543ef51SXin LI   const char *text = long_character_data_text;
1353*4543ef51SXin LI 
1354*4543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1355*4543ef51SXin LI   g_resumable = XML_FALSE;
1356*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1357*4543ef51SXin LI       != XML_STATUS_ERROR)
1358*4543ef51SXin LI     xml_failure(g_parser);
1359*4543ef51SXin LI   if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
1360*4543ef51SXin LI     xml_failure(g_parser);
1361*4543ef51SXin LI }
1362*4543ef51SXin LI END_TEST
1363*4543ef51SXin LI 
1364*4543ef51SXin LI /* Regression test for SF bug #1515266: missing check of stopped
1365*4543ef51SXin LI    parser in doContext() 'for' loop. */
START_TEST(test_suspend_parser_between_char_data_calls)1366*4543ef51SXin LI START_TEST(test_suspend_parser_between_char_data_calls) {
1367*4543ef51SXin LI   /* The sample data must be big enough that there are two calls to
1368*4543ef51SXin LI      the character data handler from within the inner "for" loop of
1369*4543ef51SXin LI      the XML_TOK_DATA_CHARS case in doContent(), and the character
1370*4543ef51SXin LI      handler must stop the parser and clear the character data
1371*4543ef51SXin LI      handler.
1372*4543ef51SXin LI   */
1373*4543ef51SXin LI   const char *text = long_character_data_text;
1374*4543ef51SXin LI 
1375*4543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1376*4543ef51SXin LI   g_resumable = XML_TRUE;
1377*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1378*4543ef51SXin LI       != XML_STATUS_SUSPENDED)
1379*4543ef51SXin LI     xml_failure(g_parser);
1380*4543ef51SXin LI   if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
1381*4543ef51SXin LI     xml_failure(g_parser);
1382*4543ef51SXin LI   /* Try parsing directly */
1383*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1384*4543ef51SXin LI       != XML_STATUS_ERROR)
1385*4543ef51SXin LI     fail("Attempt to continue parse while suspended not faulted");
1386*4543ef51SXin LI   if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
1387*4543ef51SXin LI     fail("Suspended parse not faulted with correct error");
1388*4543ef51SXin LI }
1389*4543ef51SXin LI END_TEST
1390*4543ef51SXin LI 
1391*4543ef51SXin LI /* Test repeated calls to XML_StopParser are handled correctly */
START_TEST(test_repeated_stop_parser_between_char_data_calls)1392*4543ef51SXin LI START_TEST(test_repeated_stop_parser_between_char_data_calls) {
1393*4543ef51SXin LI   const char *text = long_character_data_text;
1394*4543ef51SXin LI 
1395*4543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1396*4543ef51SXin LI   g_resumable = XML_FALSE;
1397*4543ef51SXin LI   g_abortable = XML_FALSE;
1398*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1399*4543ef51SXin LI       != XML_STATUS_ERROR)
1400*4543ef51SXin LI     fail("Failed to double-stop parser");
1401*4543ef51SXin LI 
1402*4543ef51SXin LI   XML_ParserReset(g_parser, NULL);
1403*4543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1404*4543ef51SXin LI   g_resumable = XML_TRUE;
1405*4543ef51SXin LI   g_abortable = XML_FALSE;
1406*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1407*4543ef51SXin LI       != XML_STATUS_SUSPENDED)
1408*4543ef51SXin LI     fail("Failed to double-suspend parser");
1409*4543ef51SXin LI 
1410*4543ef51SXin LI   XML_ParserReset(g_parser, NULL);
1411*4543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1412*4543ef51SXin LI   g_resumable = XML_TRUE;
1413*4543ef51SXin LI   g_abortable = XML_TRUE;
1414*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1415*4543ef51SXin LI       != XML_STATUS_ERROR)
1416*4543ef51SXin LI     fail("Failed to suspend-abort parser");
1417*4543ef51SXin LI }
1418*4543ef51SXin LI END_TEST
1419*4543ef51SXin LI 
START_TEST(test_good_cdata_ascii)1420*4543ef51SXin LI START_TEST(test_good_cdata_ascii) {
1421*4543ef51SXin LI   const char *text = "<a><![CDATA[<greeting>Hello, world!</greeting>]]></a>";
1422*4543ef51SXin LI   const XML_Char *expected = XCS("<greeting>Hello, world!</greeting>");
1423*4543ef51SXin LI 
1424*4543ef51SXin LI   CharData storage;
1425*4543ef51SXin LI   CharData_Init(&storage);
1426*4543ef51SXin LI   XML_SetUserData(g_parser, &storage);
1427*4543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1428*4543ef51SXin LI   /* Add start and end handlers for coverage */
1429*4543ef51SXin LI   XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
1430*4543ef51SXin LI   XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
1431*4543ef51SXin LI 
1432*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1433*4543ef51SXin LI       == XML_STATUS_ERROR)
1434*4543ef51SXin LI     xml_failure(g_parser);
1435*4543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
1436*4543ef51SXin LI 
1437*4543ef51SXin LI   /* Try again, this time with a default handler */
1438*4543ef51SXin LI   XML_ParserReset(g_parser, NULL);
1439*4543ef51SXin LI   CharData_Init(&storage);
1440*4543ef51SXin LI   XML_SetUserData(g_parser, &storage);
1441*4543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1442*4543ef51SXin LI   XML_SetDefaultHandler(g_parser, dummy_default_handler);
1443*4543ef51SXin LI 
1444*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1445*4543ef51SXin LI       == XML_STATUS_ERROR)
1446*4543ef51SXin LI     xml_failure(g_parser);
1447*4543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
1448*4543ef51SXin LI }
1449*4543ef51SXin LI END_TEST
1450*4543ef51SXin LI 
START_TEST(test_good_cdata_utf16)1451*4543ef51SXin LI START_TEST(test_good_cdata_utf16) {
1452*4543ef51SXin LI   /* Test data is:
1453*4543ef51SXin LI    *   <?xml version='1.0' encoding='utf-16'?>
1454*4543ef51SXin LI    *   <a><![CDATA[hello]]></a>
1455*4543ef51SXin LI    */
1456*4543ef51SXin LI   const char text[]
1457*4543ef51SXin LI       = "\0<\0?\0x\0m\0l\0"
1458*4543ef51SXin LI         " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1459*4543ef51SXin LI         " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1460*4543ef51SXin LI         "1\0"
1461*4543ef51SXin LI         "6\0'"
1462*4543ef51SXin LI         "\0?\0>\0\n"
1463*4543ef51SXin LI         "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>";
1464*4543ef51SXin LI   const XML_Char *expected = XCS("hello");
1465*4543ef51SXin LI 
1466*4543ef51SXin LI   CharData storage;
1467*4543ef51SXin LI   CharData_Init(&storage);
1468*4543ef51SXin LI   XML_SetUserData(g_parser, &storage);
1469*4543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1470*4543ef51SXin LI 
1471*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1472*4543ef51SXin LI       == XML_STATUS_ERROR)
1473*4543ef51SXin LI     xml_failure(g_parser);
1474*4543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
1475*4543ef51SXin LI }
1476*4543ef51SXin LI END_TEST
1477*4543ef51SXin LI 
START_TEST(test_good_cdata_utf16_le)1478*4543ef51SXin LI START_TEST(test_good_cdata_utf16_le) {
1479*4543ef51SXin LI   /* Test data is:
1480*4543ef51SXin LI    *   <?xml version='1.0' encoding='utf-16'?>
1481*4543ef51SXin LI    *   <a><![CDATA[hello]]></a>
1482*4543ef51SXin LI    */
1483*4543ef51SXin LI   const char text[]
1484*4543ef51SXin LI       = "<\0?\0x\0m\0l\0"
1485*4543ef51SXin LI         " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1486*4543ef51SXin LI         " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1487*4543ef51SXin LI         "1\0"
1488*4543ef51SXin LI         "6\0'"
1489*4543ef51SXin LI         "\0?\0>\0\n"
1490*4543ef51SXin LI         "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>\0";
1491*4543ef51SXin LI   const XML_Char *expected = XCS("hello");
1492*4543ef51SXin LI 
1493*4543ef51SXin LI   CharData storage;
1494*4543ef51SXin LI   CharData_Init(&storage);
1495*4543ef51SXin LI   XML_SetUserData(g_parser, &storage);
1496*4543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1497*4543ef51SXin LI 
1498*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1499*4543ef51SXin LI       == XML_STATUS_ERROR)
1500*4543ef51SXin LI     xml_failure(g_parser);
1501*4543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
1502*4543ef51SXin LI }
1503*4543ef51SXin LI END_TEST
1504*4543ef51SXin LI 
1505*4543ef51SXin LI /* Test UTF16 conversion of a long cdata string */
1506*4543ef51SXin LI 
1507*4543ef51SXin LI /* 16 characters: handy macro to reduce visual clutter */
1508*4543ef51SXin LI #define A_TO_P_IN_UTF16 "\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P"
1509*4543ef51SXin LI 
START_TEST(test_long_cdata_utf16)1510*4543ef51SXin LI START_TEST(test_long_cdata_utf16) {
1511*4543ef51SXin LI   /* Test data is:
1512*4543ef51SXin LI    * <?xlm version='1.0' encoding='utf-16'?>
1513*4543ef51SXin LI    * <a><![CDATA[
1514*4543ef51SXin LI    * ABCDEFGHIJKLMNOP
1515*4543ef51SXin LI    * ]]></a>
1516*4543ef51SXin LI    */
1517*4543ef51SXin LI   const char text[]
1518*4543ef51SXin LI       = "\0<\0?\0x\0m\0l\0 "
1519*4543ef51SXin LI         "\0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0 "
1520*4543ef51SXin LI         "\0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0\x31\0\x36\0'\0?\0>"
1521*4543ef51SXin LI         "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1522*4543ef51SXin LI       /* 64 characters per line */
1523*4543ef51SXin LI       /* clang-format off */
1524*4543ef51SXin LI         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1525*4543ef51SXin LI         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1526*4543ef51SXin LI         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1527*4543ef51SXin LI         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1528*4543ef51SXin LI         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1529*4543ef51SXin LI         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1530*4543ef51SXin LI         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1531*4543ef51SXin LI         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1532*4543ef51SXin LI         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1533*4543ef51SXin LI         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1534*4543ef51SXin LI         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1535*4543ef51SXin LI         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1536*4543ef51SXin LI         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1537*4543ef51SXin LI         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1538*4543ef51SXin LI         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1539*4543ef51SXin LI         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1540*4543ef51SXin LI         A_TO_P_IN_UTF16
1541*4543ef51SXin LI         /* clang-format on */
1542*4543ef51SXin LI         "\0]\0]\0>\0<\0/\0a\0>";
1543*4543ef51SXin LI   const XML_Char *expected =
1544*4543ef51SXin LI       /* clang-format off */
1545*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1546*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1547*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1548*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1549*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1550*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1551*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1552*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1553*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1554*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1555*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1556*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1557*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1558*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1559*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1560*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1561*4543ef51SXin LI         XCS("ABCDEFGHIJKLMNOP");
1562*4543ef51SXin LI   /* clang-format on */
1563*4543ef51SXin LI   CharData storage;
1564*4543ef51SXin LI   void *buffer;
1565*4543ef51SXin LI 
1566*4543ef51SXin LI   CharData_Init(&storage);
1567*4543ef51SXin LI   XML_SetUserData(g_parser, &storage);
1568*4543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1569*4543ef51SXin LI   buffer = XML_GetBuffer(g_parser, sizeof(text) - 1);
1570*4543ef51SXin LI   if (buffer == NULL)
1571*4543ef51SXin LI     fail("Could not allocate parse buffer");
1572*4543ef51SXin LI   assert(buffer != NULL);
1573*4543ef51SXin LI   memcpy(buffer, text, sizeof(text) - 1);
1574*4543ef51SXin LI   if (XML_ParseBuffer(g_parser, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR)
1575*4543ef51SXin LI     xml_failure(g_parser);
1576*4543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
1577*4543ef51SXin LI }
1578*4543ef51SXin LI END_TEST
1579*4543ef51SXin LI 
1580*4543ef51SXin LI /* Test handling of multiple unit UTF-16 characters */
START_TEST(test_multichar_cdata_utf16)1581*4543ef51SXin LI START_TEST(test_multichar_cdata_utf16) {
1582*4543ef51SXin LI   /* Test data is:
1583*4543ef51SXin LI    *   <?xml version='1.0' encoding='utf-16'?>
1584*4543ef51SXin LI    *   <a><![CDATA[{MINIM}{CROTCHET}]]></a>
1585*4543ef51SXin LI    *
1586*4543ef51SXin LI    * where {MINIM} is U+1d15e (a minim or half-note)
1587*4543ef51SXin LI    *   UTF-16: 0xd834 0xdd5e
1588*4543ef51SXin LI    *   UTF-8:  0xf0 0x9d 0x85 0x9e
1589*4543ef51SXin LI    * and {CROTCHET} is U+1d15f (a crotchet or quarter-note)
1590*4543ef51SXin LI    *   UTF-16: 0xd834 0xdd5f
1591*4543ef51SXin LI    *   UTF-8:  0xf0 0x9d 0x85 0x9f
1592*4543ef51SXin LI    */
1593*4543ef51SXin LI   const char text[] = "\0<\0?\0x\0m\0l\0"
1594*4543ef51SXin LI                       " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1595*4543ef51SXin LI                       " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1596*4543ef51SXin LI                       "1\0"
1597*4543ef51SXin LI                       "6\0'"
1598*4543ef51SXin LI                       "\0?\0>\0\n"
1599*4543ef51SXin LI                       "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1600*4543ef51SXin LI                       "\xd8\x34\xdd\x5e\xd8\x34\xdd\x5f"
1601*4543ef51SXin LI                       "\0]\0]\0>\0<\0/\0a\0>";
1602*4543ef51SXin LI #ifdef XML_UNICODE
1603*4543ef51SXin LI   const XML_Char *expected = XCS("\xd834\xdd5e\xd834\xdd5f");
1604*4543ef51SXin LI #else
1605*4543ef51SXin LI   const XML_Char *expected = XCS("\xf0\x9d\x85\x9e\xf0\x9d\x85\x9f");
1606*4543ef51SXin LI #endif
1607*4543ef51SXin LI   CharData storage;
1608*4543ef51SXin LI 
1609*4543ef51SXin LI   CharData_Init(&storage);
1610*4543ef51SXin LI   XML_SetUserData(g_parser, &storage);
1611*4543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1612*4543ef51SXin LI 
1613*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1614*4543ef51SXin LI       == XML_STATUS_ERROR)
1615*4543ef51SXin LI     xml_failure(g_parser);
1616*4543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
1617*4543ef51SXin LI }
1618*4543ef51SXin LI END_TEST
1619*4543ef51SXin LI 
1620*4543ef51SXin LI /* Test that an element name with a UTF-16 surrogate pair is rejected */
START_TEST(test_utf16_bad_surrogate_pair)1621*4543ef51SXin LI START_TEST(test_utf16_bad_surrogate_pair) {
1622*4543ef51SXin LI   /* Test data is:
1623*4543ef51SXin LI    *   <?xml version='1.0' encoding='utf-16'?>
1624*4543ef51SXin LI    *   <a><![CDATA[{BADLINB}]]></a>
1625*4543ef51SXin LI    *
1626*4543ef51SXin LI    * where {BADLINB} is U+10000 (the first Linear B character)
1627*4543ef51SXin LI    * with the UTF-16 surrogate pair in the wrong order, i.e.
1628*4543ef51SXin LI    *   0xdc00 0xd800
1629*4543ef51SXin LI    */
1630*4543ef51SXin LI   const char text[] = "\0<\0?\0x\0m\0l\0"
1631*4543ef51SXin LI                       " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1632*4543ef51SXin LI                       " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1633*4543ef51SXin LI                       "1\0"
1634*4543ef51SXin LI                       "6\0'"
1635*4543ef51SXin LI                       "\0?\0>\0\n"
1636*4543ef51SXin LI                       "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1637*4543ef51SXin LI                       "\xdc\x00\xd8\x00"
1638*4543ef51SXin LI                       "\0]\0]\0>\0<\0/\0a\0>";
1639*4543ef51SXin LI 
1640*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1641*4543ef51SXin LI       != XML_STATUS_ERROR)
1642*4543ef51SXin LI     fail("Reversed UTF-16 surrogate pair not faulted");
1643*4543ef51SXin LI   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
1644*4543ef51SXin LI     xml_failure(g_parser);
1645*4543ef51SXin LI }
1646*4543ef51SXin LI END_TEST
1647*4543ef51SXin LI 
START_TEST(test_bad_cdata)1648*4543ef51SXin LI START_TEST(test_bad_cdata) {
1649*4543ef51SXin LI   struct CaseData {
1650*4543ef51SXin LI     const char *text;
1651*4543ef51SXin LI     enum XML_Error expectedError;
1652*4543ef51SXin LI   };
1653*4543ef51SXin LI 
1654*4543ef51SXin LI   struct CaseData cases[]
1655*4543ef51SXin LI       = {{"<a><", XML_ERROR_UNCLOSED_TOKEN},
1656*4543ef51SXin LI          {"<a><!", XML_ERROR_UNCLOSED_TOKEN},
1657*4543ef51SXin LI          {"<a><![", XML_ERROR_UNCLOSED_TOKEN},
1658*4543ef51SXin LI          {"<a><![C", XML_ERROR_UNCLOSED_TOKEN},
1659*4543ef51SXin LI          {"<a><![CD", XML_ERROR_UNCLOSED_TOKEN},
1660*4543ef51SXin LI          {"<a><![CDA", XML_ERROR_UNCLOSED_TOKEN},
1661*4543ef51SXin LI          {"<a><![CDAT", XML_ERROR_UNCLOSED_TOKEN},
1662*4543ef51SXin LI          {"<a><![CDATA", XML_ERROR_UNCLOSED_TOKEN},
1663*4543ef51SXin LI 
1664*4543ef51SXin LI          {"<a><![CDATA[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1665*4543ef51SXin LI          {"<a><![CDATA[]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1666*4543ef51SXin LI          {"<a><![CDATA[]]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1667*4543ef51SXin LI 
1668*4543ef51SXin LI          {"<a><!<a/>", XML_ERROR_INVALID_TOKEN},
1669*4543ef51SXin LI          {"<a><![<a/>", XML_ERROR_UNCLOSED_TOKEN},  /* ?! */
1670*4543ef51SXin LI          {"<a><![C<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
1671*4543ef51SXin LI          {"<a><![CD<a/>", XML_ERROR_INVALID_TOKEN},
1672*4543ef51SXin LI          {"<a><![CDA<a/>", XML_ERROR_INVALID_TOKEN},
1673*4543ef51SXin LI          {"<a><![CDAT<a/>", XML_ERROR_INVALID_TOKEN},
1674*4543ef51SXin LI          {"<a><![CDATA<a/>", XML_ERROR_INVALID_TOKEN},
1675*4543ef51SXin LI 
1676*4543ef51SXin LI          {"<a><![CDATA[<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1677*4543ef51SXin LI          {"<a><![CDATA[]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1678*4543ef51SXin LI          {"<a><![CDATA[]]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}};
1679*4543ef51SXin LI 
1680*4543ef51SXin LI   size_t i = 0;
1681*4543ef51SXin LI   for (; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1682*4543ef51SXin LI     set_subtest("%s", cases[i].text);
1683*4543ef51SXin LI     const enum XML_Status actualStatus = _XML_Parse_SINGLE_BYTES(
1684*4543ef51SXin LI         g_parser, cases[i].text, (int)strlen(cases[i].text), XML_TRUE);
1685*4543ef51SXin LI     const enum XML_Error actualError = XML_GetErrorCode(g_parser);
1686*4543ef51SXin LI 
1687*4543ef51SXin LI     assert(actualStatus == XML_STATUS_ERROR);
1688*4543ef51SXin LI 
1689*4543ef51SXin LI     if (actualError != cases[i].expectedError) {
1690*4543ef51SXin LI       char message[100];
1691*4543ef51SXin LI       snprintf(message, sizeof(message),
1692*4543ef51SXin LI                "Expected error %d but got error %d for case %u: \"%s\"\n",
1693*4543ef51SXin LI                cases[i].expectedError, actualError, (unsigned int)i + 1,
1694*4543ef51SXin LI                cases[i].text);
1695*4543ef51SXin LI       fail(message);
1696*4543ef51SXin LI     }
1697*4543ef51SXin LI 
1698*4543ef51SXin LI     XML_ParserReset(g_parser, NULL);
1699*4543ef51SXin LI   }
1700*4543ef51SXin LI }
1701*4543ef51SXin LI END_TEST
1702*4543ef51SXin LI 
1703*4543ef51SXin LI /* Test failures in UTF-16 CDATA */
START_TEST(test_bad_cdata_utf16)1704*4543ef51SXin LI START_TEST(test_bad_cdata_utf16) {
1705*4543ef51SXin LI   struct CaseData {
1706*4543ef51SXin LI     size_t text_bytes;
1707*4543ef51SXin LI     const char *text;
1708*4543ef51SXin LI     enum XML_Error expected_error;
1709*4543ef51SXin LI   };
1710*4543ef51SXin LI 
1711*4543ef51SXin LI   const char prolog[] = "\0<\0?\0x\0m\0l\0"
1712*4543ef51SXin LI                         " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1713*4543ef51SXin LI                         " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1714*4543ef51SXin LI                         "1\0"
1715*4543ef51SXin LI                         "6\0'"
1716*4543ef51SXin LI                         "\0?\0>\0\n"
1717*4543ef51SXin LI                         "\0<\0a\0>";
1718*4543ef51SXin LI   struct CaseData cases[] = {
1719*4543ef51SXin LI       {1, "\0", XML_ERROR_UNCLOSED_TOKEN},
1720*4543ef51SXin LI       {2, "\0<", XML_ERROR_UNCLOSED_TOKEN},
1721*4543ef51SXin LI       {3, "\0<\0", XML_ERROR_UNCLOSED_TOKEN},
1722*4543ef51SXin LI       {4, "\0<\0!", XML_ERROR_UNCLOSED_TOKEN},
1723*4543ef51SXin LI       {5, "\0<\0!\0", XML_ERROR_UNCLOSED_TOKEN},
1724*4543ef51SXin LI       {6, "\0<\0!\0[", XML_ERROR_UNCLOSED_TOKEN},
1725*4543ef51SXin LI       {7, "\0<\0!\0[\0", XML_ERROR_UNCLOSED_TOKEN},
1726*4543ef51SXin LI       {8, "\0<\0!\0[\0C", XML_ERROR_UNCLOSED_TOKEN},
1727*4543ef51SXin LI       {9, "\0<\0!\0[\0C\0", XML_ERROR_UNCLOSED_TOKEN},
1728*4543ef51SXin LI       {10, "\0<\0!\0[\0C\0D", XML_ERROR_UNCLOSED_TOKEN},
1729*4543ef51SXin LI       {11, "\0<\0!\0[\0C\0D\0", XML_ERROR_UNCLOSED_TOKEN},
1730*4543ef51SXin LI       {12, "\0<\0!\0[\0C\0D\0A", XML_ERROR_UNCLOSED_TOKEN},
1731*4543ef51SXin LI       {13, "\0<\0!\0[\0C\0D\0A\0", XML_ERROR_UNCLOSED_TOKEN},
1732*4543ef51SXin LI       {14, "\0<\0!\0[\0C\0D\0A\0T", XML_ERROR_UNCLOSED_TOKEN},
1733*4543ef51SXin LI       {15, "\0<\0!\0[\0C\0D\0A\0T\0", XML_ERROR_UNCLOSED_TOKEN},
1734*4543ef51SXin LI       {16, "\0<\0!\0[\0C\0D\0A\0T\0A", XML_ERROR_UNCLOSED_TOKEN},
1735*4543ef51SXin LI       {17, "\0<\0!\0[\0C\0D\0A\0T\0A\0", XML_ERROR_UNCLOSED_TOKEN},
1736*4543ef51SXin LI       {18, "\0<\0!\0[\0C\0D\0A\0T\0A\0[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1737*4543ef51SXin LI       {19, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0", XML_ERROR_UNCLOSED_CDATA_SECTION},
1738*4543ef51SXin LI       {20, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z", XML_ERROR_UNCLOSED_CDATA_SECTION},
1739*4543ef51SXin LI       /* Now add a four-byte UTF-16 character */
1740*4543ef51SXin LI       {21, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8",
1741*4543ef51SXin LI        XML_ERROR_UNCLOSED_CDATA_SECTION},
1742*4543ef51SXin LI       {22, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34", XML_ERROR_PARTIAL_CHAR},
1743*4543ef51SXin LI       {23, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd",
1744*4543ef51SXin LI        XML_ERROR_PARTIAL_CHAR},
1745*4543ef51SXin LI       {24, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd\x5e",
1746*4543ef51SXin LI        XML_ERROR_UNCLOSED_CDATA_SECTION}};
1747*4543ef51SXin LI   size_t i;
1748*4543ef51SXin LI 
1749*4543ef51SXin LI   for (i = 0; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1750*4543ef51SXin LI     set_subtest("case %lu", (long unsigned)(i + 1));
1751*4543ef51SXin LI     enum XML_Status actual_status;
1752*4543ef51SXin LI     enum XML_Error actual_error;
1753*4543ef51SXin LI 
1754*4543ef51SXin LI     if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)sizeof(prolog) - 1,
1755*4543ef51SXin LI                                 XML_FALSE)
1756*4543ef51SXin LI         == XML_STATUS_ERROR)
1757*4543ef51SXin LI       xml_failure(g_parser);
1758*4543ef51SXin LI     actual_status = _XML_Parse_SINGLE_BYTES(g_parser, cases[i].text,
1759*4543ef51SXin LI                                             (int)cases[i].text_bytes, XML_TRUE);
1760*4543ef51SXin LI     assert(actual_status == XML_STATUS_ERROR);
1761*4543ef51SXin LI     actual_error = XML_GetErrorCode(g_parser);
1762*4543ef51SXin LI     if (actual_error != cases[i].expected_error) {
1763*4543ef51SXin LI       char message[1024];
1764*4543ef51SXin LI 
1765*4543ef51SXin LI       snprintf(message, sizeof(message),
1766*4543ef51SXin LI                "Expected error %d (%" XML_FMT_STR "), got %d (%" XML_FMT_STR
1767*4543ef51SXin LI                ") for case %lu\n",
1768*4543ef51SXin LI                cases[i].expected_error,
1769*4543ef51SXin LI                XML_ErrorString(cases[i].expected_error), actual_error,
1770*4543ef51SXin LI                XML_ErrorString(actual_error), (long unsigned)(i + 1));
1771*4543ef51SXin LI       fail(message);
1772*4543ef51SXin LI     }
1773*4543ef51SXin LI     XML_ParserReset(g_parser, NULL);
1774*4543ef51SXin LI   }
1775*4543ef51SXin LI }
1776*4543ef51SXin LI END_TEST
1777*4543ef51SXin LI 
1778*4543ef51SXin LI /* Test stopping the parser in cdata handler */
START_TEST(test_stop_parser_between_cdata_calls)1779*4543ef51SXin LI START_TEST(test_stop_parser_between_cdata_calls) {
1780*4543ef51SXin LI   const char *text = long_cdata_text;
1781*4543ef51SXin LI 
1782*4543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1783*4543ef51SXin LI   g_resumable = XML_FALSE;
1784*4543ef51SXin LI   expect_failure(text, XML_ERROR_ABORTED, "Parse not aborted in CDATA handler");
1785*4543ef51SXin LI }
1786*4543ef51SXin LI END_TEST
1787*4543ef51SXin LI 
1788*4543ef51SXin LI /* Test suspending the parser in cdata handler */
START_TEST(test_suspend_parser_between_cdata_calls)1789*4543ef51SXin LI START_TEST(test_suspend_parser_between_cdata_calls) {
1790*4543ef51SXin LI   const char *text = long_cdata_text;
1791*4543ef51SXin LI   enum XML_Status result;
1792*4543ef51SXin LI 
1793*4543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1794*4543ef51SXin LI   g_resumable = XML_TRUE;
1795*4543ef51SXin LI   result = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE);
1796*4543ef51SXin LI   if (result != XML_STATUS_SUSPENDED) {
1797*4543ef51SXin LI     if (result == XML_STATUS_ERROR)
1798*4543ef51SXin LI       xml_failure(g_parser);
1799*4543ef51SXin LI     fail("Parse not suspended in CDATA handler");
1800*4543ef51SXin LI   }
1801*4543ef51SXin LI   if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
1802*4543ef51SXin LI     xml_failure(g_parser);
1803*4543ef51SXin LI }
1804*4543ef51SXin LI END_TEST
1805*4543ef51SXin LI 
1806*4543ef51SXin LI /* Test memory allocation functions */
START_TEST(test_memory_allocation)1807*4543ef51SXin LI START_TEST(test_memory_allocation) {
1808*4543ef51SXin LI   char *buffer = (char *)XML_MemMalloc(g_parser, 256);
1809*4543ef51SXin LI   char *p;
1810*4543ef51SXin LI 
1811*4543ef51SXin LI   if (buffer == NULL) {
1812*4543ef51SXin LI     fail("Allocation failed");
1813*4543ef51SXin LI   } else {
1814*4543ef51SXin LI     /* Try writing to memory; some OSes try to cheat! */
1815*4543ef51SXin LI     buffer[0] = 'T';
1816*4543ef51SXin LI     buffer[1] = 'E';
1817*4543ef51SXin LI     buffer[2] = 'S';
1818*4543ef51SXin LI     buffer[3] = 'T';
1819*4543ef51SXin LI     buffer[4] = '\0';
1820*4543ef51SXin LI     if (strcmp(buffer, "TEST") != 0) {
1821*4543ef51SXin LI       fail("Memory not writable");
1822*4543ef51SXin LI     } else {
1823*4543ef51SXin LI       p = (char *)XML_MemRealloc(g_parser, buffer, 512);
1824*4543ef51SXin LI       if (p == NULL) {
1825*4543ef51SXin LI         fail("Reallocation failed");
1826*4543ef51SXin LI       } else {
1827*4543ef51SXin LI         /* Write again, just to be sure */
1828*4543ef51SXin LI         buffer = p;
1829*4543ef51SXin LI         buffer[0] = 'V';
1830*4543ef51SXin LI         if (strcmp(buffer, "VEST") != 0) {
1831*4543ef51SXin LI           fail("Reallocated memory not writable");
1832*4543ef51SXin LI         }
1833*4543ef51SXin LI       }
1834*4543ef51SXin LI     }
1835*4543ef51SXin LI     XML_MemFree(g_parser, buffer);
1836*4543ef51SXin LI   }
1837*4543ef51SXin LI }
1838*4543ef51SXin LI END_TEST
1839*4543ef51SXin LI 
1840*4543ef51SXin LI /* Test XML_DefaultCurrent() passes handling on correctly */
START_TEST(test_default_current)1841*4543ef51SXin LI START_TEST(test_default_current) {
1842*4543ef51SXin LI   const char *text = "<doc>hell]</doc>";
1843*4543ef51SXin LI   const char *entity_text = "<!DOCTYPE doc [\n"
1844*4543ef51SXin LI                             "<!ENTITY entity '&#37;'>\n"
1845*4543ef51SXin LI                             "]>\n"
1846*4543ef51SXin LI                             "<doc>&entity;</doc>";
1847*4543ef51SXin LI 
1848*4543ef51SXin LI   set_subtest("with defaulting");
1849*4543ef51SXin LI   {
1850*4543ef51SXin LI     struct handler_record_list storage;
1851*4543ef51SXin LI     storage.count = 0;
1852*4543ef51SXin LI     XML_SetDefaultHandler(g_parser, record_default_handler);
1853*4543ef51SXin LI     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
1854*4543ef51SXin LI     XML_SetUserData(g_parser, &storage);
1855*4543ef51SXin LI     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1856*4543ef51SXin LI         == XML_STATUS_ERROR)
1857*4543ef51SXin LI       xml_failure(g_parser);
1858*4543ef51SXin LI     int i = 0;
1859*4543ef51SXin LI     assert_record_handler_called(&storage, i++, "record_default_handler", 5);
1860*4543ef51SXin LI     // we should have gotten one or more cdata callbacks, totaling 5 chars
1861*4543ef51SXin LI     int cdata_len_remaining = 5;
1862*4543ef51SXin LI     while (cdata_len_remaining > 0) {
1863*4543ef51SXin LI       const struct handler_record_entry *c_entry
1864*4543ef51SXin LI           = handler_record_get(&storage, i++);
1865*4543ef51SXin LI       assert_true(strcmp(c_entry->name, "record_cdata_handler") == 0);
1866*4543ef51SXin LI       assert_true(c_entry->arg > 0);
1867*4543ef51SXin LI       assert_true(c_entry->arg <= cdata_len_remaining);
1868*4543ef51SXin LI       cdata_len_remaining -= c_entry->arg;
1869*4543ef51SXin LI       // default handler must follow, with the exact same len argument.
1870*4543ef51SXin LI       assert_record_handler_called(&storage, i++, "record_default_handler",
1871*4543ef51SXin LI                                    c_entry->arg);
1872*4543ef51SXin LI     }
1873*4543ef51SXin LI     assert_record_handler_called(&storage, i++, "record_default_handler", 6);
1874*4543ef51SXin LI     assert_true(storage.count == i);
1875*4543ef51SXin LI   }
1876*4543ef51SXin LI 
1877*4543ef51SXin LI   /* Again, without the defaulting */
1878*4543ef51SXin LI   set_subtest("no defaulting");
1879*4543ef51SXin LI   {
1880*4543ef51SXin LI     struct handler_record_list storage;
1881*4543ef51SXin LI     storage.count = 0;
1882*4543ef51SXin LI     XML_ParserReset(g_parser, NULL);
1883*4543ef51SXin LI     XML_SetDefaultHandler(g_parser, record_default_handler);
1884*4543ef51SXin LI     XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
1885*4543ef51SXin LI     XML_SetUserData(g_parser, &storage);
1886*4543ef51SXin LI     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1887*4543ef51SXin LI         == XML_STATUS_ERROR)
1888*4543ef51SXin LI       xml_failure(g_parser);
1889*4543ef51SXin LI     int i = 0;
1890*4543ef51SXin LI     assert_record_handler_called(&storage, i++, "record_default_handler", 5);
1891*4543ef51SXin LI     // we should have gotten one or more cdata callbacks, totaling 5 chars
1892*4543ef51SXin LI     int cdata_len_remaining = 5;
1893*4543ef51SXin LI     while (cdata_len_remaining > 0) {
1894*4543ef51SXin LI       const struct handler_record_entry *c_entry
1895*4543ef51SXin LI           = handler_record_get(&storage, i++);
1896*4543ef51SXin LI       assert_true(strcmp(c_entry->name, "record_cdata_nodefault_handler") == 0);
1897*4543ef51SXin LI       assert_true(c_entry->arg > 0);
1898*4543ef51SXin LI       assert_true(c_entry->arg <= cdata_len_remaining);
1899*4543ef51SXin LI       cdata_len_remaining -= c_entry->arg;
1900*4543ef51SXin LI     }
1901*4543ef51SXin LI     assert_record_handler_called(&storage, i++, "record_default_handler", 6);
1902*4543ef51SXin LI     assert_true(storage.count == i);
1903*4543ef51SXin LI   }
1904*4543ef51SXin LI 
1905*4543ef51SXin LI   /* Now with an internal entity to complicate matters */
1906*4543ef51SXin LI   set_subtest("with internal entity");
1907*4543ef51SXin LI   {
1908*4543ef51SXin LI     struct handler_record_list storage;
1909*4543ef51SXin LI     storage.count = 0;
1910*4543ef51SXin LI     XML_ParserReset(g_parser, NULL);
1911*4543ef51SXin LI     XML_SetDefaultHandler(g_parser, record_default_handler);
1912*4543ef51SXin LI     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
1913*4543ef51SXin LI     XML_SetUserData(g_parser, &storage);
1914*4543ef51SXin LI     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
1915*4543ef51SXin LI                                 XML_TRUE)
1916*4543ef51SXin LI         == XML_STATUS_ERROR)
1917*4543ef51SXin LI       xml_failure(g_parser);
1918*4543ef51SXin LI     /* The default handler suppresses the entity */
1919*4543ef51SXin LI     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
1920*4543ef51SXin LI     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
1921*4543ef51SXin LI     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
1922*4543ef51SXin LI     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
1923*4543ef51SXin LI     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
1924*4543ef51SXin LI     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
1925*4543ef51SXin LI     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
1926*4543ef51SXin LI     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
1927*4543ef51SXin LI     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
1928*4543ef51SXin LI     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
1929*4543ef51SXin LI     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
1930*4543ef51SXin LI     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
1931*4543ef51SXin LI     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
1932*4543ef51SXin LI     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
1933*4543ef51SXin LI     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
1934*4543ef51SXin LI     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
1935*4543ef51SXin LI     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
1936*4543ef51SXin LI     assert_record_handler_called(&storage, 17, "record_default_handler", 8);
1937*4543ef51SXin LI     assert_record_handler_called(&storage, 18, "record_default_handler", 6);
1938*4543ef51SXin LI     assert_true(storage.count == 19);
1939*4543ef51SXin LI   }
1940*4543ef51SXin LI 
1941*4543ef51SXin LI   /* Again, with a skip handler */
1942*4543ef51SXin LI   set_subtest("with skip handler");
1943*4543ef51SXin LI   {
1944*4543ef51SXin LI     struct handler_record_list storage;
1945*4543ef51SXin LI     storage.count = 0;
1946*4543ef51SXin LI     XML_ParserReset(g_parser, NULL);
1947*4543ef51SXin LI     XML_SetDefaultHandler(g_parser, record_default_handler);
1948*4543ef51SXin LI     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
1949*4543ef51SXin LI     XML_SetSkippedEntityHandler(g_parser, record_skip_handler);
1950*4543ef51SXin LI     XML_SetUserData(g_parser, &storage);
1951*4543ef51SXin LI     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
1952*4543ef51SXin LI                                 XML_TRUE)
1953*4543ef51SXin LI         == XML_STATUS_ERROR)
1954*4543ef51SXin LI       xml_failure(g_parser);
1955*4543ef51SXin LI     /* The default handler suppresses the entity */
1956*4543ef51SXin LI     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
1957*4543ef51SXin LI     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
1958*4543ef51SXin LI     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
1959*4543ef51SXin LI     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
1960*4543ef51SXin LI     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
1961*4543ef51SXin LI     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
1962*4543ef51SXin LI     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
1963*4543ef51SXin LI     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
1964*4543ef51SXin LI     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
1965*4543ef51SXin LI     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
1966*4543ef51SXin LI     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
1967*4543ef51SXin LI     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
1968*4543ef51SXin LI     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
1969*4543ef51SXin LI     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
1970*4543ef51SXin LI     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
1971*4543ef51SXin LI     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
1972*4543ef51SXin LI     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
1973*4543ef51SXin LI     assert_record_handler_called(&storage, 17, "record_skip_handler", 0);
1974*4543ef51SXin LI     assert_record_handler_called(&storage, 18, "record_default_handler", 6);
1975*4543ef51SXin LI     assert_true(storage.count == 19);
1976*4543ef51SXin LI   }
1977*4543ef51SXin LI 
1978*4543ef51SXin LI   /* This time, allow the entity through */
1979*4543ef51SXin LI   set_subtest("allow entity");
1980*4543ef51SXin LI   {
1981*4543ef51SXin LI     struct handler_record_list storage;
1982*4543ef51SXin LI     storage.count = 0;
1983*4543ef51SXin LI     XML_ParserReset(g_parser, NULL);
1984*4543ef51SXin LI     XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
1985*4543ef51SXin LI     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
1986*4543ef51SXin LI     XML_SetUserData(g_parser, &storage);
1987*4543ef51SXin LI     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
1988*4543ef51SXin LI                                 XML_TRUE)
1989*4543ef51SXin LI         == XML_STATUS_ERROR)
1990*4543ef51SXin LI       xml_failure(g_parser);
1991*4543ef51SXin LI     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
1992*4543ef51SXin LI     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
1993*4543ef51SXin LI     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
1994*4543ef51SXin LI     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
1995*4543ef51SXin LI     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
1996*4543ef51SXin LI     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
1997*4543ef51SXin LI     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
1998*4543ef51SXin LI     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
1999*4543ef51SXin LI     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2000*4543ef51SXin LI     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2001*4543ef51SXin LI     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2002*4543ef51SXin LI     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2003*4543ef51SXin LI     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2004*4543ef51SXin LI     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2005*4543ef51SXin LI     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2006*4543ef51SXin LI     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2007*4543ef51SXin LI     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2008*4543ef51SXin LI     assert_record_handler_called(&storage, 17, "record_cdata_handler", 1);
2009*4543ef51SXin LI     assert_record_handler_called(&storage, 18, "record_default_handler", 1);
2010*4543ef51SXin LI     assert_record_handler_called(&storage, 19, "record_default_handler", 6);
2011*4543ef51SXin LI     assert_true(storage.count == 20);
2012*4543ef51SXin LI   }
2013*4543ef51SXin LI 
2014*4543ef51SXin LI   /* Finally, without passing the cdata to the default handler */
2015*4543ef51SXin LI   set_subtest("not passing cdata");
2016*4543ef51SXin LI   {
2017*4543ef51SXin LI     struct handler_record_list storage;
2018*4543ef51SXin LI     storage.count = 0;
2019*4543ef51SXin LI     XML_ParserReset(g_parser, NULL);
2020*4543ef51SXin LI     XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
2021*4543ef51SXin LI     XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
2022*4543ef51SXin LI     XML_SetUserData(g_parser, &storage);
2023*4543ef51SXin LI     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2024*4543ef51SXin LI                                 XML_TRUE)
2025*4543ef51SXin LI         == XML_STATUS_ERROR)
2026*4543ef51SXin LI       xml_failure(g_parser);
2027*4543ef51SXin LI     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2028*4543ef51SXin LI     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2029*4543ef51SXin LI     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2030*4543ef51SXin LI     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2031*4543ef51SXin LI     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2032*4543ef51SXin LI     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2033*4543ef51SXin LI     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2034*4543ef51SXin LI     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2035*4543ef51SXin LI     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2036*4543ef51SXin LI     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2037*4543ef51SXin LI     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2038*4543ef51SXin LI     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2039*4543ef51SXin LI     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2040*4543ef51SXin LI     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2041*4543ef51SXin LI     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2042*4543ef51SXin LI     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2043*4543ef51SXin LI     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2044*4543ef51SXin LI     assert_record_handler_called(&storage, 17, "record_cdata_nodefault_handler",
2045*4543ef51SXin LI                                  1);
2046*4543ef51SXin LI     assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2047*4543ef51SXin LI     assert_true(storage.count == 19);
2048*4543ef51SXin LI   }
2049*4543ef51SXin LI }
2050*4543ef51SXin LI END_TEST
2051*4543ef51SXin LI 
2052*4543ef51SXin LI /* Test DTD element parsing code paths */
START_TEST(test_dtd_elements)2053*4543ef51SXin LI START_TEST(test_dtd_elements) {
2054*4543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
2055*4543ef51SXin LI                      "<!ELEMENT doc (chapter)>\n"
2056*4543ef51SXin LI                      "<!ELEMENT chapter (#PCDATA)>\n"
2057*4543ef51SXin LI                      "]>\n"
2058*4543ef51SXin LI                      "<doc><chapter>Wombats are go</chapter></doc>";
2059*4543ef51SXin LI 
2060*4543ef51SXin LI   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
2061*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2062*4543ef51SXin LI       == XML_STATUS_ERROR)
2063*4543ef51SXin LI     xml_failure(g_parser);
2064*4543ef51SXin LI }
2065*4543ef51SXin LI END_TEST
2066*4543ef51SXin LI 
2067*4543ef51SXin LI static void XMLCALL
element_decl_check_model(void * userData,const XML_Char * name,XML_Content * model)2068*4543ef51SXin LI element_decl_check_model(void *userData, const XML_Char *name,
2069*4543ef51SXin LI                          XML_Content *model) {
2070*4543ef51SXin LI   UNUSED_P(userData);
2071*4543ef51SXin LI   uint32_t errorFlags = 0;
2072*4543ef51SXin LI 
2073*4543ef51SXin LI   /* Expected model array structure is this:
2074*4543ef51SXin LI    * [0] (type 6, quant 0)
2075*4543ef51SXin LI    *   [1] (type 5, quant 0)
2076*4543ef51SXin LI    *     [3] (type 4, quant 0, name "bar")
2077*4543ef51SXin LI    *     [4] (type 4, quant 0, name "foo")
2078*4543ef51SXin LI    *     [5] (type 4, quant 3, name "xyz")
2079*4543ef51SXin LI    *   [2] (type 4, quant 2, name "zebra")
2080*4543ef51SXin LI    */
2081*4543ef51SXin LI   errorFlags |= ((xcstrcmp(name, XCS("junk")) == 0) ? 0 : (1u << 0));
2082*4543ef51SXin LI   errorFlags |= ((model != NULL) ? 0 : (1u << 1));
2083*4543ef51SXin LI 
2084*4543ef51SXin LI   if (model != NULL) {
2085*4543ef51SXin LI     errorFlags |= ((model[0].type == XML_CTYPE_SEQ) ? 0 : (1u << 2));
2086*4543ef51SXin LI     errorFlags |= ((model[0].quant == XML_CQUANT_NONE) ? 0 : (1u << 3));
2087*4543ef51SXin LI     errorFlags |= ((model[0].numchildren == 2) ? 0 : (1u << 4));
2088*4543ef51SXin LI     errorFlags |= ((model[0].children == &model[1]) ? 0 : (1u << 5));
2089*4543ef51SXin LI     errorFlags |= ((model[0].name == NULL) ? 0 : (1u << 6));
2090*4543ef51SXin LI 
2091*4543ef51SXin LI     errorFlags |= ((model[1].type == XML_CTYPE_CHOICE) ? 0 : (1u << 7));
2092*4543ef51SXin LI     errorFlags |= ((model[1].quant == XML_CQUANT_NONE) ? 0 : (1u << 8));
2093*4543ef51SXin LI     errorFlags |= ((model[1].numchildren == 3) ? 0 : (1u << 9));
2094*4543ef51SXin LI     errorFlags |= ((model[1].children == &model[3]) ? 0 : (1u << 10));
2095*4543ef51SXin LI     errorFlags |= ((model[1].name == NULL) ? 0 : (1u << 11));
2096*4543ef51SXin LI 
2097*4543ef51SXin LI     errorFlags |= ((model[2].type == XML_CTYPE_NAME) ? 0 : (1u << 12));
2098*4543ef51SXin LI     errorFlags |= ((model[2].quant == XML_CQUANT_REP) ? 0 : (1u << 13));
2099*4543ef51SXin LI     errorFlags |= ((model[2].numchildren == 0) ? 0 : (1u << 14));
2100*4543ef51SXin LI     errorFlags |= ((model[2].children == NULL) ? 0 : (1u << 15));
2101*4543ef51SXin LI     errorFlags
2102*4543ef51SXin LI         |= ((xcstrcmp(model[2].name, XCS("zebra")) == 0) ? 0 : (1u << 16));
2103*4543ef51SXin LI 
2104*4543ef51SXin LI     errorFlags |= ((model[3].type == XML_CTYPE_NAME) ? 0 : (1u << 17));
2105*4543ef51SXin LI     errorFlags |= ((model[3].quant == XML_CQUANT_NONE) ? 0 : (1u << 18));
2106*4543ef51SXin LI     errorFlags |= ((model[3].numchildren == 0) ? 0 : (1u << 19));
2107*4543ef51SXin LI     errorFlags |= ((model[3].children == NULL) ? 0 : (1u << 20));
2108*4543ef51SXin LI     errorFlags |= ((xcstrcmp(model[3].name, XCS("bar")) == 0) ? 0 : (1u << 21));
2109*4543ef51SXin LI 
2110*4543ef51SXin LI     errorFlags |= ((model[4].type == XML_CTYPE_NAME) ? 0 : (1u << 22));
2111*4543ef51SXin LI     errorFlags |= ((model[4].quant == XML_CQUANT_NONE) ? 0 : (1u << 23));
2112*4543ef51SXin LI     errorFlags |= ((model[4].numchildren == 0) ? 0 : (1u << 24));
2113*4543ef51SXin LI     errorFlags |= ((model[4].children == NULL) ? 0 : (1u << 25));
2114*4543ef51SXin LI     errorFlags |= ((xcstrcmp(model[4].name, XCS("foo")) == 0) ? 0 : (1u << 26));
2115*4543ef51SXin LI 
2116*4543ef51SXin LI     errorFlags |= ((model[5].type == XML_CTYPE_NAME) ? 0 : (1u << 27));
2117*4543ef51SXin LI     errorFlags |= ((model[5].quant == XML_CQUANT_PLUS) ? 0 : (1u << 28));
2118*4543ef51SXin LI     errorFlags |= ((model[5].numchildren == 0) ? 0 : (1u << 29));
2119*4543ef51SXin LI     errorFlags |= ((model[5].children == NULL) ? 0 : (1u << 30));
2120*4543ef51SXin LI     errorFlags |= ((xcstrcmp(model[5].name, XCS("xyz")) == 0) ? 0 : (1u << 31));
2121*4543ef51SXin LI   }
2122*4543ef51SXin LI 
2123*4543ef51SXin LI   XML_SetUserData(g_parser, (void *)(uintptr_t)errorFlags);
2124*4543ef51SXin LI   XML_FreeContentModel(g_parser, model);
2125*4543ef51SXin LI }
2126*4543ef51SXin LI 
START_TEST(test_dtd_elements_nesting)2127*4543ef51SXin LI START_TEST(test_dtd_elements_nesting) {
2128*4543ef51SXin LI   // Payload inspired by a test in Perl's XML::Parser
2129*4543ef51SXin LI   const char *text = "<!DOCTYPE foo [\n"
2130*4543ef51SXin LI                      "<!ELEMENT junk ((bar|foo|xyz+), zebra*)>\n"
2131*4543ef51SXin LI                      "]>\n"
2132*4543ef51SXin LI                      "<foo/>";
2133*4543ef51SXin LI 
2134*4543ef51SXin LI   XML_SetUserData(g_parser, (void *)(uintptr_t)-1);
2135*4543ef51SXin LI 
2136*4543ef51SXin LI   XML_SetElementDeclHandler(g_parser, element_decl_check_model);
2137*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2138*4543ef51SXin LI       == XML_STATUS_ERROR)
2139*4543ef51SXin LI     xml_failure(g_parser);
2140*4543ef51SXin LI 
2141*4543ef51SXin LI   if ((uint32_t)(uintptr_t)XML_GetUserData(g_parser) != 0)
2142*4543ef51SXin LI     fail("Element declaration model regression detected");
2143*4543ef51SXin LI }
2144*4543ef51SXin LI END_TEST
2145*4543ef51SXin LI 
2146*4543ef51SXin LI /* Test foreign DTD handling */
START_TEST(test_set_foreign_dtd)2147*4543ef51SXin LI START_TEST(test_set_foreign_dtd) {
2148*4543ef51SXin LI   const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n";
2149*4543ef51SXin LI   const char *text2 = "<doc>&entity;</doc>";
2150*4543ef51SXin LI   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2151*4543ef51SXin LI 
2152*4543ef51SXin LI   /* Check hash salt is passed through too */
2153*4543ef51SXin LI   XML_SetHashSalt(g_parser, 0x12345678);
2154*4543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2155*4543ef51SXin LI   XML_SetUserData(g_parser, &test_data);
2156*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2157*4543ef51SXin LI   /* Add a default handler to exercise more code paths */
2158*4543ef51SXin LI   XML_SetDefaultHandler(g_parser, dummy_default_handler);
2159*4543ef51SXin LI   if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2160*4543ef51SXin LI     fail("Could not set foreign DTD");
2161*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2162*4543ef51SXin LI       == XML_STATUS_ERROR)
2163*4543ef51SXin LI     xml_failure(g_parser);
2164*4543ef51SXin LI 
2165*4543ef51SXin LI   /* Ensure that trying to set the DTD after parsing has started
2166*4543ef51SXin LI    * is faulted, even if it's the same setting.
2167*4543ef51SXin LI    */
2168*4543ef51SXin LI   if (XML_UseForeignDTD(g_parser, XML_TRUE)
2169*4543ef51SXin LI       != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
2170*4543ef51SXin LI     fail("Failed to reject late foreign DTD setting");
2171*4543ef51SXin LI   /* Ditto for the hash salt */
2172*4543ef51SXin LI   if (XML_SetHashSalt(g_parser, 0x23456789))
2173*4543ef51SXin LI     fail("Failed to reject late hash salt change");
2174*4543ef51SXin LI 
2175*4543ef51SXin LI   /* Now finish the parse */
2176*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2177*4543ef51SXin LI       == XML_STATUS_ERROR)
2178*4543ef51SXin LI     xml_failure(g_parser);
2179*4543ef51SXin LI }
2180*4543ef51SXin LI END_TEST
2181*4543ef51SXin LI 
2182*4543ef51SXin LI /* Test foreign DTD handling with a failing NotStandalone handler */
START_TEST(test_foreign_dtd_not_standalone)2183*4543ef51SXin LI START_TEST(test_foreign_dtd_not_standalone) {
2184*4543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2185*4543ef51SXin LI                      "<doc>&entity;</doc>";
2186*4543ef51SXin LI   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2187*4543ef51SXin LI 
2188*4543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2189*4543ef51SXin LI   XML_SetUserData(g_parser, &test_data);
2190*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2191*4543ef51SXin LI   XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
2192*4543ef51SXin LI   if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2193*4543ef51SXin LI     fail("Could not set foreign DTD");
2194*4543ef51SXin LI   expect_failure(text, XML_ERROR_NOT_STANDALONE,
2195*4543ef51SXin LI                  "NotStandalonehandler failed to reject");
2196*4543ef51SXin LI }
2197*4543ef51SXin LI END_TEST
2198*4543ef51SXin LI 
2199*4543ef51SXin LI /* Test invalid character in a foreign DTD is faulted */
START_TEST(test_invalid_foreign_dtd)2200*4543ef51SXin LI START_TEST(test_invalid_foreign_dtd) {
2201*4543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2202*4543ef51SXin LI                      "<doc>&entity;</doc>";
2203*4543ef51SXin LI   ExtFaults test_data
2204*4543ef51SXin LI       = {"$", "Dollar not faulted", NULL, XML_ERROR_INVALID_TOKEN};
2205*4543ef51SXin LI 
2206*4543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2207*4543ef51SXin LI   XML_SetUserData(g_parser, &test_data);
2208*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
2209*4543ef51SXin LI   XML_UseForeignDTD(g_parser, XML_TRUE);
2210*4543ef51SXin LI   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
2211*4543ef51SXin LI                  "Bad DTD should not have been accepted");
2212*4543ef51SXin LI }
2213*4543ef51SXin LI END_TEST
2214*4543ef51SXin LI 
2215*4543ef51SXin LI /* Test foreign DTD use with a doctype */
START_TEST(test_foreign_dtd_with_doctype)2216*4543ef51SXin LI START_TEST(test_foreign_dtd_with_doctype) {
2217*4543ef51SXin LI   const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n"
2218*4543ef51SXin LI                       "<!DOCTYPE doc [<!ENTITY entity 'hello world'>]>\n";
2219*4543ef51SXin LI   const char *text2 = "<doc>&entity;</doc>";
2220*4543ef51SXin LI   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2221*4543ef51SXin LI 
2222*4543ef51SXin LI   /* Check hash salt is passed through too */
2223*4543ef51SXin LI   XML_SetHashSalt(g_parser, 0x12345678);
2224*4543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2225*4543ef51SXin LI   XML_SetUserData(g_parser, &test_data);
2226*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2227*4543ef51SXin LI   /* Add a default handler to exercise more code paths */
2228*4543ef51SXin LI   XML_SetDefaultHandler(g_parser, dummy_default_handler);
2229*4543ef51SXin LI   if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2230*4543ef51SXin LI     fail("Could not set foreign DTD");
2231*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2232*4543ef51SXin LI       == XML_STATUS_ERROR)
2233*4543ef51SXin LI     xml_failure(g_parser);
2234*4543ef51SXin LI 
2235*4543ef51SXin LI   /* Ensure that trying to set the DTD after parsing has started
2236*4543ef51SXin LI    * is faulted, even if it's the same setting.
2237*4543ef51SXin LI    */
2238*4543ef51SXin LI   if (XML_UseForeignDTD(g_parser, XML_TRUE)
2239*4543ef51SXin LI       != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
2240*4543ef51SXin LI     fail("Failed to reject late foreign DTD setting");
2241*4543ef51SXin LI   /* Ditto for the hash salt */
2242*4543ef51SXin LI   if (XML_SetHashSalt(g_parser, 0x23456789))
2243*4543ef51SXin LI     fail("Failed to reject late hash salt change");
2244*4543ef51SXin LI 
2245*4543ef51SXin LI   /* Now finish the parse */
2246*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2247*4543ef51SXin LI       == XML_STATUS_ERROR)
2248*4543ef51SXin LI     xml_failure(g_parser);
2249*4543ef51SXin LI }
2250*4543ef51SXin LI END_TEST
2251*4543ef51SXin LI 
2252*4543ef51SXin LI /* Test XML_UseForeignDTD with no external subset present */
START_TEST(test_foreign_dtd_without_external_subset)2253*4543ef51SXin LI START_TEST(test_foreign_dtd_without_external_subset) {
2254*4543ef51SXin LI   const char *text = "<!DOCTYPE doc [<!ENTITY foo 'bar'>]>\n"
2255*4543ef51SXin LI                      "<doc>&foo;</doc>";
2256*4543ef51SXin LI 
2257*4543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2258*4543ef51SXin LI   XML_SetUserData(g_parser, NULL);
2259*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
2260*4543ef51SXin LI   XML_UseForeignDTD(g_parser, XML_TRUE);
2261*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2262*4543ef51SXin LI       == XML_STATUS_ERROR)
2263*4543ef51SXin LI     xml_failure(g_parser);
2264*4543ef51SXin LI }
2265*4543ef51SXin LI END_TEST
2266*4543ef51SXin LI 
START_TEST(test_empty_foreign_dtd)2267*4543ef51SXin LI START_TEST(test_empty_foreign_dtd) {
2268*4543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2269*4543ef51SXin LI                      "<doc>&entity;</doc>";
2270*4543ef51SXin LI 
2271*4543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2272*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
2273*4543ef51SXin LI   XML_UseForeignDTD(g_parser, XML_TRUE);
2274*4543ef51SXin LI   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
2275*4543ef51SXin LI                  "Undefined entity not faulted");
2276*4543ef51SXin LI }
2277*4543ef51SXin LI END_TEST
2278*4543ef51SXin LI 
2279*4543ef51SXin LI /* Test XML Base is set and unset appropriately */
START_TEST(test_set_base)2280*4543ef51SXin LI START_TEST(test_set_base) {
2281*4543ef51SXin LI   const XML_Char *old_base;
2282*4543ef51SXin LI   const XML_Char *new_base = XCS("/local/file/name.xml");
2283*4543ef51SXin LI 
2284*4543ef51SXin LI   old_base = XML_GetBase(g_parser);
2285*4543ef51SXin LI   if (XML_SetBase(g_parser, new_base) != XML_STATUS_OK)
2286*4543ef51SXin LI     fail("Unable to set base");
2287*4543ef51SXin LI   if (xcstrcmp(XML_GetBase(g_parser), new_base) != 0)
2288*4543ef51SXin LI     fail("Base setting not correct");
2289*4543ef51SXin LI   if (XML_SetBase(g_parser, NULL) != XML_STATUS_OK)
2290*4543ef51SXin LI     fail("Unable to NULL base");
2291*4543ef51SXin LI   if (XML_GetBase(g_parser) != NULL)
2292*4543ef51SXin LI     fail("Base setting not nulled");
2293*4543ef51SXin LI   XML_SetBase(g_parser, old_base);
2294*4543ef51SXin LI }
2295*4543ef51SXin LI END_TEST
2296*4543ef51SXin LI 
2297*4543ef51SXin LI /* Test attribute counts, indexing, etc */
START_TEST(test_attributes)2298*4543ef51SXin LI START_TEST(test_attributes) {
2299*4543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
2300*4543ef51SXin LI                      "<!ELEMENT doc (tag)>\n"
2301*4543ef51SXin LI                      "<!ATTLIST doc id ID #REQUIRED>\n"
2302*4543ef51SXin LI                      "]>"
2303*4543ef51SXin LI                      "<doc a='1' id='one' b='2'>"
2304*4543ef51SXin LI                      "<tag c='3'/>"
2305*4543ef51SXin LI                      "</doc>";
2306*4543ef51SXin LI   AttrInfo doc_info[] = {{XCS("a"), XCS("1")},
2307*4543ef51SXin LI                          {XCS("b"), XCS("2")},
2308*4543ef51SXin LI                          {XCS("id"), XCS("one")},
2309*4543ef51SXin LI                          {NULL, NULL}};
2310*4543ef51SXin LI   AttrInfo tag_info[] = {{XCS("c"), XCS("3")}, {NULL, NULL}};
2311*4543ef51SXin LI   ElementInfo info[] = {{XCS("doc"), 3, XCS("id"), NULL},
2312*4543ef51SXin LI                         {XCS("tag"), 1, NULL, NULL},
2313*4543ef51SXin LI                         {NULL, 0, NULL, NULL}};
2314*4543ef51SXin LI   info[0].attributes = doc_info;
2315*4543ef51SXin LI   info[1].attributes = tag_info;
2316*4543ef51SXin LI 
2317*4543ef51SXin LI   XML_SetStartElementHandler(g_parser, counting_start_element_handler);
2318*4543ef51SXin LI   XML_SetUserData(g_parser, info);
2319*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2320*4543ef51SXin LI       == XML_STATUS_ERROR)
2321*4543ef51SXin LI     xml_failure(g_parser);
2322*4543ef51SXin LI }
2323*4543ef51SXin LI END_TEST
2324*4543ef51SXin LI 
2325*4543ef51SXin LI /* Test reset works correctly in the middle of processing an internal
2326*4543ef51SXin LI  * entity.  Exercises some obscure code in XML_ParserReset().
2327*4543ef51SXin LI  */
START_TEST(test_reset_in_entity)2328*4543ef51SXin LI START_TEST(test_reset_in_entity) {
2329*4543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
2330*4543ef51SXin LI                      "<!ENTITY wombat 'wom'>\n"
2331*4543ef51SXin LI                      "<!ENTITY entity 'hi &wom; there'>\n"
2332*4543ef51SXin LI                      "]>\n"
2333*4543ef51SXin LI                      "<doc>&entity;</doc>";
2334*4543ef51SXin LI   XML_ParsingStatus status;
2335*4543ef51SXin LI 
2336*4543ef51SXin LI   g_resumable = XML_TRUE;
2337*4543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2338*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2339*4543ef51SXin LI       == XML_STATUS_ERROR)
2340*4543ef51SXin LI     xml_failure(g_parser);
2341*4543ef51SXin LI   XML_GetParsingStatus(g_parser, &status);
2342*4543ef51SXin LI   if (status.parsing != XML_SUSPENDED)
2343*4543ef51SXin LI     fail("Parsing status not SUSPENDED");
2344*4543ef51SXin LI   XML_ParserReset(g_parser, NULL);
2345*4543ef51SXin LI   XML_GetParsingStatus(g_parser, &status);
2346*4543ef51SXin LI   if (status.parsing != XML_INITIALIZED)
2347*4543ef51SXin LI     fail("Parsing status doesn't reset to INITIALIZED");
2348*4543ef51SXin LI }
2349*4543ef51SXin LI END_TEST
2350*4543ef51SXin LI 
2351*4543ef51SXin LI /* Test that resume correctly passes through parse errors */
START_TEST(test_resume_invalid_parse)2352*4543ef51SXin LI START_TEST(test_resume_invalid_parse) {
2353*4543ef51SXin LI   const char *text = "<doc>Hello</doc"; /* Missing closing wedge */
2354*4543ef51SXin LI 
2355*4543ef51SXin LI   g_resumable = XML_TRUE;
2356*4543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2357*4543ef51SXin LI   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2358*4543ef51SXin LI       == XML_STATUS_ERROR)
2359*4543ef51SXin LI     xml_failure(g_parser);
2360*4543ef51SXin LI   if (XML_ResumeParser(g_parser) == XML_STATUS_OK)
2361*4543ef51SXin LI     fail("Resumed invalid parse not faulted");
2362*4543ef51SXin LI   if (XML_GetErrorCode(g_parser) != XML_ERROR_UNCLOSED_TOKEN)
2363*4543ef51SXin LI     fail("Invalid parse not correctly faulted");
2364*4543ef51SXin LI }
2365*4543ef51SXin LI END_TEST
2366*4543ef51SXin LI 
2367*4543ef51SXin LI /* Test that re-suspended parses are correctly passed through */
START_TEST(test_resume_resuspended)2368*4543ef51SXin LI START_TEST(test_resume_resuspended) {
2369*4543ef51SXin LI   const char *text = "<doc>Hello<meep/>world</doc>";
2370*4543ef51SXin LI 
2371*4543ef51SXin LI   g_resumable = XML_TRUE;
2372*4543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2373*4543ef51SXin LI   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2374*4543ef51SXin LI       == XML_STATUS_ERROR)
2375*4543ef51SXin LI     xml_failure(g_parser);
2376*4543ef51SXin LI   g_resumable = XML_TRUE;
2377*4543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2378*4543ef51SXin LI   if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
2379*4543ef51SXin LI     fail("Resumption not suspended");
2380*4543ef51SXin LI   /* This one should succeed and finish up */
2381*4543ef51SXin LI   if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
2382*4543ef51SXin LI     xml_failure(g_parser);
2383*4543ef51SXin LI }
2384*4543ef51SXin LI END_TEST
2385*4543ef51SXin LI 
2386*4543ef51SXin LI /* Test that CDATA shows up correctly through a default handler */
START_TEST(test_cdata_default)2387*4543ef51SXin LI START_TEST(test_cdata_default) {
2388*4543ef51SXin LI   const char *text = "<doc><![CDATA[Hello\nworld]]></doc>";
2389*4543ef51SXin LI   const XML_Char *expected = XCS("<doc><![CDATA[Hello\nworld]]></doc>");
2390*4543ef51SXin LI   CharData storage;
2391*4543ef51SXin LI 
2392*4543ef51SXin LI   CharData_Init(&storage);
2393*4543ef51SXin LI   XML_SetUserData(g_parser, &storage);
2394*4543ef51SXin LI   XML_SetDefaultHandler(g_parser, accumulate_characters);
2395*4543ef51SXin LI 
2396*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2397*4543ef51SXin LI       == XML_STATUS_ERROR)
2398*4543ef51SXin LI     xml_failure(g_parser);
2399*4543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
2400*4543ef51SXin LI }
2401*4543ef51SXin LI END_TEST
2402*4543ef51SXin LI 
2403*4543ef51SXin LI /* Test resetting a subordinate parser does exactly nothing */
START_TEST(test_subordinate_reset)2404*4543ef51SXin LI START_TEST(test_subordinate_reset) {
2405*4543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2406*4543ef51SXin LI                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
2407*4543ef51SXin LI                      "<doc>&entity;</doc>";
2408*4543ef51SXin LI 
2409*4543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2410*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_resetter);
2411*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2412*4543ef51SXin LI       == XML_STATUS_ERROR)
2413*4543ef51SXin LI     xml_failure(g_parser);
2414*4543ef51SXin LI }
2415*4543ef51SXin LI END_TEST
2416*4543ef51SXin LI 
2417*4543ef51SXin LI /* Test suspending a subordinate parser */
START_TEST(test_subordinate_suspend)2418*4543ef51SXin LI START_TEST(test_subordinate_suspend) {
2419*4543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2420*4543ef51SXin LI                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
2421*4543ef51SXin LI                      "<doc>&entity;</doc>";
2422*4543ef51SXin LI 
2423*4543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2424*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_suspender);
2425*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2426*4543ef51SXin LI       == XML_STATUS_ERROR)
2427*4543ef51SXin LI     xml_failure(g_parser);
2428*4543ef51SXin LI }
2429*4543ef51SXin LI END_TEST
2430*4543ef51SXin LI 
2431*4543ef51SXin LI /* Test suspending a subordinate parser from an XML declaration */
2432*4543ef51SXin LI /* Increases code coverage of the tests */
2433*4543ef51SXin LI 
START_TEST(test_subordinate_xdecl_suspend)2434*4543ef51SXin LI START_TEST(test_subordinate_xdecl_suspend) {
2435*4543ef51SXin LI   const char *text
2436*4543ef51SXin LI       = "<!DOCTYPE doc [\n"
2437*4543ef51SXin LI         "  <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
2438*4543ef51SXin LI         "]>\n"
2439*4543ef51SXin LI         "<doc>&entity;</doc>";
2440*4543ef51SXin LI 
2441*4543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2442*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
2443*4543ef51SXin LI   g_resumable = XML_TRUE;
2444*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2445*4543ef51SXin LI       == XML_STATUS_ERROR)
2446*4543ef51SXin LI     xml_failure(g_parser);
2447*4543ef51SXin LI }
2448*4543ef51SXin LI END_TEST
2449*4543ef51SXin LI 
START_TEST(test_subordinate_xdecl_abort)2450*4543ef51SXin LI START_TEST(test_subordinate_xdecl_abort) {
2451*4543ef51SXin LI   const char *text
2452*4543ef51SXin LI       = "<!DOCTYPE doc [\n"
2453*4543ef51SXin LI         "  <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
2454*4543ef51SXin LI         "]>\n"
2455*4543ef51SXin LI         "<doc>&entity;</doc>";
2456*4543ef51SXin LI 
2457*4543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2458*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
2459*4543ef51SXin LI   g_resumable = XML_FALSE;
2460*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2461*4543ef51SXin LI       == XML_STATUS_ERROR)
2462*4543ef51SXin LI     xml_failure(g_parser);
2463*4543ef51SXin LI }
2464*4543ef51SXin LI END_TEST
2465*4543ef51SXin LI 
2466*4543ef51SXin LI /* Test external entity fault handling with suspension */
START_TEST(test_ext_entity_invalid_suspended_parse)2467*4543ef51SXin LI START_TEST(test_ext_entity_invalid_suspended_parse) {
2468*4543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
2469*4543ef51SXin LI                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2470*4543ef51SXin LI                      "]>\n"
2471*4543ef51SXin LI                      "<doc>&en;</doc>";
2472*4543ef51SXin LI   ExtFaults faults[]
2473*4543ef51SXin LI       = {{"<?xml version='1.0' encoding='us-ascii'?><",
2474*4543ef51SXin LI           "Incomplete element declaration not faulted", NULL,
2475*4543ef51SXin LI           XML_ERROR_UNCLOSED_TOKEN},
2476*4543ef51SXin LI          {/* First two bytes of a three-byte char */
2477*4543ef51SXin LI           "<?xml version='1.0' encoding='utf-8'?>\xe2\x82",
2478*4543ef51SXin LI           "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
2479*4543ef51SXin LI          {NULL, NULL, NULL, XML_ERROR_NONE}};
2480*4543ef51SXin LI   ExtFaults *fault;
2481*4543ef51SXin LI 
2482*4543ef51SXin LI   for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
2483*4543ef51SXin LI     set_subtest("%s", fault->parse_text);
2484*4543ef51SXin LI     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2485*4543ef51SXin LI     XML_SetExternalEntityRefHandler(g_parser,
2486*4543ef51SXin LI                                     external_entity_suspending_faulter);
2487*4543ef51SXin LI     XML_SetUserData(g_parser, fault);
2488*4543ef51SXin LI     expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
2489*4543ef51SXin LI                    "Parser did not report external entity error");
2490*4543ef51SXin LI     XML_ParserReset(g_parser, NULL);
2491*4543ef51SXin LI   }
2492*4543ef51SXin LI }
2493*4543ef51SXin LI END_TEST
2494*4543ef51SXin LI 
2495*4543ef51SXin LI /* Test setting an explicit encoding */
START_TEST(test_explicit_encoding)2496*4543ef51SXin LI START_TEST(test_explicit_encoding) {
2497*4543ef51SXin LI   const char *text1 = "<doc>Hello ";
2498*4543ef51SXin LI   const char *text2 = " World</doc>";
2499*4543ef51SXin LI 
2500*4543ef51SXin LI   /* Just check that we can set the encoding to NULL before starting */
2501*4543ef51SXin LI   if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
2502*4543ef51SXin LI     fail("Failed to initialise encoding to NULL");
2503*4543ef51SXin LI   /* Say we are UTF-8 */
2504*4543ef51SXin LI   if (XML_SetEncoding(g_parser, XCS("utf-8")) != XML_STATUS_OK)
2505*4543ef51SXin LI     fail("Failed to set explicit encoding");
2506*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2507*4543ef51SXin LI       == XML_STATUS_ERROR)
2508*4543ef51SXin LI     xml_failure(g_parser);
2509*4543ef51SXin LI   /* Try to switch encodings mid-parse */
2510*4543ef51SXin LI   if (XML_SetEncoding(g_parser, XCS("us-ascii")) != XML_STATUS_ERROR)
2511*4543ef51SXin LI     fail("Allowed encoding change");
2512*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2513*4543ef51SXin LI       == XML_STATUS_ERROR)
2514*4543ef51SXin LI     xml_failure(g_parser);
2515*4543ef51SXin LI   /* Try now the parse is over */
2516*4543ef51SXin LI   if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
2517*4543ef51SXin LI     fail("Failed to unset encoding");
2518*4543ef51SXin LI }
2519*4543ef51SXin LI END_TEST
2520*4543ef51SXin LI 
2521*4543ef51SXin LI /* Test handling of trailing CR (rather than newline) */
START_TEST(test_trailing_cr)2522*4543ef51SXin LI START_TEST(test_trailing_cr) {
2523*4543ef51SXin LI   const char *text = "<doc>\r";
2524*4543ef51SXin LI   int found_cr;
2525*4543ef51SXin LI 
2526*4543ef51SXin LI   /* Try with a character handler, for code coverage */
2527*4543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, cr_cdata_handler);
2528*4543ef51SXin LI   XML_SetUserData(g_parser, &found_cr);
2529*4543ef51SXin LI   found_cr = 0;
2530*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2531*4543ef51SXin LI       == XML_STATUS_OK)
2532*4543ef51SXin LI     fail("Failed to fault unclosed doc");
2533*4543ef51SXin LI   if (found_cr == 0)
2534*4543ef51SXin LI     fail("Did not catch the carriage return");
2535*4543ef51SXin LI   XML_ParserReset(g_parser, NULL);
2536*4543ef51SXin LI 
2537*4543ef51SXin LI   /* Now with a default handler instead */
2538*4543ef51SXin LI   XML_SetDefaultHandler(g_parser, cr_cdata_handler);
2539*4543ef51SXin LI   XML_SetUserData(g_parser, &found_cr);
2540*4543ef51SXin LI   found_cr = 0;
2541*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2542*4543ef51SXin LI       == XML_STATUS_OK)
2543*4543ef51SXin LI     fail("Failed to fault unclosed doc");
2544*4543ef51SXin LI   if (found_cr == 0)
2545*4543ef51SXin LI     fail("Did not catch default carriage return");
2546*4543ef51SXin LI }
2547*4543ef51SXin LI END_TEST
2548*4543ef51SXin LI 
2549*4543ef51SXin LI /* Test trailing CR in an external entity parse */
START_TEST(test_ext_entity_trailing_cr)2550*4543ef51SXin LI START_TEST(test_ext_entity_trailing_cr) {
2551*4543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
2552*4543ef51SXin LI                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2553*4543ef51SXin LI                      "]>\n"
2554*4543ef51SXin LI                      "<doc>&en;</doc>";
2555*4543ef51SXin LI   int found_cr;
2556*4543ef51SXin LI 
2557*4543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2558*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_cr_catcher);
2559*4543ef51SXin LI   XML_SetUserData(g_parser, &found_cr);
2560*4543ef51SXin LI   found_cr = 0;
2561*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2562*4543ef51SXin LI       != XML_STATUS_OK)
2563*4543ef51SXin LI     xml_failure(g_parser);
2564*4543ef51SXin LI   if (found_cr == 0)
2565*4543ef51SXin LI     fail("No carriage return found");
2566*4543ef51SXin LI   XML_ParserReset(g_parser, NULL);
2567*4543ef51SXin LI 
2568*4543ef51SXin LI   /* Try again with a different trailing CR */
2569*4543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2570*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_bad_cr_catcher);
2571*4543ef51SXin LI   XML_SetUserData(g_parser, &found_cr);
2572*4543ef51SXin LI   found_cr = 0;
2573*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2574*4543ef51SXin LI       != XML_STATUS_OK)
2575*4543ef51SXin LI     xml_failure(g_parser);
2576*4543ef51SXin LI   if (found_cr == 0)
2577*4543ef51SXin LI     fail("No carriage return found");
2578*4543ef51SXin LI }
2579*4543ef51SXin LI END_TEST
2580*4543ef51SXin LI 
2581*4543ef51SXin LI /* Test handling of trailing square bracket */
START_TEST(test_trailing_rsqb)2582*4543ef51SXin LI START_TEST(test_trailing_rsqb) {
2583*4543ef51SXin LI   const char *text8 = "<doc>]";
2584*4543ef51SXin LI   const char text16[] = "\xFF\xFE<\000d\000o\000c\000>\000]\000";
2585*4543ef51SXin LI   int found_rsqb;
2586*4543ef51SXin LI   int text8_len = (int)strlen(text8);
2587*4543ef51SXin LI 
2588*4543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, rsqb_handler);
2589*4543ef51SXin LI   XML_SetUserData(g_parser, &found_rsqb);
2590*4543ef51SXin LI   found_rsqb = 0;
2591*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text8, text8_len, XML_TRUE)
2592*4543ef51SXin LI       == XML_STATUS_OK)
2593*4543ef51SXin LI     fail("Failed to fault unclosed doc");
2594*4543ef51SXin LI   if (found_rsqb == 0)
2595*4543ef51SXin LI     fail("Did not catch the right square bracket");
2596*4543ef51SXin LI 
2597*4543ef51SXin LI   /* Try again with a different encoding */
2598*4543ef51SXin LI   XML_ParserReset(g_parser, NULL);
2599*4543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, rsqb_handler);
2600*4543ef51SXin LI   XML_SetUserData(g_parser, &found_rsqb);
2601*4543ef51SXin LI   found_rsqb = 0;
2602*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
2603*4543ef51SXin LI                               XML_TRUE)
2604*4543ef51SXin LI       == XML_STATUS_OK)
2605*4543ef51SXin LI     fail("Failed to fault unclosed doc");
2606*4543ef51SXin LI   if (found_rsqb == 0)
2607*4543ef51SXin LI     fail("Did not catch the right square bracket");
2608*4543ef51SXin LI 
2609*4543ef51SXin LI   /* And finally with a default handler */
2610*4543ef51SXin LI   XML_ParserReset(g_parser, NULL);
2611*4543ef51SXin LI   XML_SetDefaultHandler(g_parser, rsqb_handler);
2612*4543ef51SXin LI   XML_SetUserData(g_parser, &found_rsqb);
2613*4543ef51SXin LI   found_rsqb = 0;
2614*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
2615*4543ef51SXin LI                               XML_TRUE)
2616*4543ef51SXin LI       == XML_STATUS_OK)
2617*4543ef51SXin LI     fail("Failed to fault unclosed doc");
2618*4543ef51SXin LI   if (found_rsqb == 0)
2619*4543ef51SXin LI     fail("Did not catch the right square bracket");
2620*4543ef51SXin LI }
2621*4543ef51SXin LI END_TEST
2622*4543ef51SXin LI 
2623*4543ef51SXin LI /* Test trailing right square bracket in an external entity parse */
START_TEST(test_ext_entity_trailing_rsqb)2624*4543ef51SXin LI START_TEST(test_ext_entity_trailing_rsqb) {
2625*4543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
2626*4543ef51SXin LI                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2627*4543ef51SXin LI                      "]>\n"
2628*4543ef51SXin LI                      "<doc>&en;</doc>";
2629*4543ef51SXin LI   int found_rsqb;
2630*4543ef51SXin LI 
2631*4543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2632*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_rsqb_catcher);
2633*4543ef51SXin LI   XML_SetUserData(g_parser, &found_rsqb);
2634*4543ef51SXin LI   found_rsqb = 0;
2635*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2636*4543ef51SXin LI       != XML_STATUS_OK)
2637*4543ef51SXin LI     xml_failure(g_parser);
2638*4543ef51SXin LI   if (found_rsqb == 0)
2639*4543ef51SXin LI     fail("No right square bracket found");
2640*4543ef51SXin LI }
2641*4543ef51SXin LI END_TEST
2642*4543ef51SXin LI 
2643*4543ef51SXin LI /* Test CDATA handling in an external entity */
START_TEST(test_ext_entity_good_cdata)2644*4543ef51SXin LI START_TEST(test_ext_entity_good_cdata) {
2645*4543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
2646*4543ef51SXin LI                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2647*4543ef51SXin LI                      "]>\n"
2648*4543ef51SXin LI                      "<doc>&en;</doc>";
2649*4543ef51SXin LI 
2650*4543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2651*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_good_cdata_ascii);
2652*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2653*4543ef51SXin LI       != XML_STATUS_OK)
2654*4543ef51SXin LI     xml_failure(g_parser);
2655*4543ef51SXin LI }
2656*4543ef51SXin LI END_TEST
2657*4543ef51SXin LI 
2658*4543ef51SXin LI /* Test user parameter settings */
START_TEST(test_user_parameters)2659*4543ef51SXin LI START_TEST(test_user_parameters) {
2660*4543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2661*4543ef51SXin LI                      "<!-- Primary parse -->\n"
2662*4543ef51SXin LI                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
2663*4543ef51SXin LI                      "<doc>&entity;";
2664*4543ef51SXin LI   const char *epilog = "<!-- Back to primary parser -->\n"
2665*4543ef51SXin LI                        "</doc>";
2666*4543ef51SXin LI 
2667*4543ef51SXin LI   g_comment_count = 0;
2668*4543ef51SXin LI   g_skip_count = 0;
2669*4543ef51SXin LI   g_xdecl_count = 0;
2670*4543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2671*4543ef51SXin LI   XML_SetXmlDeclHandler(g_parser, xml_decl_handler);
2672*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_param_checker);
2673*4543ef51SXin LI   XML_SetCommentHandler(g_parser, data_check_comment_handler);
2674*4543ef51SXin LI   XML_SetSkippedEntityHandler(g_parser, param_check_skip_handler);
2675*4543ef51SXin LI   XML_UseParserAsHandlerArg(g_parser);
2676*4543ef51SXin LI   XML_SetUserData(g_parser, (void *)1);
2677*4543ef51SXin LI   g_handler_data = g_parser;
2678*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
2679*4543ef51SXin LI       == XML_STATUS_ERROR)
2680*4543ef51SXin LI     xml_failure(g_parser);
2681*4543ef51SXin LI   /* Ensure we can't change policy mid-parse */
2682*4543ef51SXin LI   if (XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_NEVER))
2683*4543ef51SXin LI     fail("Changed param entity parsing policy while parsing");
2684*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, epilog, (int)strlen(epilog), XML_TRUE)
2685*4543ef51SXin LI       == XML_STATUS_ERROR)
2686*4543ef51SXin LI     xml_failure(g_parser);
2687*4543ef51SXin LI   if (g_comment_count != 3)
2688*4543ef51SXin LI     fail("Comment handler not invoked enough times");
2689*4543ef51SXin LI   if (g_skip_count != 1)
2690*4543ef51SXin LI     fail("Skip handler not invoked enough times");
2691*4543ef51SXin LI   if (g_xdecl_count != 1)
2692*4543ef51SXin LI     fail("XML declaration handler not invoked");
2693*4543ef51SXin LI }
2694*4543ef51SXin LI END_TEST
2695*4543ef51SXin LI 
2696*4543ef51SXin LI /* Test that an explicit external entity handler argument replaces
2697*4543ef51SXin LI  * the parser as the first argument.
2698*4543ef51SXin LI  *
2699*4543ef51SXin LI  * We do not call the first parameter to the external entity handler
2700*4543ef51SXin LI  * 'parser' for once, since the first time the handler is called it
2701*4543ef51SXin LI  * will actually be a text string.  We need to be able to access the
2702*4543ef51SXin LI  * global 'parser' variable to create our external entity parser from,
2703*4543ef51SXin LI  * since there are code paths we need to ensure get executed.
2704*4543ef51SXin LI  */
START_TEST(test_ext_entity_ref_parameter)2705*4543ef51SXin LI START_TEST(test_ext_entity_ref_parameter) {
2706*4543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2707*4543ef51SXin LI                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
2708*4543ef51SXin LI                      "<doc>&entity;</doc>";
2709*4543ef51SXin LI 
2710*4543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2711*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
2712*4543ef51SXin LI   /* Set a handler arg that is not NULL and not parser (which is
2713*4543ef51SXin LI    * what NULL would cause to be passed.
2714*4543ef51SXin LI    */
2715*4543ef51SXin LI   XML_SetExternalEntityRefHandlerArg(g_parser, (void *)text);
2716*4543ef51SXin LI   g_handler_data = text;
2717*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2718*4543ef51SXin LI       == XML_STATUS_ERROR)
2719*4543ef51SXin LI     xml_failure(g_parser);
2720*4543ef51SXin LI 
2721*4543ef51SXin LI   /* Now try again with unset args */
2722*4543ef51SXin LI   XML_ParserReset(g_parser, NULL);
2723*4543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2724*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
2725*4543ef51SXin LI   XML_SetExternalEntityRefHandlerArg(g_parser, NULL);
2726*4543ef51SXin LI   g_handler_data = g_parser;
2727*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2728*4543ef51SXin LI       == XML_STATUS_ERROR)
2729*4543ef51SXin LI     xml_failure(g_parser);
2730*4543ef51SXin LI }
2731*4543ef51SXin LI END_TEST
2732*4543ef51SXin LI 
2733*4543ef51SXin LI /* Test the parsing of an empty string */
START_TEST(test_empty_parse)2734*4543ef51SXin LI START_TEST(test_empty_parse) {
2735*4543ef51SXin LI   const char *text = "<doc></doc>";
2736*4543ef51SXin LI   const char *partial = "<doc>";
2737*4543ef51SXin LI 
2738*4543ef51SXin LI   if (XML_Parse(g_parser, NULL, 0, XML_FALSE) == XML_STATUS_ERROR)
2739*4543ef51SXin LI     fail("Parsing empty string faulted");
2740*4543ef51SXin LI   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
2741*4543ef51SXin LI     fail("Parsing final empty string not faulted");
2742*4543ef51SXin LI   if (XML_GetErrorCode(g_parser) != XML_ERROR_NO_ELEMENTS)
2743*4543ef51SXin LI     fail("Parsing final empty string faulted for wrong reason");
2744*4543ef51SXin LI 
2745*4543ef51SXin LI   /* Now try with valid text before the empty end */
2746*4543ef51SXin LI   XML_ParserReset(g_parser, NULL);
2747*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
2748*4543ef51SXin LI       == XML_STATUS_ERROR)
2749*4543ef51SXin LI     xml_failure(g_parser);
2750*4543ef51SXin LI   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) == XML_STATUS_ERROR)
2751*4543ef51SXin LI     fail("Parsing final empty string faulted");
2752*4543ef51SXin LI 
2753*4543ef51SXin LI   /* Now try with invalid text before the empty end */
2754*4543ef51SXin LI   XML_ParserReset(g_parser, NULL);
2755*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, partial, (int)strlen(partial),
2756*4543ef51SXin LI                               XML_FALSE)
2757*4543ef51SXin LI       == XML_STATUS_ERROR)
2758*4543ef51SXin LI     xml_failure(g_parser);
2759*4543ef51SXin LI   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
2760*4543ef51SXin LI     fail("Parsing final incomplete empty string not faulted");
2761*4543ef51SXin LI }
2762*4543ef51SXin LI END_TEST
2763*4543ef51SXin LI 
2764*4543ef51SXin LI /* Test odd corners of the XML_GetBuffer interface */
2765*4543ef51SXin LI static enum XML_Status
get_feature(enum XML_FeatureEnum feature_id,long * presult)2766*4543ef51SXin LI get_feature(enum XML_FeatureEnum feature_id, long *presult) {
2767*4543ef51SXin LI   const XML_Feature *feature = XML_GetFeatureList();
2768*4543ef51SXin LI 
2769*4543ef51SXin LI   if (feature == NULL)
2770*4543ef51SXin LI     return XML_STATUS_ERROR;
2771*4543ef51SXin LI   for (; feature->feature != XML_FEATURE_END; feature++) {
2772*4543ef51SXin LI     if (feature->feature == feature_id) {
2773*4543ef51SXin LI       *presult = feature->value;
2774*4543ef51SXin LI       return XML_STATUS_OK;
2775*4543ef51SXin LI     }
2776*4543ef51SXin LI   }
2777*4543ef51SXin LI   return XML_STATUS_ERROR;
2778*4543ef51SXin LI }
2779*4543ef51SXin LI 
2780*4543ef51SXin LI /* Test odd corners of the XML_GetBuffer interface */
START_TEST(test_get_buffer_1)2781*4543ef51SXin LI START_TEST(test_get_buffer_1) {
2782*4543ef51SXin LI   const char *text = get_buffer_test_text;
2783*4543ef51SXin LI   void *buffer;
2784*4543ef51SXin LI   long context_bytes;
2785*4543ef51SXin LI 
2786*4543ef51SXin LI   /* Attempt to allocate a negative length buffer */
2787*4543ef51SXin LI   if (XML_GetBuffer(g_parser, -12) != NULL)
2788*4543ef51SXin LI     fail("Negative length buffer not failed");
2789*4543ef51SXin LI 
2790*4543ef51SXin LI   /* Now get a small buffer and extend it past valid length */
2791*4543ef51SXin LI   buffer = XML_GetBuffer(g_parser, 1536);
2792*4543ef51SXin LI   if (buffer == NULL)
2793*4543ef51SXin LI     fail("1.5K buffer failed");
2794*4543ef51SXin LI   assert(buffer != NULL);
2795*4543ef51SXin LI   memcpy(buffer, text, strlen(text));
2796*4543ef51SXin LI   if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
2797*4543ef51SXin LI       == XML_STATUS_ERROR)
2798*4543ef51SXin LI     xml_failure(g_parser);
2799*4543ef51SXin LI   if (XML_GetBuffer(g_parser, INT_MAX) != NULL)
2800*4543ef51SXin LI     fail("INT_MAX buffer not failed");
2801*4543ef51SXin LI 
2802*4543ef51SXin LI   /* Now try extending it a more reasonable but still too large
2803*4543ef51SXin LI    * amount.  The allocator in XML_GetBuffer() doubles the buffer
2804*4543ef51SXin LI    * size until it exceeds the requested amount or INT_MAX.  If it
2805*4543ef51SXin LI    * exceeds INT_MAX, it rejects the request, so we want a request
2806*4543ef51SXin LI    * between INT_MAX and INT_MAX/2.  A gap of 1K seems comfortable,
2807*4543ef51SXin LI    * with an extra byte just to ensure that the request is off any
2808*4543ef51SXin LI    * boundary.  The request will be inflated internally by
2809*4543ef51SXin LI    * XML_CONTEXT_BYTES (if >=1), so we subtract that from our
2810*4543ef51SXin LI    * request.
2811*4543ef51SXin LI    */
2812*4543ef51SXin LI   if (get_feature(XML_FEATURE_CONTEXT_BYTES, &context_bytes) != XML_STATUS_OK)
2813*4543ef51SXin LI     context_bytes = 0;
2814*4543ef51SXin LI   if (XML_GetBuffer(g_parser, INT_MAX - (context_bytes + 1025)) != NULL)
2815*4543ef51SXin LI     fail("INT_MAX- buffer not failed");
2816*4543ef51SXin LI 
2817*4543ef51SXin LI   /* Now try extending it a carefully crafted amount */
2818*4543ef51SXin LI   if (XML_GetBuffer(g_parser, 1000) == NULL)
2819*4543ef51SXin LI     fail("1000 buffer failed");
2820*4543ef51SXin LI }
2821*4543ef51SXin LI END_TEST
2822*4543ef51SXin LI 
2823*4543ef51SXin LI /* Test more corners of the XML_GetBuffer interface */
START_TEST(test_get_buffer_2)2824*4543ef51SXin LI START_TEST(test_get_buffer_2) {
2825*4543ef51SXin LI   const char *text = get_buffer_test_text;
2826*4543ef51SXin LI   void *buffer;
2827*4543ef51SXin LI 
2828*4543ef51SXin LI   /* Now get a decent buffer */
2829*4543ef51SXin LI   buffer = XML_GetBuffer(g_parser, 1536);
2830*4543ef51SXin LI   if (buffer == NULL)
2831*4543ef51SXin LI     fail("1.5K buffer failed");
2832*4543ef51SXin LI   assert(buffer != NULL);
2833*4543ef51SXin LI   memcpy(buffer, text, strlen(text));
2834*4543ef51SXin LI   if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
2835*4543ef51SXin LI       == XML_STATUS_ERROR)
2836*4543ef51SXin LI     xml_failure(g_parser);
2837*4543ef51SXin LI 
2838*4543ef51SXin LI   /* Extend it, to catch a different code path */
2839*4543ef51SXin LI   if (XML_GetBuffer(g_parser, 1024) == NULL)
2840*4543ef51SXin LI     fail("1024 buffer failed");
2841*4543ef51SXin LI }
2842*4543ef51SXin LI END_TEST
2843*4543ef51SXin LI 
2844*4543ef51SXin LI /* Test for signed integer overflow CVE-2022-23852 */
2845*4543ef51SXin LI #if XML_CONTEXT_BYTES > 0
START_TEST(test_get_buffer_3_overflow)2846*4543ef51SXin LI START_TEST(test_get_buffer_3_overflow) {
2847*4543ef51SXin LI   XML_Parser parser = XML_ParserCreate(NULL);
2848*4543ef51SXin LI   assert(parser != NULL);
2849*4543ef51SXin LI 
2850*4543ef51SXin LI   const char *const text = "\n";
2851*4543ef51SXin LI   const int expectedKeepValue = (int)strlen(text);
2852*4543ef51SXin LI 
2853*4543ef51SXin LI   // After this call, variable "keep" in XML_GetBuffer will
2854*4543ef51SXin LI   // have value expectedKeepValue
2855*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text),
2856*4543ef51SXin LI                               XML_FALSE /* isFinal */)
2857*4543ef51SXin LI       == XML_STATUS_ERROR)
2858*4543ef51SXin LI     xml_failure(parser);
2859*4543ef51SXin LI 
2860*4543ef51SXin LI   assert(expectedKeepValue > 0);
2861*4543ef51SXin LI   if (XML_GetBuffer(parser, INT_MAX - expectedKeepValue + 1) != NULL)
2862*4543ef51SXin LI     fail("enlarging buffer not failed");
2863*4543ef51SXin LI 
2864*4543ef51SXin LI   XML_ParserFree(parser);
2865*4543ef51SXin LI }
2866*4543ef51SXin LI END_TEST
2867*4543ef51SXin LI #endif // XML_CONTEXT_BYTES > 0
2868*4543ef51SXin LI 
START_TEST(test_buffer_can_grow_to_max)2869*4543ef51SXin LI START_TEST(test_buffer_can_grow_to_max) {
2870*4543ef51SXin LI   const char *const prefixes[] = {
2871*4543ef51SXin LI       "",
2872*4543ef51SXin LI       "<",
2873*4543ef51SXin LI       "<x a='",
2874*4543ef51SXin LI       "<doc><x a='",
2875*4543ef51SXin LI       "<document><x a='",
2876*4543ef51SXin LI       "<averylongelementnamesuchthatitwillhopefullystretchacrossmultiplelinesand"
2877*4543ef51SXin LI       "lookprettyridiculousitsalsoveryhardtoreadandifyouredoingitihavetowonderif"
2878*4543ef51SXin LI       "youreallydonthaveanythingbettertodoofcourseiguessicouldveputsomethingbadin"
2879*4543ef51SXin LI       "herebutipromisethatididntheybtwhowgreatarespacesandpunctuationforhelping"
2880*4543ef51SXin LI       "withreadabilityprettygreatithinkanywaysthisisprobablylongenoughbye><x a='"};
2881*4543ef51SXin LI   const int num_prefixes = sizeof(prefixes) / sizeof(prefixes[0]);
2882*4543ef51SXin LI   int maxbuf = INT_MAX / 2 + (INT_MAX & 1); // round up without overflow
2883*4543ef51SXin LI #if defined(__MINGW32__) && ! defined(__MINGW64__)
2884*4543ef51SXin LI   // workaround for mingw/wine32 on GitHub CI not being able to reach 1GiB
2885*4543ef51SXin LI   // Can we make a big allocation?
2886*4543ef51SXin LI   void *big = malloc(maxbuf);
2887*4543ef51SXin LI   if (! big) {
2888*4543ef51SXin LI     // The big allocation failed. Let's be a little lenient.
2889*4543ef51SXin LI     maxbuf = maxbuf / 2;
2890*4543ef51SXin LI   }
2891*4543ef51SXin LI   free(big);
2892*4543ef51SXin LI #endif
2893*4543ef51SXin LI 
2894*4543ef51SXin LI   for (int i = 0; i < num_prefixes; ++i) {
2895*4543ef51SXin LI     set_subtest("\"%s\"", prefixes[i]);
2896*4543ef51SXin LI     XML_Parser parser = XML_ParserCreate(NULL);
2897*4543ef51SXin LI     const int prefix_len = (int)strlen(prefixes[i]);
2898*4543ef51SXin LI     const enum XML_Status s
2899*4543ef51SXin LI         = _XML_Parse_SINGLE_BYTES(parser, prefixes[i], prefix_len, XML_FALSE);
2900*4543ef51SXin LI     if (s != XML_STATUS_OK)
2901*4543ef51SXin LI       xml_failure(parser);
2902*4543ef51SXin LI 
2903*4543ef51SXin LI     // XML_CONTEXT_BYTES of the prefix may remain in the buffer;
2904*4543ef51SXin LI     // subtracting the whole prefix is easiest, and close enough.
2905*4543ef51SXin LI     assert_true(XML_GetBuffer(parser, maxbuf - prefix_len) != NULL);
2906*4543ef51SXin LI     // The limit should be consistent; no prefix should allow us to
2907*4543ef51SXin LI     // reach above the max buffer size.
2908*4543ef51SXin LI     assert_true(XML_GetBuffer(parser, maxbuf + 1) == NULL);
2909*4543ef51SXin LI     XML_ParserFree(parser);
2910*4543ef51SXin LI   }
2911*4543ef51SXin LI }
2912*4543ef51SXin LI END_TEST
2913*4543ef51SXin LI 
START_TEST(test_getbuffer_allocates_on_zero_len)2914*4543ef51SXin LI START_TEST(test_getbuffer_allocates_on_zero_len) {
2915*4543ef51SXin LI   for (int first_len = 1; first_len >= 0; first_len--) {
2916*4543ef51SXin LI     set_subtest("with len=%d first", first_len);
2917*4543ef51SXin LI     XML_Parser parser = XML_ParserCreate(NULL);
2918*4543ef51SXin LI     assert_true(parser != NULL);
2919*4543ef51SXin LI     assert_true(XML_GetBuffer(parser, first_len) != NULL);
2920*4543ef51SXin LI     assert_true(XML_GetBuffer(parser, 0) != NULL);
2921*4543ef51SXin LI     if (XML_ParseBuffer(parser, 0, XML_FALSE) != XML_STATUS_OK)
2922*4543ef51SXin LI       xml_failure(parser);
2923*4543ef51SXin LI     XML_ParserFree(parser);
2924*4543ef51SXin LI   }
2925*4543ef51SXin LI }
2926*4543ef51SXin LI END_TEST
2927*4543ef51SXin LI 
2928*4543ef51SXin LI /* Test position information macros */
START_TEST(test_byte_info_at_end)2929*4543ef51SXin LI START_TEST(test_byte_info_at_end) {
2930*4543ef51SXin LI   const char *text = "<doc></doc>";
2931*4543ef51SXin LI 
2932*4543ef51SXin LI   if (XML_GetCurrentByteIndex(g_parser) != -1
2933*4543ef51SXin LI       || XML_GetCurrentByteCount(g_parser) != 0)
2934*4543ef51SXin LI     fail("Byte index/count incorrect at start of parse");
2935*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2936*4543ef51SXin LI       == XML_STATUS_ERROR)
2937*4543ef51SXin LI     xml_failure(g_parser);
2938*4543ef51SXin LI   /* At end, the count will be zero and the index the end of string */
2939*4543ef51SXin LI   if (XML_GetCurrentByteCount(g_parser) != 0)
2940*4543ef51SXin LI     fail("Terminal byte count incorrect");
2941*4543ef51SXin LI   if (XML_GetCurrentByteIndex(g_parser) != (XML_Index)strlen(text))
2942*4543ef51SXin LI     fail("Terminal byte index incorrect");
2943*4543ef51SXin LI }
2944*4543ef51SXin LI END_TEST
2945*4543ef51SXin LI 
2946*4543ef51SXin LI /* Test position information from errors */
2947*4543ef51SXin LI #define PRE_ERROR_STR "<doc></"
2948*4543ef51SXin LI #define POST_ERROR_STR "wombat></doc>"
START_TEST(test_byte_info_at_error)2949*4543ef51SXin LI START_TEST(test_byte_info_at_error) {
2950*4543ef51SXin LI   const char *text = PRE_ERROR_STR POST_ERROR_STR;
2951*4543ef51SXin LI 
2952*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2953*4543ef51SXin LI       == XML_STATUS_OK)
2954*4543ef51SXin LI     fail("Syntax error not faulted");
2955*4543ef51SXin LI   if (XML_GetCurrentByteCount(g_parser) != 0)
2956*4543ef51SXin LI     fail("Error byte count incorrect");
2957*4543ef51SXin LI   if (XML_GetCurrentByteIndex(g_parser) != strlen(PRE_ERROR_STR))
2958*4543ef51SXin LI     fail("Error byte index incorrect");
2959*4543ef51SXin LI }
2960*4543ef51SXin LI END_TEST
2961*4543ef51SXin LI #undef PRE_ERROR_STR
2962*4543ef51SXin LI #undef POST_ERROR_STR
2963*4543ef51SXin LI 
2964*4543ef51SXin LI /* Test position information in handler */
2965*4543ef51SXin LI #define START_ELEMENT "<e>"
2966*4543ef51SXin LI #define CDATA_TEXT "Hello"
2967*4543ef51SXin LI #define END_ELEMENT "</e>"
START_TEST(test_byte_info_at_cdata)2968*4543ef51SXin LI START_TEST(test_byte_info_at_cdata) {
2969*4543ef51SXin LI   const char *text = START_ELEMENT CDATA_TEXT END_ELEMENT;
2970*4543ef51SXin LI   int offset, size;
2971*4543ef51SXin LI   ByteTestData data;
2972*4543ef51SXin LI 
2973*4543ef51SXin LI   /* Check initial context is empty */
2974*4543ef51SXin LI   if (XML_GetInputContext(g_parser, &offset, &size) != NULL)
2975*4543ef51SXin LI     fail("Unexpected context at start of parse");
2976*4543ef51SXin LI 
2977*4543ef51SXin LI   data.start_element_len = (int)strlen(START_ELEMENT);
2978*4543ef51SXin LI   data.cdata_len = (int)strlen(CDATA_TEXT);
2979*4543ef51SXin LI   data.total_string_len = (int)strlen(text);
2980*4543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, byte_character_handler);
2981*4543ef51SXin LI   XML_SetUserData(g_parser, &data);
2982*4543ef51SXin LI   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
2983*4543ef51SXin LI     xml_failure(g_parser);
2984*4543ef51SXin LI }
2985*4543ef51SXin LI END_TEST
2986*4543ef51SXin LI #undef START_ELEMENT
2987*4543ef51SXin LI #undef CDATA_TEXT
2988*4543ef51SXin LI #undef END_ELEMENT
2989*4543ef51SXin LI 
2990*4543ef51SXin LI /* Test predefined entities are correctly recognised */
START_TEST(test_predefined_entities)2991*4543ef51SXin LI START_TEST(test_predefined_entities) {
2992*4543ef51SXin LI   const char *text = "<doc>&lt;&gt;&amp;&quot;&apos;</doc>";
2993*4543ef51SXin LI   const XML_Char *expected = XCS("<doc>&lt;&gt;&amp;&quot;&apos;</doc>");
2994*4543ef51SXin LI   const XML_Char *result = XCS("<>&\"'");
2995*4543ef51SXin LI   CharData storage;
2996*4543ef51SXin LI 
2997*4543ef51SXin LI   XML_SetDefaultHandler(g_parser, accumulate_characters);
2998*4543ef51SXin LI   /* run_character_check uses XML_SetCharacterDataHandler(), which
2999*4543ef51SXin LI    * unfortunately heads off a code path that we need to exercise.
3000*4543ef51SXin LI    */
3001*4543ef51SXin LI   CharData_Init(&storage);
3002*4543ef51SXin LI   XML_SetUserData(g_parser, &storage);
3003*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3004*4543ef51SXin LI       == XML_STATUS_ERROR)
3005*4543ef51SXin LI     xml_failure(g_parser);
3006*4543ef51SXin LI   /* The default handler doesn't translate the entities */
3007*4543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
3008*4543ef51SXin LI 
3009*4543ef51SXin LI   /* Now try again and check the translation */
3010*4543ef51SXin LI   XML_ParserReset(g_parser, NULL);
3011*4543ef51SXin LI   run_character_check(text, result);
3012*4543ef51SXin LI }
3013*4543ef51SXin LI END_TEST
3014*4543ef51SXin LI 
3015*4543ef51SXin LI /* Regression test that an invalid tag in an external parameter
3016*4543ef51SXin LI  * reference in an external DTD is correctly faulted.
3017*4543ef51SXin LI  *
3018*4543ef51SXin LI  * Only a few specific tags are legal in DTDs ignoring comments and
3019*4543ef51SXin LI  * processing instructions, all of which begin with an exclamation
3020*4543ef51SXin LI  * mark.  "<el/>" is not one of them, so the parser should raise an
3021*4543ef51SXin LI  * error on encountering it.
3022*4543ef51SXin LI  */
START_TEST(test_invalid_tag_in_dtd)3023*4543ef51SXin LI START_TEST(test_invalid_tag_in_dtd) {
3024*4543ef51SXin LI   const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3025*4543ef51SXin LI                      "<doc></doc>\n";
3026*4543ef51SXin LI 
3027*4543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3028*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_param);
3029*4543ef51SXin LI   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3030*4543ef51SXin LI                  "Invalid tag IN DTD external param not rejected");
3031*4543ef51SXin LI }
3032*4543ef51SXin LI END_TEST
3033*4543ef51SXin LI 
3034*4543ef51SXin LI /* Test entities not quite the predefined ones are not mis-recognised */
START_TEST(test_not_predefined_entities)3035*4543ef51SXin LI START_TEST(test_not_predefined_entities) {
3036*4543ef51SXin LI   const char *text[] = {"<doc>&pt;</doc>", "<doc>&amo;</doc>",
3037*4543ef51SXin LI                         "<doc>&quid;</doc>", "<doc>&apod;</doc>", NULL};
3038*4543ef51SXin LI   int i = 0;
3039*4543ef51SXin LI 
3040*4543ef51SXin LI   while (text[i] != NULL) {
3041*4543ef51SXin LI     expect_failure(text[i], XML_ERROR_UNDEFINED_ENTITY,
3042*4543ef51SXin LI                    "Undefined entity not rejected");
3043*4543ef51SXin LI     XML_ParserReset(g_parser, NULL);
3044*4543ef51SXin LI     i++;
3045*4543ef51SXin LI   }
3046*4543ef51SXin LI }
3047*4543ef51SXin LI END_TEST
3048*4543ef51SXin LI 
3049*4543ef51SXin LI /* Test conditional inclusion (IGNORE) */
START_TEST(test_ignore_section)3050*4543ef51SXin LI START_TEST(test_ignore_section) {
3051*4543ef51SXin LI   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3052*4543ef51SXin LI                      "<doc><e>&entity;</e></doc>";
3053*4543ef51SXin LI   const XML_Char *expected
3054*4543ef51SXin LI       = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&entity;");
3055*4543ef51SXin LI   CharData storage;
3056*4543ef51SXin LI 
3057*4543ef51SXin LI   CharData_Init(&storage);
3058*4543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3059*4543ef51SXin LI   XML_SetUserData(g_parser, &storage);
3060*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore);
3061*4543ef51SXin LI   XML_SetDefaultHandler(g_parser, accumulate_characters);
3062*4543ef51SXin LI   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3063*4543ef51SXin LI   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3064*4543ef51SXin LI   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3065*4543ef51SXin LI   XML_SetStartElementHandler(g_parser, dummy_start_element);
3066*4543ef51SXin LI   XML_SetEndElementHandler(g_parser, dummy_end_element);
3067*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3068*4543ef51SXin LI       == XML_STATUS_ERROR)
3069*4543ef51SXin LI     xml_failure(g_parser);
3070*4543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
3071*4543ef51SXin LI }
3072*4543ef51SXin LI END_TEST
3073*4543ef51SXin LI 
START_TEST(test_ignore_section_utf16)3074*4543ef51SXin LI START_TEST(test_ignore_section_utf16) {
3075*4543ef51SXin LI   const char text[] =
3076*4543ef51SXin LI       /* <!DOCTYPE d SYSTEM 's'> */
3077*4543ef51SXin LI       "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
3078*4543ef51SXin LI       "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n\0"
3079*4543ef51SXin LI       /* <d><e>&en;</e></d> */
3080*4543ef51SXin LI       "<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>\0";
3081*4543ef51SXin LI   const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
3082*4543ef51SXin LI   CharData storage;
3083*4543ef51SXin LI 
3084*4543ef51SXin LI   CharData_Init(&storage);
3085*4543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3086*4543ef51SXin LI   XML_SetUserData(g_parser, &storage);
3087*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore_utf16);
3088*4543ef51SXin LI   XML_SetDefaultHandler(g_parser, accumulate_characters);
3089*4543ef51SXin LI   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3090*4543ef51SXin LI   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3091*4543ef51SXin LI   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3092*4543ef51SXin LI   XML_SetStartElementHandler(g_parser, dummy_start_element);
3093*4543ef51SXin LI   XML_SetEndElementHandler(g_parser, dummy_end_element);
3094*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3095*4543ef51SXin LI       == XML_STATUS_ERROR)
3096*4543ef51SXin LI     xml_failure(g_parser);
3097*4543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
3098*4543ef51SXin LI }
3099*4543ef51SXin LI END_TEST
3100*4543ef51SXin LI 
START_TEST(test_ignore_section_utf16_be)3101*4543ef51SXin LI START_TEST(test_ignore_section_utf16_be) {
3102*4543ef51SXin LI   const char text[] =
3103*4543ef51SXin LI       /* <!DOCTYPE d SYSTEM 's'> */
3104*4543ef51SXin LI       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
3105*4543ef51SXin LI       "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n"
3106*4543ef51SXin LI       /* <d><e>&en;</e></d> */
3107*4543ef51SXin LI       "\0<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>";
3108*4543ef51SXin LI   const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
3109*4543ef51SXin LI   CharData storage;
3110*4543ef51SXin LI 
3111*4543ef51SXin LI   CharData_Init(&storage);
3112*4543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3113*4543ef51SXin LI   XML_SetUserData(g_parser, &storage);
3114*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser,
3115*4543ef51SXin LI                                   external_entity_load_ignore_utf16_be);
3116*4543ef51SXin LI   XML_SetDefaultHandler(g_parser, accumulate_characters);
3117*4543ef51SXin LI   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3118*4543ef51SXin LI   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3119*4543ef51SXin LI   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3120*4543ef51SXin LI   XML_SetStartElementHandler(g_parser, dummy_start_element);
3121*4543ef51SXin LI   XML_SetEndElementHandler(g_parser, dummy_end_element);
3122*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3123*4543ef51SXin LI       == XML_STATUS_ERROR)
3124*4543ef51SXin LI     xml_failure(g_parser);
3125*4543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
3126*4543ef51SXin LI }
3127*4543ef51SXin LI END_TEST
3128*4543ef51SXin LI 
3129*4543ef51SXin LI /* Test mis-formatted conditional exclusion */
START_TEST(test_bad_ignore_section)3130*4543ef51SXin LI START_TEST(test_bad_ignore_section) {
3131*4543ef51SXin LI   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3132*4543ef51SXin LI                      "<doc><e>&entity;</e></doc>";
3133*4543ef51SXin LI   ExtFaults faults[]
3134*4543ef51SXin LI       = {{"<![IGNORE[<!ELEM", "Broken-off declaration not faulted", NULL,
3135*4543ef51SXin LI           XML_ERROR_SYNTAX},
3136*4543ef51SXin LI          {"<![IGNORE[\x01]]>", "Invalid XML character not faulted", NULL,
3137*4543ef51SXin LI           XML_ERROR_INVALID_TOKEN},
3138*4543ef51SXin LI          {/* FIrst two bytes of a three-byte char */
3139*4543ef51SXin LI           "<![IGNORE[\xe2\x82", "Partial XML character not faulted", NULL,
3140*4543ef51SXin LI           XML_ERROR_PARTIAL_CHAR},
3141*4543ef51SXin LI          {NULL, NULL, NULL, XML_ERROR_NONE}};
3142*4543ef51SXin LI   ExtFaults *fault;
3143*4543ef51SXin LI 
3144*4543ef51SXin LI   for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
3145*4543ef51SXin LI     set_subtest("%s", fault->parse_text);
3146*4543ef51SXin LI     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3147*4543ef51SXin LI     XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
3148*4543ef51SXin LI     XML_SetUserData(g_parser, fault);
3149*4543ef51SXin LI     expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3150*4543ef51SXin LI                    "Incomplete IGNORE section not failed");
3151*4543ef51SXin LI     XML_ParserReset(g_parser, NULL);
3152*4543ef51SXin LI   }
3153*4543ef51SXin LI }
3154*4543ef51SXin LI END_TEST
3155*4543ef51SXin LI 
3156*4543ef51SXin LI struct bom_testdata {
3157*4543ef51SXin LI   const char *external;
3158*4543ef51SXin LI   int split;
3159*4543ef51SXin LI   XML_Bool nested_callback_happened;
3160*4543ef51SXin LI };
3161*4543ef51SXin LI 
3162*4543ef51SXin LI static int XMLCALL
external_bom_checker(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)3163*4543ef51SXin LI external_bom_checker(XML_Parser parser, const XML_Char *context,
3164*4543ef51SXin LI                      const XML_Char *base, const XML_Char *systemId,
3165*4543ef51SXin LI                      const XML_Char *publicId) {
3166*4543ef51SXin LI   const char *text;
3167*4543ef51SXin LI   UNUSED_P(base);
3168*4543ef51SXin LI   UNUSED_P(systemId);
3169*4543ef51SXin LI   UNUSED_P(publicId);
3170*4543ef51SXin LI 
3171*4543ef51SXin LI   XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL);
3172*4543ef51SXin LI   if (ext_parser == NULL)
3173*4543ef51SXin LI     fail("Could not create external entity parser");
3174*4543ef51SXin LI 
3175*4543ef51SXin LI   if (! xcstrcmp(systemId, XCS("004-2.ent"))) {
3176*4543ef51SXin LI     struct bom_testdata *const testdata
3177*4543ef51SXin LI         = (struct bom_testdata *)XML_GetUserData(parser);
3178*4543ef51SXin LI     const char *const external = testdata->external;
3179*4543ef51SXin LI     const int split = testdata->split;
3180*4543ef51SXin LI     testdata->nested_callback_happened = XML_TRUE;
3181*4543ef51SXin LI 
3182*4543ef51SXin LI     if (_XML_Parse_SINGLE_BYTES(ext_parser, external, split, XML_FALSE)
3183*4543ef51SXin LI         != XML_STATUS_OK) {
3184*4543ef51SXin LI       xml_failure(ext_parser);
3185*4543ef51SXin LI     }
3186*4543ef51SXin LI     text = external + split; // the parse below will continue where we left off.
3187*4543ef51SXin LI   } else if (! xcstrcmp(systemId, XCS("004-1.ent"))) {
3188*4543ef51SXin LI     text = "<!ELEMENT doc EMPTY>\n"
3189*4543ef51SXin LI            "<!ENTITY % e1 SYSTEM '004-2.ent'>\n"
3190*4543ef51SXin LI            "<!ENTITY % e2 '%e1;'>\n";
3191*4543ef51SXin LI   } else {
3192*4543ef51SXin LI     fail("unknown systemId");
3193*4543ef51SXin LI   }
3194*4543ef51SXin LI 
3195*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE)
3196*4543ef51SXin LI       != XML_STATUS_OK)
3197*4543ef51SXin LI     xml_failure(ext_parser);
3198*4543ef51SXin LI 
3199*4543ef51SXin LI   XML_ParserFree(ext_parser);
3200*4543ef51SXin LI   return XML_STATUS_OK;
3201*4543ef51SXin LI }
3202*4543ef51SXin LI 
3203*4543ef51SXin LI /* regression test: BOM should be consumed when followed by a partial token. */
START_TEST(test_external_bom_consumed)3204*4543ef51SXin LI START_TEST(test_external_bom_consumed) {
3205*4543ef51SXin LI   const char *const text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3206*4543ef51SXin LI                            "<doc></doc>\n";
3207*4543ef51SXin LI   const char *const external = "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>";
3208*4543ef51SXin LI   const int len = (int)strlen(external);
3209*4543ef51SXin LI   for (int split = 0; split <= len; ++split) {
3210*4543ef51SXin LI     set_subtest("split at byte %d", split);
3211*4543ef51SXin LI 
3212*4543ef51SXin LI     struct bom_testdata testdata;
3213*4543ef51SXin LI     testdata.external = external;
3214*4543ef51SXin LI     testdata.split = split;
3215*4543ef51SXin LI     testdata.nested_callback_happened = XML_FALSE;
3216*4543ef51SXin LI 
3217*4543ef51SXin LI     XML_Parser parser = XML_ParserCreate(NULL);
3218*4543ef51SXin LI     if (parser == NULL) {
3219*4543ef51SXin LI       fail("Couldn't create parser");
3220*4543ef51SXin LI     }
3221*4543ef51SXin LI     XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3222*4543ef51SXin LI     XML_SetExternalEntityRefHandler(parser, external_bom_checker);
3223*4543ef51SXin LI     XML_SetUserData(parser, &testdata);
3224*4543ef51SXin LI     if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
3225*4543ef51SXin LI         == XML_STATUS_ERROR)
3226*4543ef51SXin LI       xml_failure(parser);
3227*4543ef51SXin LI     if (! testdata.nested_callback_happened) {
3228*4543ef51SXin LI       fail("ref handler not called");
3229*4543ef51SXin LI     }
3230*4543ef51SXin LI     XML_ParserFree(parser);
3231*4543ef51SXin LI   }
3232*4543ef51SXin LI }
3233*4543ef51SXin LI END_TEST
3234*4543ef51SXin LI 
3235*4543ef51SXin LI /* Test recursive parsing */
START_TEST(test_external_entity_values)3236*4543ef51SXin LI START_TEST(test_external_entity_values) {
3237*4543ef51SXin LI   const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3238*4543ef51SXin LI                      "<doc></doc>\n";
3239*4543ef51SXin LI   ExtFaults data_004_2[] = {
3240*4543ef51SXin LI       {"<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL, XML_ERROR_NONE},
3241*4543ef51SXin LI       {"<!ATTLIST $doc a1 CDATA 'value'>", "Invalid token not faulted", NULL,
3242*4543ef51SXin LI        XML_ERROR_INVALID_TOKEN},
3243*4543ef51SXin LI       {"'wombat", "Unterminated string not faulted", NULL,
3244*4543ef51SXin LI        XML_ERROR_UNCLOSED_TOKEN},
3245*4543ef51SXin LI       {"\xe2\x82", "Partial UTF-8 character not faulted", NULL,
3246*4543ef51SXin LI        XML_ERROR_PARTIAL_CHAR},
3247*4543ef51SXin LI       {"<?xml version='1.0' encoding='utf-8'?>\n", NULL, NULL, XML_ERROR_NONE},
3248*4543ef51SXin LI       {"<?xml?>", "Malformed XML declaration not faulted", NULL,
3249*4543ef51SXin LI        XML_ERROR_XML_DECL},
3250*4543ef51SXin LI       {/* UTF-8 BOM */
3251*4543ef51SXin LI        "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL,
3252*4543ef51SXin LI        XML_ERROR_NONE},
3253*4543ef51SXin LI       {"<?xml version='1.0' encoding='utf-8'?>\n$",
3254*4543ef51SXin LI        "Invalid token after text declaration not faulted", NULL,
3255*4543ef51SXin LI        XML_ERROR_INVALID_TOKEN},
3256*4543ef51SXin LI       {"<?xml version='1.0' encoding='utf-8'?>\n'wombat",
3257*4543ef51SXin LI        "Unterminated string after text decl not faulted", NULL,
3258*4543ef51SXin LI        XML_ERROR_UNCLOSED_TOKEN},
3259*4543ef51SXin LI       {"<?xml version='1.0' encoding='utf-8'?>\n\xe2\x82",
3260*4543ef51SXin LI        "Partial UTF-8 character after text decl not faulted", NULL,
3261*4543ef51SXin LI        XML_ERROR_PARTIAL_CHAR},
3262*4543ef51SXin LI       {"%e1;", "Recursive parameter entity not faulted", NULL,
3263*4543ef51SXin LI        XML_ERROR_RECURSIVE_ENTITY_REF},
3264*4543ef51SXin LI       {NULL, NULL, NULL, XML_ERROR_NONE}};
3265*4543ef51SXin LI   int i;
3266*4543ef51SXin LI 
3267*4543ef51SXin LI   for (i = 0; data_004_2[i].parse_text != NULL; i++) {
3268*4543ef51SXin LI     set_subtest("%s", data_004_2[i].parse_text);
3269*4543ef51SXin LI     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3270*4543ef51SXin LI     XML_SetExternalEntityRefHandler(g_parser, external_entity_valuer);
3271*4543ef51SXin LI     XML_SetUserData(g_parser, &data_004_2[i]);
3272*4543ef51SXin LI     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3273*4543ef51SXin LI         == XML_STATUS_ERROR)
3274*4543ef51SXin LI       xml_failure(g_parser);
3275*4543ef51SXin LI     XML_ParserReset(g_parser, NULL);
3276*4543ef51SXin LI   }
3277*4543ef51SXin LI }
3278*4543ef51SXin LI END_TEST
3279*4543ef51SXin LI 
3280*4543ef51SXin LI /* Test the recursive parse interacts with a not standalone handler */
START_TEST(test_ext_entity_not_standalone)3281*4543ef51SXin LI START_TEST(test_ext_entity_not_standalone) {
3282*4543ef51SXin LI   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3283*4543ef51SXin LI                      "<doc></doc>";
3284*4543ef51SXin LI 
3285*4543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3286*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_not_standalone);
3287*4543ef51SXin LI   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3288*4543ef51SXin LI                  "Standalone rejection not caught");
3289*4543ef51SXin LI }
3290*4543ef51SXin LI END_TEST
3291*4543ef51SXin LI 
START_TEST(test_ext_entity_value_abort)3292*4543ef51SXin LI START_TEST(test_ext_entity_value_abort) {
3293*4543ef51SXin LI   const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3294*4543ef51SXin LI                      "<doc></doc>\n";
3295*4543ef51SXin LI 
3296*4543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3297*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_value_aborter);
3298*4543ef51SXin LI   g_resumable = XML_FALSE;
3299*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3300*4543ef51SXin LI       == XML_STATUS_ERROR)
3301*4543ef51SXin LI     xml_failure(g_parser);
3302*4543ef51SXin LI }
3303*4543ef51SXin LI END_TEST
3304*4543ef51SXin LI 
START_TEST(test_bad_public_doctype)3305*4543ef51SXin LI START_TEST(test_bad_public_doctype) {
3306*4543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
3307*4543ef51SXin LI                      "<!DOCTYPE doc PUBLIC '{BadName}' 'test'>\n"
3308*4543ef51SXin LI                      "<doc></doc>";
3309*4543ef51SXin LI 
3310*4543ef51SXin LI   /* Setting a handler provokes a particular code path */
3311*4543ef51SXin LI   XML_SetDoctypeDeclHandler(g_parser, dummy_start_doctype_handler,
3312*4543ef51SXin LI                             dummy_end_doctype_handler);
3313*4543ef51SXin LI   expect_failure(text, XML_ERROR_PUBLICID, "Bad Public ID not failed");
3314*4543ef51SXin LI }
3315*4543ef51SXin LI END_TEST
3316*4543ef51SXin LI 
3317*4543ef51SXin LI /* Test based on ibm/valid/P32/ibm32v04.xml */
START_TEST(test_attribute_enum_value)3318*4543ef51SXin LI START_TEST(test_attribute_enum_value) {
3319*4543ef51SXin LI   const char *text = "<?xml version='1.0' standalone='no'?>\n"
3320*4543ef51SXin LI                      "<!DOCTYPE animal SYSTEM 'test.dtd'>\n"
3321*4543ef51SXin LI                      "<animal>This is a \n    <a/>  \n\nyellow tiger</animal>";
3322*4543ef51SXin LI   ExtTest dtd_data
3323*4543ef51SXin LI       = {"<!ELEMENT animal (#PCDATA|a)*>\n"
3324*4543ef51SXin LI          "<!ELEMENT a EMPTY>\n"
3325*4543ef51SXin LI          "<!ATTLIST animal xml:space (default|preserve) 'preserve'>",
3326*4543ef51SXin LI          NULL, NULL};
3327*4543ef51SXin LI   const XML_Char *expected = XCS("This is a \n      \n\nyellow tiger");
3328*4543ef51SXin LI 
3329*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3330*4543ef51SXin LI   XML_SetUserData(g_parser, &dtd_data);
3331*4543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3332*4543ef51SXin LI   /* An attribute list handler provokes a different code path */
3333*4543ef51SXin LI   XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
3334*4543ef51SXin LI   run_ext_character_check(text, &dtd_data, expected);
3335*4543ef51SXin LI }
3336*4543ef51SXin LI END_TEST
3337*4543ef51SXin LI 
3338*4543ef51SXin LI /* Slightly bizarrely, the library seems to silently ignore entity
3339*4543ef51SXin LI  * definitions for predefined entities, even when they are wrong.  The
3340*4543ef51SXin LI  * language of the XML 1.0 spec is somewhat unhelpful as to what ought
3341*4543ef51SXin LI  * to happen, so this is currently treated as acceptable.
3342*4543ef51SXin LI  */
START_TEST(test_predefined_entity_redefinition)3343*4543ef51SXin LI START_TEST(test_predefined_entity_redefinition) {
3344*4543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
3345*4543ef51SXin LI                      "<!ENTITY apos 'foo'>\n"
3346*4543ef51SXin LI                      "]>\n"
3347*4543ef51SXin LI                      "<doc>&apos;</doc>";
3348*4543ef51SXin LI   run_character_check(text, XCS("'"));
3349*4543ef51SXin LI }
3350*4543ef51SXin LI END_TEST
3351*4543ef51SXin LI 
3352*4543ef51SXin LI /* Test that the parser stops processing the DTD after an unresolved
3353*4543ef51SXin LI  * parameter entity is encountered.
3354*4543ef51SXin LI  */
START_TEST(test_dtd_stop_processing)3355*4543ef51SXin LI START_TEST(test_dtd_stop_processing) {
3356*4543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
3357*4543ef51SXin LI                      "%foo;\n"
3358*4543ef51SXin LI                      "<!ENTITY bar 'bas'>\n"
3359*4543ef51SXin LI                      "]><doc/>";
3360*4543ef51SXin LI 
3361*4543ef51SXin LI   XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
3362*4543ef51SXin LI   init_dummy_handlers();
3363*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3364*4543ef51SXin LI       == XML_STATUS_ERROR)
3365*4543ef51SXin LI     xml_failure(g_parser);
3366*4543ef51SXin LI   if (get_dummy_handler_flags() != 0)
3367*4543ef51SXin LI     fail("DTD processing still going after undefined PE");
3368*4543ef51SXin LI }
3369*4543ef51SXin LI END_TEST
3370*4543ef51SXin LI 
3371*4543ef51SXin LI /* Test public notations with no system ID */
START_TEST(test_public_notation_no_sysid)3372*4543ef51SXin LI START_TEST(test_public_notation_no_sysid) {
3373*4543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
3374*4543ef51SXin LI                      "<!NOTATION note PUBLIC 'foo'>\n"
3375*4543ef51SXin LI                      "<!ELEMENT doc EMPTY>\n"
3376*4543ef51SXin LI                      "]>\n<doc/>";
3377*4543ef51SXin LI 
3378*4543ef51SXin LI   init_dummy_handlers();
3379*4543ef51SXin LI   XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
3380*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3381*4543ef51SXin LI       == XML_STATUS_ERROR)
3382*4543ef51SXin LI     xml_failure(g_parser);
3383*4543ef51SXin LI   if (get_dummy_handler_flags() != DUMMY_NOTATION_DECL_HANDLER_FLAG)
3384*4543ef51SXin LI     fail("Notation declaration handler not called");
3385*4543ef51SXin LI }
3386*4543ef51SXin LI END_TEST
3387*4543ef51SXin LI 
START_TEST(test_nested_groups)3388*4543ef51SXin LI START_TEST(test_nested_groups) {
3389*4543ef51SXin LI   const char *text
3390*4543ef51SXin LI       = "<!DOCTYPE doc [\n"
3391*4543ef51SXin LI         "<!ELEMENT doc "
3392*4543ef51SXin LI         /* Sixteen elements per line */
3393*4543ef51SXin LI         "(e,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,"
3394*4543ef51SXin LI         "(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?"
3395*4543ef51SXin LI         "))))))))))))))))))))))))))))))))>\n"
3396*4543ef51SXin LI         "<!ELEMENT e EMPTY>"
3397*4543ef51SXin LI         "]>\n"
3398*4543ef51SXin LI         "<doc><e/></doc>";
3399*4543ef51SXin LI   CharData storage;
3400*4543ef51SXin LI 
3401*4543ef51SXin LI   CharData_Init(&storage);
3402*4543ef51SXin LI   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3403*4543ef51SXin LI   XML_SetStartElementHandler(g_parser, record_element_start_handler);
3404*4543ef51SXin LI   XML_SetUserData(g_parser, &storage);
3405*4543ef51SXin LI   init_dummy_handlers();
3406*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3407*4543ef51SXin LI       == XML_STATUS_ERROR)
3408*4543ef51SXin LI     xml_failure(g_parser);
3409*4543ef51SXin LI   CharData_CheckXMLChars(&storage, XCS("doce"));
3410*4543ef51SXin LI   if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
3411*4543ef51SXin LI     fail("Element handler not fired");
3412*4543ef51SXin LI }
3413*4543ef51SXin LI END_TEST
3414*4543ef51SXin LI 
START_TEST(test_group_choice)3415*4543ef51SXin LI START_TEST(test_group_choice) {
3416*4543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
3417*4543ef51SXin LI                      "<!ELEMENT doc (a|b|c)+>\n"
3418*4543ef51SXin LI                      "<!ELEMENT a EMPTY>\n"
3419*4543ef51SXin LI                      "<!ELEMENT b (#PCDATA)>\n"
3420*4543ef51SXin LI                      "<!ELEMENT c ANY>\n"
3421*4543ef51SXin LI                      "]>\n"
3422*4543ef51SXin LI                      "<doc>\n"
3423*4543ef51SXin LI                      "<a/>\n"
3424*4543ef51SXin LI                      "<b attr='foo'>This is a foo</b>\n"
3425*4543ef51SXin LI                      "<c></c>\n"
3426*4543ef51SXin LI                      "</doc>\n";
3427*4543ef51SXin LI 
3428*4543ef51SXin LI   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3429*4543ef51SXin LI   init_dummy_handlers();
3430*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3431*4543ef51SXin LI       == XML_STATUS_ERROR)
3432*4543ef51SXin LI     xml_failure(g_parser);
3433*4543ef51SXin LI   if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
3434*4543ef51SXin LI     fail("Element handler flag not raised");
3435*4543ef51SXin LI }
3436*4543ef51SXin LI END_TEST
3437*4543ef51SXin LI 
START_TEST(test_standalone_parameter_entity)3438*4543ef51SXin LI START_TEST(test_standalone_parameter_entity) {
3439*4543ef51SXin LI   const char *text = "<?xml version='1.0' standalone='yes'?>\n"
3440*4543ef51SXin LI                      "<!DOCTYPE doc SYSTEM 'http://example.org/' [\n"
3441*4543ef51SXin LI                      "<!ENTITY % entity '<!ELEMENT doc (#PCDATA)>'>\n"
3442*4543ef51SXin LI                      "%entity;\n"
3443*4543ef51SXin LI                      "]>\n"
3444*4543ef51SXin LI                      "<doc></doc>";
3445*4543ef51SXin LI   char dtd_data[] = "<!ENTITY % e1 'foo'>\n";
3446*4543ef51SXin LI 
3447*4543ef51SXin LI   XML_SetUserData(g_parser, dtd_data);
3448*4543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3449*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_public);
3450*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3451*4543ef51SXin LI       == XML_STATUS_ERROR)
3452*4543ef51SXin LI     xml_failure(g_parser);
3453*4543ef51SXin LI }
3454*4543ef51SXin LI END_TEST
3455*4543ef51SXin LI 
3456*4543ef51SXin LI /* Test skipping of parameter entity in an external DTD */
3457*4543ef51SXin LI /* Derived from ibm/invalid/P69/ibm69i01.xml */
START_TEST(test_skipped_parameter_entity)3458*4543ef51SXin LI START_TEST(test_skipped_parameter_entity) {
3459*4543ef51SXin LI   const char *text = "<?xml version='1.0'?>\n"
3460*4543ef51SXin LI                      "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
3461*4543ef51SXin LI                      "<!ELEMENT root (#PCDATA|a)* >\n"
3462*4543ef51SXin LI                      "]>\n"
3463*4543ef51SXin LI                      "<root></root>";
3464*4543ef51SXin LI   ExtTest dtd_data = {"%pe2;", NULL, NULL};
3465*4543ef51SXin LI 
3466*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3467*4543ef51SXin LI   XML_SetUserData(g_parser, &dtd_data);
3468*4543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3469*4543ef51SXin LI   XML_SetSkippedEntityHandler(g_parser, dummy_skip_handler);
3470*4543ef51SXin LI   init_dummy_handlers();
3471*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3472*4543ef51SXin LI       == XML_STATUS_ERROR)
3473*4543ef51SXin LI     xml_failure(g_parser);
3474*4543ef51SXin LI   if (get_dummy_handler_flags() != DUMMY_SKIP_HANDLER_FLAG)
3475*4543ef51SXin LI     fail("Skip handler not executed");
3476*4543ef51SXin LI }
3477*4543ef51SXin LI END_TEST
3478*4543ef51SXin LI 
3479*4543ef51SXin LI /* Test recursive parameter entity definition rejected in external DTD */
START_TEST(test_recursive_external_parameter_entity)3480*4543ef51SXin LI START_TEST(test_recursive_external_parameter_entity) {
3481*4543ef51SXin LI   const char *text = "<?xml version='1.0'?>\n"
3482*4543ef51SXin LI                      "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
3483*4543ef51SXin LI                      "<!ELEMENT root (#PCDATA|a)* >\n"
3484*4543ef51SXin LI                      "]>\n"
3485*4543ef51SXin LI                      "<root></root>";
3486*4543ef51SXin LI   ExtFaults dtd_data = {"<!ENTITY % pe2 '&#37;pe2;'>\n%pe2;",
3487*4543ef51SXin LI                         "Recursive external parameter entity not faulted", NULL,
3488*4543ef51SXin LI                         XML_ERROR_RECURSIVE_ENTITY_REF};
3489*4543ef51SXin LI 
3490*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
3491*4543ef51SXin LI   XML_SetUserData(g_parser, &dtd_data);
3492*4543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3493*4543ef51SXin LI   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3494*4543ef51SXin LI                  "Recursive external parameter not spotted");
3495*4543ef51SXin LI }
3496*4543ef51SXin LI END_TEST
3497*4543ef51SXin LI 
3498*4543ef51SXin LI /* Test undefined parameter entity in external entity handler */
START_TEST(test_undefined_ext_entity_in_external_dtd)3499*4543ef51SXin LI START_TEST(test_undefined_ext_entity_in_external_dtd) {
3500*4543ef51SXin LI   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3501*4543ef51SXin LI                      "<doc></doc>\n";
3502*4543ef51SXin LI 
3503*4543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3504*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
3505*4543ef51SXin LI   XML_SetUserData(g_parser, NULL);
3506*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3507*4543ef51SXin LI       == XML_STATUS_ERROR)
3508*4543ef51SXin LI     xml_failure(g_parser);
3509*4543ef51SXin LI 
3510*4543ef51SXin LI   /* Now repeat without the external entity ref handler invoking
3511*4543ef51SXin LI    * another copy of itself.
3512*4543ef51SXin LI    */
3513*4543ef51SXin LI   XML_ParserReset(g_parser, NULL);
3514*4543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3515*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
3516*4543ef51SXin LI   XML_SetUserData(g_parser, g_parser); /* Any non-NULL value will do */
3517*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3518*4543ef51SXin LI       == XML_STATUS_ERROR)
3519*4543ef51SXin LI     xml_failure(g_parser);
3520*4543ef51SXin LI }
3521*4543ef51SXin LI END_TEST
3522*4543ef51SXin LI 
3523*4543ef51SXin LI /* Test suspending the parse on receiving an XML declaration works */
START_TEST(test_suspend_xdecl)3524*4543ef51SXin LI START_TEST(test_suspend_xdecl) {
3525*4543ef51SXin LI   const char *text = long_character_data_text;
3526*4543ef51SXin LI 
3527*4543ef51SXin LI   XML_SetXmlDeclHandler(g_parser, entity_suspending_xdecl_handler);
3528*4543ef51SXin LI   XML_SetUserData(g_parser, g_parser);
3529*4543ef51SXin LI   g_resumable = XML_TRUE;
3530*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3531*4543ef51SXin LI       != XML_STATUS_SUSPENDED)
3532*4543ef51SXin LI     xml_failure(g_parser);
3533*4543ef51SXin LI   if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
3534*4543ef51SXin LI     xml_failure(g_parser);
3535*4543ef51SXin LI   /* Attempt to start a new parse while suspended */
3536*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3537*4543ef51SXin LI       != XML_STATUS_ERROR)
3538*4543ef51SXin LI     fail("Attempt to parse while suspended not faulted");
3539*4543ef51SXin LI   if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
3540*4543ef51SXin LI     fail("Suspended parse not faulted with correct error");
3541*4543ef51SXin LI }
3542*4543ef51SXin LI END_TEST
3543*4543ef51SXin LI 
3544*4543ef51SXin LI /* Test aborting the parse in an epilog works */
START_TEST(test_abort_epilog)3545*4543ef51SXin LI START_TEST(test_abort_epilog) {
3546*4543ef51SXin LI   const char *text = "<doc></doc>\n\r\n";
3547*4543ef51SXin LI   XML_Char trigger_char = XCS('\r');
3548*4543ef51SXin LI 
3549*4543ef51SXin LI   XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3550*4543ef51SXin LI   XML_SetUserData(g_parser, &trigger_char);
3551*4543ef51SXin LI   g_resumable = XML_FALSE;
3552*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3553*4543ef51SXin LI       != XML_STATUS_ERROR)
3554*4543ef51SXin LI     fail("Abort not triggered");
3555*4543ef51SXin LI   if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
3556*4543ef51SXin LI     xml_failure(g_parser);
3557*4543ef51SXin LI }
3558*4543ef51SXin LI END_TEST
3559*4543ef51SXin LI 
3560*4543ef51SXin LI /* Test a different code path for abort in the epilog */
START_TEST(test_abort_epilog_2)3561*4543ef51SXin LI START_TEST(test_abort_epilog_2) {
3562*4543ef51SXin LI   const char *text = "<doc></doc>\n";
3563*4543ef51SXin LI   XML_Char trigger_char = XCS('\n');
3564*4543ef51SXin LI 
3565*4543ef51SXin LI   XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3566*4543ef51SXin LI   XML_SetUserData(g_parser, &trigger_char);
3567*4543ef51SXin LI   g_resumable = XML_FALSE;
3568*4543ef51SXin LI   expect_failure(text, XML_ERROR_ABORTED, "Abort not triggered");
3569*4543ef51SXin LI }
3570*4543ef51SXin LI END_TEST
3571*4543ef51SXin LI 
3572*4543ef51SXin LI /* Test suspension from the epilog */
START_TEST(test_suspend_epilog)3573*4543ef51SXin LI START_TEST(test_suspend_epilog) {
3574*4543ef51SXin LI   const char *text = "<doc></doc>\n";
3575*4543ef51SXin LI   XML_Char trigger_char = XCS('\n');
3576*4543ef51SXin LI 
3577*4543ef51SXin LI   XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3578*4543ef51SXin LI   XML_SetUserData(g_parser, &trigger_char);
3579*4543ef51SXin LI   g_resumable = XML_TRUE;
3580*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3581*4543ef51SXin LI       != XML_STATUS_SUSPENDED)
3582*4543ef51SXin LI     xml_failure(g_parser);
3583*4543ef51SXin LI }
3584*4543ef51SXin LI END_TEST
3585*4543ef51SXin LI 
START_TEST(test_suspend_in_sole_empty_tag)3586*4543ef51SXin LI START_TEST(test_suspend_in_sole_empty_tag) {
3587*4543ef51SXin LI   const char *text = "<doc/>";
3588*4543ef51SXin LI   enum XML_Status rc;
3589*4543ef51SXin LI 
3590*4543ef51SXin LI   XML_SetEndElementHandler(g_parser, suspending_end_handler);
3591*4543ef51SXin LI   XML_SetUserData(g_parser, g_parser);
3592*4543ef51SXin LI   rc = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE);
3593*4543ef51SXin LI   if (rc == XML_STATUS_ERROR)
3594*4543ef51SXin LI     xml_failure(g_parser);
3595*4543ef51SXin LI   else if (rc != XML_STATUS_SUSPENDED)
3596*4543ef51SXin LI     fail("Suspend not triggered");
3597*4543ef51SXin LI   rc = XML_ResumeParser(g_parser);
3598*4543ef51SXin LI   if (rc == XML_STATUS_ERROR)
3599*4543ef51SXin LI     xml_failure(g_parser);
3600*4543ef51SXin LI   else if (rc != XML_STATUS_OK)
3601*4543ef51SXin LI     fail("Resume failed");
3602*4543ef51SXin LI }
3603*4543ef51SXin LI END_TEST
3604*4543ef51SXin LI 
START_TEST(test_unfinished_epilog)3605*4543ef51SXin LI START_TEST(test_unfinished_epilog) {
3606*4543ef51SXin LI   const char *text = "<doc></doc><";
3607*4543ef51SXin LI 
3608*4543ef51SXin LI   expect_failure(text, XML_ERROR_UNCLOSED_TOKEN,
3609*4543ef51SXin LI                  "Incomplete epilog entry not faulted");
3610*4543ef51SXin LI }
3611*4543ef51SXin LI END_TEST
3612*4543ef51SXin LI 
START_TEST(test_partial_char_in_epilog)3613*4543ef51SXin LI START_TEST(test_partial_char_in_epilog) {
3614*4543ef51SXin LI   const char *text = "<doc></doc>\xe2\x82";
3615*4543ef51SXin LI 
3616*4543ef51SXin LI   /* First check that no fault is raised if the parse is not finished */
3617*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
3618*4543ef51SXin LI       == XML_STATUS_ERROR)
3619*4543ef51SXin LI     xml_failure(g_parser);
3620*4543ef51SXin LI   /* Now check that it is faulted once we finish */
3621*4543ef51SXin LI   if (XML_ParseBuffer(g_parser, 0, XML_TRUE) != XML_STATUS_ERROR)
3622*4543ef51SXin LI     fail("Partial character in epilog not faulted");
3623*4543ef51SXin LI   if (XML_GetErrorCode(g_parser) != XML_ERROR_PARTIAL_CHAR)
3624*4543ef51SXin LI     xml_failure(g_parser);
3625*4543ef51SXin LI }
3626*4543ef51SXin LI END_TEST
3627*4543ef51SXin LI 
3628*4543ef51SXin LI /* Test resuming a parse suspended in entity substitution */
START_TEST(test_suspend_resume_internal_entity)3629*4543ef51SXin LI START_TEST(test_suspend_resume_internal_entity) {
3630*4543ef51SXin LI   const char *text
3631*4543ef51SXin LI       = "<!DOCTYPE doc [\n"
3632*4543ef51SXin LI         "<!ENTITY foo '<suspend>Hi<suspend>Ho</suspend></suspend>'>\n"
3633*4543ef51SXin LI         "]>\n"
3634*4543ef51SXin LI         "<doc>&foo;</doc>\n";
3635*4543ef51SXin LI   const XML_Char *expected1 = XCS("Hi");
3636*4543ef51SXin LI   const XML_Char *expected2 = XCS("HiHo");
3637*4543ef51SXin LI   CharData storage;
3638*4543ef51SXin LI 
3639*4543ef51SXin LI   CharData_Init(&storage);
3640*4543ef51SXin LI   XML_SetStartElementHandler(g_parser, start_element_suspender);
3641*4543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
3642*4543ef51SXin LI   XML_SetUserData(g_parser, &storage);
3643*4543ef51SXin LI   // can't use SINGLE_BYTES here, because it'll return early on suspension, and
3644*4543ef51SXin LI   // we won't know exactly how much input we actually managed to give Expat.
3645*4543ef51SXin LI   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
3646*4543ef51SXin LI       != XML_STATUS_SUSPENDED)
3647*4543ef51SXin LI     xml_failure(g_parser);
3648*4543ef51SXin LI   CharData_CheckXMLChars(&storage, XCS(""));
3649*4543ef51SXin LI   if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
3650*4543ef51SXin LI     xml_failure(g_parser);
3651*4543ef51SXin LI   CharData_CheckXMLChars(&storage, expected1);
3652*4543ef51SXin LI   if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
3653*4543ef51SXin LI     xml_failure(g_parser);
3654*4543ef51SXin LI   CharData_CheckXMLChars(&storage, expected2);
3655*4543ef51SXin LI }
3656*4543ef51SXin LI END_TEST
3657*4543ef51SXin LI 
START_TEST(test_suspend_resume_internal_entity_issue_629)3658*4543ef51SXin LI START_TEST(test_suspend_resume_internal_entity_issue_629) {
3659*4543ef51SXin LI   const char *const text
3660*4543ef51SXin LI       = "<!DOCTYPE a [<!ENTITY e '<!--COMMENT-->a'>]><a>&e;<b>\n"
3661*4543ef51SXin LI         "<"
3662*4543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3663*4543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3664*4543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3665*4543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3666*4543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3667*4543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3668*4543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3669*4543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3670*4543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3671*4543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3672*4543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3673*4543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3674*4543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3675*4543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3676*4543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3677*4543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3678*4543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3679*4543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3680*4543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3681*4543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3682*4543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3683*4543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3684*4543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3685*4543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3686*4543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3687*4543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3688*4543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3689*4543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3690*4543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3691*4543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3692*4543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3693*4543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3694*4543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3695*4543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3696*4543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3697*4543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3698*4543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3699*4543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3700*4543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3701*4543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3702*4543ef51SXin LI         "/>"
3703*4543ef51SXin LI         "</b></a>";
3704*4543ef51SXin LI   const size_t firstChunkSizeBytes = 54;
3705*4543ef51SXin LI 
3706*4543ef51SXin LI   XML_Parser parser = XML_ParserCreate(NULL);
3707*4543ef51SXin LI   XML_SetUserData(parser, parser);
3708*4543ef51SXin LI   XML_SetCommentHandler(parser, suspending_comment_handler);
3709*4543ef51SXin LI 
3710*4543ef51SXin LI   if (XML_Parse(parser, text, (int)firstChunkSizeBytes, XML_FALSE)
3711*4543ef51SXin LI       != XML_STATUS_SUSPENDED)
3712*4543ef51SXin LI     xml_failure(parser);
3713*4543ef51SXin LI   if (XML_ResumeParser(parser) != XML_STATUS_OK)
3714*4543ef51SXin LI     xml_failure(parser);
3715*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(parser, text + firstChunkSizeBytes,
3716*4543ef51SXin LI                               (int)(strlen(text) - firstChunkSizeBytes),
3717*4543ef51SXin LI                               XML_TRUE)
3718*4543ef51SXin LI       != XML_STATUS_OK)
3719*4543ef51SXin LI     xml_failure(parser);
3720*4543ef51SXin LI   XML_ParserFree(parser);
3721*4543ef51SXin LI }
3722*4543ef51SXin LI END_TEST
3723*4543ef51SXin LI 
3724*4543ef51SXin LI /* Test syntax error is caught at parse resumption */
START_TEST(test_resume_entity_with_syntax_error)3725*4543ef51SXin LI START_TEST(test_resume_entity_with_syntax_error) {
3726*4543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
3727*4543ef51SXin LI                      "<!ENTITY foo '<suspend>Hi</wombat>'>\n"
3728*4543ef51SXin LI                      "]>\n"
3729*4543ef51SXin LI                      "<doc>&foo;</doc>\n";
3730*4543ef51SXin LI 
3731*4543ef51SXin LI   XML_SetStartElementHandler(g_parser, start_element_suspender);
3732*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3733*4543ef51SXin LI       != XML_STATUS_SUSPENDED)
3734*4543ef51SXin LI     xml_failure(g_parser);
3735*4543ef51SXin LI   if (XML_ResumeParser(g_parser) != XML_STATUS_ERROR)
3736*4543ef51SXin LI     fail("Syntax error in entity not faulted");
3737*4543ef51SXin LI   if (XML_GetErrorCode(g_parser) != XML_ERROR_TAG_MISMATCH)
3738*4543ef51SXin LI     xml_failure(g_parser);
3739*4543ef51SXin LI }
3740*4543ef51SXin LI END_TEST
3741*4543ef51SXin LI 
3742*4543ef51SXin LI /* Test suspending and resuming in a parameter entity substitution */
START_TEST(test_suspend_resume_parameter_entity)3743*4543ef51SXin LI START_TEST(test_suspend_resume_parameter_entity) {
3744*4543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
3745*4543ef51SXin LI                      "<!ENTITY % foo '<!ELEMENT doc (#PCDATA)*>'>\n"
3746*4543ef51SXin LI                      "%foo;\n"
3747*4543ef51SXin LI                      "]>\n"
3748*4543ef51SXin LI                      "<doc>Hello, world</doc>";
3749*4543ef51SXin LI   const XML_Char *expected = XCS("Hello, world");
3750*4543ef51SXin LI   CharData storage;
3751*4543ef51SXin LI 
3752*4543ef51SXin LI   CharData_Init(&storage);
3753*4543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3754*4543ef51SXin LI   XML_SetElementDeclHandler(g_parser, element_decl_suspender);
3755*4543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
3756*4543ef51SXin LI   XML_SetUserData(g_parser, &storage);
3757*4543ef51SXin LI   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
3758*4543ef51SXin LI       != XML_STATUS_SUSPENDED)
3759*4543ef51SXin LI     xml_failure(g_parser);
3760*4543ef51SXin LI   CharData_CheckXMLChars(&storage, XCS(""));
3761*4543ef51SXin LI   if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
3762*4543ef51SXin LI     xml_failure(g_parser);
3763*4543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
3764*4543ef51SXin LI }
3765*4543ef51SXin LI END_TEST
3766*4543ef51SXin LI 
3767*4543ef51SXin LI /* Test attempting to use parser after an error is faulted */
START_TEST(test_restart_on_error)3768*4543ef51SXin LI START_TEST(test_restart_on_error) {
3769*4543ef51SXin LI   const char *text = "<$doc><doc></doc>";
3770*4543ef51SXin LI 
3771*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3772*4543ef51SXin LI       != XML_STATUS_ERROR)
3773*4543ef51SXin LI     fail("Invalid tag name not faulted");
3774*4543ef51SXin LI   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
3775*4543ef51SXin LI     xml_failure(g_parser);
3776*4543ef51SXin LI   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
3777*4543ef51SXin LI     fail("Restarting invalid parse not faulted");
3778*4543ef51SXin LI   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
3779*4543ef51SXin LI     xml_failure(g_parser);
3780*4543ef51SXin LI }
3781*4543ef51SXin LI END_TEST
3782*4543ef51SXin LI 
3783*4543ef51SXin LI /* Test that angle brackets in an attribute default value are faulted */
START_TEST(test_reject_lt_in_attribute_value)3784*4543ef51SXin LI START_TEST(test_reject_lt_in_attribute_value) {
3785*4543ef51SXin LI   const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '<bar>'>]>\n"
3786*4543ef51SXin LI                      "<doc></doc>";
3787*4543ef51SXin LI 
3788*4543ef51SXin LI   expect_failure(text, XML_ERROR_INVALID_TOKEN,
3789*4543ef51SXin LI                  "Bad attribute default not faulted");
3790*4543ef51SXin LI }
3791*4543ef51SXin LI END_TEST
3792*4543ef51SXin LI 
START_TEST(test_reject_unfinished_param_in_att_value)3793*4543ef51SXin LI START_TEST(test_reject_unfinished_param_in_att_value) {
3794*4543ef51SXin LI   const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '&foo'>]>\n"
3795*4543ef51SXin LI                      "<doc></doc>";
3796*4543ef51SXin LI 
3797*4543ef51SXin LI   expect_failure(text, XML_ERROR_INVALID_TOKEN,
3798*4543ef51SXin LI                  "Bad attribute default not faulted");
3799*4543ef51SXin LI }
3800*4543ef51SXin LI END_TEST
3801*4543ef51SXin LI 
START_TEST(test_trailing_cr_in_att_value)3802*4543ef51SXin LI START_TEST(test_trailing_cr_in_att_value) {
3803*4543ef51SXin LI   const char *text = "<doc a='value\r'/>";
3804*4543ef51SXin LI 
3805*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3806*4543ef51SXin LI       == XML_STATUS_ERROR)
3807*4543ef51SXin LI     xml_failure(g_parser);
3808*4543ef51SXin LI }
3809*4543ef51SXin LI END_TEST
3810*4543ef51SXin LI 
3811*4543ef51SXin LI /* Try parsing a general entity within a parameter entity in a
3812*4543ef51SXin LI  * standalone internal DTD.  Covers a corner case in the parser.
3813*4543ef51SXin LI  */
START_TEST(test_standalone_internal_entity)3814*4543ef51SXin LI START_TEST(test_standalone_internal_entity) {
3815*4543ef51SXin LI   const char *text = "<?xml version='1.0' standalone='yes' ?>\n"
3816*4543ef51SXin LI                      "<!DOCTYPE doc [\n"
3817*4543ef51SXin LI                      "  <!ELEMENT doc (#PCDATA)>\n"
3818*4543ef51SXin LI                      "  <!ENTITY % pe '<!ATTLIST doc att2 CDATA \"&ge;\">'>\n"
3819*4543ef51SXin LI                      "  <!ENTITY ge 'AttDefaultValue'>\n"
3820*4543ef51SXin LI                      "  %pe;\n"
3821*4543ef51SXin LI                      "]>\n"
3822*4543ef51SXin LI                      "<doc att2='any'/>";
3823*4543ef51SXin LI 
3824*4543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3825*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3826*4543ef51SXin LI       == XML_STATUS_ERROR)
3827*4543ef51SXin LI     xml_failure(g_parser);
3828*4543ef51SXin LI }
3829*4543ef51SXin LI END_TEST
3830*4543ef51SXin LI 
3831*4543ef51SXin LI /* Test that a reference to an unknown external entity is skipped */
START_TEST(test_skipped_external_entity)3832*4543ef51SXin LI START_TEST(test_skipped_external_entity) {
3833*4543ef51SXin LI   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
3834*4543ef51SXin LI                      "<doc></doc>\n";
3835*4543ef51SXin LI   ExtTest test_data = {"<!ELEMENT doc EMPTY>\n"
3836*4543ef51SXin LI                        "<!ENTITY % e2 '%e1;'>\n",
3837*4543ef51SXin LI                        NULL, NULL};
3838*4543ef51SXin LI 
3839*4543ef51SXin LI   XML_SetUserData(g_parser, &test_data);
3840*4543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3841*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3842*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3843*4543ef51SXin LI       == XML_STATUS_ERROR)
3844*4543ef51SXin LI     xml_failure(g_parser);
3845*4543ef51SXin LI }
3846*4543ef51SXin LI END_TEST
3847*4543ef51SXin LI 
3848*4543ef51SXin LI /* Test a different form of unknown external entity */
START_TEST(test_skipped_null_loaded_ext_entity)3849*4543ef51SXin LI START_TEST(test_skipped_null_loaded_ext_entity) {
3850*4543ef51SXin LI   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
3851*4543ef51SXin LI                      "<doc />";
3852*4543ef51SXin LI   ExtHdlrData test_data
3853*4543ef51SXin LI       = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
3854*4543ef51SXin LI          "<!ENTITY % pe2 '%pe1;'>\n"
3855*4543ef51SXin LI          "%pe2;\n",
3856*4543ef51SXin LI          external_entity_null_loader};
3857*4543ef51SXin LI 
3858*4543ef51SXin LI   XML_SetUserData(g_parser, &test_data);
3859*4543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3860*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
3861*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3862*4543ef51SXin LI       == XML_STATUS_ERROR)
3863*4543ef51SXin LI     xml_failure(g_parser);
3864*4543ef51SXin LI }
3865*4543ef51SXin LI END_TEST
3866*4543ef51SXin LI 
START_TEST(test_skipped_unloaded_ext_entity)3867*4543ef51SXin LI START_TEST(test_skipped_unloaded_ext_entity) {
3868*4543ef51SXin LI   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
3869*4543ef51SXin LI                      "<doc />";
3870*4543ef51SXin LI   ExtHdlrData test_data
3871*4543ef51SXin LI       = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
3872*4543ef51SXin LI          "<!ENTITY % pe2 '%pe1;'>\n"
3873*4543ef51SXin LI          "%pe2;\n",
3874*4543ef51SXin LI          NULL};
3875*4543ef51SXin LI 
3876*4543ef51SXin LI   XML_SetUserData(g_parser, &test_data);
3877*4543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3878*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
3879*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3880*4543ef51SXin LI       == XML_STATUS_ERROR)
3881*4543ef51SXin LI     xml_failure(g_parser);
3882*4543ef51SXin LI }
3883*4543ef51SXin LI END_TEST
3884*4543ef51SXin LI 
3885*4543ef51SXin LI /* Test that a parameter entity value ending with a carriage return
3886*4543ef51SXin LI  * has it translated internally into a newline.
3887*4543ef51SXin LI  */
START_TEST(test_param_entity_with_trailing_cr)3888*4543ef51SXin LI START_TEST(test_param_entity_with_trailing_cr) {
3889*4543ef51SXin LI #define PARAM_ENTITY_NAME "pe"
3890*4543ef51SXin LI #define PARAM_ENTITY_CORE_VALUE "<!ATTLIST doc att CDATA \"default\">"
3891*4543ef51SXin LI   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
3892*4543ef51SXin LI                      "<doc/>";
3893*4543ef51SXin LI   ExtTest test_data
3894*4543ef51SXin LI       = {"<!ENTITY % " PARAM_ENTITY_NAME " '" PARAM_ENTITY_CORE_VALUE "\r'>\n"
3895*4543ef51SXin LI          "%" PARAM_ENTITY_NAME ";\n",
3896*4543ef51SXin LI          NULL, NULL};
3897*4543ef51SXin LI 
3898*4543ef51SXin LI   XML_SetUserData(g_parser, &test_data);
3899*4543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3900*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3901*4543ef51SXin LI   XML_SetEntityDeclHandler(g_parser, param_entity_match_handler);
3902*4543ef51SXin LI   param_entity_match_init(XCS(PARAM_ENTITY_NAME),
3903*4543ef51SXin LI                           XCS(PARAM_ENTITY_CORE_VALUE) XCS("\n"));
3904*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3905*4543ef51SXin LI       == XML_STATUS_ERROR)
3906*4543ef51SXin LI     xml_failure(g_parser);
3907*4543ef51SXin LI   int entity_match_flag = get_param_entity_match_flag();
3908*4543ef51SXin LI   if (entity_match_flag == ENTITY_MATCH_FAIL)
3909*4543ef51SXin LI     fail("Parameter entity CR->NEWLINE conversion failed");
3910*4543ef51SXin LI   else if (entity_match_flag == ENTITY_MATCH_NOT_FOUND)
3911*4543ef51SXin LI     fail("Parameter entity not parsed");
3912*4543ef51SXin LI }
3913*4543ef51SXin LI #undef PARAM_ENTITY_NAME
3914*4543ef51SXin LI #undef PARAM_ENTITY_CORE_VALUE
3915*4543ef51SXin LI END_TEST
3916*4543ef51SXin LI 
START_TEST(test_invalid_character_entity)3917*4543ef51SXin LI START_TEST(test_invalid_character_entity) {
3918*4543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
3919*4543ef51SXin LI                      "  <!ENTITY entity '&#x110000;'>\n"
3920*4543ef51SXin LI                      "]>\n"
3921*4543ef51SXin LI                      "<doc>&entity;</doc>";
3922*4543ef51SXin LI 
3923*4543ef51SXin LI   expect_failure(text, XML_ERROR_BAD_CHAR_REF,
3924*4543ef51SXin LI                  "Out of range character reference not faulted");
3925*4543ef51SXin LI }
3926*4543ef51SXin LI END_TEST
3927*4543ef51SXin LI 
START_TEST(test_invalid_character_entity_2)3928*4543ef51SXin LI START_TEST(test_invalid_character_entity_2) {
3929*4543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
3930*4543ef51SXin LI                      "  <!ENTITY entity '&#xg0;'>\n"
3931*4543ef51SXin LI                      "]>\n"
3932*4543ef51SXin LI                      "<doc>&entity;</doc>";
3933*4543ef51SXin LI 
3934*4543ef51SXin LI   expect_failure(text, XML_ERROR_INVALID_TOKEN,
3935*4543ef51SXin LI                  "Out of range character reference not faulted");
3936*4543ef51SXin LI }
3937*4543ef51SXin LI END_TEST
3938*4543ef51SXin LI 
START_TEST(test_invalid_character_entity_3)3939*4543ef51SXin LI START_TEST(test_invalid_character_entity_3) {
3940*4543ef51SXin LI   const char text[] =
3941*4543ef51SXin LI       /* <!DOCTYPE doc [\n */
3942*4543ef51SXin LI       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
3943*4543ef51SXin LI       /* U+0E04 = KHO KHWAI
3944*4543ef51SXin LI        * U+0E08 = CHO CHAN */
3945*4543ef51SXin LI       /* <!ENTITY entity '&\u0e04\u0e08;'>\n */
3946*4543ef51SXin LI       "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0e\0n\0t\0i\0t\0y\0 "
3947*4543ef51SXin LI       "\0'\0&\x0e\x04\x0e\x08\0;\0'\0>\0\n"
3948*4543ef51SXin LI       /* ]>\n */
3949*4543ef51SXin LI       "\0]\0>\0\n"
3950*4543ef51SXin LI       /* <doc>&entity;</doc> */
3951*4543ef51SXin LI       "\0<\0d\0o\0c\0>\0&\0e\0n\0t\0i\0t\0y\0;\0<\0/\0d\0o\0c\0>";
3952*4543ef51SXin LI 
3953*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3954*4543ef51SXin LI       != XML_STATUS_ERROR)
3955*4543ef51SXin LI     fail("Invalid start of entity name not faulted");
3956*4543ef51SXin LI   if (XML_GetErrorCode(g_parser) != XML_ERROR_UNDEFINED_ENTITY)
3957*4543ef51SXin LI     xml_failure(g_parser);
3958*4543ef51SXin LI }
3959*4543ef51SXin LI END_TEST
3960*4543ef51SXin LI 
START_TEST(test_invalid_character_entity_4)3961*4543ef51SXin LI START_TEST(test_invalid_character_entity_4) {
3962*4543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
3963*4543ef51SXin LI                      "  <!ENTITY entity '&#1114112;'>\n" /* = &#x110000 */
3964*4543ef51SXin LI                      "]>\n"
3965*4543ef51SXin LI                      "<doc>&entity;</doc>";
3966*4543ef51SXin LI 
3967*4543ef51SXin LI   expect_failure(text, XML_ERROR_BAD_CHAR_REF,
3968*4543ef51SXin LI                  "Out of range character reference not faulted");
3969*4543ef51SXin LI }
3970*4543ef51SXin LI END_TEST
3971*4543ef51SXin LI 
3972*4543ef51SXin LI /* Test that processing instructions are picked up by a default handler */
START_TEST(test_pi_handled_in_default)3973*4543ef51SXin LI START_TEST(test_pi_handled_in_default) {
3974*4543ef51SXin LI   const char *text = "<?test processing instruction?>\n<doc/>";
3975*4543ef51SXin LI   const XML_Char *expected = XCS("<?test processing instruction?>\n<doc/>");
3976*4543ef51SXin LI   CharData storage;
3977*4543ef51SXin LI 
3978*4543ef51SXin LI   CharData_Init(&storage);
3979*4543ef51SXin LI   XML_SetDefaultHandler(g_parser, accumulate_characters);
3980*4543ef51SXin LI   XML_SetUserData(g_parser, &storage);
3981*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3982*4543ef51SXin LI       == XML_STATUS_ERROR)
3983*4543ef51SXin LI     xml_failure(g_parser);
3984*4543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
3985*4543ef51SXin LI }
3986*4543ef51SXin LI END_TEST
3987*4543ef51SXin LI 
3988*4543ef51SXin LI /* Test that comments are picked up by a default handler */
START_TEST(test_comment_handled_in_default)3989*4543ef51SXin LI START_TEST(test_comment_handled_in_default) {
3990*4543ef51SXin LI   const char *text = "<!-- This is a comment -->\n<doc/>";
3991*4543ef51SXin LI   const XML_Char *expected = XCS("<!-- This is a comment -->\n<doc/>");
3992*4543ef51SXin LI   CharData storage;
3993*4543ef51SXin LI 
3994*4543ef51SXin LI   CharData_Init(&storage);
3995*4543ef51SXin LI   XML_SetDefaultHandler(g_parser, accumulate_characters);
3996*4543ef51SXin LI   XML_SetUserData(g_parser, &storage);
3997*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3998*4543ef51SXin LI       == XML_STATUS_ERROR)
3999*4543ef51SXin LI     xml_failure(g_parser);
4000*4543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
4001*4543ef51SXin LI }
4002*4543ef51SXin LI END_TEST
4003*4543ef51SXin LI 
4004*4543ef51SXin LI /* Test PIs that look almost but not quite like XML declarations */
START_TEST(test_pi_yml)4005*4543ef51SXin LI START_TEST(test_pi_yml) {
4006*4543ef51SXin LI   const char *text = "<?yml something like data?><doc/>";
4007*4543ef51SXin LI   const XML_Char *expected = XCS("yml: something like data\n");
4008*4543ef51SXin LI   CharData storage;
4009*4543ef51SXin LI 
4010*4543ef51SXin LI   CharData_Init(&storage);
4011*4543ef51SXin LI   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4012*4543ef51SXin LI   XML_SetUserData(g_parser, &storage);
4013*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4014*4543ef51SXin LI       == XML_STATUS_ERROR)
4015*4543ef51SXin LI     xml_failure(g_parser);
4016*4543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
4017*4543ef51SXin LI }
4018*4543ef51SXin LI END_TEST
4019*4543ef51SXin LI 
START_TEST(test_pi_xnl)4020*4543ef51SXin LI START_TEST(test_pi_xnl) {
4021*4543ef51SXin LI   const char *text = "<?xnl nothing like data?><doc/>";
4022*4543ef51SXin LI   const XML_Char *expected = XCS("xnl: nothing like data\n");
4023*4543ef51SXin LI   CharData storage;
4024*4543ef51SXin LI 
4025*4543ef51SXin LI   CharData_Init(&storage);
4026*4543ef51SXin LI   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4027*4543ef51SXin LI   XML_SetUserData(g_parser, &storage);
4028*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4029*4543ef51SXin LI       == XML_STATUS_ERROR)
4030*4543ef51SXin LI     xml_failure(g_parser);
4031*4543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
4032*4543ef51SXin LI }
4033*4543ef51SXin LI END_TEST
4034*4543ef51SXin LI 
START_TEST(test_pi_xmm)4035*4543ef51SXin LI START_TEST(test_pi_xmm) {
4036*4543ef51SXin LI   const char *text = "<?xmm everything like data?><doc/>";
4037*4543ef51SXin LI   const XML_Char *expected = XCS("xmm: everything like data\n");
4038*4543ef51SXin LI   CharData storage;
4039*4543ef51SXin LI 
4040*4543ef51SXin LI   CharData_Init(&storage);
4041*4543ef51SXin LI   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4042*4543ef51SXin LI   XML_SetUserData(g_parser, &storage);
4043*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4044*4543ef51SXin LI       == XML_STATUS_ERROR)
4045*4543ef51SXin LI     xml_failure(g_parser);
4046*4543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
4047*4543ef51SXin LI }
4048*4543ef51SXin LI END_TEST
4049*4543ef51SXin LI 
START_TEST(test_utf16_pi)4050*4543ef51SXin LI START_TEST(test_utf16_pi) {
4051*4543ef51SXin LI   const char text[] =
4052*4543ef51SXin LI       /* <?{KHO KHWAI}{CHO CHAN}?>
4053*4543ef51SXin LI        * where {KHO KHWAI} = U+0E04
4054*4543ef51SXin LI        * and   {CHO CHAN}  = U+0E08
4055*4543ef51SXin LI        */
4056*4543ef51SXin LI       "<\0?\0\x04\x0e\x08\x0e?\0>\0"
4057*4543ef51SXin LI       /* <q/> */
4058*4543ef51SXin LI       "<\0q\0/\0>\0";
4059*4543ef51SXin LI #ifdef XML_UNICODE
4060*4543ef51SXin LI   const XML_Char *expected = XCS("\x0e04\x0e08: \n");
4061*4543ef51SXin LI #else
4062*4543ef51SXin LI   const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
4063*4543ef51SXin LI #endif
4064*4543ef51SXin LI   CharData storage;
4065*4543ef51SXin LI 
4066*4543ef51SXin LI   CharData_Init(&storage);
4067*4543ef51SXin LI   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4068*4543ef51SXin LI   XML_SetUserData(g_parser, &storage);
4069*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4070*4543ef51SXin LI       == XML_STATUS_ERROR)
4071*4543ef51SXin LI     xml_failure(g_parser);
4072*4543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
4073*4543ef51SXin LI }
4074*4543ef51SXin LI END_TEST
4075*4543ef51SXin LI 
START_TEST(test_utf16_be_pi)4076*4543ef51SXin LI START_TEST(test_utf16_be_pi) {
4077*4543ef51SXin LI   const char text[] =
4078*4543ef51SXin LI       /* <?{KHO KHWAI}{CHO CHAN}?>
4079*4543ef51SXin LI        * where {KHO KHWAI} = U+0E04
4080*4543ef51SXin LI        * and   {CHO CHAN}  = U+0E08
4081*4543ef51SXin LI        */
4082*4543ef51SXin LI       "\0<\0?\x0e\x04\x0e\x08\0?\0>"
4083*4543ef51SXin LI       /* <q/> */
4084*4543ef51SXin LI       "\0<\0q\0/\0>";
4085*4543ef51SXin LI #ifdef XML_UNICODE
4086*4543ef51SXin LI   const XML_Char *expected = XCS("\x0e04\x0e08: \n");
4087*4543ef51SXin LI #else
4088*4543ef51SXin LI   const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
4089*4543ef51SXin LI #endif
4090*4543ef51SXin LI   CharData storage;
4091*4543ef51SXin LI 
4092*4543ef51SXin LI   CharData_Init(&storage);
4093*4543ef51SXin LI   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4094*4543ef51SXin LI   XML_SetUserData(g_parser, &storage);
4095*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4096*4543ef51SXin LI       == XML_STATUS_ERROR)
4097*4543ef51SXin LI     xml_failure(g_parser);
4098*4543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
4099*4543ef51SXin LI }
4100*4543ef51SXin LI END_TEST
4101*4543ef51SXin LI 
4102*4543ef51SXin LI /* Test that comments can be picked up and translated */
START_TEST(test_utf16_be_comment)4103*4543ef51SXin LI START_TEST(test_utf16_be_comment) {
4104*4543ef51SXin LI   const char text[] =
4105*4543ef51SXin LI       /* <!-- Comment A --> */
4106*4543ef51SXin LI       "\0<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0A\0 \0-\0-\0>\0\n"
4107*4543ef51SXin LI       /* <doc/> */
4108*4543ef51SXin LI       "\0<\0d\0o\0c\0/\0>";
4109*4543ef51SXin LI   const XML_Char *expected = XCS(" Comment A ");
4110*4543ef51SXin LI   CharData storage;
4111*4543ef51SXin LI 
4112*4543ef51SXin LI   CharData_Init(&storage);
4113*4543ef51SXin LI   XML_SetCommentHandler(g_parser, accumulate_comment);
4114*4543ef51SXin LI   XML_SetUserData(g_parser, &storage);
4115*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4116*4543ef51SXin LI       == XML_STATUS_ERROR)
4117*4543ef51SXin LI     xml_failure(g_parser);
4118*4543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
4119*4543ef51SXin LI }
4120*4543ef51SXin LI END_TEST
4121*4543ef51SXin LI 
START_TEST(test_utf16_le_comment)4122*4543ef51SXin LI START_TEST(test_utf16_le_comment) {
4123*4543ef51SXin LI   const char text[] =
4124*4543ef51SXin LI       /* <!-- Comment B --> */
4125*4543ef51SXin LI       "<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0B\0 \0-\0-\0>\0\n\0"
4126*4543ef51SXin LI       /* <doc/> */
4127*4543ef51SXin LI       "<\0d\0o\0c\0/\0>\0";
4128*4543ef51SXin LI   const XML_Char *expected = XCS(" Comment B ");
4129*4543ef51SXin LI   CharData storage;
4130*4543ef51SXin LI 
4131*4543ef51SXin LI   CharData_Init(&storage);
4132*4543ef51SXin LI   XML_SetCommentHandler(g_parser, accumulate_comment);
4133*4543ef51SXin LI   XML_SetUserData(g_parser, &storage);
4134*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4135*4543ef51SXin LI       == XML_STATUS_ERROR)
4136*4543ef51SXin LI     xml_failure(g_parser);
4137*4543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
4138*4543ef51SXin LI }
4139*4543ef51SXin LI END_TEST
4140*4543ef51SXin LI 
4141*4543ef51SXin LI /* Test that the unknown encoding handler with map entries that expect
4142*4543ef51SXin LI  * conversion but no conversion function is faulted
4143*4543ef51SXin LI  */
START_TEST(test_missing_encoding_conversion_fn)4144*4543ef51SXin LI START_TEST(test_missing_encoding_conversion_fn) {
4145*4543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='no-conv'?>\n"
4146*4543ef51SXin LI                      "<doc>\x81</doc>";
4147*4543ef51SXin LI 
4148*4543ef51SXin LI   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4149*4543ef51SXin LI   /* MiscEncodingHandler sets up an encoding with every top-bit-set
4150*4543ef51SXin LI    * character introducing a two-byte sequence.  For this, it
4151*4543ef51SXin LI    * requires a convert function.  The above function call doesn't
4152*4543ef51SXin LI    * pass one through, so when BadEncodingHandler actually gets
4153*4543ef51SXin LI    * called it should supply an invalid encoding.
4154*4543ef51SXin LI    */
4155*4543ef51SXin LI   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4156*4543ef51SXin LI                  "Encoding with missing convert() not faulted");
4157*4543ef51SXin LI }
4158*4543ef51SXin LI END_TEST
4159*4543ef51SXin LI 
START_TEST(test_failing_encoding_conversion_fn)4160*4543ef51SXin LI START_TEST(test_failing_encoding_conversion_fn) {
4161*4543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='failing-conv'?>\n"
4162*4543ef51SXin LI                      "<doc>\x81</doc>";
4163*4543ef51SXin LI 
4164*4543ef51SXin LI   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4165*4543ef51SXin LI   /* BadEncodingHandler sets up an encoding with every top-bit-set
4166*4543ef51SXin LI    * character introducing a two-byte sequence.  For this, it
4167*4543ef51SXin LI    * requires a convert function.  The above function call passes
4168*4543ef51SXin LI    * one that insists all possible sequences are invalid anyway.
4169*4543ef51SXin LI    */
4170*4543ef51SXin LI   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4171*4543ef51SXin LI                  "Encoding with failing convert() not faulted");
4172*4543ef51SXin LI }
4173*4543ef51SXin LI END_TEST
4174*4543ef51SXin LI 
4175*4543ef51SXin LI /* Test unknown encoding conversions */
START_TEST(test_unknown_encoding_success)4176*4543ef51SXin LI START_TEST(test_unknown_encoding_success) {
4177*4543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4178*4543ef51SXin LI                      /* Equivalent to <eoc>Hello, world</eoc> */
4179*4543ef51SXin LI                      "<\x81\x64\x80oc>Hello, world</\x81\x64\x80oc>";
4180*4543ef51SXin LI 
4181*4543ef51SXin LI   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4182*4543ef51SXin LI   run_character_check(text, XCS("Hello, world"));
4183*4543ef51SXin LI }
4184*4543ef51SXin LI END_TEST
4185*4543ef51SXin LI 
4186*4543ef51SXin LI /* Test bad name character in unknown encoding */
START_TEST(test_unknown_encoding_bad_name)4187*4543ef51SXin LI START_TEST(test_unknown_encoding_bad_name) {
4188*4543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4189*4543ef51SXin LI                      "<\xff\x64oc>Hello, world</\xff\x64oc>";
4190*4543ef51SXin LI 
4191*4543ef51SXin LI   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4192*4543ef51SXin LI   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4193*4543ef51SXin LI                  "Bad name start in unknown encoding not faulted");
4194*4543ef51SXin LI }
4195*4543ef51SXin LI END_TEST
4196*4543ef51SXin LI 
4197*4543ef51SXin LI /* Test bad mid-name character in unknown encoding */
START_TEST(test_unknown_encoding_bad_name_2)4198*4543ef51SXin LI START_TEST(test_unknown_encoding_bad_name_2) {
4199*4543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4200*4543ef51SXin LI                      "<d\xffoc>Hello, world</d\xffoc>";
4201*4543ef51SXin LI 
4202*4543ef51SXin LI   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4203*4543ef51SXin LI   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4204*4543ef51SXin LI                  "Bad name in unknown encoding not faulted");
4205*4543ef51SXin LI }
4206*4543ef51SXin LI END_TEST
4207*4543ef51SXin LI 
4208*4543ef51SXin LI /* Test element name that is long enough to fill the conversion buffer
4209*4543ef51SXin LI  * in an unknown encoding, finishing with an encoded character.
4210*4543ef51SXin LI  */
START_TEST(test_unknown_encoding_long_name_1)4211*4543ef51SXin LI START_TEST(test_unknown_encoding_long_name_1) {
4212*4543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4213*4543ef51SXin LI                      "<abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>"
4214*4543ef51SXin LI                      "Hi"
4215*4543ef51SXin LI                      "</abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>";
4216*4543ef51SXin LI   const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
4217*4543ef51SXin LI   CharData storage;
4218*4543ef51SXin LI 
4219*4543ef51SXin LI   CharData_Init(&storage);
4220*4543ef51SXin LI   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4221*4543ef51SXin LI   XML_SetStartElementHandler(g_parser, record_element_start_handler);
4222*4543ef51SXin LI   XML_SetUserData(g_parser, &storage);
4223*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4224*4543ef51SXin LI       == XML_STATUS_ERROR)
4225*4543ef51SXin LI     xml_failure(g_parser);
4226*4543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
4227*4543ef51SXin LI }
4228*4543ef51SXin LI END_TEST
4229*4543ef51SXin LI 
4230*4543ef51SXin LI /* Test element name that is long enough to fill the conversion buffer
4231*4543ef51SXin LI  * in an unknown encoding, finishing with an simple character.
4232*4543ef51SXin LI  */
START_TEST(test_unknown_encoding_long_name_2)4233*4543ef51SXin LI START_TEST(test_unknown_encoding_long_name_2) {
4234*4543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4235*4543ef51SXin LI                      "<abcdefghabcdefghabcdefghijklmnop>"
4236*4543ef51SXin LI                      "Hi"
4237*4543ef51SXin LI                      "</abcdefghabcdefghabcdefghijklmnop>";
4238*4543ef51SXin LI   const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
4239*4543ef51SXin LI   CharData storage;
4240*4543ef51SXin LI 
4241*4543ef51SXin LI   CharData_Init(&storage);
4242*4543ef51SXin LI   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4243*4543ef51SXin LI   XML_SetStartElementHandler(g_parser, record_element_start_handler);
4244*4543ef51SXin LI   XML_SetUserData(g_parser, &storage);
4245*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4246*4543ef51SXin LI       == XML_STATUS_ERROR)
4247*4543ef51SXin LI     xml_failure(g_parser);
4248*4543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
4249*4543ef51SXin LI }
4250*4543ef51SXin LI END_TEST
4251*4543ef51SXin LI 
START_TEST(test_invalid_unknown_encoding)4252*4543ef51SXin LI START_TEST(test_invalid_unknown_encoding) {
4253*4543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='invalid-9'?>\n"
4254*4543ef51SXin LI                      "<doc>Hello world</doc>";
4255*4543ef51SXin LI 
4256*4543ef51SXin LI   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4257*4543ef51SXin LI   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4258*4543ef51SXin LI                  "Invalid unknown encoding not faulted");
4259*4543ef51SXin LI }
4260*4543ef51SXin LI END_TEST
4261*4543ef51SXin LI 
START_TEST(test_unknown_ascii_encoding_ok)4262*4543ef51SXin LI START_TEST(test_unknown_ascii_encoding_ok) {
4263*4543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
4264*4543ef51SXin LI                      "<doc>Hello, world</doc>";
4265*4543ef51SXin LI 
4266*4543ef51SXin LI   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4267*4543ef51SXin LI   run_character_check(text, XCS("Hello, world"));
4268*4543ef51SXin LI }
4269*4543ef51SXin LI END_TEST
4270*4543ef51SXin LI 
START_TEST(test_unknown_ascii_encoding_fail)4271*4543ef51SXin LI START_TEST(test_unknown_ascii_encoding_fail) {
4272*4543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
4273*4543ef51SXin LI                      "<doc>Hello, \x80 world</doc>";
4274*4543ef51SXin LI 
4275*4543ef51SXin LI   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4276*4543ef51SXin LI   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4277*4543ef51SXin LI                  "Invalid character not faulted");
4278*4543ef51SXin LI }
4279*4543ef51SXin LI END_TEST
4280*4543ef51SXin LI 
START_TEST(test_unknown_encoding_invalid_length)4281*4543ef51SXin LI START_TEST(test_unknown_encoding_invalid_length) {
4282*4543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='invalid-len'?>\n"
4283*4543ef51SXin LI                      "<doc>Hello, world</doc>";
4284*4543ef51SXin LI 
4285*4543ef51SXin LI   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4286*4543ef51SXin LI   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4287*4543ef51SXin LI                  "Invalid unknown encoding not faulted");
4288*4543ef51SXin LI }
4289*4543ef51SXin LI END_TEST
4290*4543ef51SXin LI 
START_TEST(test_unknown_encoding_invalid_topbit)4291*4543ef51SXin LI START_TEST(test_unknown_encoding_invalid_topbit) {
4292*4543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='invalid-a'?>\n"
4293*4543ef51SXin LI                      "<doc>Hello, world</doc>";
4294*4543ef51SXin LI 
4295*4543ef51SXin LI   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4296*4543ef51SXin LI   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4297*4543ef51SXin LI                  "Invalid unknown encoding not faulted");
4298*4543ef51SXin LI }
4299*4543ef51SXin LI END_TEST
4300*4543ef51SXin LI 
START_TEST(test_unknown_encoding_invalid_surrogate)4301*4543ef51SXin LI START_TEST(test_unknown_encoding_invalid_surrogate) {
4302*4543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='invalid-surrogate'?>\n"
4303*4543ef51SXin LI                      "<doc>Hello, \x82 world</doc>";
4304*4543ef51SXin LI 
4305*4543ef51SXin LI   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4306*4543ef51SXin LI   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4307*4543ef51SXin LI                  "Invalid unknown encoding not faulted");
4308*4543ef51SXin LI }
4309*4543ef51SXin LI END_TEST
4310*4543ef51SXin LI 
START_TEST(test_unknown_encoding_invalid_high)4311*4543ef51SXin LI START_TEST(test_unknown_encoding_invalid_high) {
4312*4543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='invalid-high'?>\n"
4313*4543ef51SXin LI                      "<doc>Hello, world</doc>";
4314*4543ef51SXin LI 
4315*4543ef51SXin LI   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4316*4543ef51SXin LI   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4317*4543ef51SXin LI                  "Invalid unknown encoding not faulted");
4318*4543ef51SXin LI }
4319*4543ef51SXin LI END_TEST
4320*4543ef51SXin LI 
START_TEST(test_unknown_encoding_invalid_attr_value)4321*4543ef51SXin LI START_TEST(test_unknown_encoding_invalid_attr_value) {
4322*4543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4323*4543ef51SXin LI                      "<doc attr='\xff\x30'/>";
4324*4543ef51SXin LI 
4325*4543ef51SXin LI   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4326*4543ef51SXin LI   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4327*4543ef51SXin LI                  "Invalid attribute valid not faulted");
4328*4543ef51SXin LI }
4329*4543ef51SXin LI END_TEST
4330*4543ef51SXin LI 
4331*4543ef51SXin LI /* Test an external entity parser set to use latin-1 detects UTF-16
4332*4543ef51SXin LI  * BOMs correctly.
4333*4543ef51SXin LI  */
4334*4543ef51SXin LI /* Test that UTF-16 BOM does not select UTF-16 given explicit encoding */
START_TEST(test_ext_entity_latin1_utf16le_bom)4335*4543ef51SXin LI START_TEST(test_ext_entity_latin1_utf16le_bom) {
4336*4543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
4337*4543ef51SXin LI                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4338*4543ef51SXin LI                      "]>\n"
4339*4543ef51SXin LI                      "<doc>&en;</doc>";
4340*4543ef51SXin LI   ExtTest2 test_data
4341*4543ef51SXin LI       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4342*4543ef51SXin LI          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4343*4543ef51SXin LI           *   0x4c = L and 0x20 is a space
4344*4543ef51SXin LI           */
4345*4543ef51SXin LI          "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
4346*4543ef51SXin LI #ifdef XML_UNICODE
4347*4543ef51SXin LI   const XML_Char *expected = XCS("\x00ff\x00feL ");
4348*4543ef51SXin LI #else
4349*4543ef51SXin LI   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4350*4543ef51SXin LI   const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
4351*4543ef51SXin LI #endif
4352*4543ef51SXin LI   CharData storage;
4353*4543ef51SXin LI 
4354*4543ef51SXin LI   CharData_Init(&storage);
4355*4543ef51SXin LI   test_data.storage = &storage;
4356*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4357*4543ef51SXin LI   XML_SetUserData(g_parser, &test_data);
4358*4543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4359*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4360*4543ef51SXin LI       == XML_STATUS_ERROR)
4361*4543ef51SXin LI     xml_failure(g_parser);
4362*4543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
4363*4543ef51SXin LI }
4364*4543ef51SXin LI END_TEST
4365*4543ef51SXin LI 
START_TEST(test_ext_entity_latin1_utf16be_bom)4366*4543ef51SXin LI START_TEST(test_ext_entity_latin1_utf16be_bom) {
4367*4543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
4368*4543ef51SXin LI                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4369*4543ef51SXin LI                      "]>\n"
4370*4543ef51SXin LI                      "<doc>&en;</doc>";
4371*4543ef51SXin LI   ExtTest2 test_data
4372*4543ef51SXin LI       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4373*4543ef51SXin LI          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4374*4543ef51SXin LI           *   0x4c = L and 0x20 is a space
4375*4543ef51SXin LI           */
4376*4543ef51SXin LI          "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
4377*4543ef51SXin LI #ifdef XML_UNICODE
4378*4543ef51SXin LI   const XML_Char *expected = XCS("\x00fe\x00ff L");
4379*4543ef51SXin LI #else
4380*4543ef51SXin LI   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4381*4543ef51SXin LI   const XML_Char *expected = XCS("\xc3\xbe\xc3\xbf L");
4382*4543ef51SXin LI #endif
4383*4543ef51SXin LI   CharData storage;
4384*4543ef51SXin LI 
4385*4543ef51SXin LI   CharData_Init(&storage);
4386*4543ef51SXin LI   test_data.storage = &storage;
4387*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4388*4543ef51SXin LI   XML_SetUserData(g_parser, &test_data);
4389*4543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4390*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4391*4543ef51SXin LI       == XML_STATUS_ERROR)
4392*4543ef51SXin LI     xml_failure(g_parser);
4393*4543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
4394*4543ef51SXin LI }
4395*4543ef51SXin LI END_TEST
4396*4543ef51SXin LI 
4397*4543ef51SXin LI /* Parsing the full buffer rather than a byte at a time makes a
4398*4543ef51SXin LI  * difference to the encoding scanning code, so repeat the above tests
4399*4543ef51SXin LI  * without breaking them down by byte.
4400*4543ef51SXin LI  */
START_TEST(test_ext_entity_latin1_utf16le_bom2)4401*4543ef51SXin LI START_TEST(test_ext_entity_latin1_utf16le_bom2) {
4402*4543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
4403*4543ef51SXin LI                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4404*4543ef51SXin LI                      "]>\n"
4405*4543ef51SXin LI                      "<doc>&en;</doc>";
4406*4543ef51SXin LI   ExtTest2 test_data
4407*4543ef51SXin LI       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4408*4543ef51SXin LI          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4409*4543ef51SXin LI           *   0x4c = L and 0x20 is a space
4410*4543ef51SXin LI           */
4411*4543ef51SXin LI          "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
4412*4543ef51SXin LI #ifdef XML_UNICODE
4413*4543ef51SXin LI   const XML_Char *expected = XCS("\x00ff\x00feL ");
4414*4543ef51SXin LI #else
4415*4543ef51SXin LI   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4416*4543ef51SXin LI   const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
4417*4543ef51SXin LI #endif
4418*4543ef51SXin LI   CharData storage;
4419*4543ef51SXin LI 
4420*4543ef51SXin LI   CharData_Init(&storage);
4421*4543ef51SXin LI   test_data.storage = &storage;
4422*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4423*4543ef51SXin LI   XML_SetUserData(g_parser, &test_data);
4424*4543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4425*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4426*4543ef51SXin LI       == XML_STATUS_ERROR)
4427*4543ef51SXin LI     xml_failure(g_parser);
4428*4543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
4429*4543ef51SXin LI }
4430*4543ef51SXin LI END_TEST
4431*4543ef51SXin LI 
START_TEST(test_ext_entity_latin1_utf16be_bom2)4432*4543ef51SXin LI START_TEST(test_ext_entity_latin1_utf16be_bom2) {
4433*4543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
4434*4543ef51SXin LI                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4435*4543ef51SXin LI                      "]>\n"
4436*4543ef51SXin LI                      "<doc>&en;</doc>";
4437*4543ef51SXin LI   ExtTest2 test_data
4438*4543ef51SXin LI       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4439*4543ef51SXin LI          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4440*4543ef51SXin LI           *   0x4c = L and 0x20 is a space
4441*4543ef51SXin LI           */
4442*4543ef51SXin LI          "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
4443*4543ef51SXin LI #ifdef XML_UNICODE
4444*4543ef51SXin LI   const XML_Char *expected = XCS("\x00fe\x00ff L");
4445*4543ef51SXin LI #else
4446*4543ef51SXin LI   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4447*4543ef51SXin LI   const XML_Char *expected = "\xc3\xbe\xc3\xbf L";
4448*4543ef51SXin LI #endif
4449*4543ef51SXin LI   CharData storage;
4450*4543ef51SXin LI 
4451*4543ef51SXin LI   CharData_Init(&storage);
4452*4543ef51SXin LI   test_data.storage = &storage;
4453*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4454*4543ef51SXin LI   XML_SetUserData(g_parser, &test_data);
4455*4543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4456*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4457*4543ef51SXin LI       == XML_STATUS_ERROR)
4458*4543ef51SXin LI     xml_failure(g_parser);
4459*4543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
4460*4543ef51SXin LI }
4461*4543ef51SXin LI END_TEST
4462*4543ef51SXin LI 
4463*4543ef51SXin LI /* Test little-endian UTF-16 given an explicit big-endian encoding */
START_TEST(test_ext_entity_utf16_be)4464*4543ef51SXin LI START_TEST(test_ext_entity_utf16_be) {
4465*4543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
4466*4543ef51SXin LI                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4467*4543ef51SXin LI                      "]>\n"
4468*4543ef51SXin LI                      "<doc>&en;</doc>";
4469*4543ef51SXin LI   ExtTest2 test_data = {"<\0e\0/\0>\0", 8, XCS("utf-16be"), NULL};
4470*4543ef51SXin LI #ifdef XML_UNICODE
4471*4543ef51SXin LI   const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
4472*4543ef51SXin LI #else
4473*4543ef51SXin LI   const XML_Char *expected = XCS("\xe3\xb0\x80"   /* U+3C00 */
4474*4543ef51SXin LI                                  "\xe6\x94\x80"   /* U+6500 */
4475*4543ef51SXin LI                                  "\xe2\xbc\x80"   /* U+2F00 */
4476*4543ef51SXin LI                                  "\xe3\xb8\x80"); /* U+3E00 */
4477*4543ef51SXin LI #endif
4478*4543ef51SXin LI   CharData storage;
4479*4543ef51SXin LI 
4480*4543ef51SXin LI   CharData_Init(&storage);
4481*4543ef51SXin LI   test_data.storage = &storage;
4482*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4483*4543ef51SXin LI   XML_SetUserData(g_parser, &test_data);
4484*4543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4485*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4486*4543ef51SXin LI       == XML_STATUS_ERROR)
4487*4543ef51SXin LI     xml_failure(g_parser);
4488*4543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
4489*4543ef51SXin LI }
4490*4543ef51SXin LI END_TEST
4491*4543ef51SXin LI 
4492*4543ef51SXin LI /* Test big-endian UTF-16 given an explicit little-endian encoding */
START_TEST(test_ext_entity_utf16_le)4493*4543ef51SXin LI START_TEST(test_ext_entity_utf16_le) {
4494*4543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
4495*4543ef51SXin LI                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4496*4543ef51SXin LI                      "]>\n"
4497*4543ef51SXin LI                      "<doc>&en;</doc>";
4498*4543ef51SXin LI   ExtTest2 test_data = {"\0<\0e\0/\0>", 8, XCS("utf-16le"), NULL};
4499*4543ef51SXin LI #ifdef XML_UNICODE
4500*4543ef51SXin LI   const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
4501*4543ef51SXin LI #else
4502*4543ef51SXin LI   const XML_Char *expected = XCS("\xe3\xb0\x80"   /* U+3C00 */
4503*4543ef51SXin LI                                  "\xe6\x94\x80"   /* U+6500 */
4504*4543ef51SXin LI                                  "\xe2\xbc\x80"   /* U+2F00 */
4505*4543ef51SXin LI                                  "\xe3\xb8\x80"); /* U+3E00 */
4506*4543ef51SXin LI #endif
4507*4543ef51SXin LI   CharData storage;
4508*4543ef51SXin LI 
4509*4543ef51SXin LI   CharData_Init(&storage);
4510*4543ef51SXin LI   test_data.storage = &storage;
4511*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4512*4543ef51SXin LI   XML_SetUserData(g_parser, &test_data);
4513*4543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4514*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4515*4543ef51SXin LI       == XML_STATUS_ERROR)
4516*4543ef51SXin LI     xml_failure(g_parser);
4517*4543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
4518*4543ef51SXin LI }
4519*4543ef51SXin LI END_TEST
4520*4543ef51SXin LI 
4521*4543ef51SXin LI /* Test little-endian UTF-16 given no explicit encoding.
4522*4543ef51SXin LI  * The existing default encoding (UTF-8) is assumed to hold without a
4523*4543ef51SXin LI  * BOM to contradict it, so the entity value will in fact provoke an
4524*4543ef51SXin LI  * error because 0x00 is not a valid XML character.  We parse the
4525*4543ef51SXin LI  * whole buffer in one go rather than feeding it in byte by byte to
4526*4543ef51SXin LI  * exercise different code paths in the initial scanning routines.
4527*4543ef51SXin LI  */
START_TEST(test_ext_entity_utf16_unknown)4528*4543ef51SXin LI START_TEST(test_ext_entity_utf16_unknown) {
4529*4543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
4530*4543ef51SXin LI                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4531*4543ef51SXin LI                      "]>\n"
4532*4543ef51SXin LI                      "<doc>&en;</doc>";
4533*4543ef51SXin LI   ExtFaults2 test_data
4534*4543ef51SXin LI       = {"a\0b\0c\0", 6, "Invalid character in entity not faulted", NULL,
4535*4543ef51SXin LI          XML_ERROR_INVALID_TOKEN};
4536*4543ef51SXin LI 
4537*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter2);
4538*4543ef51SXin LI   XML_SetUserData(g_parser, &test_data);
4539*4543ef51SXin LI   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
4540*4543ef51SXin LI                  "Invalid character should not have been accepted");
4541*4543ef51SXin LI }
4542*4543ef51SXin LI END_TEST
4543*4543ef51SXin LI 
4544*4543ef51SXin LI /* Test not-quite-UTF-8 BOM (0xEF 0xBB 0xBF) */
START_TEST(test_ext_entity_utf8_non_bom)4545*4543ef51SXin LI START_TEST(test_ext_entity_utf8_non_bom) {
4546*4543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
4547*4543ef51SXin LI                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4548*4543ef51SXin LI                      "]>\n"
4549*4543ef51SXin LI                      "<doc>&en;</doc>";
4550*4543ef51SXin LI   ExtTest2 test_data
4551*4543ef51SXin LI       = {"\xef\xbb\x80", /* Arabic letter DAD medial form, U+FEC0 */
4552*4543ef51SXin LI          3, NULL, NULL};
4553*4543ef51SXin LI #ifdef XML_UNICODE
4554*4543ef51SXin LI   const XML_Char *expected = XCS("\xfec0");
4555*4543ef51SXin LI #else
4556*4543ef51SXin LI   const XML_Char *expected = XCS("\xef\xbb\x80");
4557*4543ef51SXin LI #endif
4558*4543ef51SXin LI   CharData storage;
4559*4543ef51SXin LI 
4560*4543ef51SXin LI   CharData_Init(&storage);
4561*4543ef51SXin LI   test_data.storage = &storage;
4562*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4563*4543ef51SXin LI   XML_SetUserData(g_parser, &test_data);
4564*4543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4565*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4566*4543ef51SXin LI       == XML_STATUS_ERROR)
4567*4543ef51SXin LI     xml_failure(g_parser);
4568*4543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
4569*4543ef51SXin LI }
4570*4543ef51SXin LI END_TEST
4571*4543ef51SXin LI 
4572*4543ef51SXin LI /* Test that UTF-8 in a CDATA section is correctly passed through */
START_TEST(test_utf8_in_cdata_section)4573*4543ef51SXin LI START_TEST(test_utf8_in_cdata_section) {
4574*4543ef51SXin LI   const char *text = "<doc><![CDATA[one \xc3\xa9 two]]></doc>";
4575*4543ef51SXin LI #ifdef XML_UNICODE
4576*4543ef51SXin LI   const XML_Char *expected = XCS("one \x00e9 two");
4577*4543ef51SXin LI #else
4578*4543ef51SXin LI   const XML_Char *expected = XCS("one \xc3\xa9 two");
4579*4543ef51SXin LI #endif
4580*4543ef51SXin LI 
4581*4543ef51SXin LI   run_character_check(text, expected);
4582*4543ef51SXin LI }
4583*4543ef51SXin LI END_TEST
4584*4543ef51SXin LI 
4585*4543ef51SXin LI /* Test that little-endian UTF-16 in a CDATA section is handled */
START_TEST(test_utf8_in_cdata_section_2)4586*4543ef51SXin LI START_TEST(test_utf8_in_cdata_section_2) {
4587*4543ef51SXin LI   const char *text = "<doc><![CDATA[\xc3\xa9]\xc3\xa9two]]></doc>";
4588*4543ef51SXin LI #ifdef XML_UNICODE
4589*4543ef51SXin LI   const XML_Char *expected = XCS("\x00e9]\x00e9two");
4590*4543ef51SXin LI #else
4591*4543ef51SXin LI   const XML_Char *expected = XCS("\xc3\xa9]\xc3\xa9two");
4592*4543ef51SXin LI #endif
4593*4543ef51SXin LI 
4594*4543ef51SXin LI   run_character_check(text, expected);
4595*4543ef51SXin LI }
4596*4543ef51SXin LI END_TEST
4597*4543ef51SXin LI 
START_TEST(test_utf8_in_start_tags)4598*4543ef51SXin LI START_TEST(test_utf8_in_start_tags) {
4599*4543ef51SXin LI   struct test_case {
4600*4543ef51SXin LI     bool goodName;
4601*4543ef51SXin LI     bool goodNameStart;
4602*4543ef51SXin LI     const char *tagName;
4603*4543ef51SXin LI   };
4604*4543ef51SXin LI 
4605*4543ef51SXin LI   // The idea with the tests below is this:
4606*4543ef51SXin LI   // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences
4607*4543ef51SXin LI   // go to isNever and are hence not a concern.
4608*4543ef51SXin LI   //
4609*4543ef51SXin LI   // We start with a character that is a valid name character
4610*4543ef51SXin LI   // (or even name-start character, see XML 1.0r4 spec) and then we flip
4611*4543ef51SXin LI   // single bits at places where (1) the result leaves the UTF-8 encoding space
4612*4543ef51SXin LI   // and (2) we stay in the same n-byte sequence family.
4613*4543ef51SXin LI   //
4614*4543ef51SXin LI   // The flipped bits are highlighted in angle brackets in comments,
4615*4543ef51SXin LI   // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped
4616*4543ef51SXin LI   // the most significant bit to 1 to leave UTF-8 encoding space.
4617*4543ef51SXin LI   struct test_case cases[] = {
4618*4543ef51SXin LI       // 1-byte UTF-8: [0xxx xxxx]
4619*4543ef51SXin LI       {true, true, "\x3A"},   // [0011 1010] = ASCII colon ':'
4620*4543ef51SXin LI       {false, false, "\xBA"}, // [<1>011 1010]
4621*4543ef51SXin LI       {true, false, "\x39"},  // [0011 1001] = ASCII nine '9'
4622*4543ef51SXin LI       {false, false, "\xB9"}, // [<1>011 1001]
4623*4543ef51SXin LI 
4624*4543ef51SXin LI       // 2-byte UTF-8: [110x xxxx] [10xx xxxx]
4625*4543ef51SXin LI       {true, true, "\xDB\xA5"},   // [1101 1011] [1010 0101] =
4626*4543ef51SXin LI                                   // Arabic small waw U+06E5
4627*4543ef51SXin LI       {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101]
4628*4543ef51SXin LI       {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101]
4629*4543ef51SXin LI       {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101]
4630*4543ef51SXin LI       {true, false, "\xCC\x81"},  // [1100 1100] [1000 0001] =
4631*4543ef51SXin LI                                   // combining char U+0301
4632*4543ef51SXin LI       {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001]
4633*4543ef51SXin LI       {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001]
4634*4543ef51SXin LI       {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001]
4635*4543ef51SXin LI 
4636*4543ef51SXin LI       // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx]
4637*4543ef51SXin LI       {true, true, "\xE0\xA4\x85"},   // [1110 0000] [1010 0100] [1000 0101] =
4638*4543ef51SXin LI                                       // Devanagari Letter A U+0905
4639*4543ef51SXin LI       {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101]
4640*4543ef51SXin LI       {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101]
4641*4543ef51SXin LI       {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101]
4642*4543ef51SXin LI       {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101]
4643*4543ef51SXin LI       {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101]
4644*4543ef51SXin LI       {true, false, "\xE0\xA4\x81"},  // [1110 0000] [1010 0100] [1000 0001] =
4645*4543ef51SXin LI                                       // combining char U+0901
4646*4543ef51SXin LI       {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001]
4647*4543ef51SXin LI       {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001]
4648*4543ef51SXin LI       {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001]
4649*4543ef51SXin LI       {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001]
4650*4543ef51SXin LI       {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001]
4651*4543ef51SXin LI   };
4652*4543ef51SXin LI   const bool atNameStart[] = {true, false};
4653*4543ef51SXin LI 
4654*4543ef51SXin LI   size_t i = 0;
4655*4543ef51SXin LI   char doc[1024];
4656*4543ef51SXin LI   size_t failCount = 0;
4657*4543ef51SXin LI 
4658*4543ef51SXin LI   // we need all the bytes to be parsed, but we don't want the errors that can
4659*4543ef51SXin LI   // trigger on isFinal=XML_TRUE, so we skip the test if the heuristic is on.
4660*4543ef51SXin LI   if (g_reparseDeferralEnabledDefault) {
4661*4543ef51SXin LI     return;
4662*4543ef51SXin LI   }
4663*4543ef51SXin LI 
4664*4543ef51SXin LI   for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
4665*4543ef51SXin LI     size_t j = 0;
4666*4543ef51SXin LI     for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) {
4667*4543ef51SXin LI       const bool expectedSuccess
4668*4543ef51SXin LI           = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName;
4669*4543ef51SXin LI       snprintf(doc, sizeof(doc), "<%s%s><!--", atNameStart[j] ? "" : "a",
4670*4543ef51SXin LI                cases[i].tagName);
4671*4543ef51SXin LI       XML_Parser parser = XML_ParserCreate(NULL);
4672*4543ef51SXin LI 
4673*4543ef51SXin LI       const enum XML_Status status = _XML_Parse_SINGLE_BYTES(
4674*4543ef51SXin LI           parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE);
4675*4543ef51SXin LI 
4676*4543ef51SXin LI       bool success = true;
4677*4543ef51SXin LI       if ((status == XML_STATUS_OK) != expectedSuccess) {
4678*4543ef51SXin LI         success = false;
4679*4543ef51SXin LI       }
4680*4543ef51SXin LI       if ((status == XML_STATUS_ERROR)
4681*4543ef51SXin LI           && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) {
4682*4543ef51SXin LI         success = false;
4683*4543ef51SXin LI       }
4684*4543ef51SXin LI 
4685*4543ef51SXin LI       if (! success) {
4686*4543ef51SXin LI         fprintf(
4687*4543ef51SXin LI             stderr,
4688*4543ef51SXin LI             "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n",
4689*4543ef51SXin LI             (unsigned)i + 1u, atNameStart[j] ? "    " : "not ",
4690*4543ef51SXin LI             (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser));
4691*4543ef51SXin LI         failCount++;
4692*4543ef51SXin LI       }
4693*4543ef51SXin LI 
4694*4543ef51SXin LI       XML_ParserFree(parser);
4695*4543ef51SXin LI     }
4696*4543ef51SXin LI   }
4697*4543ef51SXin LI 
4698*4543ef51SXin LI   if (failCount > 0) {
4699*4543ef51SXin LI     fail("UTF-8 regression detected");
4700*4543ef51SXin LI   }
4701*4543ef51SXin LI }
4702*4543ef51SXin LI END_TEST
4703*4543ef51SXin LI 
4704*4543ef51SXin LI /* Test trailing spaces in elements are accepted */
START_TEST(test_trailing_spaces_in_elements)4705*4543ef51SXin LI START_TEST(test_trailing_spaces_in_elements) {
4706*4543ef51SXin LI   const char *text = "<doc   >Hi</doc >";
4707*4543ef51SXin LI   const XML_Char *expected = XCS("doc/doc");
4708*4543ef51SXin LI   CharData storage;
4709*4543ef51SXin LI 
4710*4543ef51SXin LI   CharData_Init(&storage);
4711*4543ef51SXin LI   XML_SetElementHandler(g_parser, record_element_start_handler,
4712*4543ef51SXin LI                         record_element_end_handler);
4713*4543ef51SXin LI   XML_SetUserData(g_parser, &storage);
4714*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4715*4543ef51SXin LI       == XML_STATUS_ERROR)
4716*4543ef51SXin LI     xml_failure(g_parser);
4717*4543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
4718*4543ef51SXin LI }
4719*4543ef51SXin LI END_TEST
4720*4543ef51SXin LI 
START_TEST(test_utf16_attribute)4721*4543ef51SXin LI START_TEST(test_utf16_attribute) {
4722*4543ef51SXin LI   const char text[] =
4723*4543ef51SXin LI       /* <d {KHO KHWAI}{CHO CHAN}='a'/>
4724*4543ef51SXin LI        * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4725*4543ef51SXin LI        * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4726*4543ef51SXin LI        */
4727*4543ef51SXin LI       "<\0d\0 \0\x04\x0e\x08\x0e=\0'\0a\0'\0/\0>\0";
4728*4543ef51SXin LI   const XML_Char *expected = XCS("a");
4729*4543ef51SXin LI   CharData storage;
4730*4543ef51SXin LI 
4731*4543ef51SXin LI   CharData_Init(&storage);
4732*4543ef51SXin LI   XML_SetStartElementHandler(g_parser, accumulate_attribute);
4733*4543ef51SXin LI   XML_SetUserData(g_parser, &storage);
4734*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4735*4543ef51SXin LI       == XML_STATUS_ERROR)
4736*4543ef51SXin LI     xml_failure(g_parser);
4737*4543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
4738*4543ef51SXin LI }
4739*4543ef51SXin LI END_TEST
4740*4543ef51SXin LI 
START_TEST(test_utf16_second_attr)4741*4543ef51SXin LI START_TEST(test_utf16_second_attr) {
4742*4543ef51SXin LI   /* <d a='1' {KHO KHWAI}{CHO CHAN}='2'/>
4743*4543ef51SXin LI    * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4744*4543ef51SXin LI    * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4745*4543ef51SXin LI    */
4746*4543ef51SXin LI   const char text[] = "<\0d\0 \0a\0=\0'\0\x31\0'\0 \0"
4747*4543ef51SXin LI                       "\x04\x0e\x08\x0e=\0'\0\x32\0'\0/\0>\0";
4748*4543ef51SXin LI   const XML_Char *expected = XCS("1");
4749*4543ef51SXin LI   CharData storage;
4750*4543ef51SXin LI 
4751*4543ef51SXin LI   CharData_Init(&storage);
4752*4543ef51SXin LI   XML_SetStartElementHandler(g_parser, accumulate_attribute);
4753*4543ef51SXin LI   XML_SetUserData(g_parser, &storage);
4754*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4755*4543ef51SXin LI       == XML_STATUS_ERROR)
4756*4543ef51SXin LI     xml_failure(g_parser);
4757*4543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
4758*4543ef51SXin LI }
4759*4543ef51SXin LI END_TEST
4760*4543ef51SXin LI 
START_TEST(test_attr_after_solidus)4761*4543ef51SXin LI START_TEST(test_attr_after_solidus) {
4762*4543ef51SXin LI   const char *text = "<doc attr1='a' / attr2='b'>";
4763*4543ef51SXin LI 
4764*4543ef51SXin LI   expect_failure(text, XML_ERROR_INVALID_TOKEN, "Misplaced / not faulted");
4765*4543ef51SXin LI }
4766*4543ef51SXin LI END_TEST
4767*4543ef51SXin LI 
START_TEST(test_utf16_pe)4768*4543ef51SXin LI START_TEST(test_utf16_pe) {
4769*4543ef51SXin LI   /* <!DOCTYPE doc [
4770*4543ef51SXin LI    * <!ENTITY % {KHO KHWAI}{CHO CHAN} '<!ELEMENT doc (#PCDATA)>'>
4771*4543ef51SXin LI    * %{KHO KHWAI}{CHO CHAN};
4772*4543ef51SXin LI    * ]>
4773*4543ef51SXin LI    * <doc></doc>
4774*4543ef51SXin LI    *
4775*4543ef51SXin LI    * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4776*4543ef51SXin LI    * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4777*4543ef51SXin LI    */
4778*4543ef51SXin LI   const char text[] = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
4779*4543ef51SXin LI                       "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \x0e\x04\x0e\x08\0 "
4780*4543ef51SXin LI                       "\0'\0<\0!\0E\0L\0E\0M\0E\0N\0T\0 "
4781*4543ef51SXin LI                       "\0d\0o\0c\0 \0(\0#\0P\0C\0D\0A\0T\0A\0)\0>\0'\0>\0\n"
4782*4543ef51SXin LI                       "\0%\x0e\x04\x0e\x08\0;\0\n"
4783*4543ef51SXin LI                       "\0]\0>\0\n"
4784*4543ef51SXin LI                       "\0<\0d\0o\0c\0>\0<\0/\0d\0o\0c\0>";
4785*4543ef51SXin LI #ifdef XML_UNICODE
4786*4543ef51SXin LI   const XML_Char *expected = XCS("\x0e04\x0e08=<!ELEMENT doc (#PCDATA)>\n");
4787*4543ef51SXin LI #else
4788*4543ef51SXin LI   const XML_Char *expected
4789*4543ef51SXin LI       = XCS("\xe0\xb8\x84\xe0\xb8\x88=<!ELEMENT doc (#PCDATA)>\n");
4790*4543ef51SXin LI #endif
4791*4543ef51SXin LI   CharData storage;
4792*4543ef51SXin LI 
4793*4543ef51SXin LI   CharData_Init(&storage);
4794*4543ef51SXin LI   XML_SetUserData(g_parser, &storage);
4795*4543ef51SXin LI   XML_SetEntityDeclHandler(g_parser, accumulate_entity_decl);
4796*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4797*4543ef51SXin LI       == XML_STATUS_ERROR)
4798*4543ef51SXin LI     xml_failure(g_parser);
4799*4543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
4800*4543ef51SXin LI }
4801*4543ef51SXin LI END_TEST
4802*4543ef51SXin LI 
4803*4543ef51SXin LI /* Test that duff attribute description keywords are rejected */
START_TEST(test_bad_attr_desc_keyword)4804*4543ef51SXin LI START_TEST(test_bad_attr_desc_keyword) {
4805*4543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
4806*4543ef51SXin LI                      "  <!ATTLIST doc attr CDATA #!IMPLIED>\n"
4807*4543ef51SXin LI                      "]>\n"
4808*4543ef51SXin LI                      "<doc />";
4809*4543ef51SXin LI 
4810*4543ef51SXin LI   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4811*4543ef51SXin LI                  "Bad keyword !IMPLIED not faulted");
4812*4543ef51SXin LI }
4813*4543ef51SXin LI END_TEST
4814*4543ef51SXin LI 
4815*4543ef51SXin LI /* Test that an invalid attribute description keyword consisting of
4816*4543ef51SXin LI  * UTF-16 characters with their top bytes non-zero are correctly
4817*4543ef51SXin LI  * faulted
4818*4543ef51SXin LI  */
START_TEST(test_bad_attr_desc_keyword_utf16)4819*4543ef51SXin LI START_TEST(test_bad_attr_desc_keyword_utf16) {
4820*4543ef51SXin LI   /* <!DOCTYPE d [
4821*4543ef51SXin LI    * <!ATTLIST d a CDATA #{KHO KHWAI}{CHO CHAN}>
4822*4543ef51SXin LI    * ]><d/>
4823*4543ef51SXin LI    *
4824*4543ef51SXin LI    * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4825*4543ef51SXin LI    * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4826*4543ef51SXin LI    */
4827*4543ef51SXin LI   const char text[]
4828*4543ef51SXin LI       = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
4829*4543ef51SXin LI         "\0<\0!\0A\0T\0T\0L\0I\0S\0T\0 \0d\0 \0a\0 \0C\0D\0A\0T\0A\0 "
4830*4543ef51SXin LI         "\0#\x0e\x04\x0e\x08\0>\0\n"
4831*4543ef51SXin LI         "\0]\0>\0<\0d\0/\0>";
4832*4543ef51SXin LI 
4833*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4834*4543ef51SXin LI       != XML_STATUS_ERROR)
4835*4543ef51SXin LI     fail("Invalid UTF16 attribute keyword not faulted");
4836*4543ef51SXin LI   if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
4837*4543ef51SXin LI     xml_failure(g_parser);
4838*4543ef51SXin LI }
4839*4543ef51SXin LI END_TEST
4840*4543ef51SXin LI 
4841*4543ef51SXin LI /* Test that invalid syntax in a <!DOCTYPE> is rejected.  Do this
4842*4543ef51SXin LI  * using prefix-encoding (see above) to trigger specific code paths
4843*4543ef51SXin LI  */
START_TEST(test_bad_doctype)4844*4543ef51SXin LI START_TEST(test_bad_doctype) {
4845*4543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4846*4543ef51SXin LI                      "<!DOCTYPE doc [ \x80\x44 ]><doc/>";
4847*4543ef51SXin LI 
4848*4543ef51SXin LI   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4849*4543ef51SXin LI   expect_failure(text, XML_ERROR_SYNTAX,
4850*4543ef51SXin LI                  "Invalid bytes in DOCTYPE not faulted");
4851*4543ef51SXin LI }
4852*4543ef51SXin LI END_TEST
4853*4543ef51SXin LI 
START_TEST(test_bad_doctype_utf8)4854*4543ef51SXin LI START_TEST(test_bad_doctype_utf8) {
4855*4543ef51SXin LI   const char *text = "<!DOCTYPE \xDB\x25"
4856*4543ef51SXin LI                      "doc><doc/>"; // [1101 1011] [<0>010 0101]
4857*4543ef51SXin LI   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4858*4543ef51SXin LI                  "Invalid UTF-8 in DOCTYPE not faulted");
4859*4543ef51SXin LI }
4860*4543ef51SXin LI END_TEST
4861*4543ef51SXin LI 
START_TEST(test_bad_doctype_utf16)4862*4543ef51SXin LI START_TEST(test_bad_doctype_utf16) {
4863*4543ef51SXin LI   const char text[] =
4864*4543ef51SXin LI       /* <!DOCTYPE doc [ \x06f2 ]><doc/>
4865*4543ef51SXin LI        *
4866*4543ef51SXin LI        * U+06F2 = EXTENDED ARABIC-INDIC DIGIT TWO, a valid number
4867*4543ef51SXin LI        * (name character) but not a valid letter (name start character)
4868*4543ef51SXin LI        */
4869*4543ef51SXin LI       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0 "
4870*4543ef51SXin LI       "\x06\xf2"
4871*4543ef51SXin LI       "\0 \0]\0>\0<\0d\0o\0c\0/\0>";
4872*4543ef51SXin LI 
4873*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4874*4543ef51SXin LI       != XML_STATUS_ERROR)
4875*4543ef51SXin LI     fail("Invalid bytes in DOCTYPE not faulted");
4876*4543ef51SXin LI   if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
4877*4543ef51SXin LI     xml_failure(g_parser);
4878*4543ef51SXin LI }
4879*4543ef51SXin LI END_TEST
4880*4543ef51SXin LI 
START_TEST(test_bad_doctype_plus)4881*4543ef51SXin LI START_TEST(test_bad_doctype_plus) {
4882*4543ef51SXin LI   const char *text = "<!DOCTYPE 1+ [ <!ENTITY foo 'bar'> ]>\n"
4883*4543ef51SXin LI                      "<1+>&foo;</1+>";
4884*4543ef51SXin LI 
4885*4543ef51SXin LI   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4886*4543ef51SXin LI                  "'+' in document name not faulted");
4887*4543ef51SXin LI }
4888*4543ef51SXin LI END_TEST
4889*4543ef51SXin LI 
START_TEST(test_bad_doctype_star)4890*4543ef51SXin LI START_TEST(test_bad_doctype_star) {
4891*4543ef51SXin LI   const char *text = "<!DOCTYPE 1* [ <!ENTITY foo 'bar'> ]>\n"
4892*4543ef51SXin LI                      "<1*>&foo;</1*>";
4893*4543ef51SXin LI 
4894*4543ef51SXin LI   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4895*4543ef51SXin LI                  "'*' in document name not faulted");
4896*4543ef51SXin LI }
4897*4543ef51SXin LI END_TEST
4898*4543ef51SXin LI 
START_TEST(test_bad_doctype_query)4899*4543ef51SXin LI START_TEST(test_bad_doctype_query) {
4900*4543ef51SXin LI   const char *text = "<!DOCTYPE 1? [ <!ENTITY foo 'bar'> ]>\n"
4901*4543ef51SXin LI                      "<1?>&foo;</1?>";
4902*4543ef51SXin LI 
4903*4543ef51SXin LI   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4904*4543ef51SXin LI                  "'?' in document name not faulted");
4905*4543ef51SXin LI }
4906*4543ef51SXin LI END_TEST
4907*4543ef51SXin LI 
START_TEST(test_unknown_encoding_bad_ignore)4908*4543ef51SXin LI START_TEST(test_unknown_encoding_bad_ignore) {
4909*4543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>"
4910*4543ef51SXin LI                      "<!DOCTYPE doc SYSTEM 'foo'>"
4911*4543ef51SXin LI                      "<doc><e>&entity;</e></doc>";
4912*4543ef51SXin LI   ExtFaults fault = {"<![IGNORE[<!ELEMENT \xffG (#PCDATA)*>]]>",
4913*4543ef51SXin LI                      "Invalid character not faulted", XCS("prefix-conv"),
4914*4543ef51SXin LI                      XML_ERROR_INVALID_TOKEN};
4915*4543ef51SXin LI 
4916*4543ef51SXin LI   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4917*4543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4918*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
4919*4543ef51SXin LI   XML_SetUserData(g_parser, &fault);
4920*4543ef51SXin LI   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
4921*4543ef51SXin LI                  "Bad IGNORE section with unknown encoding not failed");
4922*4543ef51SXin LI }
4923*4543ef51SXin LI END_TEST
4924*4543ef51SXin LI 
START_TEST(test_entity_in_utf16_be_attr)4925*4543ef51SXin LI START_TEST(test_entity_in_utf16_be_attr) {
4926*4543ef51SXin LI   const char text[] =
4927*4543ef51SXin LI       /* <e a='&#228; &#x00E4;'></e> */
4928*4543ef51SXin LI       "\0<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 "
4929*4543ef51SXin LI       "\0&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>";
4930*4543ef51SXin LI #ifdef XML_UNICODE
4931*4543ef51SXin LI   const XML_Char *expected = XCS("\x00e4 \x00e4");
4932*4543ef51SXin LI #else
4933*4543ef51SXin LI   const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
4934*4543ef51SXin LI #endif
4935*4543ef51SXin LI   CharData storage;
4936*4543ef51SXin LI 
4937*4543ef51SXin LI   CharData_Init(&storage);
4938*4543ef51SXin LI   XML_SetUserData(g_parser, &storage);
4939*4543ef51SXin LI   XML_SetStartElementHandler(g_parser, accumulate_attribute);
4940*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4941*4543ef51SXin LI       == XML_STATUS_ERROR)
4942*4543ef51SXin LI     xml_failure(g_parser);
4943*4543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
4944*4543ef51SXin LI }
4945*4543ef51SXin LI END_TEST
4946*4543ef51SXin LI 
START_TEST(test_entity_in_utf16_le_attr)4947*4543ef51SXin LI START_TEST(test_entity_in_utf16_le_attr) {
4948*4543ef51SXin LI   const char text[] =
4949*4543ef51SXin LI       /* <e a='&#228; &#x00E4;'></e> */
4950*4543ef51SXin LI       "<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 \0"
4951*4543ef51SXin LI       "&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>\0";
4952*4543ef51SXin LI #ifdef XML_UNICODE
4953*4543ef51SXin LI   const XML_Char *expected = XCS("\x00e4 \x00e4");
4954*4543ef51SXin LI #else
4955*4543ef51SXin LI   const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
4956*4543ef51SXin LI #endif
4957*4543ef51SXin LI   CharData storage;
4958*4543ef51SXin LI 
4959*4543ef51SXin LI   CharData_Init(&storage);
4960*4543ef51SXin LI   XML_SetUserData(g_parser, &storage);
4961*4543ef51SXin LI   XML_SetStartElementHandler(g_parser, accumulate_attribute);
4962*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4963*4543ef51SXin LI       == XML_STATUS_ERROR)
4964*4543ef51SXin LI     xml_failure(g_parser);
4965*4543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
4966*4543ef51SXin LI }
4967*4543ef51SXin LI END_TEST
4968*4543ef51SXin LI 
START_TEST(test_entity_public_utf16_be)4969*4543ef51SXin LI START_TEST(test_entity_public_utf16_be) {
4970*4543ef51SXin LI   const char text[] =
4971*4543ef51SXin LI       /* <!DOCTYPE d [ */
4972*4543ef51SXin LI       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
4973*4543ef51SXin LI       /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
4974*4543ef51SXin LI       "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 "
4975*4543ef51SXin LI       "\0'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n"
4976*4543ef51SXin LI       /* %e; */
4977*4543ef51SXin LI       "\0%\0e\0;\0\n"
4978*4543ef51SXin LI       /* ]> */
4979*4543ef51SXin LI       "\0]\0>\0\n"
4980*4543ef51SXin LI       /* <d>&j;</d> */
4981*4543ef51SXin LI       "\0<\0d\0>\0&\0j\0;\0<\0/\0d\0>";
4982*4543ef51SXin LI   ExtTest2 test_data
4983*4543ef51SXin LI       = {/* <!ENTITY j 'baz'> */
4984*4543ef51SXin LI          "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>", 34, NULL, NULL};
4985*4543ef51SXin LI   const XML_Char *expected = XCS("baz");
4986*4543ef51SXin LI   CharData storage;
4987*4543ef51SXin LI 
4988*4543ef51SXin LI   CharData_Init(&storage);
4989*4543ef51SXin LI   test_data.storage = &storage;
4990*4543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4991*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4992*4543ef51SXin LI   XML_SetUserData(g_parser, &test_data);
4993*4543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4994*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4995*4543ef51SXin LI       == XML_STATUS_ERROR)
4996*4543ef51SXin LI     xml_failure(g_parser);
4997*4543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
4998*4543ef51SXin LI }
4999*4543ef51SXin LI END_TEST
5000*4543ef51SXin LI 
START_TEST(test_entity_public_utf16_le)5001*4543ef51SXin LI START_TEST(test_entity_public_utf16_le) {
5002*4543ef51SXin LI   const char text[] =
5003*4543ef51SXin LI       /* <!DOCTYPE d [ */
5004*4543ef51SXin LI       "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n\0"
5005*4543ef51SXin LI       /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
5006*4543ef51SXin LI       "<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 \0"
5007*4543ef51SXin LI       "'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n\0"
5008*4543ef51SXin LI       /* %e; */
5009*4543ef51SXin LI       "%\0e\0;\0\n\0"
5010*4543ef51SXin LI       /* ]> */
5011*4543ef51SXin LI       "]\0>\0\n\0"
5012*4543ef51SXin LI       /* <d>&j;</d> */
5013*4543ef51SXin LI       "<\0d\0>\0&\0j\0;\0<\0/\0d\0>\0";
5014*4543ef51SXin LI   ExtTest2 test_data
5015*4543ef51SXin LI       = {/* <!ENTITY j 'baz'> */
5016*4543ef51SXin LI          "<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>\0", 34, NULL, NULL};
5017*4543ef51SXin LI   const XML_Char *expected = XCS("baz");
5018*4543ef51SXin LI   CharData storage;
5019*4543ef51SXin LI 
5020*4543ef51SXin LI   CharData_Init(&storage);
5021*4543ef51SXin LI   test_data.storage = &storage;
5022*4543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5023*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5024*4543ef51SXin LI   XML_SetUserData(g_parser, &test_data);
5025*4543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5026*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5027*4543ef51SXin LI       == XML_STATUS_ERROR)
5028*4543ef51SXin LI     xml_failure(g_parser);
5029*4543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
5030*4543ef51SXin LI }
5031*4543ef51SXin LI END_TEST
5032*4543ef51SXin LI 
5033*4543ef51SXin LI /* Test that a doctype with neither an internal nor external subset is
5034*4543ef51SXin LI  * faulted
5035*4543ef51SXin LI  */
START_TEST(test_short_doctype)5036*4543ef51SXin LI START_TEST(test_short_doctype) {
5037*4543ef51SXin LI   const char *text = "<!DOCTYPE doc></doc>";
5038*4543ef51SXin LI   expect_failure(text, XML_ERROR_INVALID_TOKEN,
5039*4543ef51SXin LI                  "DOCTYPE without subset not rejected");
5040*4543ef51SXin LI }
5041*4543ef51SXin LI END_TEST
5042*4543ef51SXin LI 
START_TEST(test_short_doctype_2)5043*4543ef51SXin LI START_TEST(test_short_doctype_2) {
5044*4543ef51SXin LI   const char *text = "<!DOCTYPE doc PUBLIC></doc>";
5045*4543ef51SXin LI   expect_failure(text, XML_ERROR_SYNTAX,
5046*4543ef51SXin LI                  "DOCTYPE without Public ID not rejected");
5047*4543ef51SXin LI }
5048*4543ef51SXin LI END_TEST
5049*4543ef51SXin LI 
START_TEST(test_short_doctype_3)5050*4543ef51SXin LI START_TEST(test_short_doctype_3) {
5051*4543ef51SXin LI   const char *text = "<!DOCTYPE doc SYSTEM></doc>";
5052*4543ef51SXin LI   expect_failure(text, XML_ERROR_SYNTAX,
5053*4543ef51SXin LI                  "DOCTYPE without System ID not rejected");
5054*4543ef51SXin LI }
5055*4543ef51SXin LI END_TEST
5056*4543ef51SXin LI 
START_TEST(test_long_doctype)5057*4543ef51SXin LI START_TEST(test_long_doctype) {
5058*4543ef51SXin LI   const char *text = "<!DOCTYPE doc PUBLIC 'foo' 'bar' 'baz'></doc>";
5059*4543ef51SXin LI   expect_failure(text, XML_ERROR_SYNTAX, "DOCTYPE with extra ID not rejected");
5060*4543ef51SXin LI }
5061*4543ef51SXin LI END_TEST
5062*4543ef51SXin LI 
START_TEST(test_bad_entity)5063*4543ef51SXin LI START_TEST(test_bad_entity) {
5064*4543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
5065*4543ef51SXin LI                      "  <!ENTITY foo PUBLIC>\n"
5066*4543ef51SXin LI                      "]>\n"
5067*4543ef51SXin LI                      "<doc/>";
5068*4543ef51SXin LI   expect_failure(text, XML_ERROR_SYNTAX,
5069*4543ef51SXin LI                  "ENTITY without Public ID is not rejected");
5070*4543ef51SXin LI }
5071*4543ef51SXin LI END_TEST
5072*4543ef51SXin LI 
5073*4543ef51SXin LI /* Test unquoted value is faulted */
START_TEST(test_bad_entity_2)5074*4543ef51SXin LI START_TEST(test_bad_entity_2) {
5075*4543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
5076*4543ef51SXin LI                      "  <!ENTITY % foo bar>\n"
5077*4543ef51SXin LI                      "]>\n"
5078*4543ef51SXin LI                      "<doc/>";
5079*4543ef51SXin LI   expect_failure(text, XML_ERROR_SYNTAX,
5080*4543ef51SXin LI                  "ENTITY without Public ID is not rejected");
5081*4543ef51SXin LI }
5082*4543ef51SXin LI END_TEST
5083*4543ef51SXin LI 
START_TEST(test_bad_entity_3)5084*4543ef51SXin LI START_TEST(test_bad_entity_3) {
5085*4543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
5086*4543ef51SXin LI                      "  <!ENTITY % foo PUBLIC>\n"
5087*4543ef51SXin LI                      "]>\n"
5088*4543ef51SXin LI                      "<doc/>";
5089*4543ef51SXin LI   expect_failure(text, XML_ERROR_SYNTAX,
5090*4543ef51SXin LI                  "Parameter ENTITY without Public ID is not rejected");
5091*4543ef51SXin LI }
5092*4543ef51SXin LI END_TEST
5093*4543ef51SXin LI 
START_TEST(test_bad_entity_4)5094*4543ef51SXin LI START_TEST(test_bad_entity_4) {
5095*4543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
5096*4543ef51SXin LI                      "  <!ENTITY % foo SYSTEM>\n"
5097*4543ef51SXin LI                      "]>\n"
5098*4543ef51SXin LI                      "<doc/>";
5099*4543ef51SXin LI   expect_failure(text, XML_ERROR_SYNTAX,
5100*4543ef51SXin LI                  "Parameter ENTITY without Public ID is not rejected");
5101*4543ef51SXin LI }
5102*4543ef51SXin LI END_TEST
5103*4543ef51SXin LI 
START_TEST(test_bad_notation)5104*4543ef51SXin LI START_TEST(test_bad_notation) {
5105*4543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
5106*4543ef51SXin LI                      "  <!NOTATION n SYSTEM>\n"
5107*4543ef51SXin LI                      "]>\n"
5108*4543ef51SXin LI                      "<doc/>";
5109*4543ef51SXin LI   expect_failure(text, XML_ERROR_SYNTAX,
5110*4543ef51SXin LI                  "Notation without System ID is not rejected");
5111*4543ef51SXin LI }
5112*4543ef51SXin LI END_TEST
5113*4543ef51SXin LI 
5114*4543ef51SXin LI /* Test for issue #11, wrongly suppressed default handler */
START_TEST(test_default_doctype_handler)5115*4543ef51SXin LI START_TEST(test_default_doctype_handler) {
5116*4543ef51SXin LI   const char *text = "<!DOCTYPE doc PUBLIC 'pubname' 'test.dtd' [\n"
5117*4543ef51SXin LI                      "  <!ENTITY foo 'bar'>\n"
5118*4543ef51SXin LI                      "]>\n"
5119*4543ef51SXin LI                      "<doc>&foo;</doc>";
5120*4543ef51SXin LI   DefaultCheck test_data[] = {{XCS("'pubname'"), 9, XML_FALSE},
5121*4543ef51SXin LI                               {XCS("'test.dtd'"), 10, XML_FALSE},
5122*4543ef51SXin LI                               {NULL, 0, XML_FALSE}};
5123*4543ef51SXin LI   int i;
5124*4543ef51SXin LI 
5125*4543ef51SXin LI   XML_SetUserData(g_parser, &test_data);
5126*4543ef51SXin LI   XML_SetDefaultHandler(g_parser, checking_default_handler);
5127*4543ef51SXin LI   XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
5128*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5129*4543ef51SXin LI       == XML_STATUS_ERROR)
5130*4543ef51SXin LI     xml_failure(g_parser);
5131*4543ef51SXin LI   for (i = 0; test_data[i].expected != NULL; i++)
5132*4543ef51SXin LI     if (! test_data[i].seen)
5133*4543ef51SXin LI       fail("Default handler not run for public !DOCTYPE");
5134*4543ef51SXin LI }
5135*4543ef51SXin LI END_TEST
5136*4543ef51SXin LI 
START_TEST(test_empty_element_abort)5137*4543ef51SXin LI START_TEST(test_empty_element_abort) {
5138*4543ef51SXin LI   const char *text = "<abort/>";
5139*4543ef51SXin LI 
5140*4543ef51SXin LI   XML_SetStartElementHandler(g_parser, start_element_suspender);
5141*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5142*4543ef51SXin LI       != XML_STATUS_ERROR)
5143*4543ef51SXin LI     fail("Expected to error on abort");
5144*4543ef51SXin LI }
5145*4543ef51SXin LI END_TEST
5146*4543ef51SXin LI 
5147*4543ef51SXin LI /* Regression test for GH issue #612: unfinished m_declAttributeType
5148*4543ef51SXin LI  * allocation in ->m_tempPool can corrupt following allocation.
5149*4543ef51SXin LI  */
START_TEST(test_pool_integrity_with_unfinished_attr)5150*4543ef51SXin LI START_TEST(test_pool_integrity_with_unfinished_attr) {
5151*4543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='UTF-8'?>\n"
5152*4543ef51SXin LI                      "<!DOCTYPE foo [\n"
5153*4543ef51SXin LI                      "<!ELEMENT foo ANY>\n"
5154*4543ef51SXin LI                      "<!ENTITY % entp SYSTEM \"external.dtd\">\n"
5155*4543ef51SXin LI                      "%entp;\n"
5156*4543ef51SXin LI                      "]>\n"
5157*4543ef51SXin LI                      "<a></a>\n";
5158*4543ef51SXin LI   const XML_Char *expected = XCS("COMMENT");
5159*4543ef51SXin LI   CharData storage;
5160*4543ef51SXin LI 
5161*4543ef51SXin LI   CharData_Init(&storage);
5162*4543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5163*4543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_unfinished_attlist);
5164*4543ef51SXin LI   XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
5165*4543ef51SXin LI   XML_SetCommentHandler(g_parser, accumulate_comment);
5166*4543ef51SXin LI   XML_SetUserData(g_parser, &storage);
5167*4543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5168*4543ef51SXin LI       == XML_STATUS_ERROR)
5169*4543ef51SXin LI     xml_failure(g_parser);
5170*4543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
5171*4543ef51SXin LI }
5172*4543ef51SXin LI END_TEST
5173*4543ef51SXin LI 
START_TEST(test_nested_entity_suspend)5174*4543ef51SXin LI START_TEST(test_nested_entity_suspend) {
5175*4543ef51SXin LI   const char *const text = "<!DOCTYPE a [\n"
5176*4543ef51SXin LI                            "  <!ENTITY e1 '<!--e1-->'>\n"
5177*4543ef51SXin LI                            "  <!ENTITY e2 '<!--e2 head-->&e1;<!--e2 tail-->'>\n"
5178*4543ef51SXin LI                            "  <!ENTITY e3 '<!--e3 head-->&e2;<!--e3 tail-->'>\n"
5179*4543ef51SXin LI                            "]>\n"
5180*4543ef51SXin LI                            "<a><!--start-->&e3;<!--end--></a>";
5181*4543ef51SXin LI   const XML_Char *const expected = XCS("start") XCS("e3 head") XCS("e2 head")
5182*4543ef51SXin LI       XCS("e1") XCS("e2 tail") XCS("e3 tail") XCS("end");
5183*4543ef51SXin LI   CharData storage;
5184*4543ef51SXin LI   CharData_Init(&storage);
5185*4543ef51SXin LI   XML_Parser parser = XML_ParserCreate(NULL);
5186*4543ef51SXin LI   ParserPlusStorage parserPlusStorage = {parser, &storage};
5187*4543ef51SXin LI 
5188*4543ef51SXin LI   XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5189*4543ef51SXin LI   XML_SetCommentHandler(parser, accumulate_and_suspend_comment_handler);
5190*4543ef51SXin LI   XML_SetUserData(parser, &parserPlusStorage);
5191*4543ef51SXin LI 
5192*4543ef51SXin LI   enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE);
5193*4543ef51SXin LI   while (status == XML_STATUS_SUSPENDED) {
5194*4543ef51SXin LI     status = XML_ResumeParser(parser);
5195*4543ef51SXin LI   }
5196*4543ef51SXin LI   if (status != XML_STATUS_OK)
5197*4543ef51SXin LI     xml_failure(parser);
5198*4543ef51SXin LI 
5199*4543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
5200*4543ef51SXin LI   XML_ParserFree(parser);
5201*4543ef51SXin LI }
5202*4543ef51SXin LI END_TEST
5203*4543ef51SXin LI 
5204*4543ef51SXin LI /* Regression test for quadratic parsing on large tokens */
START_TEST(test_big_tokens_take_linear_time)5205*4543ef51SXin LI START_TEST(test_big_tokens_take_linear_time) {
5206*4543ef51SXin LI   const char *const too_slow_failure_message
5207*4543ef51SXin LI       = "Compared to the baseline runtime of the first test, this test has a "
5208*4543ef51SXin LI         "slowdown of more than <max_slowdown>. "
5209*4543ef51SXin LI         "Please keep increasing the value by 1 until it reliably passes the "
5210*4543ef51SXin LI         "test on your hardware and open a bug sharing that number with us. "
5211*4543ef51SXin LI         "Thanks in advance!";
5212*4543ef51SXin LI   const struct {
5213*4543ef51SXin LI     const char *pre;
5214*4543ef51SXin LI     const char *post;
5215*4543ef51SXin LI   } text[] = {
5216*4543ef51SXin LI       {"<a>", "</a>"},                      // assumed good, used as baseline
5217*4543ef51SXin LI       {"<b><![CDATA[ value: ", " ]]></b>"}, // CDATA, performed OK before patch
5218*4543ef51SXin LI       {"<c attr='", "'></c>"},              // big attribute, used to be O(N²)
5219*4543ef51SXin LI       {"<d><!-- ", " --></d>"},             // long comment, used to be O(N²)
5220*4543ef51SXin LI       {"<e><", "/></e>"},                   // big elem name, used to be O(N²)
5221*4543ef51SXin LI   };
5222*4543ef51SXin LI   const int num_cases = sizeof(text) / sizeof(text[0]);
5223*4543ef51SXin LI   // For the test we need a <max_slowdown> value that is:
5224*4543ef51SXin LI   // (1) big enough that the test passes reliably (avoiding flaky tests), and
5225*4543ef51SXin LI   // (2) small enough that the test actually catches regressions.
5226*4543ef51SXin LI   const int max_slowdown = 15;
5227*4543ef51SXin LI   char aaaaaa[4096];
5228*4543ef51SXin LI   const int fillsize = (int)sizeof(aaaaaa);
5229*4543ef51SXin LI   const int fillcount = 100;
5230*4543ef51SXin LI 
5231*4543ef51SXin LI   memset(aaaaaa, 'a', fillsize);
5232*4543ef51SXin LI 
5233*4543ef51SXin LI   if (! g_reparseDeferralEnabledDefault) {
5234*4543ef51SXin LI     return; // heuristic is disabled; we would get O(n^2) and fail.
5235*4543ef51SXin LI   }
5236*4543ef51SXin LI #if ! defined(__linux__)
5237*4543ef51SXin LI   if (CLOCKS_PER_SEC < 100000) {
5238*4543ef51SXin LI     // Skip this test if clock() doesn't have reasonably good resolution.
5239*4543ef51SXin LI     // This workaround is primarily targeting Windows and FreeBSD, since
5240*4543ef51SXin LI     // XSI requires the value to be 1.000.000 (10x the condition here), and
5241*4543ef51SXin LI     // we want to be very sure that at least one platform in CI can catch
5242*4543ef51SXin LI     // regressions (through a failing test).
5243*4543ef51SXin LI     return;
5244*4543ef51SXin LI   }
5245*4543ef51SXin LI #endif
5246*4543ef51SXin LI 
5247*4543ef51SXin LI   clock_t baseline = 0;
5248*4543ef51SXin LI   for (int i = 0; i < num_cases; ++i) {
5249*4543ef51SXin LI     XML_Parser parser = XML_ParserCreate(NULL);
5250*4543ef51SXin LI     assert_true(parser != NULL);
5251*4543ef51SXin LI     enum XML_Status status;
5252*4543ef51SXin LI     set_subtest("max_slowdown=%d text=\"%saaaaaa%s\"", max_slowdown,
5253*4543ef51SXin LI                 text[i].pre, text[i].post);
5254*4543ef51SXin LI     const clock_t start = clock();
5255*4543ef51SXin LI 
5256*4543ef51SXin LI     // parse the start text
5257*4543ef51SXin LI     status = _XML_Parse_SINGLE_BYTES(parser, text[i].pre,
5258*4543ef51SXin LI                                      (int)strlen(text[i].pre), XML_FALSE);
5259*4543ef51SXin LI     if (status != XML_STATUS_OK) {
5260*4543ef51SXin LI       xml_failure(parser);
5261*4543ef51SXin LI     }
5262*4543ef51SXin LI     // parse lots of 'a', failing the test early if it takes too long
5263*4543ef51SXin LI     for (int f = 0; f < fillcount; ++f) {
5264*4543ef51SXin LI       status = _XML_Parse_SINGLE_BYTES(parser, aaaaaa, fillsize, XML_FALSE);
5265*4543ef51SXin LI       if (status != XML_STATUS_OK) {
5266*4543ef51SXin LI         xml_failure(parser);
5267*4543ef51SXin LI       }
5268*4543ef51SXin LI       // i == 0 means we're still calculating the baseline value
5269*4543ef51SXin LI       if (i > 0) {
5270*4543ef51SXin LI         const clock_t now = clock();
5271*4543ef51SXin LI         const clock_t clocks_so_far = now - start;
5272*4543ef51SXin LI         const int slowdown = clocks_so_far / baseline;
5273*4543ef51SXin LI         if (slowdown >= max_slowdown) {
5274*4543ef51SXin LI           fprintf(
5275*4543ef51SXin LI               stderr,
5276*4543ef51SXin LI               "fill#%d: clocks_so_far=%d baseline=%d slowdown=%d max_slowdown=%d\n",
5277*4543ef51SXin LI               f, (int)clocks_so_far, (int)baseline, slowdown, max_slowdown);
5278*4543ef51SXin LI           fail(too_slow_failure_message);
5279*4543ef51SXin LI         }
5280*4543ef51SXin LI       }
5281*4543ef51SXin LI     }
5282*4543ef51SXin LI     // parse the end text
5283*4543ef51SXin LI     status = _XML_Parse_SINGLE_BYTES(parser, text[i].post,
5284*4543ef51SXin LI                                      (int)strlen(text[i].post), XML_TRUE);
5285*4543ef51SXin LI     if (status != XML_STATUS_OK) {
5286*4543ef51SXin LI       xml_failure(parser);
5287*4543ef51SXin LI     }
5288*4543ef51SXin LI 
5289*4543ef51SXin LI     // how long did it take in total?
5290*4543ef51SXin LI     const clock_t end = clock();
5291*4543ef51SXin LI     const clock_t taken = end - start;
5292*4543ef51SXin LI     if (i == 0) {
5293*4543ef51SXin LI       assert_true(taken > 0); // just to make sure we don't div-by-0 later
5294*4543ef51SXin LI       baseline = taken;
5295*4543ef51SXin LI     }
5296*4543ef51SXin LI     const int slowdown = taken / baseline;
5297*4543ef51SXin LI     if (slowdown >= max_slowdown) {
5298*4543ef51SXin LI       fprintf(stderr, "taken=%d baseline=%d slowdown=%d max_slowdown=%d\n",
5299*4543ef51SXin LI               (int)taken, (int)baseline, slowdown, max_slowdown);
5300*4543ef51SXin LI       fail(too_slow_failure_message);
5301*4543ef51SXin LI     }
5302*4543ef51SXin LI 
5303*4543ef51SXin LI     XML_ParserFree(parser);
5304*4543ef51SXin LI   }
5305*4543ef51SXin LI }
5306*4543ef51SXin LI END_TEST
5307*4543ef51SXin LI 
START_TEST(test_set_reparse_deferral)5308*4543ef51SXin LI START_TEST(test_set_reparse_deferral) {
5309*4543ef51SXin LI   const char *const pre = "<d>";
5310*4543ef51SXin LI   const char *const start = "<x attr='";
5311*4543ef51SXin LI   const char *const end = "'></x>";
5312*4543ef51SXin LI   char eeeeee[100];
5313*4543ef51SXin LI   const int fillsize = (int)sizeof(eeeeee);
5314*4543ef51SXin LI   memset(eeeeee, 'e', fillsize);
5315*4543ef51SXin LI 
5316*4543ef51SXin LI   for (int enabled = 0; enabled <= 1; enabled += 1) {
5317*4543ef51SXin LI     set_subtest("deferral=%d", enabled);
5318*4543ef51SXin LI 
5319*4543ef51SXin LI     XML_Parser parser = XML_ParserCreate(NULL);
5320*4543ef51SXin LI     assert_true(parser != NULL);
5321*4543ef51SXin LI     assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
5322*4543ef51SXin LI     // pre-grow the buffer to avoid reparsing due to almost-fullness
5323*4543ef51SXin LI     assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
5324*4543ef51SXin LI 
5325*4543ef51SXin LI     CharData storage;
5326*4543ef51SXin LI     CharData_Init(&storage);
5327*4543ef51SXin LI     XML_SetUserData(parser, &storage);
5328*4543ef51SXin LI     XML_SetStartElementHandler(parser, start_element_event_handler);
5329*4543ef51SXin LI 
5330*4543ef51SXin LI     enum XML_Status status;
5331*4543ef51SXin LI     // parse the start text
5332*4543ef51SXin LI     status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5333*4543ef51SXin LI     if (status != XML_STATUS_OK) {
5334*4543ef51SXin LI       xml_failure(parser);
5335*4543ef51SXin LI     }
5336*4543ef51SXin LI     CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
5337*4543ef51SXin LI 
5338*4543ef51SXin LI     // ..and the start of the token
5339*4543ef51SXin LI     status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
5340*4543ef51SXin LI     if (status != XML_STATUS_OK) {
5341*4543ef51SXin LI       xml_failure(parser);
5342*4543ef51SXin LI     }
5343*4543ef51SXin LI     CharData_CheckXMLChars(&storage, XCS("d")); // still just the first one
5344*4543ef51SXin LI 
5345*4543ef51SXin LI     // try to parse lots of 'e', but the token isn't finished
5346*4543ef51SXin LI     for (int c = 0; c < 100; ++c) {
5347*4543ef51SXin LI       status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5348*4543ef51SXin LI       if (status != XML_STATUS_OK) {
5349*4543ef51SXin LI         xml_failure(parser);
5350*4543ef51SXin LI       }
5351*4543ef51SXin LI     }
5352*4543ef51SXin LI     CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
5353*4543ef51SXin LI 
5354*4543ef51SXin LI     // end the <x> token.
5355*4543ef51SXin LI     status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5356*4543ef51SXin LI     if (status != XML_STATUS_OK) {
5357*4543ef51SXin LI       xml_failure(parser);
5358*4543ef51SXin LI     }
5359*4543ef51SXin LI 
5360*4543ef51SXin LI     if (enabled) {
5361*4543ef51SXin LI       // In general, we may need to push more data to trigger a reparse attempt,
5362*4543ef51SXin LI       // but in this test, the data is constructed to always require it.
5363*4543ef51SXin LI       CharData_CheckXMLChars(&storage, XCS("d")); // or the test is incorrect
5364*4543ef51SXin LI       // 2x the token length should suffice; the +1 covers the start and end.
5365*4543ef51SXin LI       for (int c = 0; c < 101; ++c) {
5366*4543ef51SXin LI         status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5367*4543ef51SXin LI         if (status != XML_STATUS_OK) {
5368*4543ef51SXin LI           xml_failure(parser);
5369*4543ef51SXin LI         }
5370*4543ef51SXin LI       }
5371*4543ef51SXin LI     }
5372*4543ef51SXin LI     CharData_CheckXMLChars(&storage, XCS("dx")); // the <x> should be done
5373*4543ef51SXin LI 
5374*4543ef51SXin LI     XML_ParserFree(parser);
5375*4543ef51SXin LI   }
5376*4543ef51SXin LI }
5377*4543ef51SXin LI END_TEST
5378*4543ef51SXin LI 
5379*4543ef51SXin LI struct element_decl_data {
5380*4543ef51SXin LI   XML_Parser parser;
5381*4543ef51SXin LI   int count;
5382*4543ef51SXin LI };
5383*4543ef51SXin LI 
5384*4543ef51SXin LI static void
element_decl_counter(void * userData,const XML_Char * name,XML_Content * model)5385*4543ef51SXin LI element_decl_counter(void *userData, const XML_Char *name, XML_Content *model) {
5386*4543ef51SXin LI   UNUSED_P(name);
5387*4543ef51SXin LI   struct element_decl_data *testdata = (struct element_decl_data *)userData;
5388*4543ef51SXin LI   testdata->count += 1;
5389*4543ef51SXin LI   XML_FreeContentModel(testdata->parser, model);
5390*4543ef51SXin LI }
5391*4543ef51SXin LI 
5392*4543ef51SXin LI static int
external_inherited_parser(XML_Parser p,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)5393*4543ef51SXin LI external_inherited_parser(XML_Parser p, const XML_Char *context,
5394*4543ef51SXin LI                           const XML_Char *base, const XML_Char *systemId,
5395*4543ef51SXin LI                           const XML_Char *publicId) {
5396*4543ef51SXin LI   UNUSED_P(base);
5397*4543ef51SXin LI   UNUSED_P(systemId);
5398*4543ef51SXin LI   UNUSED_P(publicId);
5399*4543ef51SXin LI   const char *const pre = "<!ELEMENT document ANY>\n";
5400*4543ef51SXin LI   const char *const start = "<!ELEMENT ";
5401*4543ef51SXin LI   const char *const end = " ANY>\n";
5402*4543ef51SXin LI   const char *const post = "<!ELEMENT xyz ANY>\n";
5403*4543ef51SXin LI   const int enabled = *(int *)XML_GetUserData(p);
5404*4543ef51SXin LI   char eeeeee[100];
5405*4543ef51SXin LI   char spaces[100];
5406*4543ef51SXin LI   const int fillsize = (int)sizeof(eeeeee);
5407*4543ef51SXin LI   assert_true(fillsize == (int)sizeof(spaces));
5408*4543ef51SXin LI   memset(eeeeee, 'e', fillsize);
5409*4543ef51SXin LI   memset(spaces, ' ', fillsize);
5410*4543ef51SXin LI 
5411*4543ef51SXin LI   XML_Parser parser = XML_ExternalEntityParserCreate(p, context, NULL);
5412*4543ef51SXin LI   assert_true(parser != NULL);
5413*4543ef51SXin LI   // pre-grow the buffer to avoid reparsing due to almost-fullness
5414*4543ef51SXin LI   assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
5415*4543ef51SXin LI 
5416*4543ef51SXin LI   struct element_decl_data testdata;
5417*4543ef51SXin LI   testdata.parser = parser;
5418*4543ef51SXin LI   testdata.count = 0;
5419*4543ef51SXin LI   XML_SetUserData(parser, &testdata);
5420*4543ef51SXin LI   XML_SetElementDeclHandler(parser, element_decl_counter);
5421*4543ef51SXin LI 
5422*4543ef51SXin LI   enum XML_Status status;
5423*4543ef51SXin LI   // parse the initial text
5424*4543ef51SXin LI   status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5425*4543ef51SXin LI   if (status != XML_STATUS_OK) {
5426*4543ef51SXin LI     xml_failure(parser);
5427*4543ef51SXin LI   }
5428*4543ef51SXin LI   assert_true(testdata.count == 1); // first element should be done
5429*4543ef51SXin LI 
5430*4543ef51SXin LI   // ..and the start of the big token
5431*4543ef51SXin LI   status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
5432*4543ef51SXin LI   if (status != XML_STATUS_OK) {
5433*4543ef51SXin LI     xml_failure(parser);
5434*4543ef51SXin LI   }
5435*4543ef51SXin LI   assert_true(testdata.count == 1); // still just the first one
5436*4543ef51SXin LI 
5437*4543ef51SXin LI   // try to parse lots of 'e', but the token isn't finished
5438*4543ef51SXin LI   for (int c = 0; c < 100; ++c) {
5439*4543ef51SXin LI     status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5440*4543ef51SXin LI     if (status != XML_STATUS_OK) {
5441*4543ef51SXin LI       xml_failure(parser);
5442*4543ef51SXin LI     }
5443*4543ef51SXin LI   }
5444*4543ef51SXin LI   assert_true(testdata.count == 1); // *still* just the first one
5445*4543ef51SXin LI 
5446*4543ef51SXin LI   // end the big token.
5447*4543ef51SXin LI   status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5448*4543ef51SXin LI   if (status != XML_STATUS_OK) {
5449*4543ef51SXin LI     xml_failure(parser);
5450*4543ef51SXin LI   }
5451*4543ef51SXin LI 
5452*4543ef51SXin LI   if (enabled) {
5453*4543ef51SXin LI     // In general, we may need to push more data to trigger a reparse attempt,
5454*4543ef51SXin LI     // but in this test, the data is constructed to always require it.
5455*4543ef51SXin LI     assert_true(testdata.count == 1); // or the test is incorrect
5456*4543ef51SXin LI     // 2x the token length should suffice; the +1 covers the start and end.
5457*4543ef51SXin LI     for (int c = 0; c < 101; ++c) {
5458*4543ef51SXin LI       status = XML_Parse(parser, spaces, fillsize, XML_FALSE);
5459*4543ef51SXin LI       if (status != XML_STATUS_OK) {
5460*4543ef51SXin LI         xml_failure(parser);
5461*4543ef51SXin LI       }
5462*4543ef51SXin LI     }
5463*4543ef51SXin LI   }
5464*4543ef51SXin LI   assert_true(testdata.count == 2); // the big token should be done
5465*4543ef51SXin LI 
5466*4543ef51SXin LI   // parse the final text
5467*4543ef51SXin LI   status = XML_Parse(parser, post, (int)strlen(post), XML_TRUE);
5468*4543ef51SXin LI   if (status != XML_STATUS_OK) {
5469*4543ef51SXin LI     xml_failure(parser);
5470*4543ef51SXin LI   }
5471*4543ef51SXin LI   assert_true(testdata.count == 3); // after isFinal=XML_TRUE, all must be done
5472*4543ef51SXin LI 
5473*4543ef51SXin LI   XML_ParserFree(parser);
5474*4543ef51SXin LI   return XML_STATUS_OK;
5475*4543ef51SXin LI }
5476*4543ef51SXin LI 
START_TEST(test_reparse_deferral_is_inherited)5477*4543ef51SXin LI START_TEST(test_reparse_deferral_is_inherited) {
5478*4543ef51SXin LI   const char *const text
5479*4543ef51SXin LI       = "<!DOCTYPE document SYSTEM 'something.ext'><document/>";
5480*4543ef51SXin LI   for (int enabled = 0; enabled <= 1; ++enabled) {
5481*4543ef51SXin LI     set_subtest("deferral=%d", enabled);
5482*4543ef51SXin LI 
5483*4543ef51SXin LI     XML_Parser parser = XML_ParserCreate(NULL);
5484*4543ef51SXin LI     assert_true(parser != NULL);
5485*4543ef51SXin LI     XML_SetUserData(parser, (void *)&enabled);
5486*4543ef51SXin LI     XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5487*4543ef51SXin LI     // this handler creates a sub-parser and checks that its deferral behavior
5488*4543ef51SXin LI     // is what we expected, based on the value of `enabled` (in userdata).
5489*4543ef51SXin LI     XML_SetExternalEntityRefHandler(parser, external_inherited_parser);
5490*4543ef51SXin LI     assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
5491*4543ef51SXin LI     if (XML_Parse(parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
5492*4543ef51SXin LI       xml_failure(parser);
5493*4543ef51SXin LI 
5494*4543ef51SXin LI     XML_ParserFree(parser);
5495*4543ef51SXin LI   }
5496*4543ef51SXin LI }
5497*4543ef51SXin LI END_TEST
5498*4543ef51SXin LI 
START_TEST(test_set_reparse_deferral_on_null_parser)5499*4543ef51SXin LI START_TEST(test_set_reparse_deferral_on_null_parser) {
5500*4543ef51SXin LI   assert_true(XML_SetReparseDeferralEnabled(NULL, 0) == XML_FALSE);
5501*4543ef51SXin LI   assert_true(XML_SetReparseDeferralEnabled(NULL, 1) == XML_FALSE);
5502*4543ef51SXin LI   assert_true(XML_SetReparseDeferralEnabled(NULL, 10) == XML_FALSE);
5503*4543ef51SXin LI   assert_true(XML_SetReparseDeferralEnabled(NULL, 100) == XML_FALSE);
5504*4543ef51SXin LI   assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MIN)
5505*4543ef51SXin LI               == XML_FALSE);
5506*4543ef51SXin LI   assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MAX)
5507*4543ef51SXin LI               == XML_FALSE);
5508*4543ef51SXin LI }
5509*4543ef51SXin LI END_TEST
5510*4543ef51SXin LI 
START_TEST(test_set_reparse_deferral_on_the_fly)5511*4543ef51SXin LI START_TEST(test_set_reparse_deferral_on_the_fly) {
5512*4543ef51SXin LI   const char *const pre = "<d><x attr='";
5513*4543ef51SXin LI   const char *const end = "'></x>";
5514*4543ef51SXin LI   char iiiiii[100];
5515*4543ef51SXin LI   const int fillsize = (int)sizeof(iiiiii);
5516*4543ef51SXin LI   memset(iiiiii, 'i', fillsize);
5517*4543ef51SXin LI 
5518*4543ef51SXin LI   XML_Parser parser = XML_ParserCreate(NULL);
5519*4543ef51SXin LI   assert_true(parser != NULL);
5520*4543ef51SXin LI   assert_true(XML_SetReparseDeferralEnabled(parser, XML_TRUE));
5521*4543ef51SXin LI 
5522*4543ef51SXin LI   CharData storage;
5523*4543ef51SXin LI   CharData_Init(&storage);
5524*4543ef51SXin LI   XML_SetUserData(parser, &storage);
5525*4543ef51SXin LI   XML_SetStartElementHandler(parser, start_element_event_handler);
5526*4543ef51SXin LI 
5527*4543ef51SXin LI   enum XML_Status status;
5528*4543ef51SXin LI   // parse the start text
5529*4543ef51SXin LI   status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5530*4543ef51SXin LI   if (status != XML_STATUS_OK) {
5531*4543ef51SXin LI     xml_failure(parser);
5532*4543ef51SXin LI   }
5533*4543ef51SXin LI   CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
5534*4543ef51SXin LI 
5535*4543ef51SXin LI   // try to parse some 'i', but the token isn't finished
5536*4543ef51SXin LI   status = XML_Parse(parser, iiiiii, fillsize, XML_FALSE);
5537*4543ef51SXin LI   if (status != XML_STATUS_OK) {
5538*4543ef51SXin LI     xml_failure(parser);
5539*4543ef51SXin LI   }
5540*4543ef51SXin LI   CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
5541*4543ef51SXin LI 
5542*4543ef51SXin LI   // end the <x> token.
5543*4543ef51SXin LI   status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5544*4543ef51SXin LI   if (status != XML_STATUS_OK) {
5545*4543ef51SXin LI     xml_failure(parser);
5546*4543ef51SXin LI   }
5547*4543ef51SXin LI   CharData_CheckXMLChars(&storage, XCS("d")); // not yet.
5548*4543ef51SXin LI 
5549*4543ef51SXin LI   // now change the heuristic setting and add *no* data
5550*4543ef51SXin LI   assert_true(XML_SetReparseDeferralEnabled(parser, XML_FALSE));
5551*4543ef51SXin LI   // we avoid isFinal=XML_TRUE, because that would force-bypass the heuristic.
5552*4543ef51SXin LI   status = XML_Parse(parser, "", 0, XML_FALSE);
5553*4543ef51SXin LI   if (status != XML_STATUS_OK) {
5554*4543ef51SXin LI     xml_failure(parser);
5555*4543ef51SXin LI   }
5556*4543ef51SXin LI   CharData_CheckXMLChars(&storage, XCS("dx"));
5557*4543ef51SXin LI 
5558*4543ef51SXin LI   XML_ParserFree(parser);
5559*4543ef51SXin LI }
5560*4543ef51SXin LI END_TEST
5561*4543ef51SXin LI 
START_TEST(test_set_bad_reparse_option)5562*4543ef51SXin LI START_TEST(test_set_bad_reparse_option) {
5563*4543ef51SXin LI   XML_Parser parser = XML_ParserCreate(NULL);
5564*4543ef51SXin LI   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 2));
5565*4543ef51SXin LI   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 3));
5566*4543ef51SXin LI   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 99));
5567*4543ef51SXin LI   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 127));
5568*4543ef51SXin LI   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 128));
5569*4543ef51SXin LI   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 129));
5570*4543ef51SXin LI   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 255));
5571*4543ef51SXin LI   assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 0));
5572*4543ef51SXin LI   assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 1));
5573*4543ef51SXin LI   XML_ParserFree(parser);
5574*4543ef51SXin LI }
5575*4543ef51SXin LI END_TEST
5576*4543ef51SXin LI 
5577*4543ef51SXin LI static size_t g_totalAlloc = 0;
5578*4543ef51SXin LI static size_t g_biggestAlloc = 0;
5579*4543ef51SXin LI 
5580*4543ef51SXin LI static void *
counting_realloc(void * ptr,size_t size)5581*4543ef51SXin LI counting_realloc(void *ptr, size_t size) {
5582*4543ef51SXin LI   g_totalAlloc += size;
5583*4543ef51SXin LI   if (size > g_biggestAlloc) {
5584*4543ef51SXin LI     g_biggestAlloc = size;
5585*4543ef51SXin LI   }
5586*4543ef51SXin LI   return realloc(ptr, size);
5587*4543ef51SXin LI }
5588*4543ef51SXin LI 
5589*4543ef51SXin LI static void *
counting_malloc(size_t size)5590*4543ef51SXin LI counting_malloc(size_t size) {
5591*4543ef51SXin LI   return counting_realloc(NULL, size);
5592*4543ef51SXin LI }
5593*4543ef51SXin LI 
START_TEST(test_bypass_heuristic_when_close_to_bufsize)5594*4543ef51SXin LI START_TEST(test_bypass_heuristic_when_close_to_bufsize) {
5595*4543ef51SXin LI   if (g_chunkSize != 0) {
5596*4543ef51SXin LI     // this test does not use SINGLE_BYTES, because it depends on very precise
5597*4543ef51SXin LI     // buffer fills.
5598*4543ef51SXin LI     return;
5599*4543ef51SXin LI   }
5600*4543ef51SXin LI   if (! g_reparseDeferralEnabledDefault) {
5601*4543ef51SXin LI     return; // this test is irrelevant when the deferral heuristic is disabled.
5602*4543ef51SXin LI   }
5603*4543ef51SXin LI 
5604*4543ef51SXin LI   const int document_length = 65536;
5605*4543ef51SXin LI   char *const document = (char *)malloc(document_length);
5606*4543ef51SXin LI 
5607*4543ef51SXin LI   const XML_Memory_Handling_Suite memfuncs = {
5608*4543ef51SXin LI       counting_malloc,
5609*4543ef51SXin LI       counting_realloc,
5610*4543ef51SXin LI       free,
5611*4543ef51SXin LI   };
5612*4543ef51SXin LI 
5613*4543ef51SXin LI   const int leading_list[] = {0, 3, 61, 96, 400, 401, 4000, 4010, 4099, -1};
5614*4543ef51SXin LI   const int bigtoken_list[] = {3000, 4000, 4001, 4096, 4099, 5000, 20000, -1};
5615*4543ef51SXin LI   const int fillsize_list[] = {131, 256, 399, 400, 401, 1025, 4099, 4321, -1};
5616*4543ef51SXin LI 
5617*4543ef51SXin LI   for (const int *leading = leading_list; *leading >= 0; leading++) {
5618*4543ef51SXin LI     for (const int *bigtoken = bigtoken_list; *bigtoken >= 0; bigtoken++) {
5619*4543ef51SXin LI       for (const int *fillsize = fillsize_list; *fillsize >= 0; fillsize++) {
5620*4543ef51SXin LI         set_subtest("leading=%d bigtoken=%d fillsize=%d", *leading, *bigtoken,
5621*4543ef51SXin LI                     *fillsize);
5622*4543ef51SXin LI         // start by checking that the test looks reasonably valid
5623*4543ef51SXin LI         assert_true(*leading + *bigtoken <= document_length);
5624*4543ef51SXin LI 
5625*4543ef51SXin LI         // put 'x' everywhere; some will be overwritten by elements.
5626*4543ef51SXin LI         memset(document, 'x', document_length);
5627*4543ef51SXin LI         // maybe add an initial tag
5628*4543ef51SXin LI         if (*leading) {
5629*4543ef51SXin LI           assert_true(*leading >= 3); // or the test case is invalid
5630*4543ef51SXin LI           memcpy(document, "<a>", 3);
5631*4543ef51SXin LI         }
5632*4543ef51SXin LI         // add the large token
5633*4543ef51SXin LI         document[*leading + 0] = '<';
5634*4543ef51SXin LI         document[*leading + 1] = 'b';
5635*4543ef51SXin LI         memset(&document[*leading + 2], ' ', *bigtoken - 2); // a spacy token
5636*4543ef51SXin LI         document[*leading + *bigtoken - 1] = '>';
5637*4543ef51SXin LI 
5638*4543ef51SXin LI         // 1 for 'b', plus 1 or 0 depending on the presence of 'a'
5639*4543ef51SXin LI         const int expected_elem_total = 1 + (*leading ? 1 : 0);
5640*4543ef51SXin LI 
5641*4543ef51SXin LI         XML_Parser parser = XML_ParserCreate_MM(NULL, &memfuncs, NULL);
5642*4543ef51SXin LI         assert_true(parser != NULL);
5643*4543ef51SXin LI 
5644*4543ef51SXin LI         CharData storage;
5645*4543ef51SXin LI         CharData_Init(&storage);
5646*4543ef51SXin LI         XML_SetUserData(parser, &storage);
5647*4543ef51SXin LI         XML_SetStartElementHandler(parser, start_element_event_handler);
5648*4543ef51SXin LI 
5649*4543ef51SXin LI         g_biggestAlloc = 0;
5650*4543ef51SXin LI         g_totalAlloc = 0;
5651*4543ef51SXin LI         int offset = 0;
5652*4543ef51SXin LI         // fill data until the big token is covered (but not necessarily parsed)
5653*4543ef51SXin LI         while (offset < *leading + *bigtoken) {
5654*4543ef51SXin LI           assert_true(offset + *fillsize <= document_length);
5655*4543ef51SXin LI           const enum XML_Status status
5656*4543ef51SXin LI               = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
5657*4543ef51SXin LI           if (status != XML_STATUS_OK) {
5658*4543ef51SXin LI             xml_failure(parser);
5659*4543ef51SXin LI           }
5660*4543ef51SXin LI           offset += *fillsize;
5661*4543ef51SXin LI         }
5662*4543ef51SXin LI         // Now, check that we've had a buffer allocation that could fit the
5663*4543ef51SXin LI         // context bytes and our big token. In order to detect a special case,
5664*4543ef51SXin LI         // we need to know how many bytes of our big token were included in the
5665*4543ef51SXin LI         // first push that contained _any_ bytes of the big token:
5666*4543ef51SXin LI         const int bigtok_first_chunk_bytes = *fillsize - (*leading % *fillsize);
5667*4543ef51SXin LI         if (bigtok_first_chunk_bytes >= *bigtoken && XML_CONTEXT_BYTES == 0) {
5668*4543ef51SXin LI           // Special case: we aren't saving any context, and the whole big token
5669*4543ef51SXin LI           // was covered by a single fill, so Expat may have parsed directly
5670*4543ef51SXin LI           // from our input pointer, without allocating an internal buffer.
5671*4543ef51SXin LI         } else if (*leading < XML_CONTEXT_BYTES) {
5672*4543ef51SXin LI           assert_true(g_biggestAlloc >= *leading + (size_t)*bigtoken);
5673*4543ef51SXin LI         } else {
5674*4543ef51SXin LI           assert_true(g_biggestAlloc >= XML_CONTEXT_BYTES + (size_t)*bigtoken);
5675*4543ef51SXin LI         }
5676*4543ef51SXin LI         // fill data until the big token is actually parsed
5677*4543ef51SXin LI         while (storage.count < expected_elem_total) {
5678*4543ef51SXin LI           const size_t alloc_before = g_totalAlloc;
5679*4543ef51SXin LI           assert_true(offset + *fillsize <= document_length);
5680*4543ef51SXin LI           const enum XML_Status status
5681*4543ef51SXin LI               = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
5682*4543ef51SXin LI           if (status != XML_STATUS_OK) {
5683*4543ef51SXin LI             xml_failure(parser);
5684*4543ef51SXin LI           }
5685*4543ef51SXin LI           offset += *fillsize;
5686*4543ef51SXin LI           // since all the bytes of the big token are already in the buffer,
5687*4543ef51SXin LI           // the bufsize ceiling should make us finish its parsing without any
5688*4543ef51SXin LI           // further buffer allocations. We assume that there will be no other
5689*4543ef51SXin LI           // large allocations in this test.
5690*4543ef51SXin LI           assert_true(g_totalAlloc - alloc_before < 4096);
5691*4543ef51SXin LI         }
5692*4543ef51SXin LI         // test-the-test: was our alloc even called?
5693*4543ef51SXin LI         assert_true(g_totalAlloc > 0);
5694*4543ef51SXin LI         // test-the-test: there shouldn't be any extra start elements
5695*4543ef51SXin LI         assert_true(storage.count == expected_elem_total);
5696*4543ef51SXin LI 
5697*4543ef51SXin LI         XML_ParserFree(parser);
5698*4543ef51SXin LI       }
5699*4543ef51SXin LI     }
5700*4543ef51SXin LI   }
5701*4543ef51SXin LI   free(document);
5702*4543ef51SXin LI }
5703*4543ef51SXin LI END_TEST
5704*4543ef51SXin LI 
START_TEST(test_varying_buffer_fills)5705*4543ef51SXin LI START_TEST(test_varying_buffer_fills) {
5706*4543ef51SXin LI   const int KiB = 1024;
5707*4543ef51SXin LI   const int MiB = 1024 * KiB;
5708*4543ef51SXin LI   const int document_length = 16 * MiB;
5709*4543ef51SXin LI   const int big = 7654321; // arbitrarily chosen between 4 and 8 MiB
5710*4543ef51SXin LI 
5711*4543ef51SXin LI   if (g_chunkSize != 0) {
5712*4543ef51SXin LI     return; // this test is slow, and doesn't use _XML_Parse_SINGLE_BYTES().
5713*4543ef51SXin LI   }
5714*4543ef51SXin LI 
5715*4543ef51SXin LI   char *const document = (char *)malloc(document_length);
5716*4543ef51SXin LI   assert_true(document != NULL);
5717*4543ef51SXin LI   memset(document, 'x', document_length);
5718*4543ef51SXin LI   document[0] = '<';
5719*4543ef51SXin LI   document[1] = 't';
5720*4543ef51SXin LI   memset(&document[2], ' ', big - 2); // a very spacy token
5721*4543ef51SXin LI   document[big - 1] = '>';
5722*4543ef51SXin LI 
5723*4543ef51SXin LI   // Each testcase is a list of buffer fill sizes, terminated by a value < 0.
5724*4543ef51SXin LI   // When reparse deferral is enabled, the final (negated) value is the expected
5725*4543ef51SXin LI   // maximum number of bytes scanned in parse attempts.
5726*4543ef51SXin LI   const int testcases[][30] = {
5727*4543ef51SXin LI       {8 * MiB, -8 * MiB},
5728*4543ef51SXin LI       {4 * MiB, 4 * MiB, -12 * MiB}, // try at 4MB, then 8MB = 12 MB total
5729*4543ef51SXin LI       // zero-size fills shouldn't trigger the bypass
5730*4543ef51SXin LI       {4 * MiB, 0, 4 * MiB, -12 * MiB},
5731*4543ef51SXin LI       {4 * MiB, 0, 0, 4 * MiB, -12 * MiB},
5732*4543ef51SXin LI       {4 * MiB, 0, 1 * MiB, 0, 3 * MiB, -12 * MiB},
5733*4543ef51SXin LI       // try to hit the buffer ceiling only once (at the end)
5734*4543ef51SXin LI       {4 * MiB, 2 * MiB, 1 * MiB, 512 * KiB, 256 * KiB, 256 * KiB, -12 * MiB},
5735*4543ef51SXin LI       // try to hit the same buffer ceiling multiple times
5736*4543ef51SXin LI       {4 * MiB + 1, 2 * MiB, 1 * MiB, 512 * KiB, -25 * MiB},
5737*4543ef51SXin LI 
5738*4543ef51SXin LI       // try to hit every ceiling, by always landing 1K shy of the buffer size
5739*4543ef51SXin LI       {1 * KiB, 2 * KiB, 4 * KiB, 8 * KiB, 16 * KiB, 32 * KiB, 64 * KiB,
5740*4543ef51SXin LI        128 * KiB, 256 * KiB, 512 * KiB, 1 * MiB, 2 * MiB, 4 * MiB, -16 * MiB},
5741*4543ef51SXin LI 
5742*4543ef51SXin LI       // try to avoid every ceiling, by always landing 1B past the buffer size
5743*4543ef51SXin LI       // the normal 2x heuristic threshold still forces parse attempts.
5744*4543ef51SXin LI       {2 * KiB + 1,          // will attempt 2KiB + 1 ==> total 2KiB + 1
5745*4543ef51SXin LI        2 * KiB, 4 * KiB,     // will attempt 8KiB + 1 ==> total 10KiB + 2
5746*4543ef51SXin LI        8 * KiB, 16 * KiB,    // will attempt 32KiB + 1 ==> total 42KiB + 3
5747*4543ef51SXin LI        32 * KiB, 64 * KiB,   // will attempt 128KiB + 1 ==> total 170KiB + 4
5748*4543ef51SXin LI        128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
5749*4543ef51SXin LI        512 * KiB, 1 * MiB,   // will attempt 2MiB + 1 ==> total 2M + 682K + 6
5750*4543ef51SXin LI        2 * MiB, 4 * MiB,     // will attempt 8MiB + 1 ==> total 10M + 682K + 7
5751*4543ef51SXin LI        -(10 * MiB + 682 * KiB + 7)},
5752*4543ef51SXin LI       // try to avoid every ceiling again, except on our last fill.
5753*4543ef51SXin LI       {2 * KiB + 1,          // will attempt 2KiB + 1 ==> total 2KiB + 1
5754*4543ef51SXin LI        2 * KiB, 4 * KiB,     // will attempt 8KiB + 1 ==> total 10KiB + 2
5755*4543ef51SXin LI        8 * KiB, 16 * KiB,    // will attempt 32KiB + 1 ==> total 42KiB + 3
5756*4543ef51SXin LI        32 * KiB, 64 * KiB,   // will attempt 128KiB + 1 ==> total 170KiB + 4
5757*4543ef51SXin LI        128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
5758*4543ef51SXin LI        512 * KiB, 1 * MiB,   // will attempt 2MiB + 1 ==> total 2M + 682K + 6
5759*4543ef51SXin LI        2 * MiB, 4 * MiB - 1, // will attempt 8MiB ==> total 10M + 682K + 6
5760*4543ef51SXin LI        -(10 * MiB + 682 * KiB + 6)},
5761*4543ef51SXin LI 
5762*4543ef51SXin LI       // try to hit ceilings on the way multiple times
5763*4543ef51SXin LI       {512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 1 MiB buffer
5764*4543ef51SXin LI        512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 2 MiB buffer
5765*4543ef51SXin LI        1 * MiB + 1, 512 * KiB, 256 * KiB, 256 * KiB - 1,   // 4 MiB buffer
5766*4543ef51SXin LI        2 * MiB + 1, 1 * MiB, 512 * KiB,                    // 8 MiB buffer
5767*4543ef51SXin LI        // we'll make a parse attempt at every parse call
5768*4543ef51SXin LI        -(45 * MiB + 12)},
5769*4543ef51SXin LI   };
5770*4543ef51SXin LI   const int testcount = sizeof(testcases) / sizeof(testcases[0]);
5771*4543ef51SXin LI   for (int test_i = 0; test_i < testcount; test_i++) {
5772*4543ef51SXin LI     const int *fillsize = testcases[test_i];
5773*4543ef51SXin LI     set_subtest("#%d {%d %d %d %d ...}", test_i, fillsize[0], fillsize[1],
5774*4543ef51SXin LI                 fillsize[2], fillsize[3]);
5775*4543ef51SXin LI     XML_Parser parser = XML_ParserCreate(NULL);
5776*4543ef51SXin LI     assert_true(parser != NULL);
5777*4543ef51SXin LI     g_parseAttempts = 0;
5778*4543ef51SXin LI 
5779*4543ef51SXin LI     CharData storage;
5780*4543ef51SXin LI     CharData_Init(&storage);
5781*4543ef51SXin LI     XML_SetUserData(parser, &storage);
5782*4543ef51SXin LI     XML_SetStartElementHandler(parser, start_element_event_handler);
5783*4543ef51SXin LI 
5784*4543ef51SXin LI     int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call)
5785*4543ef51SXin LI     int scanned_bytes = 0;   // sum of (buffered bytes at each actual parse)
5786*4543ef51SXin LI     int offset = 0;
5787*4543ef51SXin LI     while (*fillsize >= 0) {
5788*4543ef51SXin LI       assert_true(offset + *fillsize <= document_length); // or test is invalid
5789*4543ef51SXin LI       const unsigned attempts_before = g_parseAttempts;
5790*4543ef51SXin LI       const enum XML_Status status
5791*4543ef51SXin LI           = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
5792*4543ef51SXin LI       if (status != XML_STATUS_OK) {
5793*4543ef51SXin LI         xml_failure(parser);
5794*4543ef51SXin LI       }
5795*4543ef51SXin LI       offset += *fillsize;
5796*4543ef51SXin LI       fillsize++;
5797*4543ef51SXin LI       assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow
5798*4543ef51SXin LI       worstcase_bytes += offset; // we might've tried to parse all pending bytes
5799*4543ef51SXin LI       if (g_parseAttempts != attempts_before) {
5800*4543ef51SXin LI         assert_true(g_parseAttempts == attempts_before + 1); // max 1/XML_Parse
5801*4543ef51SXin LI         assert_true(offset <= INT_MAX - scanned_bytes);      // avoid overflow
5802*4543ef51SXin LI         scanned_bytes += offset; // we *did* try to parse all pending bytes
5803*4543ef51SXin LI       }
5804*4543ef51SXin LI     }
5805*4543ef51SXin LI     assert_true(storage.count == 1); // the big token should've been parsed
5806*4543ef51SXin LI     assert_true(scanned_bytes > 0);  // test-the-test: does our counter work?
5807*4543ef51SXin LI     if (g_reparseDeferralEnabledDefault) {
5808*4543ef51SXin LI       // heuristic is enabled; some XML_Parse calls may have deferred reparsing
5809*4543ef51SXin LI       const int max_bytes_scanned = -*fillsize;
5810*4543ef51SXin LI       if (scanned_bytes > max_bytes_scanned) {
5811*4543ef51SXin LI         fprintf(stderr,
5812*4543ef51SXin LI                 "bytes scanned in parse attempts: actual=%d limit=%d \n",
5813*4543ef51SXin LI                 scanned_bytes, max_bytes_scanned);
5814*4543ef51SXin LI         fail("too many bytes scanned in parse attempts");
5815*4543ef51SXin LI       }
5816*4543ef51SXin LI       assert_true(scanned_bytes <= worstcase_bytes);
5817*4543ef51SXin LI     } else {
5818*4543ef51SXin LI       // heuristic is disabled; every XML_Parse() will have reparsed
5819*4543ef51SXin LI       assert_true(scanned_bytes == worstcase_bytes);
5820*4543ef51SXin LI     }
5821*4543ef51SXin LI 
5822*4543ef51SXin LI     XML_ParserFree(parser);
5823*4543ef51SXin LI   }
5824*4543ef51SXin LI   free(document);
5825*4543ef51SXin LI }
5826*4543ef51SXin LI END_TEST
5827*4543ef51SXin LI 
5828*4543ef51SXin LI void
make_basic_test_case(Suite * s)5829*4543ef51SXin LI make_basic_test_case(Suite *s) {
5830*4543ef51SXin LI   TCase *tc_basic = tcase_create("basic tests");
5831*4543ef51SXin LI 
5832*4543ef51SXin LI   suite_add_tcase(s, tc_basic);
5833*4543ef51SXin LI   tcase_add_checked_fixture(tc_basic, basic_setup, basic_teardown);
5834*4543ef51SXin LI 
5835*4543ef51SXin LI   tcase_add_test(tc_basic, test_nul_byte);
5836*4543ef51SXin LI   tcase_add_test(tc_basic, test_u0000_char);
5837*4543ef51SXin LI   tcase_add_test(tc_basic, test_siphash_self);
5838*4543ef51SXin LI   tcase_add_test(tc_basic, test_siphash_spec);
5839*4543ef51SXin LI   tcase_add_test(tc_basic, test_bom_utf8);
5840*4543ef51SXin LI   tcase_add_test(tc_basic, test_bom_utf16_be);
5841*4543ef51SXin LI   tcase_add_test(tc_basic, test_bom_utf16_le);
5842*4543ef51SXin LI   tcase_add_test(tc_basic, test_nobom_utf16_le);
5843*4543ef51SXin LI   tcase_add_test(tc_basic, test_hash_collision);
5844*4543ef51SXin LI   tcase_add_test(tc_basic, test_illegal_utf8);
5845*4543ef51SXin LI   tcase_add_test(tc_basic, test_utf8_auto_align);
5846*4543ef51SXin LI   tcase_add_test(tc_basic, test_utf16);
5847*4543ef51SXin LI   tcase_add_test(tc_basic, test_utf16_le_epilog_newline);
5848*4543ef51SXin LI   tcase_add_test(tc_basic, test_not_utf16);
5849*4543ef51SXin LI   tcase_add_test(tc_basic, test_bad_encoding);
5850*4543ef51SXin LI   tcase_add_test(tc_basic, test_latin1_umlauts);
5851*4543ef51SXin LI   tcase_add_test(tc_basic, test_long_utf8_character);
5852*4543ef51SXin LI   tcase_add_test(tc_basic, test_long_latin1_attribute);
5853*4543ef51SXin LI   tcase_add_test(tc_basic, test_long_ascii_attribute);
5854*4543ef51SXin LI   /* Regression test for SF bug #491986. */
5855*4543ef51SXin LI   tcase_add_test(tc_basic, test_danish_latin1);
5856*4543ef51SXin LI   /* Regression test for SF bug #514281. */
5857*4543ef51SXin LI   tcase_add_test(tc_basic, test_french_charref_hexidecimal);
5858*4543ef51SXin LI   tcase_add_test(tc_basic, test_french_charref_decimal);
5859*4543ef51SXin LI   tcase_add_test(tc_basic, test_french_latin1);
5860*4543ef51SXin LI   tcase_add_test(tc_basic, test_french_utf8);
5861*4543ef51SXin LI   tcase_add_test(tc_basic, test_utf8_false_rejection);
5862*4543ef51SXin LI   tcase_add_test(tc_basic, test_line_number_after_parse);
5863*4543ef51SXin LI   tcase_add_test(tc_basic, test_column_number_after_parse);
5864*4543ef51SXin LI   tcase_add_test(tc_basic, test_line_and_column_numbers_inside_handlers);
5865*4543ef51SXin LI   tcase_add_test(tc_basic, test_line_number_after_error);
5866*4543ef51SXin LI   tcase_add_test(tc_basic, test_column_number_after_error);
5867*4543ef51SXin LI   tcase_add_test(tc_basic, test_really_long_lines);
5868*4543ef51SXin LI   tcase_add_test(tc_basic, test_really_long_encoded_lines);
5869*4543ef51SXin LI   tcase_add_test(tc_basic, test_end_element_events);
5870*4543ef51SXin LI   tcase_add_test(tc_basic, test_helper_is_whitespace_normalized);
5871*4543ef51SXin LI   tcase_add_test(tc_basic, test_attr_whitespace_normalization);
5872*4543ef51SXin LI   tcase_add_test(tc_basic, test_xmldecl_misplaced);
5873*4543ef51SXin LI   tcase_add_test(tc_basic, test_xmldecl_invalid);
5874*4543ef51SXin LI   tcase_add_test(tc_basic, test_xmldecl_missing_attr);
5875*4543ef51SXin LI   tcase_add_test(tc_basic, test_xmldecl_missing_value);
5876*4543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_unknown_encoding_internal_entity);
5877*4543ef51SXin LI   tcase_add_test(tc_basic, test_unrecognised_encoding_internal_entity);
5878*4543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_encoding);
5879*4543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_no_handler);
5880*4543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_bom);
5881*4543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding);
5882*4543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding_2);
5883*4543ef51SXin LI   tcase_add_test(tc_basic, test_wfc_undeclared_entity_unread_external_subset);
5884*4543ef51SXin LI   tcase_add_test(tc_basic, test_wfc_undeclared_entity_no_external_subset);
5885*4543ef51SXin LI   tcase_add_test(tc_basic, test_wfc_undeclared_entity_standalone);
5886*4543ef51SXin LI   tcase_add_test(tc_basic,
5887*4543ef51SXin LI                  test_wfc_undeclared_entity_with_external_subset_standalone);
5888*4543ef51SXin LI   tcase_add_test(tc_basic, test_entity_with_external_subset_unless_standalone);
5889*4543ef51SXin LI   tcase_add_test(tc_basic, test_wfc_undeclared_entity_with_external_subset);
5890*4543ef51SXin LI   tcase_add_test(tc_basic, test_not_standalone_handler_reject);
5891*4543ef51SXin LI   tcase_add_test(tc_basic, test_not_standalone_handler_accept);
5892*4543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_wfc_no_recursive_entity_refs);
5893*4543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_invalid_parse);
5894*4543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_dtd_default_handling);
5895*4543ef51SXin LI   tcase_add_test(tc_basic, test_dtd_attr_handling);
5896*4543ef51SXin LI   tcase_add_test(tc_basic, test_empty_ns_without_namespaces);
5897*4543ef51SXin LI   tcase_add_test(tc_basic, test_ns_in_attribute_default_without_namespaces);
5898*4543ef51SXin LI   tcase_add_test(tc_basic, test_stop_parser_between_char_data_calls);
5899*4543ef51SXin LI   tcase_add_test(tc_basic, test_suspend_parser_between_char_data_calls);
5900*4543ef51SXin LI   tcase_add_test(tc_basic, test_repeated_stop_parser_between_char_data_calls);
5901*4543ef51SXin LI   tcase_add_test(tc_basic, test_good_cdata_ascii);
5902*4543ef51SXin LI   tcase_add_test(tc_basic, test_good_cdata_utf16);
5903*4543ef51SXin LI   tcase_add_test(tc_basic, test_good_cdata_utf16_le);
5904*4543ef51SXin LI   tcase_add_test(tc_basic, test_long_cdata_utf16);
5905*4543ef51SXin LI   tcase_add_test(tc_basic, test_multichar_cdata_utf16);
5906*4543ef51SXin LI   tcase_add_test(tc_basic, test_utf16_bad_surrogate_pair);
5907*4543ef51SXin LI   tcase_add_test(tc_basic, test_bad_cdata);
5908*4543ef51SXin LI   tcase_add_test(tc_basic, test_bad_cdata_utf16);
5909*4543ef51SXin LI   tcase_add_test(tc_basic, test_stop_parser_between_cdata_calls);
5910*4543ef51SXin LI   tcase_add_test(tc_basic, test_suspend_parser_between_cdata_calls);
5911*4543ef51SXin LI   tcase_add_test(tc_basic, test_memory_allocation);
5912*4543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_default_current);
5913*4543ef51SXin LI   tcase_add_test(tc_basic, test_dtd_elements);
5914*4543ef51SXin LI   tcase_add_test(tc_basic, test_dtd_elements_nesting);
5915*4543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_set_foreign_dtd);
5916*4543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_not_standalone);
5917*4543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_foreign_dtd);
5918*4543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_with_doctype);
5919*4543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic,
5920*4543ef51SXin LI                                 test_foreign_dtd_without_external_subset);
5921*4543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_empty_foreign_dtd);
5922*4543ef51SXin LI   tcase_add_test(tc_basic, test_set_base);
5923*4543ef51SXin LI   tcase_add_test(tc_basic, test_attributes);
5924*4543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_reset_in_entity);
5925*4543ef51SXin LI   tcase_add_test(tc_basic, test_resume_invalid_parse);
5926*4543ef51SXin LI   tcase_add_test(tc_basic, test_resume_resuspended);
5927*4543ef51SXin LI   tcase_add_test(tc_basic, test_cdata_default);
5928*4543ef51SXin LI   tcase_add_test(tc_basic, test_subordinate_reset);
5929*4543ef51SXin LI   tcase_add_test(tc_basic, test_subordinate_suspend);
5930*4543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_suspend);
5931*4543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_abort);
5932*4543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic,
5933*4543ef51SXin LI                                 test_ext_entity_invalid_suspended_parse);
5934*4543ef51SXin LI   tcase_add_test(tc_basic, test_explicit_encoding);
5935*4543ef51SXin LI   tcase_add_test(tc_basic, test_trailing_cr);
5936*4543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_cr);
5937*4543ef51SXin LI   tcase_add_test(tc_basic, test_trailing_rsqb);
5938*4543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_rsqb);
5939*4543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_good_cdata);
5940*4543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_user_parameters);
5941*4543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_ref_parameter);
5942*4543ef51SXin LI   tcase_add_test(tc_basic, test_empty_parse);
5943*4543ef51SXin LI   tcase_add_test(tc_basic, test_get_buffer_1);
5944*4543ef51SXin LI   tcase_add_test(tc_basic, test_get_buffer_2);
5945*4543ef51SXin LI #if XML_CONTEXT_BYTES > 0
5946*4543ef51SXin LI   tcase_add_test(tc_basic, test_get_buffer_3_overflow);
5947*4543ef51SXin LI #endif
5948*4543ef51SXin LI   tcase_add_test(tc_basic, test_buffer_can_grow_to_max);
5949*4543ef51SXin LI   tcase_add_test(tc_basic, test_getbuffer_allocates_on_zero_len);
5950*4543ef51SXin LI   tcase_add_test(tc_basic, test_byte_info_at_end);
5951*4543ef51SXin LI   tcase_add_test(tc_basic, test_byte_info_at_error);
5952*4543ef51SXin LI   tcase_add_test(tc_basic, test_byte_info_at_cdata);
5953*4543ef51SXin LI   tcase_add_test(tc_basic, test_predefined_entities);
5954*4543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_tag_in_dtd);
5955*4543ef51SXin LI   tcase_add_test(tc_basic, test_not_predefined_entities);
5956*4543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section);
5957*4543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16);
5958*4543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16_be);
5959*4543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_bad_ignore_section);
5960*4543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_bom_consumed);
5961*4543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_entity_values);
5962*4543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_not_standalone);
5963*4543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_value_abort);
5964*4543ef51SXin LI   tcase_add_test(tc_basic, test_bad_public_doctype);
5965*4543ef51SXin LI   tcase_add_test(tc_basic, test_attribute_enum_value);
5966*4543ef51SXin LI   tcase_add_test(tc_basic, test_predefined_entity_redefinition);
5967*4543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_dtd_stop_processing);
5968*4543ef51SXin LI   tcase_add_test(tc_basic, test_public_notation_no_sysid);
5969*4543ef51SXin LI   tcase_add_test(tc_basic, test_nested_groups);
5970*4543ef51SXin LI   tcase_add_test(tc_basic, test_group_choice);
5971*4543ef51SXin LI   tcase_add_test(tc_basic, test_standalone_parameter_entity);
5972*4543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_skipped_parameter_entity);
5973*4543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic,
5974*4543ef51SXin LI                                 test_recursive_external_parameter_entity);
5975*4543ef51SXin LI   tcase_add_test(tc_basic, test_undefined_ext_entity_in_external_dtd);
5976*4543ef51SXin LI   tcase_add_test(tc_basic, test_suspend_xdecl);
5977*4543ef51SXin LI   tcase_add_test(tc_basic, test_abort_epilog);
5978*4543ef51SXin LI   tcase_add_test(tc_basic, test_abort_epilog_2);
5979*4543ef51SXin LI   tcase_add_test(tc_basic, test_suspend_epilog);
5980*4543ef51SXin LI   tcase_add_test(tc_basic, test_suspend_in_sole_empty_tag);
5981*4543ef51SXin LI   tcase_add_test(tc_basic, test_unfinished_epilog);
5982*4543ef51SXin LI   tcase_add_test(tc_basic, test_partial_char_in_epilog);
5983*4543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_internal_entity);
5984*4543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic,
5985*4543ef51SXin LI                                 test_suspend_resume_internal_entity_issue_629);
5986*4543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_resume_entity_with_syntax_error);
5987*4543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_parameter_entity);
5988*4543ef51SXin LI   tcase_add_test(tc_basic, test_restart_on_error);
5989*4543ef51SXin LI   tcase_add_test(tc_basic, test_reject_lt_in_attribute_value);
5990*4543ef51SXin LI   tcase_add_test(tc_basic, test_reject_unfinished_param_in_att_value);
5991*4543ef51SXin LI   tcase_add_test(tc_basic, test_trailing_cr_in_att_value);
5992*4543ef51SXin LI   tcase_add_test(tc_basic, test_standalone_internal_entity);
5993*4543ef51SXin LI   tcase_add_test(tc_basic, test_skipped_external_entity);
5994*4543ef51SXin LI   tcase_add_test(tc_basic, test_skipped_null_loaded_ext_entity);
5995*4543ef51SXin LI   tcase_add_test(tc_basic, test_skipped_unloaded_ext_entity);
5996*4543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_param_entity_with_trailing_cr);
5997*4543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity);
5998*4543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_2);
5999*4543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_3);
6000*4543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_4);
6001*4543ef51SXin LI   tcase_add_test(tc_basic, test_pi_handled_in_default);
6002*4543ef51SXin LI   tcase_add_test(tc_basic, test_comment_handled_in_default);
6003*4543ef51SXin LI   tcase_add_test(tc_basic, test_pi_yml);
6004*4543ef51SXin LI   tcase_add_test(tc_basic, test_pi_xnl);
6005*4543ef51SXin LI   tcase_add_test(tc_basic, test_pi_xmm);
6006*4543ef51SXin LI   tcase_add_test(tc_basic, test_utf16_pi);
6007*4543ef51SXin LI   tcase_add_test(tc_basic, test_utf16_be_pi);
6008*4543ef51SXin LI   tcase_add_test(tc_basic, test_utf16_be_comment);
6009*4543ef51SXin LI   tcase_add_test(tc_basic, test_utf16_le_comment);
6010*4543ef51SXin LI   tcase_add_test(tc_basic, test_missing_encoding_conversion_fn);
6011*4543ef51SXin LI   tcase_add_test(tc_basic, test_failing_encoding_conversion_fn);
6012*4543ef51SXin LI   tcase_add_test(tc_basic, test_unknown_encoding_success);
6013*4543ef51SXin LI   tcase_add_test(tc_basic, test_unknown_encoding_bad_name);
6014*4543ef51SXin LI   tcase_add_test(tc_basic, test_unknown_encoding_bad_name_2);
6015*4543ef51SXin LI   tcase_add_test(tc_basic, test_unknown_encoding_long_name_1);
6016*4543ef51SXin LI   tcase_add_test(tc_basic, test_unknown_encoding_long_name_2);
6017*4543ef51SXin LI   tcase_add_test(tc_basic, test_invalid_unknown_encoding);
6018*4543ef51SXin LI   tcase_add_test(tc_basic, test_unknown_ascii_encoding_ok);
6019*4543ef51SXin LI   tcase_add_test(tc_basic, test_unknown_ascii_encoding_fail);
6020*4543ef51SXin LI   tcase_add_test(tc_basic, test_unknown_encoding_invalid_length);
6021*4543ef51SXin LI   tcase_add_test(tc_basic, test_unknown_encoding_invalid_topbit);
6022*4543ef51SXin LI   tcase_add_test(tc_basic, test_unknown_encoding_invalid_surrogate);
6023*4543ef51SXin LI   tcase_add_test(tc_basic, test_unknown_encoding_invalid_high);
6024*4543ef51SXin LI   tcase_add_test(tc_basic, test_unknown_encoding_invalid_attr_value);
6025*4543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom);
6026*4543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom);
6027*4543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom2);
6028*4543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom2);
6029*4543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_be);
6030*4543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_le);
6031*4543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_unknown);
6032*4543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf8_non_bom);
6033*4543ef51SXin LI   tcase_add_test(tc_basic, test_utf8_in_cdata_section);
6034*4543ef51SXin LI   tcase_add_test(tc_basic, test_utf8_in_cdata_section_2);
6035*4543ef51SXin LI   tcase_add_test(tc_basic, test_utf8_in_start_tags);
6036*4543ef51SXin LI   tcase_add_test(tc_basic, test_trailing_spaces_in_elements);
6037*4543ef51SXin LI   tcase_add_test(tc_basic, test_utf16_attribute);
6038*4543ef51SXin LI   tcase_add_test(tc_basic, test_utf16_second_attr);
6039*4543ef51SXin LI   tcase_add_test(tc_basic, test_attr_after_solidus);
6040*4543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_utf16_pe);
6041*4543ef51SXin LI   tcase_add_test(tc_basic, test_bad_attr_desc_keyword);
6042*4543ef51SXin LI   tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16);
6043*4543ef51SXin LI   tcase_add_test(tc_basic, test_bad_doctype);
6044*4543ef51SXin LI   tcase_add_test(tc_basic, test_bad_doctype_utf8);
6045*4543ef51SXin LI   tcase_add_test(tc_basic, test_bad_doctype_utf16);
6046*4543ef51SXin LI   tcase_add_test(tc_basic, test_bad_doctype_plus);
6047*4543ef51SXin LI   tcase_add_test(tc_basic, test_bad_doctype_star);
6048*4543ef51SXin LI   tcase_add_test(tc_basic, test_bad_doctype_query);
6049*4543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_unknown_encoding_bad_ignore);
6050*4543ef51SXin LI   tcase_add_test(tc_basic, test_entity_in_utf16_be_attr);
6051*4543ef51SXin LI   tcase_add_test(tc_basic, test_entity_in_utf16_le_attr);
6052*4543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_be);
6053*4543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_le);
6054*4543ef51SXin LI   tcase_add_test(tc_basic, test_short_doctype);
6055*4543ef51SXin LI   tcase_add_test(tc_basic, test_short_doctype_2);
6056*4543ef51SXin LI   tcase_add_test(tc_basic, test_short_doctype_3);
6057*4543ef51SXin LI   tcase_add_test(tc_basic, test_long_doctype);
6058*4543ef51SXin LI   tcase_add_test(tc_basic, test_bad_entity);
6059*4543ef51SXin LI   tcase_add_test(tc_basic, test_bad_entity_2);
6060*4543ef51SXin LI   tcase_add_test(tc_basic, test_bad_entity_3);
6061*4543ef51SXin LI   tcase_add_test(tc_basic, test_bad_entity_4);
6062*4543ef51SXin LI   tcase_add_test(tc_basic, test_bad_notation);
6063*4543ef51SXin LI   tcase_add_test(tc_basic, test_default_doctype_handler);
6064*4543ef51SXin LI   tcase_add_test(tc_basic, test_empty_element_abort);
6065*4543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic,
6066*4543ef51SXin LI                                 test_pool_integrity_with_unfinished_attr);
6067*4543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend);
6068*4543ef51SXin LI   tcase_add_test(tc_basic, test_big_tokens_take_linear_time);
6069*4543ef51SXin LI   tcase_add_test(tc_basic, test_set_reparse_deferral);
6070*4543ef51SXin LI   tcase_add_test(tc_basic, test_reparse_deferral_is_inherited);
6071*4543ef51SXin LI   tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser);
6072*4543ef51SXin LI   tcase_add_test(tc_basic, test_set_reparse_deferral_on_the_fly);
6073*4543ef51SXin LI   tcase_add_test(tc_basic, test_set_bad_reparse_option);
6074*4543ef51SXin LI   tcase_add_test(tc_basic, test_bypass_heuristic_when_close_to_bufsize);
6075*4543ef51SXin LI   tcase_add_test(tc_basic, test_varying_buffer_fills);
6076*4543ef51SXin LI }
6077