1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Main Library written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 This JIT compiler regression test program was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include <stdio.h>
48 #include <string.h>
49 #include "pcre.h"
50
51
52 #include "pcre_internal.h"
53
54 /*
55 Letter characters:
56 \xe6\x92\xad = 0x64ad = 25773 (kanji)
57 Non-letter characters:
58 \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
59 \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
60 \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
61 \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
62 Newlines:
63 \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
64 \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
65 Othercase pairs:
66 \xc3\xa9 = 0xe9 = 233 (e')
67 \xc3\x89 = 0xc9 = 201 (E')
68 \xc3\xa1 = 0xe1 = 225 (a')
69 \xc3\x81 = 0xc1 = 193 (A')
70 \x53 = 0x53 = S
71 \x73 = 0x73 = s
72 \xc5\xbf = 0x17f = 383 (long S)
73 \xc8\xba = 0x23a = 570
74 \xe2\xb1\xa5 = 0x2c65 = 11365
75 \xe1\xbd\xb8 = 0x1f78 = 8056
76 \xe1\xbf\xb8 = 0x1ff8 = 8184
77 \xf0\x90\x90\x80 = 0x10400 = 66560
78 \xf0\x90\x90\xa8 = 0x10428 = 66600
79 \xc7\x84 = 0x1c4 = 452
80 \xc7\x85 = 0x1c5 = 453
81 \xc7\x86 = 0x1c6 = 454
82 Caseless sets:
83 ucp_Armenian - \x{531}-\x{556} -> \x{561}-\x{586}
84 ucp_Coptic - \x{2c80}-\x{2ce3} -> caseless: XOR 0x1
85 ucp_Latin - \x{ff21}-\x{ff3a} -> \x{ff41]-\x{ff5a}
86
87 Mark property:
88 \xcc\x8d = 0x30d = 781
89 Special:
90 \xc2\x80 = 0x80 = 128 (lowest 2 byte character)
91 \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
92 \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
93 \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
94 \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
95 \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
96 */
97
98 static int regression_tests(void);
99
main(void)100 int main(void)
101 {
102 int jit = 0;
103 #if defined SUPPORT_PCRE8
104 pcre_config(PCRE_CONFIG_JIT, &jit);
105 #elif defined SUPPORT_PCRE16
106 pcre16_config(PCRE_CONFIG_JIT, &jit);
107 #elif defined SUPPORT_PCRE32
108 pcre32_config(PCRE_CONFIG_JIT, &jit);
109 #endif
110 if (!jit) {
111 printf("JIT must be enabled to run pcre_jit_test\n");
112 return 1;
113 }
114 return regression_tests();
115 }
116
117 /* --------------------------------------------------------------------------------------- */
118
119 #if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16) && !(defined SUPPORT_PCRE32)
120 #error SUPPORT_PCRE8 or SUPPORT_PCRE16 or SUPPORT_PCRE32 must be defined
121 #endif
122
123 #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
124 #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
125 #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
126 #define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
127 #define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
128 #define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
129 #define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
130
131 #define OFFSET_MASK 0x00ffff
132 #define F_NO8 0x010000
133 #define F_NO16 0x020000
134 #define F_NO32 0x020000
135 #define F_NOMATCH 0x040000
136 #define F_DIFF 0x080000
137 #define F_FORCECONV 0x100000
138 #define F_PROPERTY 0x200000
139 #define F_STUDY 0x400000
140
141 struct regression_test_case {
142 int flags;
143 int start_offset;
144 const char *pattern;
145 const char *input;
146 };
147
148 static struct regression_test_case regression_test_cases[] = {
149 /* Constant strings. */
150 { MUA, 0, "AbC", "AbAbC" },
151 { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
152 { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
153 { MA, 0, "[^a]", "aAbB" },
154 { CMA, 0, "[^m]", "mMnN" },
155 { MA, 0, "a[^b][^#]", "abacd" },
156 { CMA, 0, "A[^B][^E]", "abacd" },
157 { CMUA, 0, "[^x][^#]", "XxBll" },
158 { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
159 { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
160 { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
161 { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
162 { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
163 { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
164 { MUA, 0, "[axd]", "sAXd" },
165 { CMUA, 0, "[axd]", "sAXd" },
166 { CMUA, 0 | F_NOMATCH, "[^axd]", "DxA" },
167 { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
168 { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
169 { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
170 { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
171 { MUA, 0, "[^a]", "\xc2\x80[]" },
172 { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
173 { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
174 { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
175 { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
176 { PCRE_CASELESS, 0, "a1", "Aa1" },
177 { MA, 0, "\\Ca", "cda" },
178 { CMA, 0, "\\Ca", "CDA" },
179 { MA, 0 | F_NOMATCH, "\\Cx", "cda" },
180 { CMA, 0 | F_NOMATCH, "\\Cx", "CDA" },
181 { CMUAP, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
182 { CMUAP, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
183 { CMUAP, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
184 { CMUAP, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
185 { MA, 0, "[3-57-9]", "5" },
186
187 /* Assertions. */
188 { MUA, 0, "\\b[^A]", "A_B#" },
189 { MA, 0 | F_NOMATCH, "\\b\\W", "\n*" },
190 { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
191 { MAP, 0, "\\B", "_\xa1" },
192 { MAP, 0, "\\b_\\b[,A]\\B", "_," },
193 { MUAP, 0, "\\b", "\xe6\x92\xad!" },
194 { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
195 { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
196 { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
197 { MUA, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
198 { CMUAP, 0, "\\By", "\xf0\x90\x90\xa8y" },
199 { MA, 0 | F_NOMATCH, "\\R^", "\n" },
200 { MA, 1 | F_NOMATCH, "^", "\n" },
201 { 0, 0, "^ab", "ab" },
202 { 0, 0 | F_NOMATCH, "^ab", "aab" },
203 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
204 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
205 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
206 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
207 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
208 { 0, 0, "ab$", "ab" },
209 { 0, 0 | F_NOMATCH, "ab$", "abab\n\n" },
210 { PCRE_DOLLAR_ENDONLY, 0 | F_NOMATCH, "ab$", "abab\r\n" },
211 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
212 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
213 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
214 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
215 { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "^a", "aa\naa" },
216 { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
217 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\naa" },
218 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\r\n" },
219 { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
220 { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
221 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
222 { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
223 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
224 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
225 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
226 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
227 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
228 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
229 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
230 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
231 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
232 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
233 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
234 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
235 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
236 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
237 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
238 { MA, 0, "\\Aa", "aaa" },
239 { MA, 1 | F_NOMATCH, "\\Aa", "aaa" },
240 { MA, 1, "\\Ga", "aaa" },
241 { MA, 1 | F_NOMATCH, "\\Ga", "aba" },
242 { MA, 0, "a\\z", "aaa" },
243 { MA, 0 | F_NOMATCH, "a\\z", "aab" },
244
245 /* Brackets and alternatives. */
246 { MUA, 0, "(ab|bb|cd)", "bacde" },
247 { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
248 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
249 { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
250 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
251 { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
252 { MUA, 0, "\xc7\x82|\xc6\x82", "\xf1\x83\x82\x82\xc7\x82\xc7\x83" },
253 { MUA, 0, "=\xc7\x82|#\xc6\x82", "\xf1\x83\x82\x82=\xc7\x82\xc7\x83" },
254 { MUA, 0, "\xc7\x82\xc7\x83|\xc6\x82\xc6\x82", "\xf1\x83\x82\x82\xc7\x82\xc7\x83" },
255 { MUA, 0, "\xc6\x82\xc6\x82|\xc7\x83\xc7\x83|\xc8\x84\xc8\x84", "\xf1\x83\x82\x82\xc8\x84\xc8\x84" },
256
257 /* Greedy and non-greedy ? operators. */
258 { MUA, 0, "(?:a)?a", "laab" },
259 { CMUA, 0, "(A)?A", "llaab" },
260 { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
261 { MUA, 0, "(a)?a", "manm" },
262 { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
263 { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
264 { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
265
266 /* Greedy and non-greedy + operators */
267 { MUA, 0, "(aa)+aa", "aaaaaaa" },
268 { MUA, 0, "(aa)+?aa", "aaaaaaa" },
269 { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
270 { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
271 { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
272 { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
273 { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
274
275 /* Greedy and non-greedy * operators */
276 { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
277 { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
278 { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
279 { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
280 { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
281 { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
282 { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
283 { MA, 0, "((?:a|)*){0}a", "a" },
284
285 /* Combining ? + * operators */
286 { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
287 { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
288 { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
289 { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
290 { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
291
292 /* Single character iterators. */
293 { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
294 { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
295 { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
296 { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
297 { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
298 { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
299 { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
300 { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
301 { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
302 { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
303 { MUA, 0, "(a?+[^b])+", "babaacacb" },
304 { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
305 { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
306 { CMUA, 0, "[c-f]+k", "DemmFke" },
307 { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
308 { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
309 { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
310 { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
311 { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
312 { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
313 { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
314 { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
315 { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
316 { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
317 { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
318 { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
319 { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
320 { CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
321 { CMUA, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
322 { CMUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
323 { MUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
324 { MUA, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
325 { MUA, 0, "\\d+123", "987654321,01234" },
326 { MUA, 0, "abcd*|\\w+xy", "aaaaa,abxyz" },
327 { MUA, 0, "(?:abc|((?:amc|\\b\\w*xy)))", "aaaaa,abxyz" },
328 { MUA, 0, "a(?R)|([a-z]++)#", ".abcd.abcd#."},
329 { MUA, 0, "a(?R)|([a-z]++)#", ".abcd.mbcd#."},
330 { MUA, 0, ".[ab]*.", "xx" },
331 { MUA, 0, ".[ab]*a", "xxa" },
332 { MUA, 0, ".[ab]?.", "xx" },
333
334 /* Bracket repeats with limit. */
335 { MUA, 0, "(?:(ab){2}){5}M", "abababababababababababM" },
336 { MUA, 0, "(?:ab|abab){1,5}M", "abababababababababababM" },
337 { MUA, 0, "(?>ab|abab){1,5}M", "abababababababababababM" },
338 { MUA, 0, "(?:ab|abab){1,5}?M", "abababababababababababM" },
339 { MUA, 0, "(?>ab|abab){1,5}?M", "abababababababababababM" },
340 { MUA, 0, "(?:(ab){1,4}?){1,3}?M", "abababababababababababababM" },
341 { MUA, 0, "(?:(ab){1,4}){1,3}abababababababababababM", "ababababababababababababM" },
342 { MUA, 0 | F_NOMATCH, "(?:(ab){1,4}){1,3}abababababababababababM", "abababababababababababM" },
343 { MUA, 0, "(ab){4,6}?M", "abababababababM" },
344
345 /* Basic character sets. */
346 { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
347 { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
348 { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
349 { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
350 { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
351 { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
352 { MUA, 0, "x[bcef]+", "xaxdxecbfg" },
353 { MUA, 0, "x[bcdghij]+", "xaxexfxdgbjk" },
354 { MUA, 0, "x[^befg]+", "xbxexacdhg" },
355 { MUA, 0, "x[^bcdl]+", "xlxbxaekmd" },
356 { MUA, 0, "x[^bcdghi]+", "xbxdxgxaefji" },
357 { MUA, 0, "x[B-Fb-f]+", "xaxAxgxbfBFG" },
358 { CMUA, 0, "\\x{e9}+", "#\xf0\x90\x90\xa8\xc3\xa8\xc3\xa9\xc3\x89\xc3\x88" },
359 { CMUA, 0, "[^\\x{e9}]+", "\xc3\xa9#\xf0\x90\x90\xa8\xc3\xa8\xc3\x88\xc3\x89" },
360 { MUA, 0, "[\\x02\\x7e]+", "\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x02\x7e\x7f" },
361 { MUA, 0, "[^\\x02\\x7e]+", "\x02\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x7f\x7e" },
362 { MUA, 0, "[\\x{81}-\\x{7fe}]+", "#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xc2\x81\xdf\xbe\xdf\xbf" },
363 { MUA, 0, "[^\\x{81}-\\x{7fe}]+", "\xc2\x81#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xdf\xbf\xdf\xbe" },
364 { MUA, 0, "[\\x{801}-\\x{fffe}]+", "#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xe0\xa0\x81\xef\xbf\xbe\xef\xbf\xbf" },
365 { MUA, 0, "[^\\x{801}-\\x{fffe}]+", "\xe0\xa0\x81#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xef\xbf\xbf\xef\xbf\xbe" },
366 { MUA, 0, "[\\x{10001}-\\x{10fffe}]+", "#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf0\x90\x80\x81\xf4\x8f\xbf\xbe\xf4\x8f\xbf\xbf" },
367 { MUA, 0, "[^\\x{10001}-\\x{10fffe}]+", "\xf0\x90\x80\x81#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbe" },
368
369 /* Unicode properties. */
370 { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
371 { MUAP, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
372 { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
373 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
374 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
375 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
376 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
377 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
378 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
379 { MUAP, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
380 { MUAP, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
381 { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
382 { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
383 { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
384 { MUAP, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
385 { MUA, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
386 { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
387 { MUAP, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
388 { MUAP, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
389 { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
390
391 /* Possible empty brackets. */
392 { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
393 { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
394 { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
395 { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
396 { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
397 { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
398 { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
399 { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
400 { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
401 { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
402
403 /* Start offset. */
404 { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
405 { MUA, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
406 { MUA, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
407 { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
408
409 /* Newline. */
410 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
411 { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
412 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
413 { MUA | PCRE_NO_UTF8_CHECK, 1, "^.a", "\n\x80\nxa" },
414 { MUA, 1, "^", "\r\n" },
415 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 1 | F_NOMATCH, "^", "\r\n" },
416 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 1, "^", "\r\na" },
417
418 /* Any character except newline or any newline. */
419 { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
420 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
421 { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
422 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
423 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
424 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
425 { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
426 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
427 { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
428 { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
429 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
430 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
431 { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
432 { MUA, 0 | F_NOMATCH, "\\R+", "ab" },
433 { MUA, 0, "\\R+", "ab\r\n\r" },
434 { MUA, 0, "\\R*", "ab\r\n\r" },
435 { MUA, 0, "\\R*", "\r\n\r" },
436 { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
437 { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
438 { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
439 { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
440 { MUA, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
441 { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
442 { MUA, 0, "\\R*\\R\\R", "\n\r" },
443 { MUA, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
444 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
445
446 /* Atomic groups (no fallback from "next" direction). */
447 { MUA, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
448 { MUA, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
449 { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
450 "bababcdedefgheijijklmlmnop" },
451 { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
452 { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
453 { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
454 { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
455 { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
456 { MUA, 0, "(?>x|)*$", "aaa" },
457 { MUA, 0, "(?>(x)|)*$", "aaa" },
458 { MUA, 0, "(?>x|())*$", "aaa" },
459 { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
460 { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
461 { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
462 { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
463 { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
464 { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
465 { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
466 { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
467 { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
468 { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
469 { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
470 { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
471 { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
472 { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
473 { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
474 { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
475 { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
476 { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
477 { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
478 { MUA, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
479 { MUA, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
480 { MUA, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
481 { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
482 { MUA, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
483 { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
484 { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
485
486 /* Possessive quantifiers. */
487 { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
488 { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
489 { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
490 { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
491 { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
492 { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
493 { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
494 { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
495 { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
496 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
497 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
498 { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
499 { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
500 { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
501 { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
502 { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
503 { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
504 { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
505 { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
506 { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
507 { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
508 { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
509 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
510 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
511 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
512 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
513 { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
514 { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
515 { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
516 { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
517 { MUA, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
518 { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
519 { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
520 { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
521 { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
522
523 /* Back references. */
524 { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
525 { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
526 { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
527 { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
528 { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
529 { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
530 { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
531 { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
532 { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
533 { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
534 { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
535 { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
536 { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
537 { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
538 { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
539 { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
540 { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
541 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
542 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
543 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
544 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
545 { PCRE_UCP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
546 { CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
547 { MUA | PCRE_DUPNAMES, 0 | F_NOMATCH, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
548 { MUA | PCRE_DUPNAMES | PCRE_JAVASCRIPT_COMPAT, 0, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
549 { MUA | PCRE_DUPNAMES | PCRE_JAVASCRIPT_COMPAT, 0, "\\k<A>*(?<A>aa)(?<A>bb)", "aabb" },
550 { MUA | PCRE_DUPNAMES, 0, "(?<A>aa)(?<A>bb)\\k<A>{0,3}aaaaaa", "aabbaaaaaa" },
551 { MUA | PCRE_DUPNAMES, 0, "(?<A>aa)(?<A>bb)\\k<A>{2,5}bb", "aabbaaaabb" },
552 { MUA | PCRE_DUPNAMES, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}m", "aaaaaaaabbbbaabbbbm" },
553 { MUA | PCRE_DUPNAMES, 0 | F_NOMATCH, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
554 { MUA | PCRE_DUPNAMES | PCRE_JAVASCRIPT_COMPAT, 0, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
555 { MUA | PCRE_DUPNAMES, 0, "\\k<A>*?(?<A>aa)(?<A>bb)", "aabb" },
556 { MUA | PCRE_DUPNAMES, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
557 { MUA | PCRE_DUPNAMES, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>*?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
558 { MUA | PCRE_DUPNAMES, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
559 { CMUA | PCRE_DUPNAMES, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}M", "aaaaaaaabbbbaabbbbm" },
560 { CMUA | PCRE_DUPNAMES, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{1,3}M", "aaaaaaaabbbbaabbbbm" },
561 { CMUA | PCRE_DUPNAMES, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}?M", "aaaaaabbbbbbaabbbbbbbbbbm" },
562 { CMUA | PCRE_DUPNAMES, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
563
564 /* Assertions. */
565 { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
566 { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
567 { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
568 { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
569 { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
570 { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
571 { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
572 { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
573 { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
574 { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
575 { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
576 { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
577 { MUA, 0, "((?(?=a)a)+k)", "bbak" },
578 { MUA, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
579 { MUA, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
580 { MUA, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
581 { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
582 { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
583 { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
584 { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
585 { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
586 { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
587 { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
588 { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
589 { MUA, 0, "a(?=(?C)\\B)b", "ab" },
590 { MUA, 0, "a(?!(?C)\\B)bb|ab", "abb" },
591 { MUA, 0, "a(?=\\b|(?C)\\B)b", "ab" },
592 { MUA, 0, "a(?!\\b|(?C)\\B)bb|ab", "abb" },
593 { MUA, 0, "c(?(?=(?C)\\B)ab|a)", "cab" },
594 { MUA, 0, "c(?(?!(?C)\\B)ab|a)", "cab" },
595 { MUA, 0, "c(?(?=\\b|(?C)\\B)ab|a)", "cab" },
596 { MUA, 0, "c(?(?!\\b|(?C)\\B)ab|a)", "cab" },
597 { MUA, 0, "a(?=)b", "ab" },
598 { MUA, 0 | F_NOMATCH, "a(?!)b", "ab" },
599
600 /* Not empty, ACCEPT, FAIL */
601 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
602 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
603 { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
604 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
605 { MUA, 0, "a(*ACCEPT)b", "ab" },
606 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
607 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
608 { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
609 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
610 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
611 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
612 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
613 { MUA | PCRE_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
614 { MUA, 0, "((a(*ACCEPT)b))", "ab" },
615 { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
616 { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
617 { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
618 { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
619 { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
620
621 /* Conditional blocks. */
622 { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
623 { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
624 { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
625 { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
626 { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
627 { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
628 { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
629 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
630 { MUA, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
631 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
632 { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
633 { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
634 { MUA, 0, "(?(?=a)ab)", "a" },
635 { MUA, 0, "(?(?<!b)c)", "b" },
636 { MUA, 0, "(?(DEFINE)a(b))", "a" },
637 { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
638 { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
639 { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
640 { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
641 { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
642 { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
643 { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
644 { MUA, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
645 { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
646 { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
647 { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
648 { MUA, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
649 { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
650 { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
651 { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
652 { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
653 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
654 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
655 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
656 { MUA, 0, "((?:a|aa)(?(1)aaa))x", "aax" },
657 { MUA, 0, "(?(?!)a|b)", "ab" },
658 { MUA, 0, "(?(?!)a)", "ab" },
659 { MUA, 0 | F_NOMATCH, "(?(?!)a|b)", "ac" },
660
661 /* Set start of match. */
662 { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
663 { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
664 { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
665 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
666 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
667
668 /* First line. */
669 { MUA | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
670 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
671 { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
672 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[^a][^b]", "ab" },
673 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "a", "\na" },
674 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[abc]", "\na" },
675 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^a", "\na" },
676 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
677 { MUA | PCRE_FIRSTLINE, 0, "\xf0\x90\x90\x80", "\xf0\x90\x90\x80" },
678 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\xc2\x85#" },
679 { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\x85#" },
680 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
681 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
682 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
683 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
684 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
685 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
686 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
687 { PCRE_FIRSTLINE | PCRE_NEWLINE_LF | PCRE_DOTALL, 0 | F_NOMATCH, "ab.", "ab" },
688 { MUA | PCRE_FIRSTLINE, 1 | F_NOMATCH, "^[a-d0-9]", "\nxx\nd" },
689 { PCRE_NEWLINE_ANY | PCRE_FIRSTLINE | PCRE_DOTALL, 0, "....a", "012\n0a" },
690 { MUA | PCRE_FIRSTLINE, 0, "[aC]", "a" },
691
692 /* Recurse. */
693 { MUA, 0, "(a)(?1)", "aa" },
694 { MUA, 0, "((a))(?1)", "aa" },
695 { MUA, 0, "(b|a)(?1)", "aa" },
696 { MUA, 0, "(b|(a))(?1)", "aa" },
697 { MUA, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
698 { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
699 { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
700 { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
701 { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
702 { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
703 { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
704 { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
705 { MUA, 0 | F_NOMATCH, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
706 { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
707 { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
708 { MUA, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
709 { MUA, 0, "b|<(?R)*>", "<<b>" },
710 { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
711 { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
712 { MUA, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
713 { MUA, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
714 { MUA, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
715 { MUA, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
716 { MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
717 { MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
718 { MUA, 0, "((?(R)a|(?1)){3})", "XaaaaaaaaaX" },
719 { MUA, 0, "((?:(?(R)a|(?1))){3})", "XaaaaaaaaaX" },
720 { MUA, 0, "((?(R)a|(?1)){1,3})aaaaaa", "aaaaaaaaXaaaaaaaaa" },
721 { MUA, 0, "((?(R)a|(?1)){1,3}?)M", "aaaM" },
722
723 /* 16 bit specific tests. */
724 { CMA, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
725 { CMA, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
726 { CMA, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
727 { CMA, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
728 { CMA, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
729 { CMA, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
730 { CMA, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
731 { CMA, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
732 { CMA, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
733 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
734 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
735 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
736 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
737 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
738 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
739 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
740 { MA, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
741 { MA, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
742 { CMA, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
743 { CMA, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
744 { CMA, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
745 { CMA, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
746 { CMA | PCRE_EXTENDED, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
747 { CMA, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
748 { CMA, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
749 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
750 { PCRE_BSR_UNICODE, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
751 { 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
752 { 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
753 { 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
754 { 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
755
756 /* Partial matching. */
757 { MUA | PCRE_PARTIAL_SOFT, 0, "ab", "a" },
758 { MUA | PCRE_PARTIAL_SOFT, 0, "ab|a", "a" },
759 { MUA | PCRE_PARTIAL_HARD, 0, "ab|a", "a" },
760 { MUA | PCRE_PARTIAL_SOFT, 0, "\\b#", "a" },
761 { MUA | PCRE_PARTIAL_SOFT, 0, "(?<=a)b", "a" },
762 { MUA | PCRE_PARTIAL_SOFT, 0, "abc|(?<=xxa)bc", "xxab" },
763 { MUA | PCRE_PARTIAL_SOFT, 0, "a\\B", "a" },
764 { MUA | PCRE_PARTIAL_HARD, 0, "a\\b", "a" },
765
766 /* (*MARK) verb. */
767 { MUA, 0, "a(*MARK:aa)a", "ababaa" },
768 { MUA, 0 | F_NOMATCH, "a(*:aa)a", "abab" },
769 { MUA, 0, "a(*:aa)(b(*:bb)b|bc)", "abc" },
770 { MUA, 0 | F_NOMATCH, "a(*:1)x|b(*:2)y", "abc" },
771 { MUA, 0, "(?>a(*:aa))b|ac", "ac" },
772 { MUA, 0, "(?(DEFINE)(a(*:aa)))(?1)", "a" },
773 { MUA, 0 | F_NOMATCH, "(?(DEFINE)((a)(*:aa)))(?1)b", "aa" },
774 { MUA, 0, "(?(DEFINE)(a(*:aa)))a(?1)b|aac", "aac" },
775 { MUA, 0, "(a(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
776 { MUA, 0, "(a(*:aa)){0}(?:b(?1)b)+", "babba" },
777 { MUA, 0 | F_NOMATCH | F_STUDY, "(a(*:aa)){0}(?:b(?1)b)+", "ba" },
778 { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
779 { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b)+", "babba" },
780 { MUA, 0 | F_NOMATCH | F_STUDY, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" },
781 { MUA, 0 | F_NOMATCH | F_STUDY, "(*:mark)m", "a" },
782
783 /* (*COMMIT) verb. */
784 { MUA, 0 | F_NOMATCH, "a(*COMMIT)b", "ac" },
785 { MUA, 0, "aa(*COMMIT)b", "xaxaab" },
786 { MUA, 0 | F_NOMATCH, "a(*COMMIT)(*:msg)b|ac", "ac" },
787 { MUA, 0 | F_NOMATCH, "(a(*COMMIT)b)++", "abac" },
788 { MUA, 0 | F_NOMATCH, "((a)(*COMMIT)b)++", "abac" },
789 { MUA, 0 | F_NOMATCH, "(?=a(*COMMIT)b)ab|ad", "ad" },
790
791 /* (*PRUNE) verb. */
792 { MUA, 0, "aa\\K(*PRUNE)b", "aaab" },
793 { MUA, 0, "aa(*PRUNE:bb)b|a", "aa" },
794 { MUA, 0, "(a)(a)(*PRUNE)b|(a)", "aa" },
795 { MUA, 0, "(a)(a)(a)(a)(a)(a)(a)(a)(*PRUNE)b|(a)", "aaaaaaaa" },
796 { MUA | PCRE_PARTIAL_SOFT, 0, "a(*PRUNE)a|", "a" },
797 { MUA | PCRE_PARTIAL_SOFT, 0, "a(*PRUNE)a|m", "a" },
798 { MUA, 0 | F_NOMATCH, "(?=a(*PRUNE)b)ab|ad", "ad" },
799 { MUA, 0, "a(*COMMIT)(*PRUNE)d|bc", "abc" },
800 { MUA, 0, "(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
801 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
802 { MUA, 0, "(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
803 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
804 { MUA, 0, "(a(*COMMIT)b){0}a(?1)(*PRUNE)c|bc", "abc" },
805 { MUA, 0 | F_NOMATCH, "(a(*COMMIT)b){0}a(*COMMIT)(?1)(*PRUNE)c|bc", "abc" },
806 { MUA, 0, "(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
807 { MUA, 0 | F_NOMATCH, "(*COMMIT)(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
808 { MUA, 0, "((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
809 { MUA, 0 | F_NOMATCH, "(*COMMIT)((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
810 { MUA, 0, "(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
811 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
812 { MUA, 0, "(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
813 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
814 { MUA, 0, "(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
815 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
816 { MUA, 0, "(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
817 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
818 { MUA, 0, "(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
819 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
820 { MUA, 0, "(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
821 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
822
823 /* (*SKIP) verb. */
824 { MUA, 0 | F_NOMATCH, "(?=a(*SKIP)b)ab|ad", "ad" },
825 { MUA, 0, "(\\w+(*SKIP)#)", "abcd,xyz#," },
826 { MUA, 0, "\\w+(*SKIP)#|mm", "abcd,xyz#," },
827 { MUA, 0 | F_NOMATCH, "b+(?<=(*SKIP)#c)|b+", "#bbb" },
828
829 /* (*THEN) verb. */
830 { MUA, 0, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcaabcaabcaabcnacm" },
831 { MUA, 0 | F_NOMATCH, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcm" },
832 { MUA, 0, "((?:a(*THEN)|aab)c|a+)+m", "aabcaabcnmaabcaabcm" },
833 { MUA, 0, "((?:a|aab)(*THEN)c|a+)+m", "aam" },
834 { MUA, 0, "((?:a(*COMMIT)|aab)(*THEN)c|a+)+m", "aam" },
835 { MUA, 0, "(?(?=a(*THEN)b)ab|ad)", "ad" },
836 { MUA, 0, "(?(?!a(*THEN)b)ad|add)", "add" },
837 { MUA, 0 | F_NOMATCH, "(?(?=a)a(*THEN)b|ad)", "ad" },
838 { MUA, 0, "(?!(?(?=a)ab|b(*THEN)d))bn|bnn", "bnn" },
839
840 /* Deep recursion. */
841 { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
842 { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
843 { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaa b" },
844
845 /* Deep recursion: Stack limit reached. */
846 { MA, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
847 { MA, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
848 { MA, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
849 { MA, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
850 { MA, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
851
852 { 0, 0, NULL, NULL }
853 };
854
tables(int mode)855 static const unsigned char *tables(int mode)
856 {
857 /* The purpose of this function to allow valgrind
858 for reporting invalid reads and writes. */
859 static unsigned char *tables_copy;
860 const char *errorptr;
861 int erroroffset;
862 unsigned char *default_tables;
863 #if defined SUPPORT_PCRE8
864 pcre *regex;
865 char null_str[1] = { 0 };
866 #elif defined SUPPORT_PCRE16
867 pcre16 *regex;
868 PCRE_UCHAR16 null_str[1] = { 0 };
869 #elif defined SUPPORT_PCRE32
870 pcre32 *regex;
871 PCRE_UCHAR32 null_str[1] = { 0 };
872 #endif
873
874 if (mode) {
875 if (tables_copy)
876 free(tables_copy);
877 tables_copy = NULL;
878 return NULL;
879 }
880
881 if (tables_copy)
882 return tables_copy;
883
884 default_tables = NULL;
885 #if defined SUPPORT_PCRE8
886 regex = pcre_compile(null_str, 0, &errorptr, &erroroffset, NULL);
887 if (regex) {
888 pcre_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
889 pcre_free(regex);
890 }
891 #elif defined SUPPORT_PCRE16
892 regex = pcre16_compile(null_str, 0, &errorptr, &erroroffset, NULL);
893 if (regex) {
894 pcre16_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
895 pcre16_free(regex);
896 }
897 #elif defined SUPPORT_PCRE32
898 regex = pcre32_compile(null_str, 0, &errorptr, &erroroffset, NULL);
899 if (regex) {
900 pcre32_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
901 pcre32_free(regex);
902 }
903 #endif
904 /* Shouldn't ever happen. */
905 if (!default_tables)
906 return NULL;
907
908 /* Unfortunately this value cannot get from pcre_fullinfo.
909 Since this is a test program, this is acceptable at the moment. */
910 tables_copy = (unsigned char *)malloc(1088);
911 if (!tables_copy)
912 return NULL;
913
914 memcpy(tables_copy, default_tables, 1088);
915 return tables_copy;
916 }
917
918 #ifdef SUPPORT_PCRE8
callback8(void * arg)919 static pcre_jit_stack* callback8(void *arg)
920 {
921 return (pcre_jit_stack *)arg;
922 }
923 #endif
924
925 #ifdef SUPPORT_PCRE16
callback16(void * arg)926 static pcre16_jit_stack* callback16(void *arg)
927 {
928 return (pcre16_jit_stack *)arg;
929 }
930 #endif
931
932 #ifdef SUPPORT_PCRE32
callback32(void * arg)933 static pcre32_jit_stack* callback32(void *arg)
934 {
935 return (pcre32_jit_stack *)arg;
936 }
937 #endif
938
939 #ifdef SUPPORT_PCRE8
940 static pcre_jit_stack *stack8;
941
getstack8(void)942 static pcre_jit_stack *getstack8(void)
943 {
944 if (!stack8)
945 stack8 = pcre_jit_stack_alloc(1, 1024 * 1024);
946 return stack8;
947 }
948
setstack8(pcre_extra * extra)949 static void setstack8(pcre_extra *extra)
950 {
951 if (!extra) {
952 if (stack8)
953 pcre_jit_stack_free(stack8);
954 stack8 = NULL;
955 return;
956 }
957
958 pcre_assign_jit_stack(extra, callback8, getstack8());
959 }
960 #endif /* SUPPORT_PCRE8 */
961
962 #ifdef SUPPORT_PCRE16
963 static pcre16_jit_stack *stack16;
964
getstack16(void)965 static pcre16_jit_stack *getstack16(void)
966 {
967 if (!stack16)
968 stack16 = pcre16_jit_stack_alloc(1, 1024 * 1024);
969 return stack16;
970 }
971
setstack16(pcre16_extra * extra)972 static void setstack16(pcre16_extra *extra)
973 {
974 if (!extra) {
975 if (stack16)
976 pcre16_jit_stack_free(stack16);
977 stack16 = NULL;
978 return;
979 }
980
981 pcre16_assign_jit_stack(extra, callback16, getstack16());
982 }
983 #endif /* SUPPORT_PCRE16 */
984
985 #ifdef SUPPORT_PCRE32
986 static pcre32_jit_stack *stack32;
987
getstack32(void)988 static pcre32_jit_stack *getstack32(void)
989 {
990 if (!stack32)
991 stack32 = pcre32_jit_stack_alloc(1, 1024 * 1024);
992 return stack32;
993 }
994
setstack32(pcre32_extra * extra)995 static void setstack32(pcre32_extra *extra)
996 {
997 if (!extra) {
998 if (stack32)
999 pcre32_jit_stack_free(stack32);
1000 stack32 = NULL;
1001 return;
1002 }
1003
1004 pcre32_assign_jit_stack(extra, callback32, getstack32());
1005 }
1006 #endif /* SUPPORT_PCRE32 */
1007
1008 #ifdef SUPPORT_PCRE16
1009
convert_utf8_to_utf16(const char * input,PCRE_UCHAR16 * output,int * offsetmap,int max_length)1010 static int convert_utf8_to_utf16(const char *input, PCRE_UCHAR16 *output, int *offsetmap, int max_length)
1011 {
1012 unsigned char *iptr = (unsigned char*)input;
1013 PCRE_UCHAR16 *optr = output;
1014 unsigned int c;
1015
1016 if (max_length == 0)
1017 return 0;
1018
1019 while (*iptr && max_length > 1) {
1020 c = 0;
1021 if (offsetmap)
1022 *offsetmap++ = (int)(iptr - (unsigned char*)input);
1023
1024 if (*iptr < 0xc0)
1025 c = *iptr++;
1026 else if (!(*iptr & 0x20)) {
1027 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
1028 iptr += 2;
1029 } else if (!(*iptr & 0x10)) {
1030 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
1031 iptr += 3;
1032 } else if (!(*iptr & 0x08)) {
1033 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
1034 iptr += 4;
1035 }
1036
1037 if (c < 65536) {
1038 *optr++ = c;
1039 max_length--;
1040 } else if (max_length <= 2) {
1041 *optr = '\0';
1042 return (int)(optr - output);
1043 } else {
1044 c -= 0x10000;
1045 *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
1046 *optr++ = 0xdc00 | (c & 0x3ff);
1047 max_length -= 2;
1048 if (offsetmap)
1049 offsetmap++;
1050 }
1051 }
1052 if (offsetmap)
1053 *offsetmap = (int)(iptr - (unsigned char*)input);
1054 *optr = '\0';
1055 return (int)(optr - output);
1056 }
1057
copy_char8_to_char16(const char * input,PCRE_UCHAR16 * output,int max_length)1058 static int copy_char8_to_char16(const char *input, PCRE_UCHAR16 *output, int max_length)
1059 {
1060 unsigned char *iptr = (unsigned char*)input;
1061 PCRE_UCHAR16 *optr = output;
1062
1063 if (max_length == 0)
1064 return 0;
1065
1066 while (*iptr && max_length > 1) {
1067 *optr++ = *iptr++;
1068 max_length--;
1069 }
1070 *optr = '\0';
1071 return (int)(optr - output);
1072 }
1073
1074 #define REGTEST_MAX_LENGTH16 4096
1075 static PCRE_UCHAR16 regtest_buf16[REGTEST_MAX_LENGTH16];
1076 static int regtest_offsetmap16[REGTEST_MAX_LENGTH16];
1077
1078 #endif /* SUPPORT_PCRE16 */
1079
1080 #ifdef SUPPORT_PCRE32
1081
convert_utf8_to_utf32(const char * input,PCRE_UCHAR32 * output,int * offsetmap,int max_length)1082 static int convert_utf8_to_utf32(const char *input, PCRE_UCHAR32 *output, int *offsetmap, int max_length)
1083 {
1084 unsigned char *iptr = (unsigned char*)input;
1085 PCRE_UCHAR32 *optr = output;
1086 unsigned int c;
1087
1088 if (max_length == 0)
1089 return 0;
1090
1091 while (*iptr && max_length > 1) {
1092 c = 0;
1093 if (offsetmap)
1094 *offsetmap++ = (int)(iptr - (unsigned char*)input);
1095
1096 if (*iptr < 0xc0)
1097 c = *iptr++;
1098 else if (!(*iptr & 0x20)) {
1099 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
1100 iptr += 2;
1101 } else if (!(*iptr & 0x10)) {
1102 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
1103 iptr += 3;
1104 } else if (!(*iptr & 0x08)) {
1105 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
1106 iptr += 4;
1107 }
1108
1109 *optr++ = c;
1110 max_length--;
1111 }
1112 if (offsetmap)
1113 *offsetmap = (int)(iptr - (unsigned char*)input);
1114 *optr = 0;
1115 return (int)(optr - output);
1116 }
1117
copy_char8_to_char32(const char * input,PCRE_UCHAR32 * output,int max_length)1118 static int copy_char8_to_char32(const char *input, PCRE_UCHAR32 *output, int max_length)
1119 {
1120 unsigned char *iptr = (unsigned char*)input;
1121 PCRE_UCHAR32 *optr = output;
1122
1123 if (max_length == 0)
1124 return 0;
1125
1126 while (*iptr && max_length > 1) {
1127 *optr++ = *iptr++;
1128 max_length--;
1129 }
1130 *optr = '\0';
1131 return (int)(optr - output);
1132 }
1133
1134 #define REGTEST_MAX_LENGTH32 4096
1135 static PCRE_UCHAR32 regtest_buf32[REGTEST_MAX_LENGTH32];
1136 static int regtest_offsetmap32[REGTEST_MAX_LENGTH32];
1137
1138 #endif /* SUPPORT_PCRE32 */
1139
check_ascii(const char * input)1140 static int check_ascii(const char *input)
1141 {
1142 const unsigned char *ptr = (unsigned char *)input;
1143 while (*ptr) {
1144 if (*ptr > 127)
1145 return 0;
1146 ptr++;
1147 }
1148 return 1;
1149 }
1150
regression_tests(void)1151 static int regression_tests(void)
1152 {
1153 struct regression_test_case *current = regression_test_cases;
1154 const char *error;
1155 char *cpu_info;
1156 int i, err_offs;
1157 int is_successful, is_ascii;
1158 int total = 0;
1159 int successful = 0;
1160 int successful_row = 0;
1161 int counter = 0;
1162 int study_mode;
1163 int utf = 0, ucp = 0;
1164 int disabled_flags = 0;
1165 #ifdef SUPPORT_PCRE8
1166 pcre *re8;
1167 pcre_extra *extra8;
1168 pcre_extra dummy_extra8;
1169 int ovector8_1[32];
1170 int ovector8_2[32];
1171 int return_value8[2];
1172 unsigned char *mark8_1, *mark8_2;
1173 #endif
1174 #ifdef SUPPORT_PCRE16
1175 pcre16 *re16;
1176 pcre16_extra *extra16;
1177 pcre16_extra dummy_extra16;
1178 int ovector16_1[32];
1179 int ovector16_2[32];
1180 int return_value16[2];
1181 PCRE_UCHAR16 *mark16_1, *mark16_2;
1182 int length16;
1183 #endif
1184 #ifdef SUPPORT_PCRE32
1185 pcre32 *re32;
1186 pcre32_extra *extra32;
1187 pcre32_extra dummy_extra32;
1188 int ovector32_1[32];
1189 int ovector32_2[32];
1190 int return_value32[2];
1191 PCRE_UCHAR32 *mark32_1, *mark32_2;
1192 int length32;
1193 #endif
1194
1195 /* This test compares the behaviour of interpreter and JIT. Although disabling
1196 utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
1197 still considered successful from pcre_jit_test point of view. */
1198
1199 #if defined SUPPORT_PCRE8
1200 pcre_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1201 #elif defined SUPPORT_PCRE16
1202 pcre16_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1203 #elif defined SUPPORT_PCRE32
1204 pcre32_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1205 #endif
1206
1207 printf("Running JIT regression tests\n");
1208 printf(" target CPU of SLJIT compiler: %s\n", cpu_info);
1209
1210 #if defined SUPPORT_PCRE8
1211 pcre_config(PCRE_CONFIG_UTF8, &utf);
1212 pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1213 #elif defined SUPPORT_PCRE16
1214 pcre16_config(PCRE_CONFIG_UTF16, &utf);
1215 pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1216 #elif defined SUPPORT_PCRE32
1217 pcre32_config(PCRE_CONFIG_UTF32, &utf);
1218 pcre32_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1219 #endif
1220
1221 if (!utf)
1222 disabled_flags |= PCRE_UTF8 | PCRE_UTF16 | PCRE_UTF32;
1223 if (!ucp)
1224 disabled_flags |= PCRE_UCP;
1225 #ifdef SUPPORT_PCRE8
1226 printf(" in 8 bit mode with UTF-8 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1227 #endif
1228 #ifdef SUPPORT_PCRE16
1229 printf(" in 16 bit mode with UTF-16 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1230 #endif
1231 #ifdef SUPPORT_PCRE32
1232 printf(" in 32 bit mode with UTF-32 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1233 #endif
1234
1235 while (current->pattern) {
1236 /* printf("\nPattern: %s :\n", current->pattern); */
1237 total++;
1238 is_ascii = 0;
1239 if (!(current->start_offset & F_PROPERTY))
1240 is_ascii = check_ascii(current->pattern) && check_ascii(current->input);
1241
1242 if (current->flags & PCRE_PARTIAL_SOFT)
1243 study_mode = PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE;
1244 else if (current->flags & PCRE_PARTIAL_HARD)
1245 study_mode = PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE;
1246 else
1247 study_mode = PCRE_STUDY_JIT_COMPILE;
1248 error = NULL;
1249 #ifdef SUPPORT_PCRE8
1250 re8 = NULL;
1251 if (!(current->start_offset & F_NO8))
1252 re8 = pcre_compile(current->pattern,
1253 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1254 &error, &err_offs, tables(0));
1255
1256 extra8 = NULL;
1257 if (re8) {
1258 error = NULL;
1259 extra8 = pcre_study(re8, study_mode, &error);
1260 if (!extra8) {
1261 printf("\n8 bit: Cannot study pattern: %s\n", current->pattern);
1262 pcre_free(re8);
1263 re8 = NULL;
1264 }
1265 else if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1266 printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern);
1267 pcre_free_study(extra8);
1268 pcre_free(re8);
1269 re8 = NULL;
1270 }
1271 extra8->flags |= PCRE_EXTRA_MARK;
1272 } else if (((utf && ucp) || is_ascii) && !(current->start_offset & F_NO8))
1273 printf("\n8 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1274 #endif
1275 #ifdef SUPPORT_PCRE16
1276 if ((current->flags & PCRE_UTF16) || (current->start_offset & F_FORCECONV))
1277 convert_utf8_to_utf16(current->pattern, regtest_buf16, NULL, REGTEST_MAX_LENGTH16);
1278 else
1279 copy_char8_to_char16(current->pattern, regtest_buf16, REGTEST_MAX_LENGTH16);
1280
1281 re16 = NULL;
1282 if (!(current->start_offset & F_NO16))
1283 re16 = pcre16_compile(regtest_buf16,
1284 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1285 &error, &err_offs, tables(0));
1286
1287 extra16 = NULL;
1288 if (re16) {
1289 error = NULL;
1290 extra16 = pcre16_study(re16, study_mode, &error);
1291 if (!extra16) {
1292 printf("\n16 bit: Cannot study pattern: %s\n", current->pattern);
1293 pcre16_free(re16);
1294 re16 = NULL;
1295 }
1296 else if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1297 printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern);
1298 pcre16_free_study(extra16);
1299 pcre16_free(re16);
1300 re16 = NULL;
1301 }
1302 extra16->flags |= PCRE_EXTRA_MARK;
1303 } else if (((utf && ucp) || is_ascii) && !(current->start_offset & F_NO16))
1304 printf("\n16 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1305 #endif
1306 #ifdef SUPPORT_PCRE32
1307 if ((current->flags & PCRE_UTF32) || (current->start_offset & F_FORCECONV))
1308 convert_utf8_to_utf32(current->pattern, regtest_buf32, NULL, REGTEST_MAX_LENGTH32);
1309 else
1310 copy_char8_to_char32(current->pattern, regtest_buf32, REGTEST_MAX_LENGTH32);
1311
1312 re32 = NULL;
1313 if (!(current->start_offset & F_NO32))
1314 re32 = pcre32_compile(regtest_buf32,
1315 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1316 &error, &err_offs, tables(0));
1317
1318 extra32 = NULL;
1319 if (re32) {
1320 error = NULL;
1321 extra32 = pcre32_study(re32, study_mode, &error);
1322 if (!extra32) {
1323 printf("\n32 bit: Cannot study pattern: %s\n", current->pattern);
1324 pcre32_free(re32);
1325 re32 = NULL;
1326 }
1327 if (!(extra32->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1328 printf("\n32 bit: JIT compiler does not support: %s\n", current->pattern);
1329 pcre32_free_study(extra32);
1330 pcre32_free(re32);
1331 re32 = NULL;
1332 }
1333 extra32->flags |= PCRE_EXTRA_MARK;
1334 } else if (((utf && ucp) || is_ascii) && !(current->start_offset & F_NO32))
1335 printf("\n32 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1336 #endif
1337
1338 counter++;
1339 if ((counter & 0x3) != 0) {
1340 #ifdef SUPPORT_PCRE8
1341 setstack8(NULL);
1342 #endif
1343 #ifdef SUPPORT_PCRE16
1344 setstack16(NULL);
1345 #endif
1346 #ifdef SUPPORT_PCRE32
1347 setstack32(NULL);
1348 #endif
1349 }
1350
1351 #ifdef SUPPORT_PCRE8
1352 return_value8[0] = -1000;
1353 return_value8[1] = -1000;
1354 for (i = 0; i < 32; ++i)
1355 ovector8_1[i] = -2;
1356 for (i = 0; i < 32; ++i)
1357 ovector8_2[i] = -2;
1358 if (re8) {
1359 mark8_1 = NULL;
1360 mark8_2 = NULL;
1361 extra8->mark = &mark8_1;
1362
1363 if ((counter & 0x1) != 0) {
1364 setstack8(extra8);
1365 return_value8[0] = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1366 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector8_1, 32);
1367 } else
1368 return_value8[0] = pcre_jit_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1369 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector8_1, 32, getstack8());
1370 memset(&dummy_extra8, 0, sizeof(pcre_extra));
1371 dummy_extra8.flags = PCRE_EXTRA_MARK;
1372 if (current->start_offset & F_STUDY) {
1373 dummy_extra8.flags |= PCRE_EXTRA_STUDY_DATA;
1374 dummy_extra8.study_data = extra8->study_data;
1375 }
1376 dummy_extra8.mark = &mark8_2;
1377 return_value8[1] = pcre_exec(re8, &dummy_extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1378 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector8_2, 32);
1379 }
1380 #endif
1381
1382 #ifdef SUPPORT_PCRE16
1383 return_value16[0] = -1000;
1384 return_value16[1] = -1000;
1385 for (i = 0; i < 32; ++i)
1386 ovector16_1[i] = -2;
1387 for (i = 0; i < 32; ++i)
1388 ovector16_2[i] = -2;
1389 if (re16) {
1390 mark16_1 = NULL;
1391 mark16_2 = NULL;
1392 if ((current->flags & PCRE_UTF16) || (current->start_offset & F_FORCECONV))
1393 length16 = convert_utf8_to_utf16(current->input, regtest_buf16, regtest_offsetmap16, REGTEST_MAX_LENGTH16);
1394 else
1395 length16 = copy_char8_to_char16(current->input, regtest_buf16, REGTEST_MAX_LENGTH16);
1396 extra16->mark = &mark16_1;
1397 if ((counter & 0x1) != 0) {
1398 setstack16(extra16);
1399 return_value16[0] = pcre16_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1400 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector16_1, 32);
1401 } else
1402 return_value16[0] = pcre16_jit_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1403 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector16_1, 32, getstack16());
1404 memset(&dummy_extra16, 0, sizeof(pcre16_extra));
1405 dummy_extra16.flags = PCRE_EXTRA_MARK;
1406 if (current->start_offset & F_STUDY) {
1407 dummy_extra16.flags |= PCRE_EXTRA_STUDY_DATA;
1408 dummy_extra16.study_data = extra16->study_data;
1409 }
1410 dummy_extra16.mark = &mark16_2;
1411 return_value16[1] = pcre16_exec(re16, &dummy_extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1412 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector16_2, 32);
1413 }
1414 #endif
1415
1416 #ifdef SUPPORT_PCRE32
1417 return_value32[0] = -1000;
1418 return_value32[1] = -1000;
1419 for (i = 0; i < 32; ++i)
1420 ovector32_1[i] = -2;
1421 for (i = 0; i < 32; ++i)
1422 ovector32_2[i] = -2;
1423 if (re32) {
1424 mark32_1 = NULL;
1425 mark32_2 = NULL;
1426 if ((current->flags & PCRE_UTF32) || (current->start_offset & F_FORCECONV))
1427 length32 = convert_utf8_to_utf32(current->input, regtest_buf32, regtest_offsetmap32, REGTEST_MAX_LENGTH32);
1428 else
1429 length32 = copy_char8_to_char32(current->input, regtest_buf32, REGTEST_MAX_LENGTH32);
1430 extra32->mark = &mark32_1;
1431 if ((counter & 0x1) != 0) {
1432 setstack32(extra32);
1433 return_value32[0] = pcre32_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1434 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector32_1, 32);
1435 } else
1436 return_value32[0] = pcre32_jit_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1437 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector32_1, 32, getstack32());
1438 memset(&dummy_extra32, 0, sizeof(pcre32_extra));
1439 dummy_extra32.flags = PCRE_EXTRA_MARK;
1440 if (current->start_offset & F_STUDY) {
1441 dummy_extra32.flags |= PCRE_EXTRA_STUDY_DATA;
1442 dummy_extra32.study_data = extra32->study_data;
1443 }
1444 dummy_extra32.mark = &mark32_2;
1445 return_value32[1] = pcre32_exec(re32, &dummy_extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1446 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector32_2, 32);
1447 }
1448 #endif
1449
1450 /* printf("[%d-%d-%d|%d-%d|%d-%d|%d-%d]%s",
1451 return_value8[0], return_value16[0], return_value32[0],
1452 ovector8_1[0], ovector8_1[1],
1453 ovector16_1[0], ovector16_1[1],
1454 ovector32_1[0], ovector32_1[1],
1455 (current->flags & PCRE_CASELESS) ? "C" : ""); */
1456
1457 /* If F_DIFF is set, just run the test, but do not compare the results.
1458 Segfaults can still be captured. */
1459
1460 is_successful = 1;
1461 if (!(current->start_offset & F_DIFF)) {
1462 #if defined SUPPORT_UTF && ((defined(SUPPORT_PCRE8) + defined(SUPPORT_PCRE16) + defined(SUPPORT_PCRE32)) >= 2)
1463 if (!(current->start_offset & F_FORCECONV)) {
1464 int return_value;
1465
1466 /* All results must be the same. */
1467 #ifdef SUPPORT_PCRE8
1468 if ((return_value = return_value8[0]) != return_value8[1]) {
1469 printf("\n8 bit: Return value differs(J8:%d,I8:%d): [%d] '%s' @ '%s'\n",
1470 return_value8[0], return_value8[1], total, current->pattern, current->input);
1471 is_successful = 0;
1472 } else
1473 #endif
1474 #ifdef SUPPORT_PCRE16
1475 if ((return_value = return_value16[0]) != return_value16[1]) {
1476 printf("\n16 bit: Return value differs(J16:%d,I16:%d): [%d] '%s' @ '%s'\n",
1477 return_value16[0], return_value16[1], total, current->pattern, current->input);
1478 is_successful = 0;
1479 } else
1480 #endif
1481 #ifdef SUPPORT_PCRE32
1482 if ((return_value = return_value32[0]) != return_value32[1]) {
1483 printf("\n32 bit: Return value differs(J32:%d,I32:%d): [%d] '%s' @ '%s'\n",
1484 return_value32[0], return_value32[1], total, current->pattern, current->input);
1485 is_successful = 0;
1486 } else
1487 #endif
1488 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1489 if (return_value8[0] != return_value16[0]) {
1490 printf("\n8 and 16 bit: Return value differs(J8:%d,J16:%d): [%d] '%s' @ '%s'\n",
1491 return_value8[0], return_value16[0],
1492 total, current->pattern, current->input);
1493 is_successful = 0;
1494 } else
1495 #endif
1496 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE32
1497 if (return_value8[0] != return_value32[0]) {
1498 printf("\n8 and 32 bit: Return value differs(J8:%d,J32:%d): [%d] '%s' @ '%s'\n",
1499 return_value8[0], return_value32[0],
1500 total, current->pattern, current->input);
1501 is_successful = 0;
1502 } else
1503 #endif
1504 #if defined SUPPORT_PCRE16 && defined SUPPORT_PCRE32
1505 if (return_value16[0] != return_value32[0]) {
1506 printf("\n16 and 32 bit: Return value differs(J16:%d,J32:%d): [%d] '%s' @ '%s'\n",
1507 return_value16[0], return_value32[0],
1508 total, current->pattern, current->input);
1509 is_successful = 0;
1510 } else
1511 #endif
1512 if (return_value >= 0 || return_value == PCRE_ERROR_PARTIAL) {
1513 if (return_value == PCRE_ERROR_PARTIAL) {
1514 return_value = 2;
1515 } else {
1516 return_value *= 2;
1517 }
1518 #ifdef SUPPORT_PCRE8
1519 return_value8[0] = return_value;
1520 #endif
1521 #ifdef SUPPORT_PCRE16
1522 return_value16[0] = return_value;
1523 #endif
1524 #ifdef SUPPORT_PCRE32
1525 return_value32[0] = return_value;
1526 #endif
1527 /* Transform back the results. */
1528 if (current->flags & PCRE_UTF8) {
1529 #ifdef SUPPORT_PCRE16
1530 for (i = 0; i < return_value; ++i) {
1531 if (ovector16_1[i] >= 0)
1532 ovector16_1[i] = regtest_offsetmap16[ovector16_1[i]];
1533 if (ovector16_2[i] >= 0)
1534 ovector16_2[i] = regtest_offsetmap16[ovector16_2[i]];
1535 }
1536 #endif
1537 #ifdef SUPPORT_PCRE32
1538 for (i = 0; i < return_value; ++i) {
1539 if (ovector32_1[i] >= 0)
1540 ovector32_1[i] = regtest_offsetmap32[ovector32_1[i]];
1541 if (ovector32_2[i] >= 0)
1542 ovector32_2[i] = regtest_offsetmap32[ovector32_2[i]];
1543 }
1544 #endif
1545 }
1546
1547 for (i = 0; i < return_value; ++i) {
1548 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1549 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
1550 printf("\n8 and 16 bit: Ovector[%d] value differs(J8:%d,I8:%d,J16:%d,I16:%d): [%d] '%s' @ '%s' \n",
1551 i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
1552 total, current->pattern, current->input);
1553 is_successful = 0;
1554 }
1555 #endif
1556 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE32
1557 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector32_1[i] || ovector8_1[i] != ovector32_2[i]) {
1558 printf("\n8 and 32 bit: Ovector[%d] value differs(J8:%d,I8:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1559 i, ovector8_1[i], ovector8_2[i], ovector32_1[i], ovector32_2[i],
1560 total, current->pattern, current->input);
1561 is_successful = 0;
1562 }
1563 #endif
1564 #if defined SUPPORT_PCRE16 && defined SUPPORT_PCRE32
1565 if (ovector16_1[i] != ovector16_2[i] || ovector16_1[i] != ovector32_1[i] || ovector16_1[i] != ovector32_2[i]) {
1566 printf("\n16 and 32 bit: Ovector[%d] value differs(J16:%d,I16:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1567 i, ovector16_1[i], ovector16_2[i], ovector32_1[i], ovector32_2[i],
1568 total, current->pattern, current->input);
1569 is_successful = 0;
1570 }
1571 #endif
1572 }
1573 }
1574 } else
1575 #endif /* more than one of SUPPORT_PCRE8, SUPPORT_PCRE16 and SUPPORT_PCRE32 */
1576 {
1577 /* Only the 8 bit and 16 bit results must be equal. */
1578 #ifdef SUPPORT_PCRE8
1579 if (return_value8[0] != return_value8[1]) {
1580 printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1581 return_value8[0], return_value8[1], total, current->pattern, current->input);
1582 is_successful = 0;
1583 } else if (return_value8[0] >= 0 || return_value8[0] == PCRE_ERROR_PARTIAL) {
1584 if (return_value8[0] == PCRE_ERROR_PARTIAL)
1585 return_value8[0] = 2;
1586 else
1587 return_value8[0] *= 2;
1588
1589 for (i = 0; i < return_value8[0]; ++i)
1590 if (ovector8_1[i] != ovector8_2[i]) {
1591 printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1592 i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
1593 is_successful = 0;
1594 }
1595 }
1596 #endif
1597
1598 #ifdef SUPPORT_PCRE16
1599 if (return_value16[0] != return_value16[1]) {
1600 printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1601 return_value16[0], return_value16[1], total, current->pattern, current->input);
1602 is_successful = 0;
1603 } else if (return_value16[0] >= 0 || return_value16[0] == PCRE_ERROR_PARTIAL) {
1604 if (return_value16[0] == PCRE_ERROR_PARTIAL)
1605 return_value16[0] = 2;
1606 else
1607 return_value16[0] *= 2;
1608
1609 for (i = 0; i < return_value16[0]; ++i)
1610 if (ovector16_1[i] != ovector16_2[i]) {
1611 printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1612 i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
1613 is_successful = 0;
1614 }
1615 }
1616 #endif
1617
1618 #ifdef SUPPORT_PCRE32
1619 if (return_value32[0] != return_value32[1]) {
1620 printf("\n32 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1621 return_value32[0], return_value32[1], total, current->pattern, current->input);
1622 is_successful = 0;
1623 } else if (return_value32[0] >= 0 || return_value32[0] == PCRE_ERROR_PARTIAL) {
1624 if (return_value32[0] == PCRE_ERROR_PARTIAL)
1625 return_value32[0] = 2;
1626 else
1627 return_value32[0] *= 2;
1628
1629 for (i = 0; i < return_value32[0]; ++i)
1630 if (ovector32_1[i] != ovector32_2[i]) {
1631 printf("\n32 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1632 i, ovector32_1[i], ovector32_2[i], total, current->pattern, current->input);
1633 is_successful = 0;
1634 }
1635 }
1636 #endif
1637 }
1638 }
1639
1640 if (is_successful) {
1641 #ifdef SUPPORT_PCRE8
1642 if (!(current->start_offset & F_NO8) && ((utf && ucp) || is_ascii)) {
1643 if (return_value8[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1644 printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1645 total, current->pattern, current->input);
1646 is_successful = 0;
1647 }
1648
1649 if (return_value8[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1650 printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1651 total, current->pattern, current->input);
1652 is_successful = 0;
1653 }
1654 }
1655 #endif
1656 #ifdef SUPPORT_PCRE16
1657 if (!(current->start_offset & F_NO16) && ((utf && ucp) || is_ascii)) {
1658 if (return_value16[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1659 printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1660 total, current->pattern, current->input);
1661 is_successful = 0;
1662 }
1663
1664 if (return_value16[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1665 printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1666 total, current->pattern, current->input);
1667 is_successful = 0;
1668 }
1669 }
1670 #endif
1671 #ifdef SUPPORT_PCRE32
1672 if (!(current->start_offset & F_NO32) && ((utf && ucp) || is_ascii)) {
1673 if (return_value32[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1674 printf("32 bit: Test should match: [%d] '%s' @ '%s'\n",
1675 total, current->pattern, current->input);
1676 is_successful = 0;
1677 }
1678
1679 if (return_value32[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1680 printf("32 bit: Test should not match: [%d] '%s' @ '%s'\n",
1681 total, current->pattern, current->input);
1682 is_successful = 0;
1683 }
1684 }
1685 #endif
1686 }
1687
1688 if (is_successful) {
1689 #ifdef SUPPORT_PCRE8
1690 if (mark8_1 != mark8_2) {
1691 printf("8 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1692 total, current->pattern, current->input);
1693 is_successful = 0;
1694 }
1695 #endif
1696 #ifdef SUPPORT_PCRE16
1697 if (mark16_1 != mark16_2) {
1698 printf("16 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1699 total, current->pattern, current->input);
1700 is_successful = 0;
1701 }
1702 #endif
1703 #ifdef SUPPORT_PCRE32
1704 if (mark32_1 != mark32_2) {
1705 printf("32 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1706 total, current->pattern, current->input);
1707 is_successful = 0;
1708 }
1709 #endif
1710 }
1711
1712 #ifdef SUPPORT_PCRE8
1713 if (re8) {
1714 pcre_free_study(extra8);
1715 pcre_free(re8);
1716 }
1717 #endif
1718 #ifdef SUPPORT_PCRE16
1719 if (re16) {
1720 pcre16_free_study(extra16);
1721 pcre16_free(re16);
1722 }
1723 #endif
1724 #ifdef SUPPORT_PCRE32
1725 if (re32) {
1726 pcre32_free_study(extra32);
1727 pcre32_free(re32);
1728 }
1729 #endif
1730
1731 if (is_successful) {
1732 successful++;
1733 successful_row++;
1734 printf(".");
1735 if (successful_row >= 60) {
1736 successful_row = 0;
1737 printf("\n");
1738 }
1739 } else
1740 successful_row = 0;
1741
1742 fflush(stdout);
1743 current++;
1744 }
1745 tables(1);
1746 #ifdef SUPPORT_PCRE8
1747 setstack8(NULL);
1748 #endif
1749 #ifdef SUPPORT_PCRE16
1750 setstack16(NULL);
1751 #endif
1752 #ifdef SUPPORT_PCRE32
1753 setstack32(NULL);
1754 #endif
1755
1756 if (total == successful) {
1757 printf("\nAll JIT regression tests are successfully passed.\n");
1758 return 0;
1759 } else {
1760 printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1761 return 1;
1762 }
1763 }
1764
1765 /* End of pcre_jit_test.c */
1766