1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
4from __future__ import print_function, unicode_literals
5import ctypes
6import onigmo
7import sys
8import io
9import locale
10
11nerror = 0
12nsucc = 0
13nfail = 0
14
15# default encoding
16onig_encoding = onigmo.ONIG_ENCODING_EUC_JP
17
18# special syntactic settings
19syntax_default = ctypes.byref(onigmo.OnigSyntaxType())
20onigmo.onig_copy_syntax(syntax_default, onigmo.ONIG_SYNTAX_DEFAULT)
21onigmo.onig_set_syntax_options(syntax_default,
22        onigmo.onig_get_syntax_options(syntax_default)
23            & ~onigmo.ONIG_OPTION_ASCII_RANGE)
24
25
26def get_encoding_name(onigenc):
27    """Return the name of specified onigmo.OnigEncoding.
28
29    arguments:
30      enc -- an instance of onigmo.OnigEncoding
31    """
32    name = onigenc[0].name.decode()
33    encnamemap = {"Windows-31J": "CP932",
34            "ASCII-8BIT": "ASCII"}
35    if name in encnamemap:
36        name = encnamemap[name]
37    return name
38
39def is_unicode_encoding(enc):
40    """Check if the encoding is Unicode encoding.
41
42    arguments:
43      enc -- encoding name or an instance of onigmo.OnigEncoding
44    """
45    return enc in (onigmo.ONIG_ENCODING_UTF32_LE,
46                   onigmo.ONIG_ENCODING_UTF32_BE,
47                   onigmo.ONIG_ENCODING_UTF16_LE,
48                   onigmo.ONIG_ENCODING_UTF16_BE,
49                   onigmo.ONIG_ENCODING_UTF8,
50                   'UTF-16LE', 'UTF-16BE', 'UTF-32LE', 'UTF-32BE', 'UTF-8')
51
52def is_ascii_incompatible_encoding(enc):
53    """Check if the encoding is ASCII-incompatible encoding.
54
55    arguments:
56      enc -- encoding name or an instance of onigmo.OnigEncoding
57    """
58    return enc in (onigmo.ONIG_ENCODING_UTF32_LE,
59                   onigmo.ONIG_ENCODING_UTF32_BE,
60                   onigmo.ONIG_ENCODING_UTF16_LE,
61                   onigmo.ONIG_ENCODING_UTF16_BE,
62                   'UTF-16LE', 'UTF-16BE', 'UTF-32LE', 'UTF-32BE')
63
64
65class strptr:
66    """a helper class to get a pointer to a string"""
67    def __init__(self, s):
68        if not isinstance(s, bytes):
69            raise TypeError
70        self._str = s
71        try:
72            # CPython 2.x/3.x
73            self._ptr = ctypes.cast(self._str, ctypes.c_void_p)
74        except TypeError:
75            # PyPy 1.x
76            self._ptr = ctypes.c_void_p(self._str)
77
78    def getptr(self, offset=0):
79        if offset == -1:    # -1 means the end of the string
80            offset = len(self._str)
81        elif offset > len(self._str):
82            raise IndexError
83        return self._ptr.value + offset
84
85def cc_to_cb(s, enc, cc):
86    """convert char count to byte count
87
88    arguments:
89      s -- unicode string
90      enc -- encoding name
91      cc -- char count
92    """
93    if cc == -1:
94        return -1
95    s = s.encode('UTF-32LE')
96    clen = cc * 4
97    if clen > len(s):
98        raise IndexError
99    return len(s[:clen].decode('UTF-32LE').encode(enc))
100
101def print_result(result, pattern, file=None):
102    if not file:
103        file = sys.stdout
104    print(result + ": ", end='', file=file)
105    try:
106        print(pattern, file=file)
107    except UnicodeEncodeError as e:
108        print('(' + str(e) + ')')
109
110def decode_errmsg(msg):
111    encoding = get_encoding_name(onig_encoding)
112    if is_ascii_incompatible_encoding(encoding):
113        encoding = 'ASCII'
114    return msg.value.decode(encoding, 'replace')
115
116
117class SearchType:
118    FORWARD = 0
119    BACKWARD = 1
120    MATCH = 2
121
122def xx(pattern, target, s_from, s_to, mem, not_match,
123        searchtype=SearchType.FORWARD,
124        gpos=-1, startpos=0, endpos=-1,
125        syn=syntax_default, opt=onigmo.ONIG_OPTION_DEFAULT,
126        err=onigmo.ONIG_NORMAL, execerr=onigmo.ONIG_NORMAL):
127    global nerror
128    global nsucc
129    global nfail
130
131    encoding = get_encoding_name(onig_encoding)
132
133    reg = onigmo.OnigRegex()
134    einfo = onigmo.OnigErrorInfo()
135    msg = ctypes.create_string_buffer(onigmo.ONIG_MAX_ERROR_MESSAGE_LEN)
136
137    pattern2 = pattern
138    if not isinstance(pattern, bytes):
139        pattern2 = pattern.encode(encoding)
140    patternp = strptr(pattern2)
141
142    target2 = target
143    if not isinstance(target, bytes):
144        s_from = cc_to_cb(target, encoding, s_from)
145        s_to = cc_to_cb(target, encoding, s_to)
146        gpos = cc_to_cb(target, encoding, gpos)
147        startpos = cc_to_cb(target, encoding, startpos)
148        endpos = cc_to_cb(target, encoding, endpos)
149        target2 = target.encode(encoding)
150    targetp = strptr(target2)
151
152    # cut very long outputs (used for showing message)
153    pattern = pattern2.decode(encoding, 'replace')
154    target = target2.decode(encoding, 'replace')
155    limit = 100
156    if len(pattern) > limit:
157        pattern = pattern[:limit] + "..."
158    if len(target) > limit:
159        target = target[:limit] + "..."
160
161    # Compile
162    r = onigmo.onig_new(ctypes.byref(reg),
163            patternp.getptr(), patternp.getptr(-1),
164            opt, onig_encoding, syn, ctypes.byref(einfo));
165    if r != 0:
166        # Error
167        onigmo.onig_error_code_to_str(msg, r, ctypes.byref(einfo))
168        if r == err:
169            nsucc += 1
170            print_result("OK(E)", "%s (/%s/ '%s')" % \
171                    (decode_errmsg(msg), pattern, target))
172        else:
173            nerror += 1
174            print_result("ERROR", "%s (/%s/ '%s')" % \
175                    (decode_errmsg(msg), pattern, target), file=sys.stderr)
176        return
177
178    if err != onigmo.ONIG_NORMAL:
179        nfail += 1
180        print_result("FAIL(E)", "/%s/ '%s'" % (pattern, target))
181        onigmo.onig_free(reg)
182        return
183
184    # Execute
185    region = onigmo.onig_region_new()
186    if searchtype == SearchType.FORWARD:
187        if gpos >= 0:
188            r = onigmo.onig_search_gpos(reg,
189                        targetp.getptr(), targetp.getptr(-1),
190                        targetp.getptr(gpos),
191                        targetp.getptr(startpos), targetp.getptr(endpos),
192                        region, onigmo.ONIG_OPTION_NONE);
193        else:
194            r = onigmo.onig_search(reg,
195                        targetp.getptr(), targetp.getptr(-1),
196                        targetp.getptr(startpos), targetp.getptr(endpos),
197                        region, onigmo.ONIG_OPTION_NONE);
198    elif searchtype == SearchType.BACKWARD:
199        if gpos >= 0:
200            r = onigmo.onig_search_gpos(reg,
201                        targetp.getptr(), targetp.getptr(-1),
202                        targetp.getptr(gpos),
203                        targetp.getptr(endpos), targetp.getptr(startpos),
204                        region, onigmo.ONIG_OPTION_NONE);
205        else:
206            r = onigmo.onig_search(reg,
207                        targetp.getptr(), targetp.getptr(-1),
208                        targetp.getptr(endpos), targetp.getptr(startpos),
209                        region, onigmo.ONIG_OPTION_NONE);
210    elif searchtype == SearchType.MATCH:
211        r = onigmo.onig_match(reg, targetp.getptr(), targetp.getptr(-1),
212                        targetp.getptr(startpos),
213                        region, onigmo.ONIG_OPTION_NONE);
214    else:
215        nerror += 1
216        print_result("ERROR", "wrong searchtype", file=sys.stderr)
217        onigmo.onig_free(reg)
218        onigmo.onig_region_free(region, 1)
219        return
220
221    if r < onigmo.ONIG_MISMATCH:
222        # Error
223        onigmo.onig_error_code_to_str(msg, r)
224        if r == execerr:
225            nsucc += 1
226            print_result("OK(E)", "%s (/%s/ '%s')" % \
227                    (decode_errmsg(msg), pattern, target))
228        else:
229            nerror += 1
230            print_result("ERROR", "%s (/%s/ '%s')" % \
231                    (decode_errmsg(msg), pattern, target), file=sys.stderr)
232        onigmo.onig_free(reg)
233        onigmo.onig_region_free(region, 1)
234        return
235
236    if r == onigmo.ONIG_MISMATCH:
237        # Not matched
238        if not_match:
239            nsucc += 1
240            print_result("OK(N)", "/%s/ '%s'" % (pattern, target))
241        else:
242            nfail += 1
243            print_result("FAIL", "/%s/ '%s'" % (pattern, target))
244    else:
245        # Matched
246        if not_match:
247            nfail += 1
248            print_result("FAIL(N)", "/%s/ '%s'" % (pattern, target))
249        else:
250            start = region[0].beg[mem]
251            end = region[0].end[mem]
252            if (start == s_from) and (end == s_to):
253                nsucc += 1
254                print_result("OK", "/%s/ '%s'" % (pattern, target))
255            else:
256                nfail += 1
257                print_result("FAIL", "/%s/ '%s' %d-%d : %d-%d" % (pattern, target,
258                        s_from, s_to, start, end))
259    onigmo.onig_free(reg)
260    onigmo.onig_region_free(region, 1)
261
262def x2(pattern, target, s_from, s_to, **kwargs):
263    xx(pattern, target, s_from, s_to, 0, False, **kwargs)
264
265def x3(pattern, target, s_from, s_to, mem, **kwargs):
266    xx(pattern, target, s_from, s_to, mem, False, **kwargs)
267
268def n(pattern, target, **kwargs):
269    xx(pattern, target, 0, 0, 0, True, **kwargs)
270
271
272def set_encoding(enc):
273    """Set the encoding used for testing.
274
275    arguments:
276      enc -- encoding name or an instance of onigmo.OnigEncoding
277    """
278    global onig_encoding
279
280    if enc == None:
281        return
282    if isinstance(enc, onigmo.OnigEncoding):
283        onig_encoding = enc
284    else:
285        encs = {"EUC-JP": onigmo.ONIG_ENCODING_EUC_JP,
286                "SJIS": onigmo.ONIG_ENCODING_SJIS,
287                "CP932": onigmo.ONIG_ENCODING_CP932,
288                "UTF-8": onigmo.ONIG_ENCODING_UTF8,
289                "UTF-16LE": onigmo.ONIG_ENCODING_UTF16_LE,
290                "UTF-16BE": onigmo.ONIG_ENCODING_UTF16_BE,
291                "UTF-32LE": onigmo.ONIG_ENCODING_UTF32_LE,
292                "UTF-32BE": onigmo.ONIG_ENCODING_UTF32_BE}
293        onig_encoding = encs[enc.upper()]
294
295def get_encoding():
296    """Get the encoding used for testing."""
297    return onig_encoding
298
299def set_output_encoding(enc=None):
300    """Set the encoding used for showing the results.
301
302    arguments:
303      enc -- Encoding name or an instance of onigmo.OnigEncoding.
304             If omitted, locale.getpreferredencoding() is used.
305    """
306    if enc is None:
307        enc = locale.getpreferredencoding()
308
309    def get_text_writer(fo, **kwargs):
310        kw = dict(kwargs)
311        kw.setdefault('errors', 'backslashreplace') # use \uXXXX style
312        kw.setdefault('closefd', False)
313
314        if sys.version_info[0] < 3:
315            # Work around for Python 2.x
316            # New line conversion isn't needed here. Done in somewhere else.
317            writer = io.open(fo.fileno(), mode='w', newline='', **kw)
318            write = writer.write    # save the original write() function
319            enc = locale.getpreferredencoding()
320            def convwrite(s):
321                if isinstance(s, bytes):
322                    write(s.decode(enc))    # convert to unistr
323                else:
324                    write(s)
325                try:
326                    writer.flush()  # needed on Windows
327                except IOError:
328                    pass
329            writer.write = convwrite
330        else:
331            writer = io.open(fo.fileno(), mode='w', **kw)
332        return writer
333
334    sys.stdout = get_text_writer(sys.stdout, encoding=enc)
335    sys.stderr = get_text_writer(sys.stderr, encoding=enc)
336
337
338def set_default_warning_function():
339    global _warn_func_ptr
340
341    warning_enc = get_encoding_name(onig_encoding)
342    if is_ascii_incompatible_encoding(warning_enc):
343        warning_enc = 'ascii'
344    def warn_func(str):
345        print("warning: " + str.decode(warning_enc, 'replace'))
346
347    _warn_func_ptr = onigmo.OnigWarnFunc(warn_func)
348    onigmo.onig_set_warn_func(_warn_func_ptr)
349    onigmo.onig_set_verb_warn_func(_warn_func_ptr)
350
351
352def init(enc, outenc=None):
353    """Setup test target encoding, output encoding and warning function.
354
355    arguments:
356      enc    -- Encoding used for testing.
357      outenc -- Encoding used for showing messages.
358    """
359    set_encoding(enc)
360    set_output_encoding(outenc)
361    set_default_warning_function()
362
363
364def main():
365    # encoding of the test target
366    enc = None
367    if len(sys.argv) > 1:
368        enc = sys.argv[1]
369
370    # encoding of stdout/stderr
371    outenc = None
372    if len(sys.argv) > 2:
373        outenc = sys.argv[2]
374
375    # Initialization
376    try:
377        init(enc, outenc)
378    except KeyError:
379        print("test target encoding error")
380        print("Usage: python testpy.py [test target encoding] [output encoding]")
381        sys.exit()
382
383    print(onigmo.onig_copyright())
384
385    # Copied from onig-5.9.2/testc.c
386    #   '?\?' which is used to avoid trigraph is replaced by '??'.
387    #   Match positions are specified by unit of character instead of byte.
388
389    x2("", "", 0, 0);
390    x2("^", "", 0, 0);
391    x2("$", "", 0, 0);
392    x2("\\G", "", 0, 0);
393    x2("\\A", "", 0, 0);
394    x2("\\Z", "", 0, 0);
395    x2("\\z", "", 0, 0);
396    x2("^$", "", 0, 0);
397    x2("\\ca", "\001", 0, 1);
398    x2("\\C-b", "\002", 0, 1);
399    x2("\\c\\\\", "\034", 0, 1);
400    x2("q[\\c\\\\]", "q\034", 0, 2);
401    x2("", "a", 0, 0);
402    x2("a", "a", 0, 1);
403    if onig_encoding == onigmo.ONIG_ENCODING_UTF16_LE:
404        x2("\\x61\\x00", "a", 0, 1);
405    elif onig_encoding == onigmo.ONIG_ENCODING_UTF16_BE:
406        x2("\\x00\\x61", "a", 0, 1);
407    elif onig_encoding == onigmo.ONIG_ENCODING_UTF32_LE:
408        x2("\\x61\\x00\\x00\\x00", "a", 0, 1);
409    elif onig_encoding == onigmo.ONIG_ENCODING_UTF32_BE:
410        x2("\\x00\\x00\\x00\\x61", "a", 0, 1);
411    else:
412        x2("\\x61", "a", 0, 1);
413    x2("aa", "aa", 0, 2);
414    x2("aaa", "aaa", 0, 3);
415    x2("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0, 35);
416    x2("ab", "ab", 0, 2);
417    x2("b", "ab", 1, 2);
418    x2("bc", "abc", 1, 3);
419    x2("(?i:#RET#)", "#INS##RET#", 5, 10);
420    if onig_encoding == onigmo.ONIG_ENCODING_UTF16_LE:
421        x2("\\17\\00", "\017", 0, 1);
422        x2("\\x1f\\x00", "\x1f", 0, 1);
423    elif onig_encoding == onigmo.ONIG_ENCODING_UTF16_BE:
424        x2("\\00\\17", "\017", 0, 1);
425        x2("\\x00\\x1f", "\x1f", 0, 1);
426    elif onig_encoding == onigmo.ONIG_ENCODING_UTF32_LE:
427        x2("\\17\\00\\00\\00", "\017", 0, 1);
428        x2("\\x1f\\x00\\x00\\x00", "\x1f", 0, 1);
429    elif onig_encoding == onigmo.ONIG_ENCODING_UTF32_BE:
430        x2("\\00\\00\\00\\17", "\017", 0, 1);
431        x2("\\x00\\x00\\x00\\x1f", "\x1f", 0, 1);
432    else:
433        x2("\\17", "\017", 0, 1);
434        x2("\\x1f", "\x1f", 0, 1);
435    x2("a(?#....\\\\JJJJ)b", "ab", 0, 2);
436    x2("(?x)  G (o O(?-x)oO) g L", "GoOoOgLe", 0, 7);
437    x2(".", "a", 0, 1);
438    n(".", "");
439    x2("..", "ab", 0, 2);
440    x2("\\w", "e", 0, 1);
441    n("\\W", "e");
442    x2("\\s", " ", 0, 1);
443    x2("\\S", "b", 0, 1);
444    x2("\\d", "4", 0, 1);
445    n("\\D", "4");
446    x2("\\b", "z ", 0, 0);
447    x2("\\b", " z", 1, 1);
448    x2("\\B", "zz ", 1, 1);
449    x2("\\B", "z ", 2, 2);
450    x2("\\B", " z", 0, 0);
451    x2("[ab]", "b", 0, 1);
452    n("[ab]", "c");
453    x2("[a-z]", "t", 0, 1);
454    n("[^a]", "a");
455    x2("[^a]", "\n", 0, 1);
456    x2("[]]", "]", 0, 1);
457    n("[^]]", "]");
458    x2("[\\^]+", "0^^1", 1, 3);
459    x2("[b-]", "b", 0, 1);
460    x2("[b-]", "-", 0, 1);
461    x2("[\\w]", "z", 0, 1);
462    n("[\\w]", " ");
463    x2("[\\W]", "b$", 1, 2);
464    x2("[\\d]", "5", 0, 1);
465    n("[\\d]", "e");
466    x2("[\\D]", "t", 0, 1);
467    n("[\\D]", "3");
468    x2("[\\s]", " ", 0, 1);
469    n("[\\s]", "a");
470    x2("[\\S]", "b", 0, 1);
471    n("[\\S]", " ");
472    x2("[\\w\\d]", "2", 0, 1);
473    n("[\\w\\d]", " ");
474    x2("[[:upper:]]", "B", 0, 1);
475    x2("[*[:xdigit:]+]", "+", 0, 1);
476    x2("[*[:xdigit:]+]", "GHIKK-9+*", 6, 7);
477    x2("[*[:xdigit:]+]", "-@^+", 3, 4);
478    n("[[:upper]]", "A");
479    x2("[[:upper]]", ":", 0, 1);
480    if onig_encoding == onigmo.ONIG_ENCODING_UTF16_LE:
481        x2("[\\044\\000-\\047\\000]", "\046", 0, 1);
482        x2("[\\x5a\\x00-\\x5c\\x00]", "\x5b", 0, 1);
483        x2("[\\x6A\\x00-\\x6D\\x00]", "\x6c", 0, 1);
484        n("[\\x6A\\x00-\\x6D\\x00]", "\x6E");
485    elif onig_encoding == onigmo.ONIG_ENCODING_UTF16_BE:
486        x2("[\\000\\044-\\000\\047]", "\046", 0, 1);
487        x2("[\\x00\\x5a-\\x00\\x5c]", "\x5b", 0, 1);
488        x2("[\\x00\\x6A-\\x00\\x6D]", "\x6c", 0, 1);
489        n("[\\x00\\x6A-\\x00\\x6D]", "\x6E");
490    elif onig_encoding == onigmo.ONIG_ENCODING_UTF32_LE:
491        x2("[\\044\\000\\000\\000-\\047\\000\\000\\000]", "\046", 0, 1);
492        x2("[\\x5a\\x00\\x00\\x00-\\x5c\\x00\\x00\\x00]", "\x5b", 0, 1);
493        x2("[\\x6A\\x00\\x00\\x00-\\x6D\\x00\\x00\\x00]", "\x6c", 0, 1);
494        n("[\\x6A\\x00\\x00\\x00-\\x6D\\x00\\x00\\x00]", "\x6E");
495    elif onig_encoding == onigmo.ONIG_ENCODING_UTF32_BE:
496        x2("[\\000\\000\\000\\044-\\000\\000\\000\\047]", "\046", 0, 1);
497        x2("[\\x00\\x00\\x00\\x5a-\\x00\\x00\\x00\\x5c]", "\x5b", 0, 1);
498        x2("[\\x00\\x00\\x00\\x6A-\\x00\\x00\\x00\\x6D]", "\x6c", 0, 1);
499        n("[\\x00\\x00\\x00\\x6A-\\x00\\x00\\x00\\x6D]", "\x6E");
500    else:
501        x2("[\\044-\\047]", "\046", 0, 1);
502        x2("[\\x5a-\\x5c]", "\x5b", 0, 1);
503        x2("[\\x6A-\\x6D]", "\x6c", 0, 1);
504        n("[\\x6A-\\x6D]", "\x6E");
505    n("^[0-9A-F]+ 0+ UNDEF ", "75F 00000000 SECT14A notype ()    External    | _rb_apply");
506    x2("[\\[]", "[", 0, 1);
507    x2("[\\]]", "]", 0, 1);
508    x2("[&]", "&", 0, 1);
509    x2("[[ab]]", "b", 0, 1);
510    x2("[[ab]c]", "c", 0, 1);
511    n("[[^a]]", "a");
512    n("[^[a]]", "a");
513    x2("[[ab]&&bc]", "b", 0, 1);
514    n("[[ab]&&bc]", "a");
515    n("[[ab]&&bc]", "c");
516    x2("[a-z&&b-y&&c-x]", "w", 0, 1);
517    n("[^a-z&&b-y&&c-x]", "w");
518    x2("[[^a&&a]&&a-z]", "b", 0, 1);
519    n("[[^a&&a]&&a-z]", "a");
520    x2("[[^a-z&&bcdef]&&[^c-g]]", "h", 0, 1);
521    n("[[^a-z&&bcdef]&&[^c-g]]", "c");
522    x2("[^[^abc]&&[^cde]]", "c", 0, 1);
523    x2("[^[^abc]&&[^cde]]", "e", 0, 1);
524    n("[^[^abc]&&[^cde]]", "f");
525    x2("[a-&&-a]", "-", 0, 1);
526    n("[a\\-&&\\-a]", "&");
527    n("\\wabc", " abc");
528    x2("a\\Wbc", "a bc", 0, 4);
529    x2("a.b.c", "aabbc", 0, 5);
530    x2(".\\wb\\W..c", "abb bcc", 0, 7);
531    x2("\\s\\wzzz", " zzzz", 0, 5);
532    x2("aa.b", "aabb", 0, 4);
533    n(".a", "ab");
534    x2(".a", "aa", 0, 2);
535    x2("^a", "a", 0, 1);
536    x2("^a$", "a", 0, 1);
537    x2("^\\w$", "a", 0, 1);
538    n("^\\w$", " ");
539    x2("^\\wab$", "zab", 0, 3);
540    x2("^\\wabcdef$", "zabcdef", 0, 7);
541    x2("^\\w...def$", "zabcdef", 0, 7);
542    x2("\\w\\w\\s\\Waaa\\d", "aa  aaa4", 0, 8);
543    x2("\\A\\Z", "", 0, 0);
544    x2("\\Axyz", "xyz", 0, 3);
545    x2("xyz\\Z", "xyz", 0, 3);
546    x2("xyz\\z", "xyz", 0, 3);
547    x2("a\\Z", "a", 0, 1);
548    x2("\\Gaz", "az", 0, 2);
549    n("\\Gz", "bza");
550    n("az\\G", "az");
551    n("az\\A", "az");
552    n("a\\Az", "az");
553    x2("\\^\\$", "^$", 0, 2);
554    x2("^x?y", "xy", 0, 2);
555    x2("^(x?y)", "xy", 0, 2);
556    x2("\\w", "_", 0, 1);
557    n("\\W", "_");
558    x2("(?=z)z", "z", 0, 1);
559    n("(?=z).", "a");
560    x2("(?!z)a", "a", 0, 1);
561    n("(?!z)a", "z");
562    x2("(?i:a)", "a", 0, 1);
563    x2("(?i:a)", "A", 0, 1);
564    x2("(?i:A)", "a", 0, 1);
565    n("(?i:A)", "b");
566    x2("(?i:[A-Z])", "a", 0, 1);
567    x2("(?i:[f-m])", "H", 0, 1);
568    x2("(?i:[f-m])", "h", 0, 1);
569    n("(?i:[f-m])", "e");
570    x2("(?i:[A-c])", "D", 0, 1);
571    n("(?i:[^a-z])", "A");
572    n("(?i:[^a-z])", "a");
573    x2("(?i:[!-k])", "Z", 0, 1);
574    x2("(?i:[!-k])", "7", 0, 1);
575    x2("(?i:[T-}])", "b", 0, 1);
576    x2("(?i:[T-}])", "{", 0, 1);
577    x2("(?i:\\?a)", "?A", 0, 2);
578    x2("(?i:\\*A)", "*a", 0, 2);
579    n(".", "\n");
580    x2("(?m:.)", "\n", 0, 1);
581    x2("(?m:a.)", "a\n", 0, 2);
582    x2("(?m:.b)", "a\nb", 1, 3);
583    x2(".*abc", "dddabdd\nddabc", 8, 13);
584    x2("(?m:.*abc)", "dddabddabc", 0, 10);
585    n("(?i)(?-i)a", "A");
586    n("(?i)(?-i:a)", "A");
587    x2("a?", "", 0, 0);
588    x2("a?", "b", 0, 0);
589    x2("a?", "a", 0, 1);
590    x2("a*", "", 0, 0);
591    x2("a*", "a", 0, 1);
592    x2("a*", "aaa", 0, 3);
593    x2("a*", "baaaa", 0, 0);
594    n("a+", "");
595    x2("a+", "a", 0, 1);
596    x2("a+", "aaaa", 0, 4);
597    x2("a+", "aabbb", 0, 2);
598    x2("a+", "baaaa", 1, 5);
599    x2(".?", "", 0, 0);
600    x2(".?", "f", 0, 1);
601    x2(".?", "\n", 0, 0);
602    x2(".*", "", 0, 0);
603    x2(".*", "abcde", 0, 5);
604    x2(".+", "z", 0, 1);
605    x2(".+", "zdswer\n", 0, 6);
606    x2("(.*)a\\1f", "babfbac", 0, 4);
607    x2("(.*)a\\1f", "bacbabf", 3, 7);
608    x2("((.*)a\\2f)", "bacbabf", 3, 7);
609    x2("(.*)a\\1f", "baczzzzzz\nbazz\nzzzzbabf", 19, 23);
610    x2("a|b", "a", 0, 1);
611    x2("a|b", "b", 0, 1);
612    x2("|a", "a", 0, 0);
613    x2("(|a)", "a", 0, 0);
614    x2("ab|bc", "ab", 0, 2);
615    x2("ab|bc", "bc", 0, 2);
616    x2("z(?:ab|bc)", "zbc", 0, 3);
617    x2("a(?:ab|bc)c", "aabc", 0, 4);
618    x2("ab|(?:ac|az)", "az", 0, 2);
619    x2("a|b|c", "dc", 1, 2);
620    x2("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "pqr", 0, 2);
621    n("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "mn");
622    x2("a|^z", "ba", 1, 2);
623    x2("a|^z", "za", 0, 1);
624    x2("a|\\Gz", "bza", 2, 3);
625    x2("a|\\Gz", "za", 0, 1);
626    x2("a|\\Az", "bza", 2, 3);
627    x2("a|\\Az", "za", 0, 1);
628    x2("a|b\\Z", "ba", 1, 2);
629    x2("a|b\\Z", "b", 0, 1);
630    x2("a|b\\z", "ba", 1, 2);
631    x2("a|b\\z", "b", 0, 1);
632    x2("\\w|\\s", " ", 0, 1);
633    n("\\w|\\w", " ");
634    x2("\\w|%", "%", 0, 1);
635    x2("\\w|[&$]", "&", 0, 1);
636    x2("[b-d]|[^e-z]", "a", 0, 1);
637    x2("(?:a|[c-f])|bz", "dz", 0, 1);
638    x2("(?:a|[c-f])|bz", "bz", 0, 2);
639    x2("abc|(?=zz)..f", "zzf", 0, 3);
640    x2("abc|(?!zz)..f", "abf", 0, 3);
641    x2("(?=za)..a|(?=zz)..a", "zza", 0, 3);
642    n("(?>a|abd)c", "abdc");
643    x2("(?>abd|a)c", "abdc", 0, 4);
644    x2("a?|b", "a", 0, 1);
645    x2("a?|b", "b", 0, 0);
646    x2("a?|b", "", 0, 0);
647    x2("a*|b", "aa", 0, 2);
648    x2("a*|b*", "ba", 0, 0);
649    x2("a*|b*", "ab", 0, 1);
650    x2("a+|b*", "", 0, 0);
651    x2("a+|b*", "bbb", 0, 3);
652    x2("a+|b*", "abbb", 0, 1);
653    n("a+|b+", "");
654    x2("(a|b)?", "b", 0, 1);
655    x2("(a|b)*", "ba", 0, 2);
656    x2("(a|b)+", "bab", 0, 3);
657    x2("(ab|ca)+", "caabbc", 0, 4);
658    x2("(ab|ca)+", "aabca", 1, 5);
659    x2("(ab|ca)+", "abzca", 0, 2);
660    x2("(a|bab)+", "ababa", 0, 5);
661    x2("(a|bab)+", "ba", 1, 2);
662    x2("(a|bab)+", "baaaba", 1, 4);
663    x2("(?:a|b)(?:a|b)", "ab", 0, 2);
664    x2("(?:a*|b*)(?:a*|b*)", "aaabbb", 0, 3);
665    x2("(?:a*|b*)(?:a+|b+)", "aaabbb", 0, 6);
666    x2("(?:a+|b+){2}", "aaabbb", 0, 6);
667    x2("h{0,}", "hhhh", 0, 4);
668    x2("(?:a+|b+){1,2}", "aaabbb", 0, 6);
669    n("ax{2}*a", "0axxxa1");
670    n("a.{0,2}a", "0aXXXa0");
671    n("a.{0,2}?a", "0aXXXa0");
672    n("a.{0,2}?a", "0aXXXXa0");
673    x2("^a{2,}?a$", "aaa", 0, 3);
674    x2("^[a-z]{2,}?$", "aaa", 0, 3);
675    x2("(?:a+|\\Ab*)cc", "cc", 0, 2);
676    n("(?:a+|\\Ab*)cc", "abcc");
677    x2("(?:^a+|b+)*c", "aabbbabc", 6, 8);
678    x2("(?:^a+|b+)*c", "aabbbbc", 0, 7);
679    x2("a|(?i)c", "C", 0, 1);
680    x2("(?i)c|a", "C", 0, 1);
681    x2("(?i)c|a", "A", 0, 1);
682    x2("(?i:c)|a", "C", 0, 1);
683    n("(?i:c)|a", "A");
684    x2("[abc]?", "abc", 0, 1);
685    x2("[abc]*", "abc", 0, 3);
686    x2("[^abc]*", "abc", 0, 0);
687    n("[^abc]+", "abc");
688    x2("a??", "aaa", 0, 0);
689    x2("ba??b", "bab", 0, 3);
690    x2("a*?", "aaa", 0, 0);
691    x2("ba*?", "baa", 0, 1);
692    x2("ba*?b", "baab", 0, 4);
693    x2("a+?", "aaa", 0, 1);
694    x2("ba+?", "baa", 0, 2);
695    x2("ba+?b", "baab", 0, 4);
696    x2("(?:a?)??", "a", 0, 0);
697    x2("(?:a??)?", "a", 0, 0);
698    x2("(?:a?)+?", "aaa", 0, 1);
699    x2("(?:a+)??", "aaa", 0, 0);
700    x2("(?:a+)??b", "aaab", 0, 4);
701    x2("(?:ab)?{2}", "", 0, 0);
702    x2("(?:ab)?{2}", "ababa", 0, 4);
703    x2("(?:ab)*{0}", "ababa", 0, 0);
704    x2("(?:ab){3,}", "abababab", 0, 8);
705    n("(?:ab){3,}", "abab");
706    x2("(?:ab){2,4}", "ababab", 0, 6);
707    x2("(?:ab){2,4}", "ababababab", 0, 8);
708    x2("(?:ab){2,4}?", "ababababab", 0, 4);
709    x2("(?:ab){,}", "ab{,}", 0, 5);
710    x2("(?:abc)+?{2}", "abcabcabc", 0, 6);
711    x2("(?:X*)(?i:xa)", "XXXa", 0, 4);
712    x2("(d+)([^abc]z)", "dddz", 0, 4);
713    x2("([^abc]*)([^abc]z)", "dddz", 0, 4);
714    x2("(\\w+)(\\wz)", "dddz", 0, 4);
715    x3("(a)", "a", 0, 1, 1);
716    x3("(ab)", "ab", 0, 2, 1);
717    x2("((ab))", "ab", 0, 2);
718    x3("((ab))", "ab", 0, 2, 1);
719    x3("((ab))", "ab", 0, 2, 2);
720    x3("((((((((((((((((((((ab))))))))))))))))))))", "ab", 0, 2, 20);
721    x3("(ab)(cd)", "abcd", 0, 2, 1);
722    x3("(ab)(cd)", "abcd", 2, 4, 2);
723    x3("()(a)bc(def)ghijk", "abcdefghijk", 3, 6, 3);
724    x3("(()(a)bc(def)ghijk)", "abcdefghijk", 3, 6, 4);
725    x2("(^a)", "a", 0, 1);
726    x3("(a)|(a)", "ba", 1, 2, 1);
727    x3("(^a)|(a)", "ba", 1, 2, 2);
728    x3("(a?)", "aaa", 0, 1, 1);
729    x3("(a*)", "aaa", 0, 3, 1);
730    x3("(a*)", "", 0, 0, 1);
731    x3("(a+)", "aaaaaaa", 0, 7, 1);
732    x3("(a+|b*)", "bbbaa", 0, 3, 1);
733    x3("(a+|b?)", "bbbaa", 0, 1, 1);
734    x3("(abc)?", "abc", 0, 3, 1);
735    x3("(abc)*", "abc", 0, 3, 1);
736    x3("(abc)+", "abc", 0, 3, 1);
737    x3("(xyz|abc)+", "abc", 0, 3, 1);
738    x3("([xyz][abc]|abc)+", "abc", 0, 3, 1);
739    x3("((?i:abc))", "AbC", 0, 3, 1);
740    x2("(abc)(?i:\\1)", "abcABC", 0, 6);
741    x3("((?m:a.c))", "a\nc", 0, 3, 1);
742    x3("((?=az)a)", "azb", 0, 1, 1);
743    x3("abc|(.abd)", "zabd", 0, 4, 1);
744    x2("(?:abc)|(ABC)", "abc", 0, 3);
745    x3("(?i:(abc))|(zzz)", "ABC", 0, 3, 1);
746    x3("a*(.)", "aaaaz", 4, 5, 1);
747    x3("a*?(.)", "aaaaz", 0, 1, 1);
748    x3("a*?(c)", "aaaac", 4, 5, 1);
749    x3("[bcd]a*(.)", "caaaaz", 5, 6, 1);
750    x3("(\\Abb)cc", "bbcc", 0, 2, 1);
751    n("(\\Abb)cc", "zbbcc");
752    x3("(^bb)cc", "bbcc", 0, 2, 1);
753    n("(^bb)cc", "zbbcc");
754    x3("cc(bb$)", "ccbb", 2, 4, 1);
755    n("cc(bb$)", "ccbbb");
756    n("(\\1)", "");
757    n("\\1(a)", "aa");
758    n("(a(b)\\1)\\2+", "ababb");
759    n("(?:(?:\\1|z)(a))+$", "zaa");
760    x2("(?:(?:\\1|z)(a))+$", "zaaa", 0, 4);
761    x2("(a)(?=\\1)", "aa", 0, 1);
762    n("(a)$|\\1", "az");
763    x2("(a)\\1", "aa", 0, 2);
764    n("(a)\\1", "ab");
765    x2("(a?)\\1", "aa", 0, 2);
766    x2("(a??)\\1", "aa", 0, 0);
767    x2("(a*)\\1", "aaaaa", 0, 4);
768    x3("(a*)\\1", "aaaaa", 0, 2, 1);
769    x2("a(b*)\\1", "abbbb", 0, 5);
770    x2("a(b*)\\1", "ab", 0, 1);
771    x2("(a*)(b*)\\1\\2", "aaabbaaabb", 0, 10);
772    x2("(a*)(b*)\\2", "aaabbbb", 0, 7);
773    x2("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 8);
774    x3("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 3, 7);
775    x2("(a)(b)(c)\\2\\1\\3", "abcbac", 0, 6);
776    x2("([a-d])\\1", "cc", 0, 2);
777    x2("(\\w\\d\\s)\\1", "f5 f5 ", 0, 6);
778    n("(\\w\\d\\s)\\1", "f5 f5");
779    x2("(who|[a-c]{3})\\1", "whowho", 0, 6);
780    x2("...(who|[a-c]{3})\\1", "abcwhowho", 0, 9);
781    x2("(who|[a-c]{3})\\1", "cbccbc", 0, 6);
782    x2("(^a)\\1", "aa", 0, 2);
783    n("(^a)\\1", "baa");
784    n("(a$)\\1", "aa");
785    n("(ab\\Z)\\1", "ab");
786    x2("(a*\\Z)\\1", "a", 1, 1);
787    x2(".(a*\\Z)\\1", "ba", 1, 2);
788    x3("(.(abc)\\2)", "zabcabc", 0, 7, 1);
789    x3("(.(..\\d.)\\2)", "z12341234", 0, 9, 1);
790    x2("((?i:az))\\1", "AzAz", 0, 4);
791    n("((?i:az))\\1", "Azaz");
792    x2("(?<=a)b", "ab", 1, 2);
793    n("(?<=a)b", "bb");
794    x2("(?<=a|b)b", "bb", 1, 2);
795    x2("(?<=a|bc)b", "bcb", 2, 3);
796    x2("(?<=a|bc)b", "ab", 1, 2);
797    x2("(?<=a|bc||defghij|klmnopq|r)z", "rz", 1, 2);
798    x2("(a)\\g<1>", "aa", 0, 2);
799    x2("(?<!a)b", "cb", 1, 2);
800    n("(?<!a)b", "ab");
801    x2("(?<!a|bc)b", "bbb", 0, 1);
802    n("(?<!a|bc)z", "bcz");
803    x2("(?<name1>a)", "a", 0, 1);
804    x2("(?<name_2>ab)\\g<name_2>", "abab", 0, 4);
805    x2("(?<name_3>.zv.)\\k<name_3>", "azvbazvb", 0, 8);
806    x2("(?<=\\g<ab>)|-\\zEND (?<ab>XyZ)", "XyZ", 3, 3);
807    x2("(?<n>|a\\g<n>)+", "", 0, 0);
808    x2("(?<n>|\\(\\g<n>\\))+$", "()(())", 0, 6);
809    x3("\\g<n>(?<n>.){0}", "X", 0, 1, 1);
810    x2("\\g<n>(abc|df(?<n>.YZ){2,8}){0}", "XYZ", 0, 3);
811    x2("\\A(?<n>(a\\g<n>)|)\\z", "aaaa", 0, 4);
812    x2("(?<n>|\\g<m>\\g<n>)\\z|\\zEND (?<m>a|(b)\\g<m>)", "bbbbabba", 0, 8);
813    x2("(?<name1240>\\w+\\sx)a+\\k<name1240>", "  fg xaaaaaaaafg x", 2, 18);
814    x3("(z)()()(?<_9>a)\\g<_9>", "zaa", 2, 3, 1);
815    x2("(.)(((?<_>a)))\\k<_>", "zaa", 0, 3);
816    x2("((?<name1>\\d)|(?<name2>\\w))(\\k<name1>|\\k<name2>)", "ff", 0, 2);
817    x2("(?:(?<x>)|(?<x>efg))\\k<x>", "", 0, 0);
818    x2("(?:(?<x>abc)|(?<x>efg))\\k<x>", "abcefgefg", 3, 9);
819    n("(?:(?<x>abc)|(?<x>efg))\\k<x>", "abcefg");
820    x2("(?:(?<n1>.)|(?<n1>..)|(?<n1>...)|(?<n1>....)|(?<n1>.....)|(?<n1>......)|(?<n1>.......)|(?<n1>........)|(?<n1>.........)|(?<n1>..........)|(?<n1>...........)|(?<n1>............)|(?<n1>.............)|(?<n1>..............))\\k<n1>$", "a-pyumpyum", 2, 10);
821    x3("(?:(?<n1>.)|(?<n1>..)|(?<n1>...)|(?<n1>....)|(?<n1>.....)|(?<n1>......)|(?<n1>.......)|(?<n1>........)|(?<n1>.........)|(?<n1>..........)|(?<n1>...........)|(?<n1>............)|(?<n1>.............)|(?<n1>..............))\\k<n1>$", "xxxxabcdefghijklmnabcdefghijklmn", 4, 18, 14);
822    x3("(?<name1>)(?<name2>)(?<name3>)(?<name4>)(?<name5>)(?<name6>)(?<name7>)(?<name8>)(?<name9>)(?<name10>)(?<name11>)(?<name12>)(?<name13>)(?<name14>)(?<name15>)(?<name16>aaa)(?<name17>)$", "aaa", 0, 3, 16);
823    x2("(?<foo>a|\\(\\g<foo>\\))", "a", 0, 1);
824    x2("(?<foo>a|\\(\\g<foo>\\))", "((((((a))))))", 0, 13);
825    x3("(?<foo>a|\\(\\g<foo>\\))", "((((((((a))))))))", 0, 17, 1);
826    x2("\\g<bar>|\\zEND(?<bar>.*abc$)", "abcxxxabc", 0, 9);
827    x2("\\g<1>|\\zEND(.a.)", "bac", 0, 3);
828    x3("\\g<_A>\\g<_A>|\\zEND(.a.)(?<_A>.b.)", "xbxyby", 3, 6, 1);
829    x2("\\A(?:\\g<pon>|\\g<pan>|\\zEND  (?<pan>a|c\\g<pon>c)(?<pon>b|d\\g<pan>d))$", "cdcbcdc", 0, 7);
830    x2("\\A(?<n>|a\\g<m>)\\z|\\zEND (?<m>\\g<n>)", "aaaa", 0, 4);
831    x2("(?<n>(a|b\\g<n>c){3,5})", "baaaaca", 1, 5);
832    x2("(?<n>(a|b\\g<n>c){3,5})", "baaaacaaaaa", 0, 10);
833    x2("(?<pare>\\(([^\\(\\)]++|\\g<pare>)*+\\))", "((a))", 0, 5);
834    x2("()*\\1", "", 0, 0);
835    x2("(?:()|())*\\1\\2", "", 0, 0);
836    x3("(?:\\1a|())*", "a", 0, 0, 1);
837    x2("x((.)*)*x", "0x1x2x3", 1, 6);
838    x2("x((.)*)*x(?i:\\1)\\Z", "0x1x2x1X2", 1, 9);
839    x2("(?:()|()|()|()|()|())*\\2\\5", "", 0, 0);
840    x2("(?:()|()|()|(x)|()|())*\\2b\\5", "b", 0, 1);
841    if onig_encoding == onigmo.ONIG_ENCODING_UTF16_LE:
842        x2("\\xFA\\x8F", "\u8ffa", 0, 1);
843    elif onig_encoding == onigmo.ONIG_ENCODING_UTF16_BE:
844        x2("\\x8F\\xFA", "\u8ffa", 0, 1);
845    elif onig_encoding == onigmo.ONIG_ENCODING_UTF32_LE:
846        x2("\\xFA\\x8F\\x00\\x00", "\u8ffa", 0, 1);
847    elif onig_encoding == onigmo.ONIG_ENCODING_UTF32_BE:
848        x2("\\x00\\x00\\x8F\\xFA", "\u8ffa", 0, 1);
849    elif onig_encoding == onigmo.ONIG_ENCODING_UTF8:
850        x2("\\xE8\\xBF\\xBA", "\u8ffa", 0, 1);
851    elif onig_encoding == onigmo.ONIG_ENCODING_SJIS or \
852            onig_encoding == onigmo.ONIG_ENCODING_CP932:
853        x2("\\xE7\\x92", "\u8ffa", 0, 1);
854    elif onig_encoding == onigmo.ONIG_ENCODING_EUC_JP:
855        x2("\\xED\\xF2", "\u8ffa", 0, 1); # "迺"
856    x2("", "あ", 0, 0);
857    x2("あ", "あ", 0, 1);
858    n("い", "あ");
859    x2("うう", "うう", 0, 2);
860    x2("あいう", "あいう", 0, 3);
861    x2("こここここここここここここここここここここここここここここここここここ", "こここここここここここここここここここここここここここここここここここ", 0, 35);
862    x2("あ", "いあ", 1, 2);
863    x2("いう", "あいう", 1, 3);
864#    x2(b"\\xca\\xb8", b"\xca\xb8", 0, 2);   # "文"
865    x2(".", "あ", 0, 1);
866    x2("..", "かき", 0, 2);
867    x2("\\w", "お", 0, 1);
868    n("\\W", "あ");
869    x2("[\\W]", "う$", 1, 2);
870    x2("\\S", "そ", 0, 1);
871    x2("\\S", "漢", 0, 1);
872    x2("\\b", "気 ", 0, 0);
873    x2("\\b", " ほ", 1, 1);
874    x2("\\B", "せそ ", 1, 1);
875    x2("\\B", "う ", 2, 2);
876    x2("\\B", " い", 0, 0);
877    x2("[たち]", "ち", 0, 1);
878    n("[なに]", "ぬ");
879    x2("[う-お]", "え", 0, 1);
880    n("[^け]", "け");
881    x2("[\\w]", "ね", 0, 1);
882    n("[\\d]", "ふ");
883    x2("[\\D]", "は", 0, 1);
884    n("[\\s]", "く");
885    x2("[\\S]", "へ", 0, 1);
886    x2("[\\w\\d]", "よ", 0, 1);
887    x2("[\\w\\d]", "   よ", 3, 4);
888    n("\\w鬼車", " 鬼車");
889    x2("鬼\\W車", "鬼 車", 0, 3);
890    x2("あ.い.う", "ああいいう", 0, 5);
891    x2(".\\wう\\W..ぞ", "えうう うぞぞ", 0, 7);
892    x2("\\s\\wこここ", " ここここ", 0, 5);
893    x2("ああ.け", "ああけけ", 0, 4);
894    n(".い", "いえ");
895    x2(".お", "おお", 0, 2);
896    x2("^あ", "あ", 0, 1);
897    x2("^む$", "む", 0, 1);
898    x2("^\\w$", "に", 0, 1);
899    x2("^\\wかきくけこ$", "zかきくけこ", 0, 6);
900    x2("^\\w...うえお$", "zあいううえお", 0, 7);
901    x2("\\w\\w\\s\\Wおおお\\d", "aお  おおお4", 0, 8);
902    x2("\\Aたちつ", "たちつ", 0, 3);
903    x2("むめも\\Z", "むめも", 0, 3);
904    x2("かきく\\z", "かきく", 0, 3);
905    x2("かきく\\Z", "かきく\n", 0, 3);
906    x2("\\Gぽぴ", "ぽぴ", 0, 2);
907    n("\\Gえ", "うえお");
908    n("とて\\G", "とて");
909    n("まみ\\A", "まみ");
910    n("ま\\Aみ", "まみ");
911    x2("(?=せ)せ", "せ", 0, 1);
912    n("(?=う).", "い");
913    x2("(?!う)か", "か", 0, 1);
914    n("(?!と)あ", "と");
915    x2("(?i:あ)", "あ", 0, 1);
916    x2("(?i:ぶべ)", "ぶべ", 0, 2);
917    n("(?i:い)", "う");
918    x2("(?m:よ.)", "よ\n", 0, 2);
919    x2("(?m:.め)", "ま\nめ", 1, 3);
920    x2("あ?", "", 0, 0);
921    x2("変?", "化", 0, 0);
922    x2("変?", "変", 0, 1);
923    x2("量*", "", 0, 0);
924    x2("量*", "量", 0, 1);
925    x2("子*", "子子子", 0, 3);
926    x2("馬*", "鹿馬馬馬馬", 0, 0);
927    n("山+", "");
928    x2("河+", "河", 0, 1);
929    x2("時+", "時時時時", 0, 4);
930    x2("え+", "ええううう", 0, 2);
931    x2("う+", "おうううう", 1, 5);
932    x2(".?", "た", 0, 1);
933    x2(".*", "ぱぴぷぺ", 0, 4);
934    x2(".+", "ろ", 0, 1);
935    x2(".+", "いうえか\n", 0, 4);
936    x2("あ|い", "あ", 0, 1);
937    x2("あ|い", "い", 0, 1);
938    x2("あい|いう", "あい", 0, 2);
939    x2("あい|いう", "いう", 0, 2);
940    x2("を(?:かき|きく)", "をかき", 0, 3);
941    x2("を(?:かき|きく)け", "をきくけ", 0, 4);
942    x2("あい|(?:あう|あを)", "あを", 0, 2);
943    x2("あ|い|う", "えう", 1, 2);
944    x2("あ|い|うえ|おかき|く|けこさ|しすせ|そ|たち|つてとなに|ぬね", "しすせ", 0, 3);
945    n("あ|い|うえ|おかき|く|けこさ|しすせ|そ|たち|つてとなに|ぬね", "すせ");
946    x2("あ|^わ", "ぶあ", 1, 2);
947    x2("あ|^を", "をあ", 0, 1);
948    x2("鬼|\\G車", "け車鬼", 2, 3);
949    x2("鬼|\\G車", "車鬼", 0, 1);
950    x2("鬼|\\A車", "b車鬼", 2, 3);
951    x2("鬼|\\A車", "車", 0, 1);
952    x2("鬼|車\\Z", "車鬼", 1, 2);
953    x2("鬼|車\\Z", "車", 0, 1);
954    x2("鬼|車\\Z", "車\n", 0, 1);
955    x2("鬼|車\\z", "車鬼", 1, 2);
956    x2("鬼|車\\z", "車", 0, 1);
957    x2("\\w|\\s", "お", 0, 1);
958    x2("\\w|%", "%お", 0, 1);
959    x2("\\w|[&$]", "う&", 0, 1);
960    x2("[い-け]", "う", 0, 1);
961    x2("[い-け]|[^か-こ]", "あ", 0, 1);
962    x2("[い-け]|[^か-こ]", "か", 0, 1);
963    x2("[^あ]", "\n", 0, 1);
964    x2("(?:あ|[う-き])|いを", "うを", 0, 1);
965    x2("(?:あ|[う-き])|いを", "いを", 0, 2);
966    x2("あいう|(?=けけ)..ほ", "けけほ", 0, 3);
967    x2("あいう|(?!けけ)..ほ", "あいほ", 0, 3);
968    x2("(?=をあ)..あ|(?=をを)..あ", "ををあ", 0, 3);
969    x2("(?<=あ|いう)い", "いうい", 2, 3);
970    n("(?>あ|あいえ)う", "あいえう");
971    x2("(?>あいえ|あ)う", "あいえう", 0, 4);
972    x2("あ?|い", "あ", 0, 1);
973    x2("あ?|い", "い", 0, 0);
974    x2("あ?|い", "", 0, 0);
975    x2("あ*|い", "ああ", 0, 2);
976    x2("あ*|い*", "いあ", 0, 0);
977    x2("あ*|い*", "あい", 0, 1);
978    x2("[aあ]*|い*", "aあいいい", 0, 2);
979    x2("あ+|い*", "", 0, 0);
980    x2("あ+|い*", "いいい", 0, 3);
981    x2("あ+|い*", "あいいい", 0, 1);
982    x2("あ+|い*", "aあいいい", 0, 0);
983    n("あ+|い+", "");
984    x2("(あ|い)?", "い", 0, 1);
985    x2("(あ|い)*", "いあ", 0, 2);
986    x2("(あ|い)+", "いあい", 0, 3);
987    x2("(あい|うあ)+", "うああいうえ", 0, 4);
988    x2("(あい|うえ)+", "うああいうえ", 2, 6);
989    x2("(あい|うあ)+", "ああいうあ", 1, 5);
990    x2("(あい|うあ)+", "あいをうあ", 0, 2);
991    x2("(あい|うあ)+", "$$zzzzあいをうあ", 6, 8);
992    x2("(あ|いあい)+", "あいあいあ", 0, 5);
993    x2("(あ|いあい)+", "いあ", 1, 2);
994    x2("(あ|いあい)+", "いあああいあ", 1, 4);
995    x2("(?:あ|い)(?:あ|い)", "あい", 0, 2);
996    x2("(?:あ*|い*)(?:あ*|い*)", "あああいいい", 0, 3);
997    x2("(?:あ*|い*)(?:あ+|い+)", "あああいいい", 0, 6);
998    x2("(?:あ+|い+){2}", "あああいいい", 0, 6);
999    x2("(?:あ+|い+){1,2}", "あああいいい", 0, 6);
1000    x2("(?:あ+|\\Aい*)うう", "うう", 0, 2);
1001    n("(?:あ+|\\Aい*)うう", "あいうう");
1002    x2("(?:^あ+|い+)*う", "ああいいいあいう", 6, 8);
1003    x2("(?:^あ+|い+)*う", "ああいいいいう", 0, 7);
1004    x2("う{0,}", "うううう", 0, 4);
1005    x2("あ|(?i)c", "C", 0, 1);
1006    x2("(?i)c|あ", "C", 0, 1);
1007    x2("(?i:あ)|a", "a", 0, 1);
1008    n("(?i:あ)|a", "A");
1009    x2("[あいう]?", "あいう", 0, 1);
1010    x2("[あいう]*", "あいう", 0, 3);
1011    x2("[^あいう]*", "あいう", 0, 0);
1012    n("[^あいう]+", "あいう");
1013    x2("あ??", "あああ", 0, 0);
1014    x2("いあ??い", "いあい", 0, 3);
1015    x2("あ*?", "あああ", 0, 0);
1016    x2("いあ*?", "いああ", 0, 1);
1017    x2("いあ*?い", "いああい", 0, 4);
1018    x2("あ+?", "あああ", 0, 1);
1019    x2("いあ+?", "いああ", 0, 2);
1020    x2("いあ+?い", "いああい", 0, 4);
1021    x2("(?:天?)??", "天", 0, 0);
1022    x2("(?:天??)?", "天", 0, 0);
1023    x2("(?:夢?)+?", "夢夢夢", 0, 1);
1024    x2("(?:風+)??", "風風風", 0, 0);
1025    x2("(?:雪+)??霜", "雪雪雪霜", 0, 4);
1026    x2("(?:あい)?{2}", "", 0, 0);
1027    x2("(?:鬼車)?{2}", "鬼車鬼車鬼", 0, 4);
1028    x2("(?:鬼車)*{0}", "鬼車鬼車鬼", 0, 0);
1029    x2("(?:鬼車){3,}", "鬼車鬼車鬼車鬼車", 0, 8);
1030    n("(?:鬼車){3,}", "鬼車鬼車");
1031    x2("(?:鬼車){2,4}", "鬼車鬼車鬼車", 0, 6);
1032    x2("(?:鬼車){2,4}", "鬼車鬼車鬼車鬼車鬼車", 0, 8);
1033    x2("(?:鬼車){2,4}?", "鬼車鬼車鬼車鬼車鬼車", 0, 4);
1034    x2("(?:鬼車){,}", "鬼車{,}", 0, 5);
1035    x2("(?:かきく)+?{2}", "かきくかきくかきく", 0, 6);
1036    x3("(火)", "火", 0, 1, 1);
1037    x3("(火水)", "火水", 0, 2, 1);
1038    x2("((時間))", "時間", 0, 2);
1039    x3("((風水))", "風水", 0, 2, 1);
1040    x3("((昨日))", "昨日", 0, 2, 2);
1041    x3("((((((((((((((((((((量子))))))))))))))))))))", "量子", 0, 2, 20);
1042    x3("(あい)(うえ)", "あいうえ", 0, 2, 1);
1043    x3("(あい)(うえ)", "あいうえ", 2, 4, 2);
1044    x3("()(あ)いう(えおか)きくけこ", "あいうえおかきくけこ", 3, 6, 3);
1045    x3("(()(あ)いう(えおか)きくけこ)", "あいうえおかきくけこ", 3, 6, 4);
1046    x3(".*(フォ)ン・マ(ン()シュタ)イン", "フォン・マンシュタイン", 5, 9, 2);
1047    x2("(^あ)", "あ", 0, 1);
1048    x3("(あ)|(あ)", "いあ", 1, 2, 1);
1049    x3("(^あ)|(あ)", "いあ", 1, 2, 2);
1050    x3("(あ?)", "あああ", 0, 1, 1);
1051    x3("(ま*)", "ままま", 0, 3, 1);
1052    x3("(と*)", "", 0, 0, 1);
1053    x3("(る+)", "るるるるるるる", 0, 7, 1);
1054    x3("(ふ+|へ*)", "ふふふへへ", 0, 3, 1);
1055    x3("(あ+|い?)", "いいいああ", 0, 1, 1);
1056    x3("(あいう)?", "あいう", 0, 3, 1);
1057    x3("(あいう)*", "あいう", 0, 3, 1);
1058    x3("(あいう)+", "あいう", 0, 3, 1);
1059    x3("(さしす|あいう)+", "あいう", 0, 3, 1);
1060    x3("([なにぬ][かきく]|かきく)+", "かきく", 0, 3, 1);
1061    x3("((?i:あいう))", "あいう", 0, 3, 1);
1062    x3("((?m:あ.う))", "あ\nう", 0, 3, 1);
1063    x3("((?=あん)あ)", "あんい", 0, 1, 1);
1064    x3("あいう|(.あいえ)", "んあいえ", 0, 4, 1);
1065    x3("あ*(.)", "ああああん", 4, 5, 1);
1066    x3("あ*?(.)", "ああああん", 0, 1, 1);
1067    x3("あ*?(ん)", "ああああん", 4, 5, 1);
1068    x3("[いうえ]あ*(.)", "えああああん", 5, 6, 1);
1069    x3("(\\Aいい)うう", "いいうう", 0, 2, 1);
1070    n("(\\Aいい)うう", "んいいうう");
1071    x3("(^いい)うう", "いいうう", 0, 2, 1);
1072    n("(^いい)うう", "んいいうう");
1073    x3("ろろ(るる$)", "ろろるる", 2, 4, 1);
1074    n("ろろ(るる$)", "ろろるるる");
1075    x2("(無)\\1", "無無", 0, 2);
1076    n("(無)\\1", "無武");
1077    x2("(空?)\\1", "空空", 0, 2);
1078    x2("(空??)\\1", "空空", 0, 0);
1079    x2("(空*)\\1", "空空空空空", 0, 4);
1080    x3("(空*)\\1", "空空空空空", 0, 2, 1);
1081    x2("あ(い*)\\1", "あいいいい", 0, 5);
1082    x2("あ(い*)\\1", "あい", 0, 1);
1083    x2("(あ*)(い*)\\1\\2", "あああいいあああいい", 0, 10);
1084    x2("(あ*)(い*)\\2", "あああいいいい", 0, 7);
1085    x3("(あ*)(い*)\\2", "あああいいいい", 3, 5, 2);
1086    x2("(((((((ぽ*)ぺ))))))ぴ\\7", "ぽぽぽぺぴぽぽぽ", 0, 8);
1087    x3("(((((((ぽ*)ぺ))))))ぴ\\7", "ぽぽぽぺぴぽぽぽ", 0, 3, 7);
1088    x2("(は)(ひ)(ふ)\\2\\1\\3", "はひふひはふ", 0, 6);
1089    x2("([き-け])\\1", "くく", 0, 2);
1090    x2("(\\w\\d\\s)\\1", "あ5 あ5 ", 0, 6);
1091    n("(\\w\\d\\s)\\1", "あ5 あ5");
1092    x2("(誰?|[あ-う]{3})\\1", "誰?誰?", 0, 4);
1093    x2("...(誰?|[あ-う]{3})\\1", "あaあ誰?誰?", 0, 7);
1094    x2("(誰?|[あ-う]{3})\\1", "ういうういう", 0, 6);
1095    x2("(^こ)\\1", "ここ", 0, 2);
1096    n("(^む)\\1", "めむむ");
1097    n("(あ$)\\1", "ああ");
1098    n("(あい\\Z)\\1", "あい");
1099    x2("(あ*\\Z)\\1", "あ", 1, 1);
1100    x2(".(あ*\\Z)\\1", "いあ", 1, 2);
1101    x3("(.(やいゆ)\\2)", "zやいゆやいゆ", 0, 7, 1);
1102    x3("(.(..\\d.)\\2)", "あ12341234", 0, 9, 1);
1103    x2("((?i:あvず))\\1", "あvずあvず", 0, 6);
1104    x2("(?<愚か>変|\\(\\g<愚か>\\))", "((((((変))))))", 0, 13);
1105    x2("\\A(?:\\g<阿_1>|\\g<云_2>|\\z終了  (?<阿_1>観|自\\g<云_2>自)(?<云_2>在|菩薩\\g<阿_1>菩薩))$", "菩薩自菩薩自在自菩薩自菩薩", 0, 13);
1106    x2("[[ひふ]]", "ふ", 0, 1);
1107    x2("[[いおう]か]", "か", 0, 1);
1108    n("[[^あ]]", "あ");
1109    n("[^[あ]]", "あ");
1110    x2("[^[^あ]]", "あ", 0, 1);
1111    x2("[[かきく]&&きく]", "く", 0, 1);
1112    n("[[かきく]&&きく]", "か");
1113    n("[[かきく]&&きく]", "け");
1114    x2("[あ-ん&&い-を&&う-ゑ]", "ゑ", 0, 1);
1115    n("[^あ-ん&&い-を&&う-ゑ]", "ゑ");
1116    x2("[[^あ&&あ]&&あ-ん]", "い", 0, 1);
1117    n("[[^あ&&あ]&&あ-ん]", "あ");
1118    x2("[[^あ-ん&&いうえお]&&[^う-か]]", "き", 0, 1);
1119    n("[[^あ-ん&&いうえお]&&[^う-か]]", "い");
1120    x2("[^[^あいう]&&[^うえお]]", "う", 0, 1);
1121    x2("[^[^あいう]&&[^うえお]]", "え", 0, 1);
1122    n("[^[^あいう]&&[^うえお]]", "か");
1123    x2("[あ-&&-あ]", "-", 0, 1);
1124    x2("[^[^a-zあいう]&&[^bcdefgうえお]q-w]", "え", 0, 1);
1125    x2("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "f", 0, 1);
1126    x2("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "g", 0, 1);
1127    n("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "2");
1128    x2("a<b>バージョンのダウンロード<\\/b>", "a<b>バージョンのダウンロード</b>", 0, 20);
1129    x2(".<b>バージョンのダウンロード<\\/b>", "a<b>バージョンのダウンロード</b>", 0, 20);
1130
1131
1132    # additional test patterns
1133    if is_unicode_encoding(onig_encoding):
1134        x2("\\x{3042}\\x{3044}", "あい", 0, 2)
1135    elif onig_encoding == onigmo.ONIG_ENCODING_SJIS or \
1136            onig_encoding == onigmo.ONIG_ENCODING_CP932:
1137        x2("\\x{82a0}\\x{82A2}", "あい", 0, 2)
1138        x2("\\M-\\C-b\x50", "1", 0, 1)     # \x8250
1139    elif onig_encoding == onigmo.ONIG_ENCODING_EUC_JP:
1140        x2("\\x{a4a2}\\x{A4A4}", "あい", 0, 2)
1141    x2("\\p{Hiragana}\\p{Katakana}", "あイ", 0, 2)
1142    x2("(?m)^A.B$", "X\nA\nB\nZ", 2, 5)
1143    n("(?<!(?<=a)b|c)d", "abd")
1144    n("(?<!(?<=a)b|c)d", "cd")
1145    x2("(?<!(?<=a)b|c)d", "bd", 1, 2)
1146    x2("(a){2}z", "aaz", 0, 3)
1147    x2("(?<=a).*b", "aab", 1, 3)
1148    x2("(?!a).*b", "ab", 1, 2)
1149    x2("(?<=(?<!A)B)C", "BBC", 2, 3)
1150    n("(?<=(?<!A)B)C", "ABC")
1151    n("(?i)(?<!aa|b)c", "Aac")
1152    n("(?i)(?<!b|aa)c", "Aac")
1153    x2("(?<=\\babc)d", " abcd", 4, 5)
1154    x2("(?<=\\Babc)d", "aabcd", 4, 5)
1155    x2("a\\b?a", "aa", 0, 2)
1156    x2("[^x]*x", "aaax", 0, 4)
1157    x2("(?i)[\\x{0}-B]+", "\x00\x01\x02\x1f\x20@AaBbC", 0, 10)
1158    x2("(?i)a{2}", "AA", 0, 2)
1159    if is_unicode_encoding(onig_encoding):
1160        # The longest script name
1161        x2("\\p{Other_Default_Ignorable_Code_Point}+", "\u034F\uFFF8\U000E0FFF", 0, 3)
1162        # The longest block name
1163        x2("\\p{In_Unified_Canadian_Aboriginal_Syllabics_Extended}+", "\u18B0\u18FF", 0, 2)
1164        # Unicode case fold
1165        x2("(?i)\u1ffc", "\u2126\u1fbe", 0, 2)
1166        x2("(?i)\u1ffc", "\u1ff3", 0, 1)
1167        x2("(?i)\u0390", "\u03b9\u0308\u0301", 0, 3)
1168        x2("(?i)\u03b9\u0308\u0301", "\u0390", 0, 1)
1169        x2("(?i)ff", "\ufb00", 0, 1)
1170        x2("(?i)\ufb01", "fi", 0, 2)
1171        x2("(?i)\u0149\u0149", "\u0149\u0149", 0, 2)
1172        x2("(?i)(?<=\u0149)a", "\u02bcna", 2, 3)    # with look-behind
1173        # Other Unicode tests
1174        x2("\\x{25771}", "\U00025771", 0, 1)
1175    x2("[0-9-a]+", " 0123456789-a ", 1, 13)     # same as [0-9\-a]
1176    x2("[0-9-\\s]+", " 0123456789-a ", 0, 12)   # same as [0-9\-\s]
1177    n("[0-9-a]", "", syn=onigmo.ONIG_SYNTAX_GREP, err=onigmo.ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS)
1178    x2("[0-9-あ\\\\/\u0001]+", " 0123456789-あ\\/\u0001 ", 1, 16)
1179    x2("[a-b-]+", "ab-", 0, 3)
1180    x2("[a-b-&&-]+", "ab-", 2, 3)
1181    x2("(?i)[a[b-あ]]+", "abあ", 0, 3)
1182    if is_unicode_encoding(onig_encoding):
1183        x2("(?i)[\\d[:^graph:]]+", "0あ", 0, 1)
1184    x2("(?ia)[\\d[:^print:]]+", "0あ", 0, 2)
1185    x2("(?i:a) B", "a B", 0, 3);
1186    x2("(?i:a )B", "a B", 0, 3);
1187    x2("B (?i:a)", "B a", 0, 3);
1188    x2("B(?i: a)", "B a", 0, 3);
1189    if is_unicode_encoding(onig_encoding):
1190        x2("(?a)[\\p{Space}\\d]", "\u00a0", 0, 1)
1191        x2("(?a)[\\d\\p{Space}]", "\u00a0", 0, 1)
1192        n("(?a)[^\\p{Space}\\d]", "\u00a0")
1193        n("(?a)[^\\d\\p{Space}]", "\u00a0")
1194        x2("(?d)[[:space:]\\d]", "\u00a0", 0, 1)
1195        n("(?d)[^\\d[:space:]]", "\u00a0")
1196    n("x.*?\\Z$", "x\ny")
1197    n("x.*?\\Z$", "x\r\ny")
1198    x2("x.*?\\Z$", "x\n", 0, 1)
1199    x2("x.*?\\Z$", "x\r\n", 0, 2)   # \Z will match between \r and \n, if
1200                                    # ONIG_OPTION_NEWLINE_CRLF isn't specified.
1201    x2("(?<=fo).*", "foo", 2, 3)        # Issue #15
1202    x2("(?m)(?<=fo).*", "foo", 2, 3)    # Issue #15
1203    x2("(?m)(?<=fo).+", "foo", 2, 3)    # Issue #15
1204    x2("\\n?\\z", "hello", 5, 5)
1205    x2("\\z", "hello", 5, 5)
1206    x2("\\n?\\z", "こんにちは", 5, 5)
1207    x2("\\z", "こんにちは", 5, 5)
1208    x2("()" * 32767, "", 0, 0)      # Issue #24
1209    n("()" * 32768, "", err=onigmo.ONIGERR_TOO_MANY_CAPTURE_GROUPS)
1210    x2("\\h+ \\H+", " 0123456789aBcDeF gh", 1, 20)
1211    x2("[\\h]+ [\\H]+", " 0123456789aBcDeF gh", 1, 20)
1212    x2("\\A(|.|(?:(.)\\g<1>\\k<2+0>))\\z", "reer", 0, 4)
1213    x2("\\A(?<a>|.|(?:(?<b>.)\\g<a>\\k<b+0>))\\z", "reer", 0, 4)
1214    x2("(?i)\\A(|.|(?:(.)\\g<1>\\k<2+0>))\\z", "reER", 0, 4)
1215    x2("(?i)\\A(?<a>|.|(?:(?<b>.)\\g<a>\\k<b+0>))\\z", "REer", 0, 4)
1216    x2(''' # Extended pattern
1217      (?<element> \g<stag> \g<content>* \g<etag> ){0}
1218      (?<stag> < \g<name> \s* > ){0}
1219      (?<name> [a-zA-Z_:]+ ){0}
1220      (?<content> [^<&]+ (\g<element> | [^<&]+)* ){0}
1221      (?<etag> </ \k<name+1> >){0}
1222      \g<element>''',
1223      "<foo>f<bar>bbb</bar>f</foo>", 0, 27, opt=onigmo.ONIG_OPTION_EXTEND)
1224    x2("(.)(?<a>a)(?<a>b)\\k<a>", "xaba", 0, 4)
1225    x2("\\p{Print}+", "\n a", 1, 3)
1226    x2("\\p{Graph}+", "\n a", 2, 3)
1227    n("a(?!b)", "ab");
1228    x2("(?:(.)\\1)*", "a" * 300, 0, 300)
1229    x2("\\cA\\C-B\\a[\\b]\\t\\n\\v\\f\\r\\e\\c?", "\x01\x02\x07\x08\x09\x0a\x0b\x0c\x0d\x1b\x7f", 0, 11)
1230    x2("(?<=(?:[a-z]|\\w){3})x", "ab1x", 3, 4)  # repeat inside look-behind
1231    x2("(?<n>(a|b\\g<n>c){3,5}?)", "baaaaca", 1, 4)
1232    x2("\\p{WoRd}", "a", 0, 1)  # property name is not case sensitive
1233    n("[[:WoRd:]]", "a", err=onigmo.ONIGERR_INVALID_POSIX_BRACKET_TYPE)   # POSIX bracket name is case sensitive
1234    n("(\\2)(\\1)", "")     # Issue #65
1235    n("(0?0|(?(1)||)|(?(1)||))?", "", err=onigmo.ONIGERR_INVALID_CONDITION_PATTERN) # Ruby Bug#12418
1236    n("[\\40000000000", "", err=onigmo.ONIGERR_TOO_BIG_NUMBER)  # Ruby Bug#12420
1237    n("[\\600000000000\n", "", err=onigmo.ONIGERR_TOO_BIG_NUMBER)   # Ruby Bug#12423
1238    n("[]", "", err=onigmo.ONIGERR_EMPTY_CHAR_CLASS)
1239    n("[c-a]", "", err=onigmo.ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS)
1240    x2("[[:ab:\\x{30}]]+", ":ab0x", 0, 4)
1241    x2("[[:x\\]:]+", "[x:]", 0, 4)
1242    x2("[!--x]+", "!-x", 0, 3)
1243    x2(" ]", " ]", 0, 2)    # warning: ']' without escape
1244    n("\\x{FFFFFFFF}", "", err=onigmo.ONIGERR_TOO_BIG_WIDE_CHAR_VALUE);
1245    n("\\x{100000000}", "", err=onigmo.ONIGERR_TOO_LONG_WIDE_CHAR_VALUE);
1246    x2("\\u0026", "\u0026", 0, 1)
1247    x2("[\\u0024-\\u0027]", "\u0026", 0, 1)
1248    n("\\u026x", "", err=onigmo.ONIGERR_TOO_SHORT_DIGITS)
1249    n("()(?\\!(?'a')\\1)", "", err=onigmo.ONIGERR_UNDEFINED_GROUP_OPTION)
1250    x2("\\i", "i", 0, 1)    # unknown escape warning
1251    n("\\((", "", err=onigmo.ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS)
1252    n("(|", "", err=onigmo.ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS)
1253    x2("%{(.*?)}", "%{HOSTNAME}", 0, 11)
1254    if not is_ascii_incompatible_encoding(onig_encoding):
1255        n(b"'/g\\\xff\xff\xff\xff&))", "", err=onigmo.ONIGERR_UNMATCHED_CLOSE_PARENTHESIS)
1256        n(b"\\\xff0", "")
1257    if onig_encoding == onigmo.ONIG_ENCODING_UTF8:
1258        n(b"[0-0-\xe2  ", "", err=onigmo.ONIGERR_PREMATURE_END_OF_CHAR_CLASS)
1259    n("\\p{foobarbaz}", "", err=onigmo.ONIGERR_INVALID_CHAR_PROPERTY_NAME)
1260    n("\\p{あ}", "", err=onigmo.ONIGERR_INVALID_CHAR_PROPERTY_NAME)
1261    if is_unicode_encoding(onig_encoding):
1262        n("\\p{\U00025771}", "", err=onigmo.ONIGERR_INVALID_CHAR_PROPERTY_NAME)
1263    if onig_encoding == onigmo.ONIG_ENCODING_UTF8:
1264        x2("[\\xce\\xb1\\xce\\xb2]", "β", 0, 1)
1265    elif onig_encoding == onigmo.ONIG_ENCODING_SJIS or \
1266            onig_encoding == onigmo.ONIG_ENCODING_CP932:
1267        n("[\\x84A]", "", err=onigmo.ONIGERR_TOO_SHORT_MULTI_BYTE_STRING)
1268    elif onig_encoding == onigmo.ONIG_ENCODING_EUC_JP:
1269        n("[\\xAAA]", "", err=onigmo.ONIGERR_TOO_SHORT_MULTI_BYTE_STRING)
1270    elif is_ascii_incompatible_encoding(onig_encoding):
1271        n("[\\x420]", "", err=onigmo.ONIGERR_TOO_SHORT_MULTI_BYTE_STRING)
1272    x2("(?:a?)*", "aa", 0, 2)   # tests for reducing nested quantifiers
1273    x2("(?:a?)*?", "aa", 0, 0)
1274    x2("(?:a*)??", "aa", 0, 0)
1275    x2("(?:a+?)*", "aa", 0, 1)
1276    x2("(?:a*){2,3}", "aaa", 0, 3)
1277    n("(?:a+){2,3}", "a")
1278    x2("a{", "a{", 0, 2)        # invalid interval is allowed
1279    n("a{100001}", "", err=onigmo.ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE)
1280    n("a{0,100001}", "", err=onigmo.ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE)
1281    n("a{5,1}", "", err=onigmo.ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE)
1282    x2("abc{1}", "abcc", 0, 3)
1283    x3("\\(((?:[^(]|\\g<0>)*)\\)", "(abc)(abc)", 1, 4, 1)   # Issue #48
1284    x3("\\(((?:[^(]|\\g<0>)*)\\)", "((abc)(abc))", 1, 11, 1)
1285    x3("\\(((?:[^(]|(\\g<0>))*)\\)", "((abc)(abc))", 6, 11, 2)
1286    n("[\\6000", "a", err=onigmo.ONIGERR_TOO_BIG_NUMBER)   # CVE-2017-9226
1287    n("[\\H- ]", "", err=onigmo.ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS)  # CVE-2017-9228
1288    x2("c.*\\b", "abc", 2, 3)           # Issue #96
1289    x2("abc.*\\b", "abc", 0, 3)         # Issue #96
1290    x2("\\b.*abc.*\\b", "abc", 0, 3)    # Issue #96
1291
1292    # ONIG_OPTION_FIND_LONGEST option
1293    x2("foo|foobar", "foobar", 0, 3)
1294    x2("foo|foobar", "foobar", 0, 6, opt=onigmo.ONIG_OPTION_FIND_LONGEST)
1295    x2("a*", "aa aaa aaaa aaaaa ", 12, 17, opt=onigmo.ONIG_OPTION_FIND_LONGEST)
1296
1297    # ONIG_OPTION_FIND_NOT_EMPTY option
1298    x2("\w*", " a", 0, 0)
1299    x2("\w*", " a", 1, 2, opt=onigmo.ONIG_OPTION_FIND_NOT_EMPTY)
1300
1301    # ONIG_OPTION_DONT_CAPTURE_GROUP option
1302    x2("(ab|cd)*", "cdab", 0, 4, opt=onigmo.ONIG_OPTION_DONT_CAPTURE_GROUP)
1303    n("(ab|cd)*\\1", "", opt=onigmo.ONIG_OPTION_DONT_CAPTURE_GROUP, err=onigmo.ONIGERR_INVALID_BACKREF)
1304
1305    # character classes (tests for character class optimization)
1306    x2("[@][a]", "@a", 0, 2);
1307    x2(".*[a][b][c][d][e]", "abcde", 0, 5);
1308    x2("(?i)[A\\x{41}]", "a", 0, 1);
1309    x2("[abA]", "a", 0, 1);
1310    x2("[[ab]&&[ac]]+", "aaa", 0, 3);
1311    x2("[[ab]&&[^b]]+", "aaa", 0, 3);
1312    x2("[[^b]&&[ab]]+", "aaa", 0, 3);
1313    x2("[[あい]&&[あう]]+", "あああ", 0, 3);
1314    x2("[[あい]&&[^い]]+", "あああ", 0, 3);
1315    x2("[[^い]&&[あい]]+", "あああ", 0, 3);
1316
1317    # possessive quantifiers
1318    n("a?+a", "a")
1319    n("a*+a", "aaaa")
1320    n("a++a", "aaaa")
1321    x2("a{2,3}+a", "aaa", 0, 3) # Not a possessive quantifier in Ruby,
1322                                # same as "(?:a{2,3})+a"
1323    n("a{2,3}+a", "aaa", syn=onigmo.ONIG_SYNTAX_PERL)
1324
1325    # automatic possessification
1326    x2("\\w+\\W", "abc#", 0, 4)
1327    x2("[a-c]+\\W", "abc#", 0, 4)
1328    x2("[a-c#]+\\W", "abc#", 0, 4)
1329    x2("[^a-c]+\\W", "def#", 0, 4)
1330    x2("(?a)[^a-c]+\\W", "def#", 0, 4)
1331    x2("a+\\w", "aaaa", 0, 4)
1332    x2("#+\\w", "###a", 0, 4)
1333    x2("(?a)a+\\w", "aaaa", 0, 4)
1334    x2("(?a)あ+\\w", "あああa", 0, 4)
1335    x2("[a-c]+[d-f]", "abcd", 0, 4)
1336    x2("[^d-f]+[d-f]", "abcd", 0, 4)
1337    x2("[a-cあ]+[d-f]", "abcd", 0, 4)
1338
1339    # linebreak
1340    x2("\\R", "\n", 0, 1)
1341    x2("\\R", "\r", 0, 1)
1342    x2("\\R{3}", "\r\r\n\n", 0, 4)
1343
1344    if (is_unicode_encoding(onig_encoding)):
1345        x2("\\R", "\u0085", 0, 1)
1346        x2("\\R", "\u2028", 0, 1)
1347        x2("\\R", "\u2029", 0, 1)
1348
1349    # extended grapheme cluster
1350    x2("\\X{5}", "あいab\n", 0, 5)
1351    x2("\\X", "\n", 0, 1)
1352    x2("\\X", "\r", 0, 1)
1353    x2("\\X{3}", "\r\r\n\n", 0, 4)
1354    if is_unicode_encoding(onig_encoding):
1355        x2("\\X", "\u306F\u309A\n", 0, 2)
1356        x2("\\A\\X\\z", "\u0020\u200d", 0, 2)
1357        x2("\\A\\X\\z", "\u0600\u0600", 0, 2)
1358        x2("\\A\\X\\z", "\u0600\u0020", 0, 2)
1359        x2("\\A\\X\\z", "\u261d\U0001F3FB", 0, 2)
1360        x2("\\A\\X\\z", "\U0001f600", 0, 1)
1361        x2("\\A\\X\\z", "\u0020\u0308", 0, 2)
1362        x2("\\A\\X\\X\\z", "\u000a\u0308", 0, 2)
1363        x2("\\A\\X\\X\\z", "\u000d\u0308", 0, 2)
1364        x2("\\A\\X\\z", "\U0001F477\U0001F3FF\u200D\u2640\uFE0F", 0, 5)
1365        x2("\\A\\X\\z", "\U0001F468\u200D\U0001F393", 0, 3)
1366        x2("\\A\\X\\z", "\U0001F46F\u200D\u2642\uFE0F", 0, 4)
1367        x2("\\A\\X\\z", "\U0001F469\u200d\u2764\ufe0f\u200d\U0001F469", 0, 6)
1368
1369    # keep
1370    x2("ab\\Kcd", "abcd", 2, 4)
1371    x2("ab\\Kc(\\Kd|z)", "abcd", 3, 4)
1372    x2("ab\\Kc(\\Kz|d)", "abcd", 2, 4)
1373    x2("(a\\K)*", "aaab", 3, 3)
1374    x3("(a\\K)*", "aaab", 2, 3, 1)
1375#    x2("a\\K?a", "aa", 0, 2)        # error: differ from perl
1376    x2("ab(?=c\Kd)", "abcd", 2, 2)          # This behaviour is currently not well defined. (see: perlre)
1377    x2("(?<=a\\Kb|aa)cd", "abcd", 1, 4)     # This behaviour is currently not well defined. (see: perlre)
1378    x2("(?<=ab|a\\Ka)cd", "abcd", 2, 4)     # This behaviour is currently not well defined. (see: perlre)
1379
1380    # named group and subroutine call
1381    x2("(?<name_2>ab)(?&name_2)", "abab", 0, 4, syn=onigmo.ONIG_SYNTAX_PERL);
1382    x2("(?<name_2>ab)(?1)", "abab", 0, 4, syn=onigmo.ONIG_SYNTAX_PERL);
1383    x2("(?'n'|\\((?&n)\\))+$", "()(())", 0, 6, syn=onigmo.ONIG_SYNTAX_PERL);
1384    x2("(a|x(?-1)x)", "xax", 0, 3, syn=onigmo.ONIG_SYNTAX_PERL);
1385    x2("(a|(x(?-2)x))", "xax", 0, 3, syn=onigmo.ONIG_SYNTAX_PERL);
1386    x2("a|x(?0)x", "xax", 0, 3, syn=onigmo.ONIG_SYNTAX_PERL);
1387    x2("a|x(?R)x", "xax", 0, 3, syn=onigmo.ONIG_SYNTAX_PERL);
1388    x2("(a|x\g<0>x)", "xax", 0, 3);
1389    x2("(a|x\g'0'x)", "xax", 0, 3);
1390    x2("(?-i:(?+1))(?i:(a)){0}", "A", 0, 1, syn=onigmo.ONIG_SYNTAX_PERL);
1391    x2("(?-i:\g<+1>)(?i:(a)){0}", "A", 0, 1);
1392    x2("(?-i:\g'+1')(?i:(a)){0}", "A", 0, 1);
1393    n("(.(?=\\g<1>))", "", err=onigmo.ONIGERR_NEVER_ENDING_RECURSION)
1394    n("(a)(?<n>b)\\g<1>\\g<n>", "abab", err=onigmo.ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED)
1395    x2("(a)(?<n>b)(?1)(?&n)", "abab", 0, 4, syn=onigmo.ONIG_SYNTAX_PERL)
1396    x2("((?<v>)a)|b\\g<0>b", "bbabb", 0, 5)
1397    x2("((?<v>)a)|b(?0)b", "bbabb", 0, 5, syn=onigmo.ONIG_SYNTAX_PERL)
1398    x2("((?<v>)a|b(?1)b)", "bbabb", 0, 5, syn=onigmo.ONIG_SYNTAX_PERL)
1399    x2("((?<v>a|b(?&v)b))", "bbabb", 0, 5, syn=onigmo.ONIG_SYNTAX_PERL)
1400    n("(?<", "", err=onigmo.ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS)
1401    n("(?<>)", "", err=onigmo.ONIGERR_EMPTY_GROUP_NAME)
1402    n("(?<.>)", "", err=onigmo.ONIGERR_INVALID_CHAR_IN_GROUP_NAME)
1403    n("\\g<1->", "", err=onigmo.ONIGERR_INVALID_CHAR_IN_GROUP_NAME)
1404    n("\\k<1/>", "", err=onigmo.ONIGERR_INVALID_GROUP_NAME)
1405    n("\\k<1-1/>", "", err=onigmo.ONIGERR_INVALID_GROUP_NAME)
1406    n("\\k<a/>", "", err=onigmo.ONIGERR_INVALID_CHAR_IN_GROUP_NAME)
1407
1408    # character set modifiers
1409    x2("(?u)\\w+", "あa#", 0, 2);
1410    x2("(?a)\\w+", "あa#", 1, 2);
1411    x2("(?u)\\W+", "あa#", 2, 3);
1412    x2("(?a)\\W+", "あa#", 0, 1);
1413
1414    x2("(?a)\\b", "あa", 1, 1);
1415    x2("(?a)\\w\\b", "aあ", 0, 1);
1416    x2("(?a)\\B", "a ああ ", 2, 2);
1417
1418    x2("(?u)\\B", "あ ", 2, 2);
1419    x2("(?a)\\B", "あ ", 0, 0);
1420    x2("(?a)\\B", "aあ ", 2, 2);
1421
1422    x2("(?a)a\\b", " a", 1, 2)
1423    x2("(?u)a\\b", " a", 1, 2)
1424    n("(?a)a\\B", " a")
1425    n("(?a)あ\\b", " あ")
1426    x2("(?u)あ\\b", " あ", 1, 2)
1427    x2("(?a)あ\\B", " あ", 1, 2)
1428    n("(?u)あ\\B", " あ")
1429
1430    x2("(?a)\\p{Alpha}\\P{Alpha}", "a。", 0, 2);
1431    x2("(?u)\\p{Alpha}\\P{Alpha}", "a。", 0, 2);
1432    x2("(?a)[[:word:]]+", "aあ", 0, 1);
1433    x2("(?a)[[:^word:]]+", "aあ", 1, 2);
1434    x2("(?u)[[:word:]]+", "aあ", 0, 2);
1435    n("(?u)[[:^word:]]+", "aあ");
1436
1437    x2("(?iu)\\p{lower}\\p{upper}", "Ab", 0, 2);
1438    x2("(?ia)\\p{lower}\\p{upper}", "Ab", 0, 2);
1439    x2("(?iu)[[:lower:]][[:upper:]]", "Ab", 0, 2);
1440    x2("(?ia)[[:lower:]][[:upper:]]", "Ab", 0, 2);
1441
1442    if is_unicode_encoding(onig_encoding):
1443        n("(?ia)\\w+", "\u212a\u017f");      # KELVIN SIGN, LATIN SMALL LETTER LONG S
1444        n("(?ia)[\\w]+", "\u212a\u017f");
1445        n("(?ia)[^\\W]+", "\u212a\u017f");
1446        x2("(?ia)[^\\W]+", "ks", 0, 2);
1447        n("(?iu)\\p{ASCII}", "\u212a");
1448        n("(?iu)\\P{ASCII}", "s");
1449        n("(?iu)[\\p{ASCII}]", "\u212a");
1450        n("(?iu)[\\P{ASCII}]", "s");
1451        n("(?ia)\\p{ASCII}", "\u212a");
1452        n("(?ia)\\P{ASCII}", "s");
1453        n("(?ia)[\\p{ASCII}]", "\u212a");
1454        n("(?ia)[\\P{ASCII}]", "s");
1455        x2("(?iu)[s]+", "Ss\u017f ", 0, 3);
1456        x2("(?ia)[s]+", "Ss\u017f ", 0, 3);
1457        x2("(?iu)[^s]+", "Ss\u017f ", 3, 4);
1458        x2("(?ia)[^s]+", "Ss\u017f ", 3, 4);
1459        x2("(?iu)[[:lower:]]", "\u017f", 0, 1);
1460        n("(?ia)[[:lower:]]", "\u017f");
1461        x2("(?u)[[:upper:]]", "\u212a", 0, 1);
1462        n("(?a)[[:upper:]]", "\u212a");
1463
1464    # Grep syntax
1465    # \+, \?, \|, \{n,m\}
1466    x2("a\\+", "aa", 0, 2, syn=onigmo.ONIG_SYNTAX_GREP)
1467    n("a\\+", "b", syn=onigmo.ONIG_SYNTAX_GREP)
1468    x2("a\\?", "", 0, 0, syn=onigmo.ONIG_SYNTAX_GREP)
1469    x2("a\\?", "a", 0, 1, syn=onigmo.ONIG_SYNTAX_GREP)
1470    x2("ab\\|cd", "cd", 0, 2, syn=onigmo.ONIG_SYNTAX_GREP)
1471    x2("a\\{1,2\\}", "aaa", 0, 2, syn=onigmo.ONIG_SYNTAX_GREP)
1472    x2("a\\{2\\}", "aaa", 0, 2, syn=onigmo.ONIG_SYNTAX_GREP)
1473    n("a\\{|", "", syn=onigmo.ONIG_SYNTAX_GREP, err=onigmo.ONIGERR_END_PATTERN_AT_LEFT_BRACE)
1474    # \< and \>
1475    x2("\\<abc\\>", " abc ", 1, 4, syn=onigmo.ONIG_SYNTAX_GREP)
1476    n("\\<abc\\>", "zabc ", syn=onigmo.ONIG_SYNTAX_GREP)
1477    n("\\<abc\\>", " abcd", syn=onigmo.ONIG_SYNTAX_GREP)
1478    n("\\<abc\\>", "あabcい", syn=onigmo.ONIG_SYNTAX_GREP)
1479    x2("\\<abc\\>", "あabcい", 1, 4, syn=onigmo.ONIG_SYNTAX_GREP, opt=onigmo.ONIG_OPTION_ASCII_RANGE)
1480    n("\\<abc\\>", "zabcい", syn=onigmo.ONIG_SYNTAX_GREP, opt=onigmo.ONIG_OPTION_ASCII_RANGE)
1481    n("\\<abc\\>", "あabcd", syn=onigmo.ONIG_SYNTAX_GREP, opt=onigmo.ONIG_OPTION_ASCII_RANGE)
1482    # others
1483    n("[^a]", "\n", syn=onigmo.ONIG_SYNTAX_GREP)
1484    x2("*", "*", 0, 1, syn=onigmo.ONIG_SYNTAX_GREP)
1485    #x2("\\{1\\}", "{1}", 0, 3, syn.onigmo.ONIG_SYNTAX_GREP)    # fails
1486    n("*", "", err=onigmo.ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED)
1487    n("{1}", "", err=onigmo.ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED)
1488
1489    # \g{} backref
1490    x2("((?<name1>\\d)|(?<name2>\\w))(\\g{name1}|\\g{name2})", "ff", 0, 2, syn=onigmo.ONIG_SYNTAX_PERL);
1491    x2("(?:(?<x>)|(?<x>efg))\\g{x}", "", 0, 0, syn=onigmo.ONIG_SYNTAX_PERL);
1492    x2("(?:(?<x>abc)|(?<x>efg))\\g{x}", "efgabcabc", 3, 9, syn=onigmo.ONIG_SYNTAX_PERL);
1493    n("(?:(?<x>abc)|(?<x>efg))\\g{x}", "abcefg", syn=onigmo.ONIG_SYNTAX_PERL);
1494    x2("((.*)a\\g{2}f)", "bacbabf", 3, 7, syn=onigmo.ONIG_SYNTAX_PERL);
1495    x2("(.*)a\\g{1}f", "baczzzzzz\nbazz\nzzzzbabf", 19, 23, syn=onigmo.ONIG_SYNTAX_PERL);
1496    x2("((.*)a\\g{-1}f)", "bacbabf", 3, 7, syn=onigmo.ONIG_SYNTAX_PERL);
1497    x2("(.*)a\\g{-1}f", "baczzzzzz\nbazz\nzzzzbabf", 19, 23, syn=onigmo.ONIG_SYNTAX_PERL);
1498    x2("(あ*)(い*)\\g{-2}\\g{-1}", "あああいいあああいい", 0, 10, syn=onigmo.ONIG_SYNTAX_PERL);
1499
1500    # Python/PCRE compatible named group
1501    x2("(?P<name_2>ab)(?P>name_2)", "abab", 0, 4, syn=onigmo.ONIG_SYNTAX_PERL);
1502    x2("(?P<n>|\\((?P>n)\\))+$", "()(())", 0, 6, syn=onigmo.ONIG_SYNTAX_PERL);
1503    x2("((?P<name1>\\d)|(?P<name2>\\w))((?P=name1)|(?P=name2))", "ff", 0, 2, syn=onigmo.ONIG_SYNTAX_PERL);
1504    n("(?P", "", syn=onigmo.ONIG_SYNTAX_PERL, err=onigmo.ONIGERR_UNDEFINED_GROUP_OPTION)
1505    n("(?PX", "", syn=onigmo.ONIG_SYNTAX_PERL, err=onigmo.ONIGERR_UNDEFINED_GROUP_OPTION)
1506
1507    # Fullwidth Alphabet
1508    n("abcdefghijklmnopqrstuvwxyz", "ABCDEFGHIJKLMNOPQRSTUVWXYZ");
1509    x2("(?i)abcdefghijklmnopqrstuvwxyz", "abcdefghijklmnopqrstuvwxyz", 0, 26);
1510    x2("(?i)abcdefghijklmnopqrstuvwxyz", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", 0, 26);
1511    x2("(?i)ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz", 0, 26);
1512    x2("(?i)ABCDEFGHIJKLMNOPQRSTUVWXYZ", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", 0, 26);
1513
1514    # Greek
1515    n("αβγδεζηθικλμνξοπρστυφχψω", "ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ");
1516    x2("(?i)αβγδεζηθικλμνξοπρστυφχψω", "αβγδεζηθικλμνξοπρστυφχψω", 0, 24);
1517    x2("(?i)αβγδεζηθικλμνξοπρστυφχψω", "ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ", 0, 24);
1518    x2("(?i)ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ", "αβγδεζηθικλμνξοπρστυφχψω", 0, 24);
1519    x2("(?i)ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ", "ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ", 0, 24);
1520
1521    # Cyrillic
1522    n("абвгдеёжзийклмнопрстуфхцчшщъыьэюя", "АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ");
1523    x2("(?i)абвгдеёжзийклмнопрстуфхцчшщъыьэюя", "абвгдеёжзийклмнопрстуфхцчшщъыьэюя", 0, 33);
1524    x2("(?i)абвгдеёжзийклмнопрстуфхцчшщъыьэюя", "АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ", 0, 33);
1525    x2("(?i)АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ", "абвгдеёжзийклмнопрстуфхцчшщъыьэюя", 0, 33);
1526    x2("(?i)АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ", "АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ", 0, 33);
1527
1528    # multiple name definition
1529    x2("(?<a>a)(?<a>b)\\k<a>", "aba", 0, 3)
1530    x2("(?<a>a)(?<a>b)\\k<a>", "abb", 0, 3)
1531    x2("(?<a>a)(?<a>b)\\g{a}", "aba", 0, 3, syn=onigmo.ONIG_SYNTAX_PERL)
1532    n("(?<a>a)(?<a>b)\\g{a}", "abb", syn=onigmo.ONIG_SYNTAX_PERL)
1533    n("(?<a>a)(?<a>b)\\g<a>", "aba", err=onigmo.ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL)
1534    x2("(?<a>[ac])(?<a>b)(?&a)", "abc", 0, 3, syn=onigmo.ONIG_SYNTAX_PERL)
1535    n("(?<a>[ac])(?<a>b)(?&a)", "abb", syn=onigmo.ONIG_SYNTAX_PERL)
1536    x2("(?:(?<x>abc)|(?<x>efg))(?i:\\k<x>)", "abcefgEFG", 3, 9)
1537    x2("(?<x>a)(?<x>b)(?i:\\k<x>)+", "abAB", 0, 4)
1538
1539    # branch reset
1540#    x3("(?|(c)|(?:(b)|(a)))", "a", 0, 1, 2)
1541#    x3("(?|(c)|(?|(b)|(a)))", "a", 0, 1, 1)
1542
1543    # conditional expression
1544    x2("(?:(a)|(b))(?(1)cd)e", "acde", 0, 4)
1545    n("(?:(a)|(b))(?(1)cd)e", "ae")
1546    x2("(?:(a)|(b))(?(2)cd)e", "ae", 0, 2)
1547    n("(?:(a)|(b))(?(2)cd)e", "acde")
1548    x2("(?:(a)|(b))(?(1)c|d)", "ac", 0, 2)
1549    x2("(?:(a)|(b))(?(1)c|d)", "bd", 0, 2)
1550    n("(?:(a)|(b))(?(1)c|d)", "ad")
1551    n("(?:(a)|(b))(?(1)c|d)", "bc")
1552    x2("(?:(a)|(b))(?:(?(1)cd)e|fg)", "acde", 0, 4)
1553    x2("(?:(a)|(b))(?:(?(1)cd|x)e|fg)", "bxe", 0, 3)
1554    n("(?:(a)|(b))(?:(?(2)cd|x)e|fg)", "bxe")
1555    x2("(?:(?<x>a)|(?<y>b))(?:(?(<x>)cd|x)e|fg)", "bxe", 0, 3)
1556    n("(?:(?<x>a)|(?<y>b))(?:(?(<y>)cd|x)e|fg)", "bxe")
1557    x2("((?<=a))?(?(1)b|c)", "abc", 1, 2)
1558    x2("((?<=a))?(?(1)b|c)", "bc", 1, 2)
1559    x2("((?<x>x)|(?<y>y))(?(<x>)y|x)", "xy", 0, 2)
1560    x2("((?<x>x)|(?<y>y))(?(<x>)y|x)", "yx", 0, 2)
1561    n("((?<x>x)|(?<y>y))(?(<x>)y|x)", "xx")
1562    n("((?<x>x)|(?<y>y))(?(<x>)y|x)", "yy")
1563    n("(a)?(?<n>b)?(?(1)a)(?(<n>)b)", "aa", err=onigmo.ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED)
1564    x2("(a)?(?<n>b)?(?(1)a)(?(<n>)b)", "aa", 0, 2, syn=onigmo.ONIG_SYNTAX_PERL)
1565    n("()(?(2))", "", err=onigmo.ONIGERR_INVALID_BACKREF)       # Issue #65
1566    n("(?(700000))", "", err=onigmo.ONIGERR_INVALID_BACKREF)
1567
1568    # Implicit-anchor optimization
1569    x2("(?m:.*abc)", "dddabdd\nddabc", 0, 13)   # optimized /(?m:.*abc)/ ==> /\A(?m:.*abc)/
1570    x2("(?m:.+abc)", "dddabdd\nddabc", 0, 13)   # optimized
1571    x2("(?-m:.*abc)", "dddabdd\nddabc", 8, 13)  # optimized /(?-m:.*abc)/ ==> /(?:^|\A)(?m:.*abc)/
1572    n("(?-m:.*ab[x-z])", "dddabdd\nddabc")      # optimized
1573    x2("(?-m:.*(?:abc|\\Gabc))", "dddabdd\nddabc", 8, 13)  # optimized
1574    x2("(?-m:.+abc)", "dddabdd\nddabc", 8, 13)  # optimized
1575    x2("(?-m:.*abc)", "dddabdd\nabc", 8, 11)    # optimized
1576    n("(?-m:.+abc)", "dddabdd\nabc")            # optimized
1577    x2("(?m:.*\\Z)", "dddabdd\nddabc", 0, 13)   # optimized /(?m:.*\Z)/ ==> /\A(?m:.*\Z)/
1578    x2("(?-m:.*\\Z)", "dddabdd\nddabc", 8, 13)  # optimized /(?-m:.*\Z)/ ==> /(?:^|\A)(?m:.*\Z)/
1579    x2("(.*)X\\1", "1234X2345", 1, 8)           # not optimized
1580
1581    # Allow options in look-behind
1582    x2("(?<=(?i)ab)cd", "ABcd", 2, 4)
1583    x2("(?<=(?i:ab))cd", "ABcd", 2, 4)
1584    n("(?<=(?i)ab)cd", "ABCD")
1585    n("(?<=(?i:ab))cd", "ABCD")
1586    x2("(?<!(?i)ab)cd", "aacd", 2, 4)
1587    x2("(?<!(?i:ab))cd", "aacd", 2, 4)
1588    n("(?<!(?i)ab)cd", "ABcd")
1589    n("(?<!(?i:ab))cd", "ABcd")
1590
1591    # Absent operator
1592    x2("<-(?~->)->", "<- ->->", 0, 5)
1593    x2("<-(?~->)->\n", "<-1->2<-3->\n", 6, 12)
1594    x2("<-(?~->)->.*<-(?~->)->", "<-1->2<-3->4<-5->", 0, 17)
1595    x2("<-(?~->)->.*?<-(?~->)->", "<-1->2<-3->4<-5->", 0, 11)
1596    x2("(?~abc)c", "abc", 0, 3)
1597    x2("(?~abc)bc", "abc", 0, 3)
1598    x2("(?~abc)abc", "abc", 0, 3)
1599    n("(?~)", " ")
1600    n("(?~)", "")
1601    n(" (?~)", "  ")
1602    n(" (?~)", " ")
1603    x2("(?~(?~))", "abc", 0, 3)
1604    x2("(?~a)", "", 0, 0)
1605    x2("(?~a)a", "a", 0, 1)
1606    x2("(?~a)", "x", 0, 1)
1607    x2("(?~a)a", "xa", 0, 2)
1608    x2("(?~.)", "", 0, 0)
1609    x2("(?~.)a", "a", 0, 1)
1610    x2("(?~.)", "x", 0, 0)
1611    x2("(?~.)a", "xa", 1, 2)
1612    x2("(?~abc)", "abc", 0, 2)
1613    x2("(?~b)", "abc", 0, 1)
1614    x2("(?~abc|b)", "abc", 0, 1)
1615    n("(?~|abc)", "abc")            # ???
1616    x2("(?~abc|)", "abc", 0, 1)     # ???
1617    x2("(?~abc|def)x", "abcx", 1, 4)
1618    x2("(?~abc|def)x", "defx", 1, 4)
1619    x2("^(?~\\S+)TEST", "TEST", 0, 4)
1620
1621    # Perl syntax
1622    x2("\\Q()\\[a]\\E[b]", "()\\[a]b", 0, 7, syn=onigmo.ONIG_SYNTAX_PERL)
1623    x2("\\Q()\\[a]", "()\\[a]", 0, 6, syn=onigmo.ONIG_SYNTAX_PERL)  # no \E
1624    x2("(?a)(?d)\\w+", "あ", 0, 1, syn=onigmo.ONIG_SYNTAX_PERL) # For now (?d) == (?u)
1625    x2("(?a)(?l)\\w+", "あ", 0, 1, syn=onigmo.ONIG_SYNTAX_PERL) # For now (?l) == (?u)
1626    x2("(?a)(?^)\\w+", "あ", 0, 1, syn=onigmo.ONIG_SYNTAX_PERL)
1627    n("(?i)(?^)a", "A", syn=onigmo.ONIG_SYNTAX_PERL)
1628    n("(?m)(?^)a$", "a\nb", syn=onigmo.ONIG_SYNTAX_PERL)
1629    x2("(?s)(?^).*", "a\nb", 0, 1, syn=onigmo.ONIG_SYNTAX_PERL)
1630    x2("\\o{046}", "\046", 0, 1, syn=onigmo.ONIG_SYNTAX_PERL);
1631    x2("[\\o{044}-\\o{047}]", "\046", 0, 1, syn=onigmo.ONIG_SYNTAX_PERL);
1632    n("\\o{40000000000}", "", syn=onigmo.ONIG_SYNTAX_PERL, err=onigmo.ONIGERR_TOO_BIG_WIDE_CHAR_VALUE);
1633    n("\\o{100000000000}", "", syn=onigmo.ONIG_SYNTAX_PERL, err=onigmo.ONIGERR_TOO_LONG_WIDE_CHAR_VALUE);
1634    n("[\\o{40000000000}]", "", syn=onigmo.ONIG_SYNTAX_PERL, err=onigmo.ONIGERR_TOO_BIG_WIDE_CHAR_VALUE);
1635    n("[\\o{100000000000}]", "", syn=onigmo.ONIG_SYNTAX_PERL, err=onigmo.ONIGERR_TOO_LONG_WIDE_CHAR_VALUE);
1636
1637    # Backward search
1638    x2("abc", "abcabc", 3, 6, searchtype=SearchType.BACKWARD)
1639    x2("あいう", "あいうあいう", 3, 6, searchtype=SearchType.BACKWARD)
1640    x2("(?i)abc", "ABCABC", 3, 6, searchtype=SearchType.BACKWARD)
1641    x2("(?i)abc", "ABCABC", 3, 6, searchtype=SearchType.BACKWARD)
1642    x2("[a-z]{3}$", "abcabc", 3, 6, searchtype=SearchType.BACKWARD)
1643    x2("[あ-ん]{3}$", "あいうあいう", 3, 6, searchtype=SearchType.BACKWARD)
1644    x2(".*[a-z]bc", "abcabc", 3, 6, searchtype=SearchType.BACKWARD) # Issue #69
1645    x2(".+[a-z]bc", "abcabc", 2, 6, searchtype=SearchType.BACKWARD) # Issue #69
1646    x2(".{1,3}[a-z]bc", "abcabc", 2, 6, searchtype=SearchType.BACKWARD)
1647
1648    # onig_match()
1649    x2("abc", "abcabc", 0, 3, searchtype=SearchType.MATCH)
1650    n("abc", " abcabc", searchtype=SearchType.MATCH)
1651
1652    # onig_search_gpos()
1653    n("\\Gabc", "123abcdef", gpos=2)
1654    x2("\\Gabc", "123abcdef", 3, 6, gpos=3)
1655    x2("\\Gabc", "123abcdef", 3, 6, startpos=3)
1656    n("\\Gabc", "123abcdef", gpos=0, startpos=3)
1657    x2("abc\\G", "abc", 0, 3, searchtype=SearchType.BACKWARD)
1658    n("abc\\G", "abc ", searchtype=SearchType.BACKWARD)
1659    x2("abc\\G", "abc ", 0, 3, searchtype=SearchType.BACKWARD, endpos=3)
1660    x2("abc\\G", "abc ", 0, 3, searchtype=SearchType.BACKWARD, gpos=3)
1661
1662    # stack size
1663    stack_size = onigmo.onig_get_match_stack_limit_size()
1664    print("Default stack size:", stack_size)
1665    onigmo.onig_set_match_stack_limit_size(1000)
1666    print("New stack size:", onigmo.onig_get_match_stack_limit_size())
1667    # These patterns need deep stack.
1668    n("^a*$", "a" * 200 + "b")
1669    n("^a*$", "a" * 2000 + "b", execerr=onigmo.ONIGERR_MATCH_STACK_LIMIT_OVER)
1670    onigmo.onig_set_match_stack_limit_size(0)
1671
1672    # parse depth
1673    parse_depth = onigmo.onig_get_parse_depth_limit()
1674    print("Default parse depth:", parse_depth)
1675    onigmo.onig_set_parse_depth_limit(1000)
1676    print("New parse depth:", onigmo.onig_get_parse_depth_limit())
1677    # These patterns need deep parse stack.
1678    x2("(" * 200 + "a" + ")" * 200, "a", 0, 1)
1679    n("(" * 2000 + "a" + ")" * 2000, "a", err=onigmo.ONIGERR_PARSE_DEPTH_LIMIT_OVER)
1680    onigmo.onig_set_match_stack_limit_size(0)
1681
1682    # syntax functions
1683    onigmo.onig_set_syntax_op(syntax_default,
1684        onigmo.onig_get_syntax_op(onigmo.ONIG_SYNTAX_DEFAULT))
1685    onigmo.onig_set_syntax_op2(syntax_default,
1686        onigmo.onig_get_syntax_op2(onigmo.ONIG_SYNTAX_DEFAULT))
1687    onigmo.onig_set_syntax_behavior(syntax_default,
1688        onigmo.onig_get_syntax_behavior(onigmo.ONIG_SYNTAX_DEFAULT))
1689    onigmo.onig_set_default_syntax(None)
1690
1691
1692    print("\nEncoding:", get_encoding_name(onig_encoding))
1693    print("RESULT   SUCC: %d,  FAIL: %d,  ERROR: %d      (by Onigmo %s)" % (
1694          nsucc, nfail, nerror, onigmo.onig_version()))
1695
1696    onigmo.onig_end()
1697
1698    if (nfail == 0 and nerror == 0):
1699        exit(0)
1700    else:
1701        exit(-1)
1702
1703if __name__ == '__main__':
1704    main()
1705
1706