1#!/usr/bin/env python 2# -*- coding: utf-8 -*- 3 4from __future__ import print_function, unicode_literals 5import ctypes 6import onigmo 7import sys 8import io 9import locale 10 11nerror = 0 12nsucc = 0 13nfail = 0 14 15# default encoding 16onig_encoding = onigmo.ONIG_ENCODING_EUC_JP 17 18# special syntactic settings 19syntax_default = ctypes.byref(onigmo.OnigSyntaxType()) 20onigmo.onig_copy_syntax(syntax_default, onigmo.ONIG_SYNTAX_DEFAULT) 21onigmo.onig_set_syntax_options(syntax_default, 22 onigmo.onig_get_syntax_options(syntax_default) 23 & ~onigmo.ONIG_OPTION_ASCII_RANGE) 24 25 26def get_encoding_name(onigenc): 27 """Return the name of specified onigmo.OnigEncoding. 28 29 arguments: 30 enc -- an instance of onigmo.OnigEncoding 31 """ 32 name = onigenc[0].name.decode() 33 encnamemap = {"Windows-31J": "CP932", 34 "ASCII-8BIT": "ASCII"} 35 if name in encnamemap: 36 name = encnamemap[name] 37 return name 38 39def is_unicode_encoding(enc): 40 """Check if the encoding is Unicode encoding. 41 42 arguments: 43 enc -- encoding name or an instance of onigmo.OnigEncoding 44 """ 45 return enc in (onigmo.ONIG_ENCODING_UTF32_LE, 46 onigmo.ONIG_ENCODING_UTF32_BE, 47 onigmo.ONIG_ENCODING_UTF16_LE, 48 onigmo.ONIG_ENCODING_UTF16_BE, 49 onigmo.ONIG_ENCODING_UTF8, 50 'UTF-16LE', 'UTF-16BE', 'UTF-32LE', 'UTF-32BE', 'UTF-8') 51 52def is_ascii_incompatible_encoding(enc): 53 """Check if the encoding is ASCII-incompatible encoding. 54 55 arguments: 56 enc -- encoding name or an instance of onigmo.OnigEncoding 57 """ 58 return enc in (onigmo.ONIG_ENCODING_UTF32_LE, 59 onigmo.ONIG_ENCODING_UTF32_BE, 60 onigmo.ONIG_ENCODING_UTF16_LE, 61 onigmo.ONIG_ENCODING_UTF16_BE, 62 'UTF-16LE', 'UTF-16BE', 'UTF-32LE', 'UTF-32BE') 63 64 65class strptr: 66 """a helper class to get a pointer to a string""" 67 def __init__(self, s): 68 if not isinstance(s, bytes): 69 raise TypeError 70 self._str = s 71 try: 72 # CPython 2.x/3.x 73 self._ptr = ctypes.cast(self._str, ctypes.c_void_p) 74 except TypeError: 75 # PyPy 1.x 76 self._ptr = ctypes.c_void_p(self._str) 77 78 def getptr(self, offset=0): 79 if offset == -1: # -1 means the end of the string 80 offset = len(self._str) 81 elif offset > len(self._str): 82 raise IndexError 83 return self._ptr.value + offset 84 85def cc_to_cb(s, enc, cc): 86 """convert char count to byte count 87 88 arguments: 89 s -- unicode string 90 enc -- encoding name 91 cc -- char count 92 """ 93 if cc == -1: 94 return -1 95 s = s.encode('UTF-32LE') 96 clen = cc * 4 97 if clen > len(s): 98 raise IndexError 99 return len(s[:clen].decode('UTF-32LE').encode(enc)) 100 101def print_result(result, pattern, file=None): 102 if not file: 103 file = sys.stdout 104 print(result + ": ", end='', file=file) 105 try: 106 print(pattern, file=file) 107 except UnicodeEncodeError as e: 108 print('(' + str(e) + ')') 109 110def decode_errmsg(msg): 111 encoding = get_encoding_name(onig_encoding) 112 if is_ascii_incompatible_encoding(encoding): 113 encoding = 'ASCII' 114 return msg.value.decode(encoding, 'replace') 115 116 117class SearchType: 118 FORWARD = 0 119 BACKWARD = 1 120 MATCH = 2 121 122def xx(pattern, target, s_from, s_to, mem, not_match, 123 searchtype=SearchType.FORWARD, 124 gpos=-1, startpos=0, endpos=-1, 125 syn=syntax_default, opt=onigmo.ONIG_OPTION_DEFAULT, 126 err=onigmo.ONIG_NORMAL, execerr=onigmo.ONIG_NORMAL): 127 global nerror 128 global nsucc 129 global nfail 130 131 encoding = get_encoding_name(onig_encoding) 132 133 reg = onigmo.OnigRegex() 134 einfo = onigmo.OnigErrorInfo() 135 msg = ctypes.create_string_buffer(onigmo.ONIG_MAX_ERROR_MESSAGE_LEN) 136 137 pattern2 = pattern 138 if not isinstance(pattern, bytes): 139 pattern2 = pattern.encode(encoding) 140 patternp = strptr(pattern2) 141 142 target2 = target 143 if not isinstance(target, bytes): 144 s_from = cc_to_cb(target, encoding, s_from) 145 s_to = cc_to_cb(target, encoding, s_to) 146 gpos = cc_to_cb(target, encoding, gpos) 147 startpos = cc_to_cb(target, encoding, startpos) 148 endpos = cc_to_cb(target, encoding, endpos) 149 target2 = target.encode(encoding) 150 targetp = strptr(target2) 151 152 # cut very long outputs (used for showing message) 153 pattern = pattern2.decode(encoding, 'replace') 154 target = target2.decode(encoding, 'replace') 155 limit = 100 156 if len(pattern) > limit: 157 pattern = pattern[:limit] + "..." 158 if len(target) > limit: 159 target = target[:limit] + "..." 160 161 # Compile 162 r = onigmo.onig_new(ctypes.byref(reg), 163 patternp.getptr(), patternp.getptr(-1), 164 opt, onig_encoding, syn, ctypes.byref(einfo)); 165 if r != 0: 166 # Error 167 onigmo.onig_error_code_to_str(msg, r, ctypes.byref(einfo)) 168 if r == err: 169 nsucc += 1 170 print_result("OK(E)", "%s (/%s/ '%s')" % \ 171 (decode_errmsg(msg), pattern, target)) 172 else: 173 nerror += 1 174 print_result("ERROR", "%s (/%s/ '%s')" % \ 175 (decode_errmsg(msg), pattern, target), file=sys.stderr) 176 return 177 178 if err != onigmo.ONIG_NORMAL: 179 nfail += 1 180 print_result("FAIL(E)", "/%s/ '%s'" % (pattern, target)) 181 onigmo.onig_free(reg) 182 return 183 184 # Execute 185 region = onigmo.onig_region_new() 186 if searchtype == SearchType.FORWARD: 187 if gpos >= 0: 188 r = onigmo.onig_search_gpos(reg, 189 targetp.getptr(), targetp.getptr(-1), 190 targetp.getptr(gpos), 191 targetp.getptr(startpos), targetp.getptr(endpos), 192 region, onigmo.ONIG_OPTION_NONE); 193 else: 194 r = onigmo.onig_search(reg, 195 targetp.getptr(), targetp.getptr(-1), 196 targetp.getptr(startpos), targetp.getptr(endpos), 197 region, onigmo.ONIG_OPTION_NONE); 198 elif searchtype == SearchType.BACKWARD: 199 if gpos >= 0: 200 r = onigmo.onig_search_gpos(reg, 201 targetp.getptr(), targetp.getptr(-1), 202 targetp.getptr(gpos), 203 targetp.getptr(endpos), targetp.getptr(startpos), 204 region, onigmo.ONIG_OPTION_NONE); 205 else: 206 r = onigmo.onig_search(reg, 207 targetp.getptr(), targetp.getptr(-1), 208 targetp.getptr(endpos), targetp.getptr(startpos), 209 region, onigmo.ONIG_OPTION_NONE); 210 elif searchtype == SearchType.MATCH: 211 r = onigmo.onig_match(reg, targetp.getptr(), targetp.getptr(-1), 212 targetp.getptr(startpos), 213 region, onigmo.ONIG_OPTION_NONE); 214 else: 215 nerror += 1 216 print_result("ERROR", "wrong searchtype", file=sys.stderr) 217 onigmo.onig_free(reg) 218 onigmo.onig_region_free(region, 1) 219 return 220 221 if r < onigmo.ONIG_MISMATCH: 222 # Error 223 onigmo.onig_error_code_to_str(msg, r) 224 if r == execerr: 225 nsucc += 1 226 print_result("OK(E)", "%s (/%s/ '%s')" % \ 227 (decode_errmsg(msg), pattern, target)) 228 else: 229 nerror += 1 230 print_result("ERROR", "%s (/%s/ '%s')" % \ 231 (decode_errmsg(msg), pattern, target), file=sys.stderr) 232 onigmo.onig_free(reg) 233 onigmo.onig_region_free(region, 1) 234 return 235 236 if r == onigmo.ONIG_MISMATCH: 237 # Not matched 238 if not_match: 239 nsucc += 1 240 print_result("OK(N)", "/%s/ '%s'" % (pattern, target)) 241 else: 242 nfail += 1 243 print_result("FAIL", "/%s/ '%s'" % (pattern, target)) 244 else: 245 # Matched 246 if not_match: 247 nfail += 1 248 print_result("FAIL(N)", "/%s/ '%s'" % (pattern, target)) 249 else: 250 start = region[0].beg[mem] 251 end = region[0].end[mem] 252 if (start == s_from) and (end == s_to): 253 nsucc += 1 254 print_result("OK", "/%s/ '%s'" % (pattern, target)) 255 else: 256 nfail += 1 257 print_result("FAIL", "/%s/ '%s' %d-%d : %d-%d" % (pattern, target, 258 s_from, s_to, start, end)) 259 onigmo.onig_free(reg) 260 onigmo.onig_region_free(region, 1) 261 262def x2(pattern, target, s_from, s_to, **kwargs): 263 xx(pattern, target, s_from, s_to, 0, False, **kwargs) 264 265def x3(pattern, target, s_from, s_to, mem, **kwargs): 266 xx(pattern, target, s_from, s_to, mem, False, **kwargs) 267 268def n(pattern, target, **kwargs): 269 xx(pattern, target, 0, 0, 0, True, **kwargs) 270 271 272def set_encoding(enc): 273 """Set the encoding used for testing. 274 275 arguments: 276 enc -- encoding name or an instance of onigmo.OnigEncoding 277 """ 278 global onig_encoding 279 280 if enc == None: 281 return 282 if isinstance(enc, onigmo.OnigEncoding): 283 onig_encoding = enc 284 else: 285 encs = {"EUC-JP": onigmo.ONIG_ENCODING_EUC_JP, 286 "SJIS": onigmo.ONIG_ENCODING_SJIS, 287 "CP932": onigmo.ONIG_ENCODING_CP932, 288 "UTF-8": onigmo.ONIG_ENCODING_UTF8, 289 "UTF-16LE": onigmo.ONIG_ENCODING_UTF16_LE, 290 "UTF-16BE": onigmo.ONIG_ENCODING_UTF16_BE, 291 "UTF-32LE": onigmo.ONIG_ENCODING_UTF32_LE, 292 "UTF-32BE": onigmo.ONIG_ENCODING_UTF32_BE} 293 onig_encoding = encs[enc.upper()] 294 295def get_encoding(): 296 """Get the encoding used for testing.""" 297 return onig_encoding 298 299def set_output_encoding(enc=None): 300 """Set the encoding used for showing the results. 301 302 arguments: 303 enc -- Encoding name or an instance of onigmo.OnigEncoding. 304 If omitted, locale.getpreferredencoding() is used. 305 """ 306 if enc is None: 307 enc = locale.getpreferredencoding() 308 309 def get_text_writer(fo, **kwargs): 310 kw = dict(kwargs) 311 kw.setdefault('errors', 'backslashreplace') # use \uXXXX style 312 kw.setdefault('closefd', False) 313 314 if sys.version_info[0] < 3: 315 # Work around for Python 2.x 316 # New line conversion isn't needed here. Done in somewhere else. 317 writer = io.open(fo.fileno(), mode='w', newline='', **kw) 318 write = writer.write # save the original write() function 319 enc = locale.getpreferredencoding() 320 def convwrite(s): 321 if isinstance(s, bytes): 322 write(s.decode(enc)) # convert to unistr 323 else: 324 write(s) 325 try: 326 writer.flush() # needed on Windows 327 except IOError: 328 pass 329 writer.write = convwrite 330 else: 331 writer = io.open(fo.fileno(), mode='w', **kw) 332 return writer 333 334 sys.stdout = get_text_writer(sys.stdout, encoding=enc) 335 sys.stderr = get_text_writer(sys.stderr, encoding=enc) 336 337 338def set_default_warning_function(): 339 global _warn_func_ptr 340 341 warning_enc = get_encoding_name(onig_encoding) 342 if is_ascii_incompatible_encoding(warning_enc): 343 warning_enc = 'ascii' 344 def warn_func(str): 345 print("warning: " + str.decode(warning_enc, 'replace')) 346 347 _warn_func_ptr = onigmo.OnigWarnFunc(warn_func) 348 onigmo.onig_set_warn_func(_warn_func_ptr) 349 onigmo.onig_set_verb_warn_func(_warn_func_ptr) 350 351 352def init(enc, outenc=None): 353 """Setup test target encoding, output encoding and warning function. 354 355 arguments: 356 enc -- Encoding used for testing. 357 outenc -- Encoding used for showing messages. 358 """ 359 set_encoding(enc) 360 set_output_encoding(outenc) 361 set_default_warning_function() 362 363 364def main(): 365 # encoding of the test target 366 enc = None 367 if len(sys.argv) > 1: 368 enc = sys.argv[1] 369 370 # encoding of stdout/stderr 371 outenc = None 372 if len(sys.argv) > 2: 373 outenc = sys.argv[2] 374 375 # Initialization 376 try: 377 init(enc, outenc) 378 except KeyError: 379 print("test target encoding error") 380 print("Usage: python testpy.py [test target encoding] [output encoding]") 381 sys.exit() 382 383 print(onigmo.onig_copyright()) 384 385 # Copied from onig-5.9.2/testc.c 386 # '?\?' which is used to avoid trigraph is replaced by '??'. 387 # Match positions are specified by unit of character instead of byte. 388 389 x2("", "", 0, 0); 390 x2("^", "", 0, 0); 391 x2("$", "", 0, 0); 392 x2("\\G", "", 0, 0); 393 x2("\\A", "", 0, 0); 394 x2("\\Z", "", 0, 0); 395 x2("\\z", "", 0, 0); 396 x2("^$", "", 0, 0); 397 x2("\\ca", "\001", 0, 1); 398 x2("\\C-b", "\002", 0, 1); 399 x2("\\c\\\\", "\034", 0, 1); 400 x2("q[\\c\\\\]", "q\034", 0, 2); 401 x2("", "a", 0, 0); 402 x2("a", "a", 0, 1); 403 if onig_encoding == onigmo.ONIG_ENCODING_UTF16_LE: 404 x2("\\x61\\x00", "a", 0, 1); 405 elif onig_encoding == onigmo.ONIG_ENCODING_UTF16_BE: 406 x2("\\x00\\x61", "a", 0, 1); 407 elif onig_encoding == onigmo.ONIG_ENCODING_UTF32_LE: 408 x2("\\x61\\x00\\x00\\x00", "a", 0, 1); 409 elif onig_encoding == onigmo.ONIG_ENCODING_UTF32_BE: 410 x2("\\x00\\x00\\x00\\x61", "a", 0, 1); 411 else: 412 x2("\\x61", "a", 0, 1); 413 x2("aa", "aa", 0, 2); 414 x2("aaa", "aaa", 0, 3); 415 x2("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0, 35); 416 x2("ab", "ab", 0, 2); 417 x2("b", "ab", 1, 2); 418 x2("bc", "abc", 1, 3); 419 x2("(?i:#RET#)", "#INS##RET#", 5, 10); 420 if onig_encoding == onigmo.ONIG_ENCODING_UTF16_LE: 421 x2("\\17\\00", "\017", 0, 1); 422 x2("\\x1f\\x00", "\x1f", 0, 1); 423 elif onig_encoding == onigmo.ONIG_ENCODING_UTF16_BE: 424 x2("\\00\\17", "\017", 0, 1); 425 x2("\\x00\\x1f", "\x1f", 0, 1); 426 elif onig_encoding == onigmo.ONIG_ENCODING_UTF32_LE: 427 x2("\\17\\00\\00\\00", "\017", 0, 1); 428 x2("\\x1f\\x00\\x00\\x00", "\x1f", 0, 1); 429 elif onig_encoding == onigmo.ONIG_ENCODING_UTF32_BE: 430 x2("\\00\\00\\00\\17", "\017", 0, 1); 431 x2("\\x00\\x00\\x00\\x1f", "\x1f", 0, 1); 432 else: 433 x2("\\17", "\017", 0, 1); 434 x2("\\x1f", "\x1f", 0, 1); 435 x2("a(?#....\\\\JJJJ)b", "ab", 0, 2); 436 x2("(?x) G (o O(?-x)oO) g L", "GoOoOgLe", 0, 7); 437 x2(".", "a", 0, 1); 438 n(".", ""); 439 x2("..", "ab", 0, 2); 440 x2("\\w", "e", 0, 1); 441 n("\\W", "e"); 442 x2("\\s", " ", 0, 1); 443 x2("\\S", "b", 0, 1); 444 x2("\\d", "4", 0, 1); 445 n("\\D", "4"); 446 x2("\\b", "z ", 0, 0); 447 x2("\\b", " z", 1, 1); 448 x2("\\B", "zz ", 1, 1); 449 x2("\\B", "z ", 2, 2); 450 x2("\\B", " z", 0, 0); 451 x2("[ab]", "b", 0, 1); 452 n("[ab]", "c"); 453 x2("[a-z]", "t", 0, 1); 454 n("[^a]", "a"); 455 x2("[^a]", "\n", 0, 1); 456 x2("[]]", "]", 0, 1); 457 n("[^]]", "]"); 458 x2("[\\^]+", "0^^1", 1, 3); 459 x2("[b-]", "b", 0, 1); 460 x2("[b-]", "-", 0, 1); 461 x2("[\\w]", "z", 0, 1); 462 n("[\\w]", " "); 463 x2("[\\W]", "b$", 1, 2); 464 x2("[\\d]", "5", 0, 1); 465 n("[\\d]", "e"); 466 x2("[\\D]", "t", 0, 1); 467 n("[\\D]", "3"); 468 x2("[\\s]", " ", 0, 1); 469 n("[\\s]", "a"); 470 x2("[\\S]", "b", 0, 1); 471 n("[\\S]", " "); 472 x2("[\\w\\d]", "2", 0, 1); 473 n("[\\w\\d]", " "); 474 x2("[[:upper:]]", "B", 0, 1); 475 x2("[*[:xdigit:]+]", "+", 0, 1); 476 x2("[*[:xdigit:]+]", "GHIKK-9+*", 6, 7); 477 x2("[*[:xdigit:]+]", "-@^+", 3, 4); 478 n("[[:upper]]", "A"); 479 x2("[[:upper]]", ":", 0, 1); 480 if onig_encoding == onigmo.ONIG_ENCODING_UTF16_LE: 481 x2("[\\044\\000-\\047\\000]", "\046", 0, 1); 482 x2("[\\x5a\\x00-\\x5c\\x00]", "\x5b", 0, 1); 483 x2("[\\x6A\\x00-\\x6D\\x00]", "\x6c", 0, 1); 484 n("[\\x6A\\x00-\\x6D\\x00]", "\x6E"); 485 elif onig_encoding == onigmo.ONIG_ENCODING_UTF16_BE: 486 x2("[\\000\\044-\\000\\047]", "\046", 0, 1); 487 x2("[\\x00\\x5a-\\x00\\x5c]", "\x5b", 0, 1); 488 x2("[\\x00\\x6A-\\x00\\x6D]", "\x6c", 0, 1); 489 n("[\\x00\\x6A-\\x00\\x6D]", "\x6E"); 490 elif onig_encoding == onigmo.ONIG_ENCODING_UTF32_LE: 491 x2("[\\044\\000\\000\\000-\\047\\000\\000\\000]", "\046", 0, 1); 492 x2("[\\x5a\\x00\\x00\\x00-\\x5c\\x00\\x00\\x00]", "\x5b", 0, 1); 493 x2("[\\x6A\\x00\\x00\\x00-\\x6D\\x00\\x00\\x00]", "\x6c", 0, 1); 494 n("[\\x6A\\x00\\x00\\x00-\\x6D\\x00\\x00\\x00]", "\x6E"); 495 elif onig_encoding == onigmo.ONIG_ENCODING_UTF32_BE: 496 x2("[\\000\\000\\000\\044-\\000\\000\\000\\047]", "\046", 0, 1); 497 x2("[\\x00\\x00\\x00\\x5a-\\x00\\x00\\x00\\x5c]", "\x5b", 0, 1); 498 x2("[\\x00\\x00\\x00\\x6A-\\x00\\x00\\x00\\x6D]", "\x6c", 0, 1); 499 n("[\\x00\\x00\\x00\\x6A-\\x00\\x00\\x00\\x6D]", "\x6E"); 500 else: 501 x2("[\\044-\\047]", "\046", 0, 1); 502 x2("[\\x5a-\\x5c]", "\x5b", 0, 1); 503 x2("[\\x6A-\\x6D]", "\x6c", 0, 1); 504 n("[\\x6A-\\x6D]", "\x6E"); 505 n("^[0-9A-F]+ 0+ UNDEF ", "75F 00000000 SECT14A notype () External | _rb_apply"); 506 x2("[\\[]", "[", 0, 1); 507 x2("[\\]]", "]", 0, 1); 508 x2("[&]", "&", 0, 1); 509 x2("[[ab]]", "b", 0, 1); 510 x2("[[ab]c]", "c", 0, 1); 511 n("[[^a]]", "a"); 512 n("[^[a]]", "a"); 513 x2("[[ab]&&bc]", "b", 0, 1); 514 n("[[ab]&&bc]", "a"); 515 n("[[ab]&&bc]", "c"); 516 x2("[a-z&&b-y&&c-x]", "w", 0, 1); 517 n("[^a-z&&b-y&&c-x]", "w"); 518 x2("[[^a&&a]&&a-z]", "b", 0, 1); 519 n("[[^a&&a]&&a-z]", "a"); 520 x2("[[^a-z&&bcdef]&&[^c-g]]", "h", 0, 1); 521 n("[[^a-z&&bcdef]&&[^c-g]]", "c"); 522 x2("[^[^abc]&&[^cde]]", "c", 0, 1); 523 x2("[^[^abc]&&[^cde]]", "e", 0, 1); 524 n("[^[^abc]&&[^cde]]", "f"); 525 x2("[a-&&-a]", "-", 0, 1); 526 n("[a\\-&&\\-a]", "&"); 527 n("\\wabc", " abc"); 528 x2("a\\Wbc", "a bc", 0, 4); 529 x2("a.b.c", "aabbc", 0, 5); 530 x2(".\\wb\\W..c", "abb bcc", 0, 7); 531 x2("\\s\\wzzz", " zzzz", 0, 5); 532 x2("aa.b", "aabb", 0, 4); 533 n(".a", "ab"); 534 x2(".a", "aa", 0, 2); 535 x2("^a", "a", 0, 1); 536 x2("^a$", "a", 0, 1); 537 x2("^\\w$", "a", 0, 1); 538 n("^\\w$", " "); 539 x2("^\\wab$", "zab", 0, 3); 540 x2("^\\wabcdef$", "zabcdef", 0, 7); 541 x2("^\\w...def$", "zabcdef", 0, 7); 542 x2("\\w\\w\\s\\Waaa\\d", "aa aaa4", 0, 8); 543 x2("\\A\\Z", "", 0, 0); 544 x2("\\Axyz", "xyz", 0, 3); 545 x2("xyz\\Z", "xyz", 0, 3); 546 x2("xyz\\z", "xyz", 0, 3); 547 x2("a\\Z", "a", 0, 1); 548 x2("\\Gaz", "az", 0, 2); 549 n("\\Gz", "bza"); 550 n("az\\G", "az"); 551 n("az\\A", "az"); 552 n("a\\Az", "az"); 553 x2("\\^\\$", "^$", 0, 2); 554 x2("^x?y", "xy", 0, 2); 555 x2("^(x?y)", "xy", 0, 2); 556 x2("\\w", "_", 0, 1); 557 n("\\W", "_"); 558 x2("(?=z)z", "z", 0, 1); 559 n("(?=z).", "a"); 560 x2("(?!z)a", "a", 0, 1); 561 n("(?!z)a", "z"); 562 x2("(?i:a)", "a", 0, 1); 563 x2("(?i:a)", "A", 0, 1); 564 x2("(?i:A)", "a", 0, 1); 565 n("(?i:A)", "b"); 566 x2("(?i:[A-Z])", "a", 0, 1); 567 x2("(?i:[f-m])", "H", 0, 1); 568 x2("(?i:[f-m])", "h", 0, 1); 569 n("(?i:[f-m])", "e"); 570 x2("(?i:[A-c])", "D", 0, 1); 571 n("(?i:[^a-z])", "A"); 572 n("(?i:[^a-z])", "a"); 573 x2("(?i:[!-k])", "Z", 0, 1); 574 x2("(?i:[!-k])", "7", 0, 1); 575 x2("(?i:[T-}])", "b", 0, 1); 576 x2("(?i:[T-}])", "{", 0, 1); 577 x2("(?i:\\?a)", "?A", 0, 2); 578 x2("(?i:\\*A)", "*a", 0, 2); 579 n(".", "\n"); 580 x2("(?m:.)", "\n", 0, 1); 581 x2("(?m:a.)", "a\n", 0, 2); 582 x2("(?m:.b)", "a\nb", 1, 3); 583 x2(".*abc", "dddabdd\nddabc", 8, 13); 584 x2("(?m:.*abc)", "dddabddabc", 0, 10); 585 n("(?i)(?-i)a", "A"); 586 n("(?i)(?-i:a)", "A"); 587 x2("a?", "", 0, 0); 588 x2("a?", "b", 0, 0); 589 x2("a?", "a", 0, 1); 590 x2("a*", "", 0, 0); 591 x2("a*", "a", 0, 1); 592 x2("a*", "aaa", 0, 3); 593 x2("a*", "baaaa", 0, 0); 594 n("a+", ""); 595 x2("a+", "a", 0, 1); 596 x2("a+", "aaaa", 0, 4); 597 x2("a+", "aabbb", 0, 2); 598 x2("a+", "baaaa", 1, 5); 599 x2(".?", "", 0, 0); 600 x2(".?", "f", 0, 1); 601 x2(".?", "\n", 0, 0); 602 x2(".*", "", 0, 0); 603 x2(".*", "abcde", 0, 5); 604 x2(".+", "z", 0, 1); 605 x2(".+", "zdswer\n", 0, 6); 606 x2("(.*)a\\1f", "babfbac", 0, 4); 607 x2("(.*)a\\1f", "bacbabf", 3, 7); 608 x2("((.*)a\\2f)", "bacbabf", 3, 7); 609 x2("(.*)a\\1f", "baczzzzzz\nbazz\nzzzzbabf", 19, 23); 610 x2("a|b", "a", 0, 1); 611 x2("a|b", "b", 0, 1); 612 x2("|a", "a", 0, 0); 613 x2("(|a)", "a", 0, 0); 614 x2("ab|bc", "ab", 0, 2); 615 x2("ab|bc", "bc", 0, 2); 616 x2("z(?:ab|bc)", "zbc", 0, 3); 617 x2("a(?:ab|bc)c", "aabc", 0, 4); 618 x2("ab|(?:ac|az)", "az", 0, 2); 619 x2("a|b|c", "dc", 1, 2); 620 x2("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "pqr", 0, 2); 621 n("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "mn"); 622 x2("a|^z", "ba", 1, 2); 623 x2("a|^z", "za", 0, 1); 624 x2("a|\\Gz", "bza", 2, 3); 625 x2("a|\\Gz", "za", 0, 1); 626 x2("a|\\Az", "bza", 2, 3); 627 x2("a|\\Az", "za", 0, 1); 628 x2("a|b\\Z", "ba", 1, 2); 629 x2("a|b\\Z", "b", 0, 1); 630 x2("a|b\\z", "ba", 1, 2); 631 x2("a|b\\z", "b", 0, 1); 632 x2("\\w|\\s", " ", 0, 1); 633 n("\\w|\\w", " "); 634 x2("\\w|%", "%", 0, 1); 635 x2("\\w|[&$]", "&", 0, 1); 636 x2("[b-d]|[^e-z]", "a", 0, 1); 637 x2("(?:a|[c-f])|bz", "dz", 0, 1); 638 x2("(?:a|[c-f])|bz", "bz", 0, 2); 639 x2("abc|(?=zz)..f", "zzf", 0, 3); 640 x2("abc|(?!zz)..f", "abf", 0, 3); 641 x2("(?=za)..a|(?=zz)..a", "zza", 0, 3); 642 n("(?>a|abd)c", "abdc"); 643 x2("(?>abd|a)c", "abdc", 0, 4); 644 x2("a?|b", "a", 0, 1); 645 x2("a?|b", "b", 0, 0); 646 x2("a?|b", "", 0, 0); 647 x2("a*|b", "aa", 0, 2); 648 x2("a*|b*", "ba", 0, 0); 649 x2("a*|b*", "ab", 0, 1); 650 x2("a+|b*", "", 0, 0); 651 x2("a+|b*", "bbb", 0, 3); 652 x2("a+|b*", "abbb", 0, 1); 653 n("a+|b+", ""); 654 x2("(a|b)?", "b", 0, 1); 655 x2("(a|b)*", "ba", 0, 2); 656 x2("(a|b)+", "bab", 0, 3); 657 x2("(ab|ca)+", "caabbc", 0, 4); 658 x2("(ab|ca)+", "aabca", 1, 5); 659 x2("(ab|ca)+", "abzca", 0, 2); 660 x2("(a|bab)+", "ababa", 0, 5); 661 x2("(a|bab)+", "ba", 1, 2); 662 x2("(a|bab)+", "baaaba", 1, 4); 663 x2("(?:a|b)(?:a|b)", "ab", 0, 2); 664 x2("(?:a*|b*)(?:a*|b*)", "aaabbb", 0, 3); 665 x2("(?:a*|b*)(?:a+|b+)", "aaabbb", 0, 6); 666 x2("(?:a+|b+){2}", "aaabbb", 0, 6); 667 x2("h{0,}", "hhhh", 0, 4); 668 x2("(?:a+|b+){1,2}", "aaabbb", 0, 6); 669 n("ax{2}*a", "0axxxa1"); 670 n("a.{0,2}a", "0aXXXa0"); 671 n("a.{0,2}?a", "0aXXXa0"); 672 n("a.{0,2}?a", "0aXXXXa0"); 673 x2("^a{2,}?a$", "aaa", 0, 3); 674 x2("^[a-z]{2,}?$", "aaa", 0, 3); 675 x2("(?:a+|\\Ab*)cc", "cc", 0, 2); 676 n("(?:a+|\\Ab*)cc", "abcc"); 677 x2("(?:^a+|b+)*c", "aabbbabc", 6, 8); 678 x2("(?:^a+|b+)*c", "aabbbbc", 0, 7); 679 x2("a|(?i)c", "C", 0, 1); 680 x2("(?i)c|a", "C", 0, 1); 681 x2("(?i)c|a", "A", 0, 1); 682 x2("(?i:c)|a", "C", 0, 1); 683 n("(?i:c)|a", "A"); 684 x2("[abc]?", "abc", 0, 1); 685 x2("[abc]*", "abc", 0, 3); 686 x2("[^abc]*", "abc", 0, 0); 687 n("[^abc]+", "abc"); 688 x2("a??", "aaa", 0, 0); 689 x2("ba??b", "bab", 0, 3); 690 x2("a*?", "aaa", 0, 0); 691 x2("ba*?", "baa", 0, 1); 692 x2("ba*?b", "baab", 0, 4); 693 x2("a+?", "aaa", 0, 1); 694 x2("ba+?", "baa", 0, 2); 695 x2("ba+?b", "baab", 0, 4); 696 x2("(?:a?)??", "a", 0, 0); 697 x2("(?:a??)?", "a", 0, 0); 698 x2("(?:a?)+?", "aaa", 0, 1); 699 x2("(?:a+)??", "aaa", 0, 0); 700 x2("(?:a+)??b", "aaab", 0, 4); 701 x2("(?:ab)?{2}", "", 0, 0); 702 x2("(?:ab)?{2}", "ababa", 0, 4); 703 x2("(?:ab)*{0}", "ababa", 0, 0); 704 x2("(?:ab){3,}", "abababab", 0, 8); 705 n("(?:ab){3,}", "abab"); 706 x2("(?:ab){2,4}", "ababab", 0, 6); 707 x2("(?:ab){2,4}", "ababababab", 0, 8); 708 x2("(?:ab){2,4}?", "ababababab", 0, 4); 709 x2("(?:ab){,}", "ab{,}", 0, 5); 710 x2("(?:abc)+?{2}", "abcabcabc", 0, 6); 711 x2("(?:X*)(?i:xa)", "XXXa", 0, 4); 712 x2("(d+)([^abc]z)", "dddz", 0, 4); 713 x2("([^abc]*)([^abc]z)", "dddz", 0, 4); 714 x2("(\\w+)(\\wz)", "dddz", 0, 4); 715 x3("(a)", "a", 0, 1, 1); 716 x3("(ab)", "ab", 0, 2, 1); 717 x2("((ab))", "ab", 0, 2); 718 x3("((ab))", "ab", 0, 2, 1); 719 x3("((ab))", "ab", 0, 2, 2); 720 x3("((((((((((((((((((((ab))))))))))))))))))))", "ab", 0, 2, 20); 721 x3("(ab)(cd)", "abcd", 0, 2, 1); 722 x3("(ab)(cd)", "abcd", 2, 4, 2); 723 x3("()(a)bc(def)ghijk", "abcdefghijk", 3, 6, 3); 724 x3("(()(a)bc(def)ghijk)", "abcdefghijk", 3, 6, 4); 725 x2("(^a)", "a", 0, 1); 726 x3("(a)|(a)", "ba", 1, 2, 1); 727 x3("(^a)|(a)", "ba", 1, 2, 2); 728 x3("(a?)", "aaa", 0, 1, 1); 729 x3("(a*)", "aaa", 0, 3, 1); 730 x3("(a*)", "", 0, 0, 1); 731 x3("(a+)", "aaaaaaa", 0, 7, 1); 732 x3("(a+|b*)", "bbbaa", 0, 3, 1); 733 x3("(a+|b?)", "bbbaa", 0, 1, 1); 734 x3("(abc)?", "abc", 0, 3, 1); 735 x3("(abc)*", "abc", 0, 3, 1); 736 x3("(abc)+", "abc", 0, 3, 1); 737 x3("(xyz|abc)+", "abc", 0, 3, 1); 738 x3("([xyz][abc]|abc)+", "abc", 0, 3, 1); 739 x3("((?i:abc))", "AbC", 0, 3, 1); 740 x2("(abc)(?i:\\1)", "abcABC", 0, 6); 741 x3("((?m:a.c))", "a\nc", 0, 3, 1); 742 x3("((?=az)a)", "azb", 0, 1, 1); 743 x3("abc|(.abd)", "zabd", 0, 4, 1); 744 x2("(?:abc)|(ABC)", "abc", 0, 3); 745 x3("(?i:(abc))|(zzz)", "ABC", 0, 3, 1); 746 x3("a*(.)", "aaaaz", 4, 5, 1); 747 x3("a*?(.)", "aaaaz", 0, 1, 1); 748 x3("a*?(c)", "aaaac", 4, 5, 1); 749 x3("[bcd]a*(.)", "caaaaz", 5, 6, 1); 750 x3("(\\Abb)cc", "bbcc", 0, 2, 1); 751 n("(\\Abb)cc", "zbbcc"); 752 x3("(^bb)cc", "bbcc", 0, 2, 1); 753 n("(^bb)cc", "zbbcc"); 754 x3("cc(bb$)", "ccbb", 2, 4, 1); 755 n("cc(bb$)", "ccbbb"); 756 n("(\\1)", ""); 757 n("\\1(a)", "aa"); 758 n("(a(b)\\1)\\2+", "ababb"); 759 n("(?:(?:\\1|z)(a))+$", "zaa"); 760 x2("(?:(?:\\1|z)(a))+$", "zaaa", 0, 4); 761 x2("(a)(?=\\1)", "aa", 0, 1); 762 n("(a)$|\\1", "az"); 763 x2("(a)\\1", "aa", 0, 2); 764 n("(a)\\1", "ab"); 765 x2("(a?)\\1", "aa", 0, 2); 766 x2("(a??)\\1", "aa", 0, 0); 767 x2("(a*)\\1", "aaaaa", 0, 4); 768 x3("(a*)\\1", "aaaaa", 0, 2, 1); 769 x2("a(b*)\\1", "abbbb", 0, 5); 770 x2("a(b*)\\1", "ab", 0, 1); 771 x2("(a*)(b*)\\1\\2", "aaabbaaabb", 0, 10); 772 x2("(a*)(b*)\\2", "aaabbbb", 0, 7); 773 x2("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 8); 774 x3("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 3, 7); 775 x2("(a)(b)(c)\\2\\1\\3", "abcbac", 0, 6); 776 x2("([a-d])\\1", "cc", 0, 2); 777 x2("(\\w\\d\\s)\\1", "f5 f5 ", 0, 6); 778 n("(\\w\\d\\s)\\1", "f5 f5"); 779 x2("(who|[a-c]{3})\\1", "whowho", 0, 6); 780 x2("...(who|[a-c]{3})\\1", "abcwhowho", 0, 9); 781 x2("(who|[a-c]{3})\\1", "cbccbc", 0, 6); 782 x2("(^a)\\1", "aa", 0, 2); 783 n("(^a)\\1", "baa"); 784 n("(a$)\\1", "aa"); 785 n("(ab\\Z)\\1", "ab"); 786 x2("(a*\\Z)\\1", "a", 1, 1); 787 x2(".(a*\\Z)\\1", "ba", 1, 2); 788 x3("(.(abc)\\2)", "zabcabc", 0, 7, 1); 789 x3("(.(..\\d.)\\2)", "z12341234", 0, 9, 1); 790 x2("((?i:az))\\1", "AzAz", 0, 4); 791 n("((?i:az))\\1", "Azaz"); 792 x2("(?<=a)b", "ab", 1, 2); 793 n("(?<=a)b", "bb"); 794 x2("(?<=a|b)b", "bb", 1, 2); 795 x2("(?<=a|bc)b", "bcb", 2, 3); 796 x2("(?<=a|bc)b", "ab", 1, 2); 797 x2("(?<=a|bc||defghij|klmnopq|r)z", "rz", 1, 2); 798 x2("(a)\\g<1>", "aa", 0, 2); 799 x2("(?<!a)b", "cb", 1, 2); 800 n("(?<!a)b", "ab"); 801 x2("(?<!a|bc)b", "bbb", 0, 1); 802 n("(?<!a|bc)z", "bcz"); 803 x2("(?<name1>a)", "a", 0, 1); 804 x2("(?<name_2>ab)\\g<name_2>", "abab", 0, 4); 805 x2("(?<name_3>.zv.)\\k<name_3>", "azvbazvb", 0, 8); 806 x2("(?<=\\g<ab>)|-\\zEND (?<ab>XyZ)", "XyZ", 3, 3); 807 x2("(?<n>|a\\g<n>)+", "", 0, 0); 808 x2("(?<n>|\\(\\g<n>\\))+$", "()(())", 0, 6); 809 x3("\\g<n>(?<n>.){0}", "X", 0, 1, 1); 810 x2("\\g<n>(abc|df(?<n>.YZ){2,8}){0}", "XYZ", 0, 3); 811 x2("\\A(?<n>(a\\g<n>)|)\\z", "aaaa", 0, 4); 812 x2("(?<n>|\\g<m>\\g<n>)\\z|\\zEND (?<m>a|(b)\\g<m>)", "bbbbabba", 0, 8); 813 x2("(?<name1240>\\w+\\sx)a+\\k<name1240>", " fg xaaaaaaaafg x", 2, 18); 814 x3("(z)()()(?<_9>a)\\g<_9>", "zaa", 2, 3, 1); 815 x2("(.)(((?<_>a)))\\k<_>", "zaa", 0, 3); 816 x2("((?<name1>\\d)|(?<name2>\\w))(\\k<name1>|\\k<name2>)", "ff", 0, 2); 817 x2("(?:(?<x>)|(?<x>efg))\\k<x>", "", 0, 0); 818 x2("(?:(?<x>abc)|(?<x>efg))\\k<x>", "abcefgefg", 3, 9); 819 n("(?:(?<x>abc)|(?<x>efg))\\k<x>", "abcefg"); 820 x2("(?:(?<n1>.)|(?<n1>..)|(?<n1>...)|(?<n1>....)|(?<n1>.....)|(?<n1>......)|(?<n1>.......)|(?<n1>........)|(?<n1>.........)|(?<n1>..........)|(?<n1>...........)|(?<n1>............)|(?<n1>.............)|(?<n1>..............))\\k<n1>$", "a-pyumpyum", 2, 10); 821 x3("(?:(?<n1>.)|(?<n1>..)|(?<n1>...)|(?<n1>....)|(?<n1>.....)|(?<n1>......)|(?<n1>.......)|(?<n1>........)|(?<n1>.........)|(?<n1>..........)|(?<n1>...........)|(?<n1>............)|(?<n1>.............)|(?<n1>..............))\\k<n1>$", "xxxxabcdefghijklmnabcdefghijklmn", 4, 18, 14); 822 x3("(?<name1>)(?<name2>)(?<name3>)(?<name4>)(?<name5>)(?<name6>)(?<name7>)(?<name8>)(?<name9>)(?<name10>)(?<name11>)(?<name12>)(?<name13>)(?<name14>)(?<name15>)(?<name16>aaa)(?<name17>)$", "aaa", 0, 3, 16); 823 x2("(?<foo>a|\\(\\g<foo>\\))", "a", 0, 1); 824 x2("(?<foo>a|\\(\\g<foo>\\))", "((((((a))))))", 0, 13); 825 x3("(?<foo>a|\\(\\g<foo>\\))", "((((((((a))))))))", 0, 17, 1); 826 x2("\\g<bar>|\\zEND(?<bar>.*abc$)", "abcxxxabc", 0, 9); 827 x2("\\g<1>|\\zEND(.a.)", "bac", 0, 3); 828 x3("\\g<_A>\\g<_A>|\\zEND(.a.)(?<_A>.b.)", "xbxyby", 3, 6, 1); 829 x2("\\A(?:\\g<pon>|\\g<pan>|\\zEND (?<pan>a|c\\g<pon>c)(?<pon>b|d\\g<pan>d))$", "cdcbcdc", 0, 7); 830 x2("\\A(?<n>|a\\g<m>)\\z|\\zEND (?<m>\\g<n>)", "aaaa", 0, 4); 831 x2("(?<n>(a|b\\g<n>c){3,5})", "baaaaca", 1, 5); 832 x2("(?<n>(a|b\\g<n>c){3,5})", "baaaacaaaaa", 0, 10); 833 x2("(?<pare>\\(([^\\(\\)]++|\\g<pare>)*+\\))", "((a))", 0, 5); 834 x2("()*\\1", "", 0, 0); 835 x2("(?:()|())*\\1\\2", "", 0, 0); 836 x3("(?:\\1a|())*", "a", 0, 0, 1); 837 x2("x((.)*)*x", "0x1x2x3", 1, 6); 838 x2("x((.)*)*x(?i:\\1)\\Z", "0x1x2x1X2", 1, 9); 839 x2("(?:()|()|()|()|()|())*\\2\\5", "", 0, 0); 840 x2("(?:()|()|()|(x)|()|())*\\2b\\5", "b", 0, 1); 841 if onig_encoding == onigmo.ONIG_ENCODING_UTF16_LE: 842 x2("\\xFA\\x8F", "\u8ffa", 0, 1); 843 elif onig_encoding == onigmo.ONIG_ENCODING_UTF16_BE: 844 x2("\\x8F\\xFA", "\u8ffa", 0, 1); 845 elif onig_encoding == onigmo.ONIG_ENCODING_UTF32_LE: 846 x2("\\xFA\\x8F\\x00\\x00", "\u8ffa", 0, 1); 847 elif onig_encoding == onigmo.ONIG_ENCODING_UTF32_BE: 848 x2("\\x00\\x00\\x8F\\xFA", "\u8ffa", 0, 1); 849 elif onig_encoding == onigmo.ONIG_ENCODING_UTF8: 850 x2("\\xE8\\xBF\\xBA", "\u8ffa", 0, 1); 851 elif onig_encoding == onigmo.ONIG_ENCODING_SJIS or \ 852 onig_encoding == onigmo.ONIG_ENCODING_CP932: 853 x2("\\xE7\\x92", "\u8ffa", 0, 1); 854 elif onig_encoding == onigmo.ONIG_ENCODING_EUC_JP: 855 x2("\\xED\\xF2", "\u8ffa", 0, 1); # "迺" 856 x2("", "あ", 0, 0); 857 x2("あ", "あ", 0, 1); 858 n("い", "あ"); 859 x2("うう", "うう", 0, 2); 860 x2("あいう", "あいう", 0, 3); 861 x2("こここここここここここここここここここここここここここここここここここ", "こここここここここここここここここここここここここここここここここここ", 0, 35); 862 x2("あ", "いあ", 1, 2); 863 x2("いう", "あいう", 1, 3); 864# x2(b"\\xca\\xb8", b"\xca\xb8", 0, 2); # "文" 865 x2(".", "あ", 0, 1); 866 x2("..", "かき", 0, 2); 867 x2("\\w", "お", 0, 1); 868 n("\\W", "あ"); 869 x2("[\\W]", "う$", 1, 2); 870 x2("\\S", "そ", 0, 1); 871 x2("\\S", "漢", 0, 1); 872 x2("\\b", "気 ", 0, 0); 873 x2("\\b", " ほ", 1, 1); 874 x2("\\B", "せそ ", 1, 1); 875 x2("\\B", "う ", 2, 2); 876 x2("\\B", " い", 0, 0); 877 x2("[たち]", "ち", 0, 1); 878 n("[なに]", "ぬ"); 879 x2("[う-お]", "え", 0, 1); 880 n("[^け]", "け"); 881 x2("[\\w]", "ね", 0, 1); 882 n("[\\d]", "ふ"); 883 x2("[\\D]", "は", 0, 1); 884 n("[\\s]", "く"); 885 x2("[\\S]", "へ", 0, 1); 886 x2("[\\w\\d]", "よ", 0, 1); 887 x2("[\\w\\d]", " よ", 3, 4); 888 n("\\w鬼車", " 鬼車"); 889 x2("鬼\\W車", "鬼 車", 0, 3); 890 x2("あ.い.う", "ああいいう", 0, 5); 891 x2(".\\wう\\W..ぞ", "えうう うぞぞ", 0, 7); 892 x2("\\s\\wこここ", " ここここ", 0, 5); 893 x2("ああ.け", "ああけけ", 0, 4); 894 n(".い", "いえ"); 895 x2(".お", "おお", 0, 2); 896 x2("^あ", "あ", 0, 1); 897 x2("^む$", "む", 0, 1); 898 x2("^\\w$", "に", 0, 1); 899 x2("^\\wかきくけこ$", "zかきくけこ", 0, 6); 900 x2("^\\w...うえお$", "zあいううえお", 0, 7); 901 x2("\\w\\w\\s\\Wおおお\\d", "aお おおお4", 0, 8); 902 x2("\\Aたちつ", "たちつ", 0, 3); 903 x2("むめも\\Z", "むめも", 0, 3); 904 x2("かきく\\z", "かきく", 0, 3); 905 x2("かきく\\Z", "かきく\n", 0, 3); 906 x2("\\Gぽぴ", "ぽぴ", 0, 2); 907 n("\\Gえ", "うえお"); 908 n("とて\\G", "とて"); 909 n("まみ\\A", "まみ"); 910 n("ま\\Aみ", "まみ"); 911 x2("(?=せ)せ", "せ", 0, 1); 912 n("(?=う).", "い"); 913 x2("(?!う)か", "か", 0, 1); 914 n("(?!と)あ", "と"); 915 x2("(?i:あ)", "あ", 0, 1); 916 x2("(?i:ぶべ)", "ぶべ", 0, 2); 917 n("(?i:い)", "う"); 918 x2("(?m:よ.)", "よ\n", 0, 2); 919 x2("(?m:.め)", "ま\nめ", 1, 3); 920 x2("あ?", "", 0, 0); 921 x2("変?", "化", 0, 0); 922 x2("変?", "変", 0, 1); 923 x2("量*", "", 0, 0); 924 x2("量*", "量", 0, 1); 925 x2("子*", "子子子", 0, 3); 926 x2("馬*", "鹿馬馬馬馬", 0, 0); 927 n("山+", ""); 928 x2("河+", "河", 0, 1); 929 x2("時+", "時時時時", 0, 4); 930 x2("え+", "ええううう", 0, 2); 931 x2("う+", "おうううう", 1, 5); 932 x2(".?", "た", 0, 1); 933 x2(".*", "ぱぴぷぺ", 0, 4); 934 x2(".+", "ろ", 0, 1); 935 x2(".+", "いうえか\n", 0, 4); 936 x2("あ|い", "あ", 0, 1); 937 x2("あ|い", "い", 0, 1); 938 x2("あい|いう", "あい", 0, 2); 939 x2("あい|いう", "いう", 0, 2); 940 x2("を(?:かき|きく)", "をかき", 0, 3); 941 x2("を(?:かき|きく)け", "をきくけ", 0, 4); 942 x2("あい|(?:あう|あを)", "あを", 0, 2); 943 x2("あ|い|う", "えう", 1, 2); 944 x2("あ|い|うえ|おかき|く|けこさ|しすせ|そ|たち|つてとなに|ぬね", "しすせ", 0, 3); 945 n("あ|い|うえ|おかき|く|けこさ|しすせ|そ|たち|つてとなに|ぬね", "すせ"); 946 x2("あ|^わ", "ぶあ", 1, 2); 947 x2("あ|^を", "をあ", 0, 1); 948 x2("鬼|\\G車", "け車鬼", 2, 3); 949 x2("鬼|\\G車", "車鬼", 0, 1); 950 x2("鬼|\\A車", "b車鬼", 2, 3); 951 x2("鬼|\\A車", "車", 0, 1); 952 x2("鬼|車\\Z", "車鬼", 1, 2); 953 x2("鬼|車\\Z", "車", 0, 1); 954 x2("鬼|車\\Z", "車\n", 0, 1); 955 x2("鬼|車\\z", "車鬼", 1, 2); 956 x2("鬼|車\\z", "車", 0, 1); 957 x2("\\w|\\s", "お", 0, 1); 958 x2("\\w|%", "%お", 0, 1); 959 x2("\\w|[&$]", "う&", 0, 1); 960 x2("[い-け]", "う", 0, 1); 961 x2("[い-け]|[^か-こ]", "あ", 0, 1); 962 x2("[い-け]|[^か-こ]", "か", 0, 1); 963 x2("[^あ]", "\n", 0, 1); 964 x2("(?:あ|[う-き])|いを", "うを", 0, 1); 965 x2("(?:あ|[う-き])|いを", "いを", 0, 2); 966 x2("あいう|(?=けけ)..ほ", "けけほ", 0, 3); 967 x2("あいう|(?!けけ)..ほ", "あいほ", 0, 3); 968 x2("(?=をあ)..あ|(?=をを)..あ", "ををあ", 0, 3); 969 x2("(?<=あ|いう)い", "いうい", 2, 3); 970 n("(?>あ|あいえ)う", "あいえう"); 971 x2("(?>あいえ|あ)う", "あいえう", 0, 4); 972 x2("あ?|い", "あ", 0, 1); 973 x2("あ?|い", "い", 0, 0); 974 x2("あ?|い", "", 0, 0); 975 x2("あ*|い", "ああ", 0, 2); 976 x2("あ*|い*", "いあ", 0, 0); 977 x2("あ*|い*", "あい", 0, 1); 978 x2("[aあ]*|い*", "aあいいい", 0, 2); 979 x2("あ+|い*", "", 0, 0); 980 x2("あ+|い*", "いいい", 0, 3); 981 x2("あ+|い*", "あいいい", 0, 1); 982 x2("あ+|い*", "aあいいい", 0, 0); 983 n("あ+|い+", ""); 984 x2("(あ|い)?", "い", 0, 1); 985 x2("(あ|い)*", "いあ", 0, 2); 986 x2("(あ|い)+", "いあい", 0, 3); 987 x2("(あい|うあ)+", "うああいうえ", 0, 4); 988 x2("(あい|うえ)+", "うああいうえ", 2, 6); 989 x2("(あい|うあ)+", "ああいうあ", 1, 5); 990 x2("(あい|うあ)+", "あいをうあ", 0, 2); 991 x2("(あい|うあ)+", "$$zzzzあいをうあ", 6, 8); 992 x2("(あ|いあい)+", "あいあいあ", 0, 5); 993 x2("(あ|いあい)+", "いあ", 1, 2); 994 x2("(あ|いあい)+", "いあああいあ", 1, 4); 995 x2("(?:あ|い)(?:あ|い)", "あい", 0, 2); 996 x2("(?:あ*|い*)(?:あ*|い*)", "あああいいい", 0, 3); 997 x2("(?:あ*|い*)(?:あ+|い+)", "あああいいい", 0, 6); 998 x2("(?:あ+|い+){2}", "あああいいい", 0, 6); 999 x2("(?:あ+|い+){1,2}", "あああいいい", 0, 6); 1000 x2("(?:あ+|\\Aい*)うう", "うう", 0, 2); 1001 n("(?:あ+|\\Aい*)うう", "あいうう"); 1002 x2("(?:^あ+|い+)*う", "ああいいいあいう", 6, 8); 1003 x2("(?:^あ+|い+)*う", "ああいいいいう", 0, 7); 1004 x2("う{0,}", "うううう", 0, 4); 1005 x2("あ|(?i)c", "C", 0, 1); 1006 x2("(?i)c|あ", "C", 0, 1); 1007 x2("(?i:あ)|a", "a", 0, 1); 1008 n("(?i:あ)|a", "A"); 1009 x2("[あいう]?", "あいう", 0, 1); 1010 x2("[あいう]*", "あいう", 0, 3); 1011 x2("[^あいう]*", "あいう", 0, 0); 1012 n("[^あいう]+", "あいう"); 1013 x2("あ??", "あああ", 0, 0); 1014 x2("いあ??い", "いあい", 0, 3); 1015 x2("あ*?", "あああ", 0, 0); 1016 x2("いあ*?", "いああ", 0, 1); 1017 x2("いあ*?い", "いああい", 0, 4); 1018 x2("あ+?", "あああ", 0, 1); 1019 x2("いあ+?", "いああ", 0, 2); 1020 x2("いあ+?い", "いああい", 0, 4); 1021 x2("(?:天?)??", "天", 0, 0); 1022 x2("(?:天??)?", "天", 0, 0); 1023 x2("(?:夢?)+?", "夢夢夢", 0, 1); 1024 x2("(?:風+)??", "風風風", 0, 0); 1025 x2("(?:雪+)??霜", "雪雪雪霜", 0, 4); 1026 x2("(?:あい)?{2}", "", 0, 0); 1027 x2("(?:鬼車)?{2}", "鬼車鬼車鬼", 0, 4); 1028 x2("(?:鬼車)*{0}", "鬼車鬼車鬼", 0, 0); 1029 x2("(?:鬼車){3,}", "鬼車鬼車鬼車鬼車", 0, 8); 1030 n("(?:鬼車){3,}", "鬼車鬼車"); 1031 x2("(?:鬼車){2,4}", "鬼車鬼車鬼車", 0, 6); 1032 x2("(?:鬼車){2,4}", "鬼車鬼車鬼車鬼車鬼車", 0, 8); 1033 x2("(?:鬼車){2,4}?", "鬼車鬼車鬼車鬼車鬼車", 0, 4); 1034 x2("(?:鬼車){,}", "鬼車{,}", 0, 5); 1035 x2("(?:かきく)+?{2}", "かきくかきくかきく", 0, 6); 1036 x3("(火)", "火", 0, 1, 1); 1037 x3("(火水)", "火水", 0, 2, 1); 1038 x2("((時間))", "時間", 0, 2); 1039 x3("((風水))", "風水", 0, 2, 1); 1040 x3("((昨日))", "昨日", 0, 2, 2); 1041 x3("((((((((((((((((((((量子))))))))))))))))))))", "量子", 0, 2, 20); 1042 x3("(あい)(うえ)", "あいうえ", 0, 2, 1); 1043 x3("(あい)(うえ)", "あいうえ", 2, 4, 2); 1044 x3("()(あ)いう(えおか)きくけこ", "あいうえおかきくけこ", 3, 6, 3); 1045 x3("(()(あ)いう(えおか)きくけこ)", "あいうえおかきくけこ", 3, 6, 4); 1046 x3(".*(フォ)ン・マ(ン()シュタ)イン", "フォン・マンシュタイン", 5, 9, 2); 1047 x2("(^あ)", "あ", 0, 1); 1048 x3("(あ)|(あ)", "いあ", 1, 2, 1); 1049 x3("(^あ)|(あ)", "いあ", 1, 2, 2); 1050 x3("(あ?)", "あああ", 0, 1, 1); 1051 x3("(ま*)", "ままま", 0, 3, 1); 1052 x3("(と*)", "", 0, 0, 1); 1053 x3("(る+)", "るるるるるるる", 0, 7, 1); 1054 x3("(ふ+|へ*)", "ふふふへへ", 0, 3, 1); 1055 x3("(あ+|い?)", "いいいああ", 0, 1, 1); 1056 x3("(あいう)?", "あいう", 0, 3, 1); 1057 x3("(あいう)*", "あいう", 0, 3, 1); 1058 x3("(あいう)+", "あいう", 0, 3, 1); 1059 x3("(さしす|あいう)+", "あいう", 0, 3, 1); 1060 x3("([なにぬ][かきく]|かきく)+", "かきく", 0, 3, 1); 1061 x3("((?i:あいう))", "あいう", 0, 3, 1); 1062 x3("((?m:あ.う))", "あ\nう", 0, 3, 1); 1063 x3("((?=あん)あ)", "あんい", 0, 1, 1); 1064 x3("あいう|(.あいえ)", "んあいえ", 0, 4, 1); 1065 x3("あ*(.)", "ああああん", 4, 5, 1); 1066 x3("あ*?(.)", "ああああん", 0, 1, 1); 1067 x3("あ*?(ん)", "ああああん", 4, 5, 1); 1068 x3("[いうえ]あ*(.)", "えああああん", 5, 6, 1); 1069 x3("(\\Aいい)うう", "いいうう", 0, 2, 1); 1070 n("(\\Aいい)うう", "んいいうう"); 1071 x3("(^いい)うう", "いいうう", 0, 2, 1); 1072 n("(^いい)うう", "んいいうう"); 1073 x3("ろろ(るる$)", "ろろるる", 2, 4, 1); 1074 n("ろろ(るる$)", "ろろるるる"); 1075 x2("(無)\\1", "無無", 0, 2); 1076 n("(無)\\1", "無武"); 1077 x2("(空?)\\1", "空空", 0, 2); 1078 x2("(空??)\\1", "空空", 0, 0); 1079 x2("(空*)\\1", "空空空空空", 0, 4); 1080 x3("(空*)\\1", "空空空空空", 0, 2, 1); 1081 x2("あ(い*)\\1", "あいいいい", 0, 5); 1082 x2("あ(い*)\\1", "あい", 0, 1); 1083 x2("(あ*)(い*)\\1\\2", "あああいいあああいい", 0, 10); 1084 x2("(あ*)(い*)\\2", "あああいいいい", 0, 7); 1085 x3("(あ*)(い*)\\2", "あああいいいい", 3, 5, 2); 1086 x2("(((((((ぽ*)ぺ))))))ぴ\\7", "ぽぽぽぺぴぽぽぽ", 0, 8); 1087 x3("(((((((ぽ*)ぺ))))))ぴ\\7", "ぽぽぽぺぴぽぽぽ", 0, 3, 7); 1088 x2("(は)(ひ)(ふ)\\2\\1\\3", "はひふひはふ", 0, 6); 1089 x2("([き-け])\\1", "くく", 0, 2); 1090 x2("(\\w\\d\\s)\\1", "あ5 あ5 ", 0, 6); 1091 n("(\\w\\d\\s)\\1", "あ5 あ5"); 1092 x2("(誰?|[あ-う]{3})\\1", "誰?誰?", 0, 4); 1093 x2("...(誰?|[あ-う]{3})\\1", "あaあ誰?誰?", 0, 7); 1094 x2("(誰?|[あ-う]{3})\\1", "ういうういう", 0, 6); 1095 x2("(^こ)\\1", "ここ", 0, 2); 1096 n("(^む)\\1", "めむむ"); 1097 n("(あ$)\\1", "ああ"); 1098 n("(あい\\Z)\\1", "あい"); 1099 x2("(あ*\\Z)\\1", "あ", 1, 1); 1100 x2(".(あ*\\Z)\\1", "いあ", 1, 2); 1101 x3("(.(やいゆ)\\2)", "zやいゆやいゆ", 0, 7, 1); 1102 x3("(.(..\\d.)\\2)", "あ12341234", 0, 9, 1); 1103 x2("((?i:あvず))\\1", "あvずあvず", 0, 6); 1104 x2("(?<愚か>変|\\(\\g<愚か>\\))", "((((((変))))))", 0, 13); 1105 x2("\\A(?:\\g<阿_1>|\\g<云_2>|\\z終了 (?<阿_1>観|自\\g<云_2>自)(?<云_2>在|菩薩\\g<阿_1>菩薩))$", "菩薩自菩薩自在自菩薩自菩薩", 0, 13); 1106 x2("[[ひふ]]", "ふ", 0, 1); 1107 x2("[[いおう]か]", "か", 0, 1); 1108 n("[[^あ]]", "あ"); 1109 n("[^[あ]]", "あ"); 1110 x2("[^[^あ]]", "あ", 0, 1); 1111 x2("[[かきく]&&きく]", "く", 0, 1); 1112 n("[[かきく]&&きく]", "か"); 1113 n("[[かきく]&&きく]", "け"); 1114 x2("[あ-ん&&い-を&&う-ゑ]", "ゑ", 0, 1); 1115 n("[^あ-ん&&い-を&&う-ゑ]", "ゑ"); 1116 x2("[[^あ&&あ]&&あ-ん]", "い", 0, 1); 1117 n("[[^あ&&あ]&&あ-ん]", "あ"); 1118 x2("[[^あ-ん&&いうえお]&&[^う-か]]", "き", 0, 1); 1119 n("[[^あ-ん&&いうえお]&&[^う-か]]", "い"); 1120 x2("[^[^あいう]&&[^うえお]]", "う", 0, 1); 1121 x2("[^[^あいう]&&[^うえお]]", "え", 0, 1); 1122 n("[^[^あいう]&&[^うえお]]", "か"); 1123 x2("[あ-&&-あ]", "-", 0, 1); 1124 x2("[^[^a-zあいう]&&[^bcdefgうえお]q-w]", "え", 0, 1); 1125 x2("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "f", 0, 1); 1126 x2("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "g", 0, 1); 1127 n("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "2"); 1128 x2("a<b>バージョンのダウンロード<\\/b>", "a<b>バージョンのダウンロード</b>", 0, 20); 1129 x2(".<b>バージョンのダウンロード<\\/b>", "a<b>バージョンのダウンロード</b>", 0, 20); 1130 1131 1132 # additional test patterns 1133 if is_unicode_encoding(onig_encoding): 1134 x2("\\x{3042}\\x{3044}", "あい", 0, 2) 1135 elif onig_encoding == onigmo.ONIG_ENCODING_SJIS or \ 1136 onig_encoding == onigmo.ONIG_ENCODING_CP932: 1137 x2("\\x{82a0}\\x{82A2}", "あい", 0, 2) 1138 x2("\\M-\\C-b\x50", "1", 0, 1) # \x8250 1139 elif onig_encoding == onigmo.ONIG_ENCODING_EUC_JP: 1140 x2("\\x{a4a2}\\x{A4A4}", "あい", 0, 2) 1141 x2("\\p{Hiragana}\\p{Katakana}", "あイ", 0, 2) 1142 x2("(?m)^A.B$", "X\nA\nB\nZ", 2, 5) 1143 n("(?<!(?<=a)b|c)d", "abd") 1144 n("(?<!(?<=a)b|c)d", "cd") 1145 x2("(?<!(?<=a)b|c)d", "bd", 1, 2) 1146 x2("(a){2}z", "aaz", 0, 3) 1147 x2("(?<=a).*b", "aab", 1, 3) 1148 x2("(?!a).*b", "ab", 1, 2) 1149 x2("(?<=(?<!A)B)C", "BBC", 2, 3) 1150 n("(?<=(?<!A)B)C", "ABC") 1151 n("(?i)(?<!aa|b)c", "Aac") 1152 n("(?i)(?<!b|aa)c", "Aac") 1153 x2("(?<=\\babc)d", " abcd", 4, 5) 1154 x2("(?<=\\Babc)d", "aabcd", 4, 5) 1155 x2("a\\b?a", "aa", 0, 2) 1156 x2("[^x]*x", "aaax", 0, 4) 1157 x2("(?i)[\\x{0}-B]+", "\x00\x01\x02\x1f\x20@AaBbC", 0, 10) 1158 x2("(?i)a{2}", "AA", 0, 2) 1159 if is_unicode_encoding(onig_encoding): 1160 # The longest script name 1161 x2("\\p{Other_Default_Ignorable_Code_Point}+", "\u034F\uFFF8\U000E0FFF", 0, 3) 1162 # The longest block name 1163 x2("\\p{In_Unified_Canadian_Aboriginal_Syllabics_Extended}+", "\u18B0\u18FF", 0, 2) 1164 # Unicode case fold 1165 x2("(?i)\u1ffc", "\u2126\u1fbe", 0, 2) 1166 x2("(?i)\u1ffc", "\u1ff3", 0, 1) 1167 x2("(?i)\u0390", "\u03b9\u0308\u0301", 0, 3) 1168 x2("(?i)\u03b9\u0308\u0301", "\u0390", 0, 1) 1169 x2("(?i)ff", "\ufb00", 0, 1) 1170 x2("(?i)\ufb01", "fi", 0, 2) 1171 x2("(?i)\u0149\u0149", "\u0149\u0149", 0, 2) 1172 x2("(?i)(?<=\u0149)a", "\u02bcna", 2, 3) # with look-behind 1173 # Other Unicode tests 1174 x2("\\x{25771}", "\U00025771", 0, 1) 1175 x2("[0-9-a]+", " 0123456789-a ", 1, 13) # same as [0-9\-a] 1176 x2("[0-9-\\s]+", " 0123456789-a ", 0, 12) # same as [0-9\-\s] 1177 n("[0-9-a]", "", syn=onigmo.ONIG_SYNTAX_GREP, err=onigmo.ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS) 1178 x2("[0-9-あ\\\\/\u0001]+", " 0123456789-あ\\/\u0001 ", 1, 16) 1179 x2("[a-b-]+", "ab-", 0, 3) 1180 x2("[a-b-&&-]+", "ab-", 2, 3) 1181 x2("(?i)[a[b-あ]]+", "abあ", 0, 3) 1182 if is_unicode_encoding(onig_encoding): 1183 x2("(?i)[\\d[:^graph:]]+", "0あ", 0, 1) 1184 x2("(?ia)[\\d[:^print:]]+", "0あ", 0, 2) 1185 x2("(?i:a) B", "a B", 0, 3); 1186 x2("(?i:a )B", "a B", 0, 3); 1187 x2("B (?i:a)", "B a", 0, 3); 1188 x2("B(?i: a)", "B a", 0, 3); 1189 if is_unicode_encoding(onig_encoding): 1190 x2("(?a)[\\p{Space}\\d]", "\u00a0", 0, 1) 1191 x2("(?a)[\\d\\p{Space}]", "\u00a0", 0, 1) 1192 n("(?a)[^\\p{Space}\\d]", "\u00a0") 1193 n("(?a)[^\\d\\p{Space}]", "\u00a0") 1194 x2("(?d)[[:space:]\\d]", "\u00a0", 0, 1) 1195 n("(?d)[^\\d[:space:]]", "\u00a0") 1196 n("x.*?\\Z$", "x\ny") 1197 n("x.*?\\Z$", "x\r\ny") 1198 x2("x.*?\\Z$", "x\n", 0, 1) 1199 x2("x.*?\\Z$", "x\r\n", 0, 2) # \Z will match between \r and \n, if 1200 # ONIG_OPTION_NEWLINE_CRLF isn't specified. 1201 x2("(?<=fo).*", "foo", 2, 3) # Issue #15 1202 x2("(?m)(?<=fo).*", "foo", 2, 3) # Issue #15 1203 x2("(?m)(?<=fo).+", "foo", 2, 3) # Issue #15 1204 x2("\\n?\\z", "hello", 5, 5) 1205 x2("\\z", "hello", 5, 5) 1206 x2("\\n?\\z", "こんにちは", 5, 5) 1207 x2("\\z", "こんにちは", 5, 5) 1208 x2("()" * 32767, "", 0, 0) # Issue #24 1209 n("()" * 32768, "", err=onigmo.ONIGERR_TOO_MANY_CAPTURE_GROUPS) 1210 x2("\\h+ \\H+", " 0123456789aBcDeF gh", 1, 20) 1211 x2("[\\h]+ [\\H]+", " 0123456789aBcDeF gh", 1, 20) 1212 x2("\\A(|.|(?:(.)\\g<1>\\k<2+0>))\\z", "reer", 0, 4) 1213 x2("\\A(?<a>|.|(?:(?<b>.)\\g<a>\\k<b+0>))\\z", "reer", 0, 4) 1214 x2("(?i)\\A(|.|(?:(.)\\g<1>\\k<2+0>))\\z", "reER", 0, 4) 1215 x2("(?i)\\A(?<a>|.|(?:(?<b>.)\\g<a>\\k<b+0>))\\z", "REer", 0, 4) 1216 x2(''' # Extended pattern 1217 (?<element> \g<stag> \g<content>* \g<etag> ){0} 1218 (?<stag> < \g<name> \s* > ){0} 1219 (?<name> [a-zA-Z_:]+ ){0} 1220 (?<content> [^<&]+ (\g<element> | [^<&]+)* ){0} 1221 (?<etag> </ \k<name+1> >){0} 1222 \g<element>''', 1223 "<foo>f<bar>bbb</bar>f</foo>", 0, 27, opt=onigmo.ONIG_OPTION_EXTEND) 1224 x2("(.)(?<a>a)(?<a>b)\\k<a>", "xaba", 0, 4) 1225 x2("\\p{Print}+", "\n a", 1, 3) 1226 x2("\\p{Graph}+", "\n a", 2, 3) 1227 n("a(?!b)", "ab"); 1228 x2("(?:(.)\\1)*", "a" * 300, 0, 300) 1229 x2("\\cA\\C-B\\a[\\b]\\t\\n\\v\\f\\r\\e\\c?", "\x01\x02\x07\x08\x09\x0a\x0b\x0c\x0d\x1b\x7f", 0, 11) 1230 x2("(?<=(?:[a-z]|\\w){3})x", "ab1x", 3, 4) # repeat inside look-behind 1231 x2("(?<n>(a|b\\g<n>c){3,5}?)", "baaaaca", 1, 4) 1232 x2("\\p{WoRd}", "a", 0, 1) # property name is not case sensitive 1233 n("[[:WoRd:]]", "a", err=onigmo.ONIGERR_INVALID_POSIX_BRACKET_TYPE) # POSIX bracket name is case sensitive 1234 n("(\\2)(\\1)", "") # Issue #65 1235 n("(0?0|(?(1)||)|(?(1)||))?", "", err=onigmo.ONIGERR_INVALID_CONDITION_PATTERN) # Ruby Bug#12418 1236 n("[\\40000000000", "", err=onigmo.ONIGERR_TOO_BIG_NUMBER) # Ruby Bug#12420 1237 n("[\\600000000000\n", "", err=onigmo.ONIGERR_TOO_BIG_NUMBER) # Ruby Bug#12423 1238 n("[]", "", err=onigmo.ONIGERR_EMPTY_CHAR_CLASS) 1239 n("[c-a]", "", err=onigmo.ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS) 1240 x2("[[:ab:\\x{30}]]+", ":ab0x", 0, 4) 1241 x2("[[:x\\]:]+", "[x:]", 0, 4) 1242 x2("[!--x]+", "!-x", 0, 3) 1243 x2(" ]", " ]", 0, 2) # warning: ']' without escape 1244 n("\\x{FFFFFFFF}", "", err=onigmo.ONIGERR_TOO_BIG_WIDE_CHAR_VALUE); 1245 n("\\x{100000000}", "", err=onigmo.ONIGERR_TOO_LONG_WIDE_CHAR_VALUE); 1246 x2("\\u0026", "\u0026", 0, 1) 1247 x2("[\\u0024-\\u0027]", "\u0026", 0, 1) 1248 n("\\u026x", "", err=onigmo.ONIGERR_TOO_SHORT_DIGITS) 1249 n("()(?\\!(?'a')\\1)", "", err=onigmo.ONIGERR_UNDEFINED_GROUP_OPTION) 1250 x2("\\i", "i", 0, 1) # unknown escape warning 1251 n("\\((", "", err=onigmo.ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS) 1252 n("(|", "", err=onigmo.ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS) 1253 x2("%{(.*?)}", "%{HOSTNAME}", 0, 11) 1254 if not is_ascii_incompatible_encoding(onig_encoding): 1255 n(b"'/g\\\xff\xff\xff\xff&))", "", err=onigmo.ONIGERR_UNMATCHED_CLOSE_PARENTHESIS) 1256 n(b"\\\xff0", "") 1257 if onig_encoding == onigmo.ONIG_ENCODING_UTF8: 1258 n(b"[0-0-\xe2 ", "", err=onigmo.ONIGERR_PREMATURE_END_OF_CHAR_CLASS) 1259 n("\\p{foobarbaz}", "", err=onigmo.ONIGERR_INVALID_CHAR_PROPERTY_NAME) 1260 n("\\p{あ}", "", err=onigmo.ONIGERR_INVALID_CHAR_PROPERTY_NAME) 1261 if is_unicode_encoding(onig_encoding): 1262 n("\\p{\U00025771}", "", err=onigmo.ONIGERR_INVALID_CHAR_PROPERTY_NAME) 1263 if onig_encoding == onigmo.ONIG_ENCODING_UTF8: 1264 x2("[\\xce\\xb1\\xce\\xb2]", "β", 0, 1) 1265 elif onig_encoding == onigmo.ONIG_ENCODING_SJIS or \ 1266 onig_encoding == onigmo.ONIG_ENCODING_CP932: 1267 n("[\\x84A]", "", err=onigmo.ONIGERR_TOO_SHORT_MULTI_BYTE_STRING) 1268 elif onig_encoding == onigmo.ONIG_ENCODING_EUC_JP: 1269 n("[\\xAAA]", "", err=onigmo.ONIGERR_TOO_SHORT_MULTI_BYTE_STRING) 1270 elif is_ascii_incompatible_encoding(onig_encoding): 1271 n("[\\x420]", "", err=onigmo.ONIGERR_TOO_SHORT_MULTI_BYTE_STRING) 1272 x2("(?:a?)*", "aa", 0, 2) # tests for reducing nested quantifiers 1273 x2("(?:a?)*?", "aa", 0, 0) 1274 x2("(?:a*)??", "aa", 0, 0) 1275 x2("(?:a+?)*", "aa", 0, 1) 1276 x2("(?:a*){2,3}", "aaa", 0, 3) 1277 n("(?:a+){2,3}", "a") 1278 x2("a{", "a{", 0, 2) # invalid interval is allowed 1279 n("a{100001}", "", err=onigmo.ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE) 1280 n("a{0,100001}", "", err=onigmo.ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE) 1281 n("a{5,1}", "", err=onigmo.ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE) 1282 x2("abc{1}", "abcc", 0, 3) 1283 x3("\\(((?:[^(]|\\g<0>)*)\\)", "(abc)(abc)", 1, 4, 1) # Issue #48 1284 x3("\\(((?:[^(]|\\g<0>)*)\\)", "((abc)(abc))", 1, 11, 1) 1285 x3("\\(((?:[^(]|(\\g<0>))*)\\)", "((abc)(abc))", 6, 11, 2) 1286 n("[\\6000", "a", err=onigmo.ONIGERR_TOO_BIG_NUMBER) # CVE-2017-9226 1287 n("[\\H- ]", "", err=onigmo.ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS) # CVE-2017-9228 1288 x2("c.*\\b", "abc", 2, 3) # Issue #96 1289 x2("abc.*\\b", "abc", 0, 3) # Issue #96 1290 x2("\\b.*abc.*\\b", "abc", 0, 3) # Issue #96 1291 1292 # ONIG_OPTION_FIND_LONGEST option 1293 x2("foo|foobar", "foobar", 0, 3) 1294 x2("foo|foobar", "foobar", 0, 6, opt=onigmo.ONIG_OPTION_FIND_LONGEST) 1295 x2("a*", "aa aaa aaaa aaaaa ", 12, 17, opt=onigmo.ONIG_OPTION_FIND_LONGEST) 1296 1297 # ONIG_OPTION_FIND_NOT_EMPTY option 1298 x2("\w*", " a", 0, 0) 1299 x2("\w*", " a", 1, 2, opt=onigmo.ONIG_OPTION_FIND_NOT_EMPTY) 1300 1301 # ONIG_OPTION_DONT_CAPTURE_GROUP option 1302 x2("(ab|cd)*", "cdab", 0, 4, opt=onigmo.ONIG_OPTION_DONT_CAPTURE_GROUP) 1303 n("(ab|cd)*\\1", "", opt=onigmo.ONIG_OPTION_DONT_CAPTURE_GROUP, err=onigmo.ONIGERR_INVALID_BACKREF) 1304 1305 # character classes (tests for character class optimization) 1306 x2("[@][a]", "@a", 0, 2); 1307 x2(".*[a][b][c][d][e]", "abcde", 0, 5); 1308 x2("(?i)[A\\x{41}]", "a", 0, 1); 1309 x2("[abA]", "a", 0, 1); 1310 x2("[[ab]&&[ac]]+", "aaa", 0, 3); 1311 x2("[[ab]&&[^b]]+", "aaa", 0, 3); 1312 x2("[[^b]&&[ab]]+", "aaa", 0, 3); 1313 x2("[[あい]&&[あう]]+", "あああ", 0, 3); 1314 x2("[[あい]&&[^い]]+", "あああ", 0, 3); 1315 x2("[[^い]&&[あい]]+", "あああ", 0, 3); 1316 1317 # possessive quantifiers 1318 n("a?+a", "a") 1319 n("a*+a", "aaaa") 1320 n("a++a", "aaaa") 1321 x2("a{2,3}+a", "aaa", 0, 3) # Not a possessive quantifier in Ruby, 1322 # same as "(?:a{2,3})+a" 1323 n("a{2,3}+a", "aaa", syn=onigmo.ONIG_SYNTAX_PERL) 1324 1325 # automatic possessification 1326 x2("\\w+\\W", "abc#", 0, 4) 1327 x2("[a-c]+\\W", "abc#", 0, 4) 1328 x2("[a-c#]+\\W", "abc#", 0, 4) 1329 x2("[^a-c]+\\W", "def#", 0, 4) 1330 x2("(?a)[^a-c]+\\W", "def#", 0, 4) 1331 x2("a+\\w", "aaaa", 0, 4) 1332 x2("#+\\w", "###a", 0, 4) 1333 x2("(?a)a+\\w", "aaaa", 0, 4) 1334 x2("(?a)あ+\\w", "あああa", 0, 4) 1335 x2("[a-c]+[d-f]", "abcd", 0, 4) 1336 x2("[^d-f]+[d-f]", "abcd", 0, 4) 1337 x2("[a-cあ]+[d-f]", "abcd", 0, 4) 1338 1339 # linebreak 1340 x2("\\R", "\n", 0, 1) 1341 x2("\\R", "\r", 0, 1) 1342 x2("\\R{3}", "\r\r\n\n", 0, 4) 1343 1344 if (is_unicode_encoding(onig_encoding)): 1345 x2("\\R", "\u0085", 0, 1) 1346 x2("\\R", "\u2028", 0, 1) 1347 x2("\\R", "\u2029", 0, 1) 1348 1349 # extended grapheme cluster 1350 x2("\\X{5}", "あいab\n", 0, 5) 1351 x2("\\X", "\n", 0, 1) 1352 x2("\\X", "\r", 0, 1) 1353 x2("\\X{3}", "\r\r\n\n", 0, 4) 1354 if is_unicode_encoding(onig_encoding): 1355 x2("\\X", "\u306F\u309A\n", 0, 2) 1356 x2("\\A\\X\\z", "\u0020\u200d", 0, 2) 1357 x2("\\A\\X\\z", "\u0600\u0600", 0, 2) 1358 x2("\\A\\X\\z", "\u0600\u0020", 0, 2) 1359 x2("\\A\\X\\z", "\u261d\U0001F3FB", 0, 2) 1360 x2("\\A\\X\\z", "\U0001f600", 0, 1) 1361 x2("\\A\\X\\z", "\u0020\u0308", 0, 2) 1362 x2("\\A\\X\\X\\z", "\u000a\u0308", 0, 2) 1363 x2("\\A\\X\\X\\z", "\u000d\u0308", 0, 2) 1364 x2("\\A\\X\\z", "\U0001F477\U0001F3FF\u200D\u2640\uFE0F", 0, 5) 1365 x2("\\A\\X\\z", "\U0001F468\u200D\U0001F393", 0, 3) 1366 x2("\\A\\X\\z", "\U0001F46F\u200D\u2642\uFE0F", 0, 4) 1367 x2("\\A\\X\\z", "\U0001F469\u200d\u2764\ufe0f\u200d\U0001F469", 0, 6) 1368 1369 # keep 1370 x2("ab\\Kcd", "abcd", 2, 4) 1371 x2("ab\\Kc(\\Kd|z)", "abcd", 3, 4) 1372 x2("ab\\Kc(\\Kz|d)", "abcd", 2, 4) 1373 x2("(a\\K)*", "aaab", 3, 3) 1374 x3("(a\\K)*", "aaab", 2, 3, 1) 1375# x2("a\\K?a", "aa", 0, 2) # error: differ from perl 1376 x2("ab(?=c\Kd)", "abcd", 2, 2) # This behaviour is currently not well defined. (see: perlre) 1377 x2("(?<=a\\Kb|aa)cd", "abcd", 1, 4) # This behaviour is currently not well defined. (see: perlre) 1378 x2("(?<=ab|a\\Ka)cd", "abcd", 2, 4) # This behaviour is currently not well defined. (see: perlre) 1379 1380 # named group and subroutine call 1381 x2("(?<name_2>ab)(?&name_2)", "abab", 0, 4, syn=onigmo.ONIG_SYNTAX_PERL); 1382 x2("(?<name_2>ab)(?1)", "abab", 0, 4, syn=onigmo.ONIG_SYNTAX_PERL); 1383 x2("(?'n'|\\((?&n)\\))+$", "()(())", 0, 6, syn=onigmo.ONIG_SYNTAX_PERL); 1384 x2("(a|x(?-1)x)", "xax", 0, 3, syn=onigmo.ONIG_SYNTAX_PERL); 1385 x2("(a|(x(?-2)x))", "xax", 0, 3, syn=onigmo.ONIG_SYNTAX_PERL); 1386 x2("a|x(?0)x", "xax", 0, 3, syn=onigmo.ONIG_SYNTAX_PERL); 1387 x2("a|x(?R)x", "xax", 0, 3, syn=onigmo.ONIG_SYNTAX_PERL); 1388 x2("(a|x\g<0>x)", "xax", 0, 3); 1389 x2("(a|x\g'0'x)", "xax", 0, 3); 1390 x2("(?-i:(?+1))(?i:(a)){0}", "A", 0, 1, syn=onigmo.ONIG_SYNTAX_PERL); 1391 x2("(?-i:\g<+1>)(?i:(a)){0}", "A", 0, 1); 1392 x2("(?-i:\g'+1')(?i:(a)){0}", "A", 0, 1); 1393 n("(.(?=\\g<1>))", "", err=onigmo.ONIGERR_NEVER_ENDING_RECURSION) 1394 n("(a)(?<n>b)\\g<1>\\g<n>", "abab", err=onigmo.ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED) 1395 x2("(a)(?<n>b)(?1)(?&n)", "abab", 0, 4, syn=onigmo.ONIG_SYNTAX_PERL) 1396 x2("((?<v>)a)|b\\g<0>b", "bbabb", 0, 5) 1397 x2("((?<v>)a)|b(?0)b", "bbabb", 0, 5, syn=onigmo.ONIG_SYNTAX_PERL) 1398 x2("((?<v>)a|b(?1)b)", "bbabb", 0, 5, syn=onigmo.ONIG_SYNTAX_PERL) 1399 x2("((?<v>a|b(?&v)b))", "bbabb", 0, 5, syn=onigmo.ONIG_SYNTAX_PERL) 1400 n("(?<", "", err=onigmo.ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS) 1401 n("(?<>)", "", err=onigmo.ONIGERR_EMPTY_GROUP_NAME) 1402 n("(?<.>)", "", err=onigmo.ONIGERR_INVALID_CHAR_IN_GROUP_NAME) 1403 n("\\g<1->", "", err=onigmo.ONIGERR_INVALID_CHAR_IN_GROUP_NAME) 1404 n("\\k<1/>", "", err=onigmo.ONIGERR_INVALID_GROUP_NAME) 1405 n("\\k<1-1/>", "", err=onigmo.ONIGERR_INVALID_GROUP_NAME) 1406 n("\\k<a/>", "", err=onigmo.ONIGERR_INVALID_CHAR_IN_GROUP_NAME) 1407 1408 # character set modifiers 1409 x2("(?u)\\w+", "あa#", 0, 2); 1410 x2("(?a)\\w+", "あa#", 1, 2); 1411 x2("(?u)\\W+", "あa#", 2, 3); 1412 x2("(?a)\\W+", "あa#", 0, 1); 1413 1414 x2("(?a)\\b", "あa", 1, 1); 1415 x2("(?a)\\w\\b", "aあ", 0, 1); 1416 x2("(?a)\\B", "a ああ ", 2, 2); 1417 1418 x2("(?u)\\B", "あ ", 2, 2); 1419 x2("(?a)\\B", "あ ", 0, 0); 1420 x2("(?a)\\B", "aあ ", 2, 2); 1421 1422 x2("(?a)a\\b", " a", 1, 2) 1423 x2("(?u)a\\b", " a", 1, 2) 1424 n("(?a)a\\B", " a") 1425 n("(?a)あ\\b", " あ") 1426 x2("(?u)あ\\b", " あ", 1, 2) 1427 x2("(?a)あ\\B", " あ", 1, 2) 1428 n("(?u)あ\\B", " あ") 1429 1430 x2("(?a)\\p{Alpha}\\P{Alpha}", "a。", 0, 2); 1431 x2("(?u)\\p{Alpha}\\P{Alpha}", "a。", 0, 2); 1432 x2("(?a)[[:word:]]+", "aあ", 0, 1); 1433 x2("(?a)[[:^word:]]+", "aあ", 1, 2); 1434 x2("(?u)[[:word:]]+", "aあ", 0, 2); 1435 n("(?u)[[:^word:]]+", "aあ"); 1436 1437 x2("(?iu)\\p{lower}\\p{upper}", "Ab", 0, 2); 1438 x2("(?ia)\\p{lower}\\p{upper}", "Ab", 0, 2); 1439 x2("(?iu)[[:lower:]][[:upper:]]", "Ab", 0, 2); 1440 x2("(?ia)[[:lower:]][[:upper:]]", "Ab", 0, 2); 1441 1442 if is_unicode_encoding(onig_encoding): 1443 n("(?ia)\\w+", "\u212a\u017f"); # KELVIN SIGN, LATIN SMALL LETTER LONG S 1444 n("(?ia)[\\w]+", "\u212a\u017f"); 1445 n("(?ia)[^\\W]+", "\u212a\u017f"); 1446 x2("(?ia)[^\\W]+", "ks", 0, 2); 1447 n("(?iu)\\p{ASCII}", "\u212a"); 1448 n("(?iu)\\P{ASCII}", "s"); 1449 n("(?iu)[\\p{ASCII}]", "\u212a"); 1450 n("(?iu)[\\P{ASCII}]", "s"); 1451 n("(?ia)\\p{ASCII}", "\u212a"); 1452 n("(?ia)\\P{ASCII}", "s"); 1453 n("(?ia)[\\p{ASCII}]", "\u212a"); 1454 n("(?ia)[\\P{ASCII}]", "s"); 1455 x2("(?iu)[s]+", "Ss\u017f ", 0, 3); 1456 x2("(?ia)[s]+", "Ss\u017f ", 0, 3); 1457 x2("(?iu)[^s]+", "Ss\u017f ", 3, 4); 1458 x2("(?ia)[^s]+", "Ss\u017f ", 3, 4); 1459 x2("(?iu)[[:lower:]]", "\u017f", 0, 1); 1460 n("(?ia)[[:lower:]]", "\u017f"); 1461 x2("(?u)[[:upper:]]", "\u212a", 0, 1); 1462 n("(?a)[[:upper:]]", "\u212a"); 1463 1464 # Grep syntax 1465 # \+, \?, \|, \{n,m\} 1466 x2("a\\+", "aa", 0, 2, syn=onigmo.ONIG_SYNTAX_GREP) 1467 n("a\\+", "b", syn=onigmo.ONIG_SYNTAX_GREP) 1468 x2("a\\?", "", 0, 0, syn=onigmo.ONIG_SYNTAX_GREP) 1469 x2("a\\?", "a", 0, 1, syn=onigmo.ONIG_SYNTAX_GREP) 1470 x2("ab\\|cd", "cd", 0, 2, syn=onigmo.ONIG_SYNTAX_GREP) 1471 x2("a\\{1,2\\}", "aaa", 0, 2, syn=onigmo.ONIG_SYNTAX_GREP) 1472 x2("a\\{2\\}", "aaa", 0, 2, syn=onigmo.ONIG_SYNTAX_GREP) 1473 n("a\\{|", "", syn=onigmo.ONIG_SYNTAX_GREP, err=onigmo.ONIGERR_END_PATTERN_AT_LEFT_BRACE) 1474 # \< and \> 1475 x2("\\<abc\\>", " abc ", 1, 4, syn=onigmo.ONIG_SYNTAX_GREP) 1476 n("\\<abc\\>", "zabc ", syn=onigmo.ONIG_SYNTAX_GREP) 1477 n("\\<abc\\>", " abcd", syn=onigmo.ONIG_SYNTAX_GREP) 1478 n("\\<abc\\>", "あabcい", syn=onigmo.ONIG_SYNTAX_GREP) 1479 x2("\\<abc\\>", "あabcい", 1, 4, syn=onigmo.ONIG_SYNTAX_GREP, opt=onigmo.ONIG_OPTION_ASCII_RANGE) 1480 n("\\<abc\\>", "zabcい", syn=onigmo.ONIG_SYNTAX_GREP, opt=onigmo.ONIG_OPTION_ASCII_RANGE) 1481 n("\\<abc\\>", "あabcd", syn=onigmo.ONIG_SYNTAX_GREP, opt=onigmo.ONIG_OPTION_ASCII_RANGE) 1482 # others 1483 n("[^a]", "\n", syn=onigmo.ONIG_SYNTAX_GREP) 1484 x2("*", "*", 0, 1, syn=onigmo.ONIG_SYNTAX_GREP) 1485 #x2("\\{1\\}", "{1}", 0, 3, syn.onigmo.ONIG_SYNTAX_GREP) # fails 1486 n("*", "", err=onigmo.ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED) 1487 n("{1}", "", err=onigmo.ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED) 1488 1489 # \g{} backref 1490 x2("((?<name1>\\d)|(?<name2>\\w))(\\g{name1}|\\g{name2})", "ff", 0, 2, syn=onigmo.ONIG_SYNTAX_PERL); 1491 x2("(?:(?<x>)|(?<x>efg))\\g{x}", "", 0, 0, syn=onigmo.ONIG_SYNTAX_PERL); 1492 x2("(?:(?<x>abc)|(?<x>efg))\\g{x}", "efgabcabc", 3, 9, syn=onigmo.ONIG_SYNTAX_PERL); 1493 n("(?:(?<x>abc)|(?<x>efg))\\g{x}", "abcefg", syn=onigmo.ONIG_SYNTAX_PERL); 1494 x2("((.*)a\\g{2}f)", "bacbabf", 3, 7, syn=onigmo.ONIG_SYNTAX_PERL); 1495 x2("(.*)a\\g{1}f", "baczzzzzz\nbazz\nzzzzbabf", 19, 23, syn=onigmo.ONIG_SYNTAX_PERL); 1496 x2("((.*)a\\g{-1}f)", "bacbabf", 3, 7, syn=onigmo.ONIG_SYNTAX_PERL); 1497 x2("(.*)a\\g{-1}f", "baczzzzzz\nbazz\nzzzzbabf", 19, 23, syn=onigmo.ONIG_SYNTAX_PERL); 1498 x2("(あ*)(い*)\\g{-2}\\g{-1}", "あああいいあああいい", 0, 10, syn=onigmo.ONIG_SYNTAX_PERL); 1499 1500 # Python/PCRE compatible named group 1501 x2("(?P<name_2>ab)(?P>name_2)", "abab", 0, 4, syn=onigmo.ONIG_SYNTAX_PERL); 1502 x2("(?P<n>|\\((?P>n)\\))+$", "()(())", 0, 6, syn=onigmo.ONIG_SYNTAX_PERL); 1503 x2("((?P<name1>\\d)|(?P<name2>\\w))((?P=name1)|(?P=name2))", "ff", 0, 2, syn=onigmo.ONIG_SYNTAX_PERL); 1504 n("(?P", "", syn=onigmo.ONIG_SYNTAX_PERL, err=onigmo.ONIGERR_UNDEFINED_GROUP_OPTION) 1505 n("(?PX", "", syn=onigmo.ONIG_SYNTAX_PERL, err=onigmo.ONIGERR_UNDEFINED_GROUP_OPTION) 1506 1507 # Fullwidth Alphabet 1508 n("abcdefghijklmnopqrstuvwxyz", "ABCDEFGHIJKLMNOPQRSTUVWXYZ"); 1509 x2("(?i)abcdefghijklmnopqrstuvwxyz", "abcdefghijklmnopqrstuvwxyz", 0, 26); 1510 x2("(?i)abcdefghijklmnopqrstuvwxyz", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", 0, 26); 1511 x2("(?i)ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz", 0, 26); 1512 x2("(?i)ABCDEFGHIJKLMNOPQRSTUVWXYZ", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", 0, 26); 1513 1514 # Greek 1515 n("αβγδεζηθικλμνξοπρστυφχψω", "ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ"); 1516 x2("(?i)αβγδεζηθικλμνξοπρστυφχψω", "αβγδεζηθικλμνξοπρστυφχψω", 0, 24); 1517 x2("(?i)αβγδεζηθικλμνξοπρστυφχψω", "ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ", 0, 24); 1518 x2("(?i)ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ", "αβγδεζηθικλμνξοπρστυφχψω", 0, 24); 1519 x2("(?i)ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ", "ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ", 0, 24); 1520 1521 # Cyrillic 1522 n("абвгдеёжзийклмнопрстуфхцчшщъыьэюя", "АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ"); 1523 x2("(?i)абвгдеёжзийклмнопрстуфхцчшщъыьэюя", "абвгдеёжзийклмнопрстуфхцчшщъыьэюя", 0, 33); 1524 x2("(?i)абвгдеёжзийклмнопрстуфхцчшщъыьэюя", "АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ", 0, 33); 1525 x2("(?i)АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ", "абвгдеёжзийклмнопрстуфхцчшщъыьэюя", 0, 33); 1526 x2("(?i)АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ", "АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ", 0, 33); 1527 1528 # multiple name definition 1529 x2("(?<a>a)(?<a>b)\\k<a>", "aba", 0, 3) 1530 x2("(?<a>a)(?<a>b)\\k<a>", "abb", 0, 3) 1531 x2("(?<a>a)(?<a>b)\\g{a}", "aba", 0, 3, syn=onigmo.ONIG_SYNTAX_PERL) 1532 n("(?<a>a)(?<a>b)\\g{a}", "abb", syn=onigmo.ONIG_SYNTAX_PERL) 1533 n("(?<a>a)(?<a>b)\\g<a>", "aba", err=onigmo.ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL) 1534 x2("(?<a>[ac])(?<a>b)(?&a)", "abc", 0, 3, syn=onigmo.ONIG_SYNTAX_PERL) 1535 n("(?<a>[ac])(?<a>b)(?&a)", "abb", syn=onigmo.ONIG_SYNTAX_PERL) 1536 x2("(?:(?<x>abc)|(?<x>efg))(?i:\\k<x>)", "abcefgEFG", 3, 9) 1537 x2("(?<x>a)(?<x>b)(?i:\\k<x>)+", "abAB", 0, 4) 1538 1539 # branch reset 1540# x3("(?|(c)|(?:(b)|(a)))", "a", 0, 1, 2) 1541# x3("(?|(c)|(?|(b)|(a)))", "a", 0, 1, 1) 1542 1543 # conditional expression 1544 x2("(?:(a)|(b))(?(1)cd)e", "acde", 0, 4) 1545 n("(?:(a)|(b))(?(1)cd)e", "ae") 1546 x2("(?:(a)|(b))(?(2)cd)e", "ae", 0, 2) 1547 n("(?:(a)|(b))(?(2)cd)e", "acde") 1548 x2("(?:(a)|(b))(?(1)c|d)", "ac", 0, 2) 1549 x2("(?:(a)|(b))(?(1)c|d)", "bd", 0, 2) 1550 n("(?:(a)|(b))(?(1)c|d)", "ad") 1551 n("(?:(a)|(b))(?(1)c|d)", "bc") 1552 x2("(?:(a)|(b))(?:(?(1)cd)e|fg)", "acde", 0, 4) 1553 x2("(?:(a)|(b))(?:(?(1)cd|x)e|fg)", "bxe", 0, 3) 1554 n("(?:(a)|(b))(?:(?(2)cd|x)e|fg)", "bxe") 1555 x2("(?:(?<x>a)|(?<y>b))(?:(?(<x>)cd|x)e|fg)", "bxe", 0, 3) 1556 n("(?:(?<x>a)|(?<y>b))(?:(?(<y>)cd|x)e|fg)", "bxe") 1557 x2("((?<=a))?(?(1)b|c)", "abc", 1, 2) 1558 x2("((?<=a))?(?(1)b|c)", "bc", 1, 2) 1559 x2("((?<x>x)|(?<y>y))(?(<x>)y|x)", "xy", 0, 2) 1560 x2("((?<x>x)|(?<y>y))(?(<x>)y|x)", "yx", 0, 2) 1561 n("((?<x>x)|(?<y>y))(?(<x>)y|x)", "xx") 1562 n("((?<x>x)|(?<y>y))(?(<x>)y|x)", "yy") 1563 n("(a)?(?<n>b)?(?(1)a)(?(<n>)b)", "aa", err=onigmo.ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED) 1564 x2("(a)?(?<n>b)?(?(1)a)(?(<n>)b)", "aa", 0, 2, syn=onigmo.ONIG_SYNTAX_PERL) 1565 n("()(?(2))", "", err=onigmo.ONIGERR_INVALID_BACKREF) # Issue #65 1566 n("(?(700000))", "", err=onigmo.ONIGERR_INVALID_BACKREF) 1567 1568 # Implicit-anchor optimization 1569 x2("(?m:.*abc)", "dddabdd\nddabc", 0, 13) # optimized /(?m:.*abc)/ ==> /\A(?m:.*abc)/ 1570 x2("(?m:.+abc)", "dddabdd\nddabc", 0, 13) # optimized 1571 x2("(?-m:.*abc)", "dddabdd\nddabc", 8, 13) # optimized /(?-m:.*abc)/ ==> /(?:^|\A)(?m:.*abc)/ 1572 n("(?-m:.*ab[x-z])", "dddabdd\nddabc") # optimized 1573 x2("(?-m:.*(?:abc|\\Gabc))", "dddabdd\nddabc", 8, 13) # optimized 1574 x2("(?-m:.+abc)", "dddabdd\nddabc", 8, 13) # optimized 1575 x2("(?-m:.*abc)", "dddabdd\nabc", 8, 11) # optimized 1576 n("(?-m:.+abc)", "dddabdd\nabc") # optimized 1577 x2("(?m:.*\\Z)", "dddabdd\nddabc", 0, 13) # optimized /(?m:.*\Z)/ ==> /\A(?m:.*\Z)/ 1578 x2("(?-m:.*\\Z)", "dddabdd\nddabc", 8, 13) # optimized /(?-m:.*\Z)/ ==> /(?:^|\A)(?m:.*\Z)/ 1579 x2("(.*)X\\1", "1234X2345", 1, 8) # not optimized 1580 1581 # Allow options in look-behind 1582 x2("(?<=(?i)ab)cd", "ABcd", 2, 4) 1583 x2("(?<=(?i:ab))cd", "ABcd", 2, 4) 1584 n("(?<=(?i)ab)cd", "ABCD") 1585 n("(?<=(?i:ab))cd", "ABCD") 1586 x2("(?<!(?i)ab)cd", "aacd", 2, 4) 1587 x2("(?<!(?i:ab))cd", "aacd", 2, 4) 1588 n("(?<!(?i)ab)cd", "ABcd") 1589 n("(?<!(?i:ab))cd", "ABcd") 1590 1591 # Absent operator 1592 x2("<-(?~->)->", "<- ->->", 0, 5) 1593 x2("<-(?~->)->\n", "<-1->2<-3->\n", 6, 12) 1594 x2("<-(?~->)->.*<-(?~->)->", "<-1->2<-3->4<-5->", 0, 17) 1595 x2("<-(?~->)->.*?<-(?~->)->", "<-1->2<-3->4<-5->", 0, 11) 1596 x2("(?~abc)c", "abc", 0, 3) 1597 x2("(?~abc)bc", "abc", 0, 3) 1598 x2("(?~abc)abc", "abc", 0, 3) 1599 n("(?~)", " ") 1600 n("(?~)", "") 1601 n(" (?~)", " ") 1602 n(" (?~)", " ") 1603 x2("(?~(?~))", "abc", 0, 3) 1604 x2("(?~a)", "", 0, 0) 1605 x2("(?~a)a", "a", 0, 1) 1606 x2("(?~a)", "x", 0, 1) 1607 x2("(?~a)a", "xa", 0, 2) 1608 x2("(?~.)", "", 0, 0) 1609 x2("(?~.)a", "a", 0, 1) 1610 x2("(?~.)", "x", 0, 0) 1611 x2("(?~.)a", "xa", 1, 2) 1612 x2("(?~abc)", "abc", 0, 2) 1613 x2("(?~b)", "abc", 0, 1) 1614 x2("(?~abc|b)", "abc", 0, 1) 1615 n("(?~|abc)", "abc") # ??? 1616 x2("(?~abc|)", "abc", 0, 1) # ??? 1617 x2("(?~abc|def)x", "abcx", 1, 4) 1618 x2("(?~abc|def)x", "defx", 1, 4) 1619 x2("^(?~\\S+)TEST", "TEST", 0, 4) 1620 1621 # Perl syntax 1622 x2("\\Q()\\[a]\\E[b]", "()\\[a]b", 0, 7, syn=onigmo.ONIG_SYNTAX_PERL) 1623 x2("\\Q()\\[a]", "()\\[a]", 0, 6, syn=onigmo.ONIG_SYNTAX_PERL) # no \E 1624 x2("(?a)(?d)\\w+", "あ", 0, 1, syn=onigmo.ONIG_SYNTAX_PERL) # For now (?d) == (?u) 1625 x2("(?a)(?l)\\w+", "あ", 0, 1, syn=onigmo.ONIG_SYNTAX_PERL) # For now (?l) == (?u) 1626 x2("(?a)(?^)\\w+", "あ", 0, 1, syn=onigmo.ONIG_SYNTAX_PERL) 1627 n("(?i)(?^)a", "A", syn=onigmo.ONIG_SYNTAX_PERL) 1628 n("(?m)(?^)a$", "a\nb", syn=onigmo.ONIG_SYNTAX_PERL) 1629 x2("(?s)(?^).*", "a\nb", 0, 1, syn=onigmo.ONIG_SYNTAX_PERL) 1630 x2("\\o{046}", "\046", 0, 1, syn=onigmo.ONIG_SYNTAX_PERL); 1631 x2("[\\o{044}-\\o{047}]", "\046", 0, 1, syn=onigmo.ONIG_SYNTAX_PERL); 1632 n("\\o{40000000000}", "", syn=onigmo.ONIG_SYNTAX_PERL, err=onigmo.ONIGERR_TOO_BIG_WIDE_CHAR_VALUE); 1633 n("\\o{100000000000}", "", syn=onigmo.ONIG_SYNTAX_PERL, err=onigmo.ONIGERR_TOO_LONG_WIDE_CHAR_VALUE); 1634 n("[\\o{40000000000}]", "", syn=onigmo.ONIG_SYNTAX_PERL, err=onigmo.ONIGERR_TOO_BIG_WIDE_CHAR_VALUE); 1635 n("[\\o{100000000000}]", "", syn=onigmo.ONIG_SYNTAX_PERL, err=onigmo.ONIGERR_TOO_LONG_WIDE_CHAR_VALUE); 1636 1637 # Backward search 1638 x2("abc", "abcabc", 3, 6, searchtype=SearchType.BACKWARD) 1639 x2("あいう", "あいうあいう", 3, 6, searchtype=SearchType.BACKWARD) 1640 x2("(?i)abc", "ABCABC", 3, 6, searchtype=SearchType.BACKWARD) 1641 x2("(?i)abc", "ABCABC", 3, 6, searchtype=SearchType.BACKWARD) 1642 x2("[a-z]{3}$", "abcabc", 3, 6, searchtype=SearchType.BACKWARD) 1643 x2("[あ-ん]{3}$", "あいうあいう", 3, 6, searchtype=SearchType.BACKWARD) 1644 x2(".*[a-z]bc", "abcabc", 3, 6, searchtype=SearchType.BACKWARD) # Issue #69 1645 x2(".+[a-z]bc", "abcabc", 2, 6, searchtype=SearchType.BACKWARD) # Issue #69 1646 x2(".{1,3}[a-z]bc", "abcabc", 2, 6, searchtype=SearchType.BACKWARD) 1647 1648 # onig_match() 1649 x2("abc", "abcabc", 0, 3, searchtype=SearchType.MATCH) 1650 n("abc", " abcabc", searchtype=SearchType.MATCH) 1651 1652 # onig_search_gpos() 1653 n("\\Gabc", "123abcdef", gpos=2) 1654 x2("\\Gabc", "123abcdef", 3, 6, gpos=3) 1655 x2("\\Gabc", "123abcdef", 3, 6, startpos=3) 1656 n("\\Gabc", "123abcdef", gpos=0, startpos=3) 1657 x2("abc\\G", "abc", 0, 3, searchtype=SearchType.BACKWARD) 1658 n("abc\\G", "abc ", searchtype=SearchType.BACKWARD) 1659 x2("abc\\G", "abc ", 0, 3, searchtype=SearchType.BACKWARD, endpos=3) 1660 x2("abc\\G", "abc ", 0, 3, searchtype=SearchType.BACKWARD, gpos=3) 1661 1662 # stack size 1663 stack_size = onigmo.onig_get_match_stack_limit_size() 1664 print("Default stack size:", stack_size) 1665 onigmo.onig_set_match_stack_limit_size(1000) 1666 print("New stack size:", onigmo.onig_get_match_stack_limit_size()) 1667 # These patterns need deep stack. 1668 n("^a*$", "a" * 200 + "b") 1669 n("^a*$", "a" * 2000 + "b", execerr=onigmo.ONIGERR_MATCH_STACK_LIMIT_OVER) 1670 onigmo.onig_set_match_stack_limit_size(0) 1671 1672 # parse depth 1673 parse_depth = onigmo.onig_get_parse_depth_limit() 1674 print("Default parse depth:", parse_depth) 1675 onigmo.onig_set_parse_depth_limit(1000) 1676 print("New parse depth:", onigmo.onig_get_parse_depth_limit()) 1677 # These patterns need deep parse stack. 1678 x2("(" * 200 + "a" + ")" * 200, "a", 0, 1) 1679 n("(" * 2000 + "a" + ")" * 2000, "a", err=onigmo.ONIGERR_PARSE_DEPTH_LIMIT_OVER) 1680 onigmo.onig_set_match_stack_limit_size(0) 1681 1682 # syntax functions 1683 onigmo.onig_set_syntax_op(syntax_default, 1684 onigmo.onig_get_syntax_op(onigmo.ONIG_SYNTAX_DEFAULT)) 1685 onigmo.onig_set_syntax_op2(syntax_default, 1686 onigmo.onig_get_syntax_op2(onigmo.ONIG_SYNTAX_DEFAULT)) 1687 onigmo.onig_set_syntax_behavior(syntax_default, 1688 onigmo.onig_get_syntax_behavior(onigmo.ONIG_SYNTAX_DEFAULT)) 1689 onigmo.onig_set_default_syntax(None) 1690 1691 1692 print("\nEncoding:", get_encoding_name(onig_encoding)) 1693 print("RESULT SUCC: %d, FAIL: %d, ERROR: %d (by Onigmo %s)" % ( 1694 nsucc, nfail, nerror, onigmo.onig_version())) 1695 1696 onigmo.onig_end() 1697 1698 if (nfail == 0 and nerror == 0): 1699 exit(0) 1700 else: 1701 exit(-1) 1702 1703if __name__ == '__main__': 1704 main() 1705 1706