1: translated from the Regex++ tests.txt by cvtregex++.c 2001-05-16 2# manual edits were done to insert the standard error codes 3# 4# inserted {...} features tests may have changed line numbers and/or ordering 5# 6# comments beyond this point are verbatim from the original input 7# 8# this file contains a script of tests to run through regress.exe 9# 10# comments start with a semicolon and proceed to the end of the line 11# 12# changes to regular expression compile flags start with a "-" as the first 13# non-whitespace character and consist of a list of the printable names 14# of the flags, for example "match_default" 15# 16# Other lines contain a test to perform using the current flag status 17# the first token contains the expression to compile, the second the string 18# to match it against. If the second string is "!" then the expression should 19# not compile, that is the first string is an invalid regular expression. 20# This is then followed by a list of integers that specify what should match, 21# each pair represents the starting and ending positions of a subexpression 22# starting with the zeroth subexpression (the whole match). 23# A value of -1 indicates that the subexpression should not take part in the 24# match at all, if the first value is -1 then no part of the expression should 25# match the string. 26# 27 28# - match_default normal REG_EXTENDED 29 30# 31# try some really simple literals: 32E a a (0,1) 33E Z Z (0,1) 34E Z aaa NOMATCH 35E Z xxxxZZxxx (4,5) 36 37# and some simple brackets: 38E (a) zzzaazz (3,4)(3,4) 39Exz () zzz (0,0)(0,0) 40Exz () NULL (0,0)(0,0) 41E ( ! EPAREN 42E ) ! NOMATCH 43E (aa ! EPAREN 44E aa) ! NOMATCH 45E a b NOMATCH 46E \(\) () (0,2) 47E \(a\) (a) (0,3) 48E \() ! NOMATCH 49E (\) ! EPAREN 50E p(a)rameter ABCparameterXYZ (3,12)(4,5) 51E [pq](a)rameter ABCparameterXYZ (3,12)(4,5) 52 53# now try escaped brackets: 54# - match_default bk_parens REG_BASIC 55B \(a\) zzzaazz (3,4)(3,4) 56B \(\) zzz (0,0)(0,0) 57B \(\) NULL (0,0)(0,0) 58B \( ! EPAREN 59B \) ! EPAREN 60B \(aa ! EPAREN 61B aa\) ! EPAREN 62B () () (0,2) 63B (a) (a) (0,3) 64B (\) ! EPAREN 65B \() ! EPAREN 66 67# now move on to "." wildcards 68# - match_default normal REG_EXTENDED REG_STARTEND 69E . a (0,1) 70E$ . \n (0,1) 71E$ . \r (0,1) 72E . NULL NOMATCH 73# - match_default normal match_not_dot_newline REG_EXTENDED REG_STARTEND REG_NEWLINE 74En . a (0,1) 75En$ . \n NOMATCH 76En$ . \r (0,1) 77En . NULL NOMATCH 78# - match_default normal match_not_dot_null match_not_dot_newline REG_EXTENDED REG_STARTEND REG_NEWLINE 79En$ . \n NOMATCH 80En$ . \r (0,1) 81# this *WILL* produce an error from the POSIX API functions: 82# - match_default normal match_not_dot_null match_not_dot_newline REG_EXTENDED REG_STARTEND REG_NEWLINE REG_NO_POSIX_TEST 83Enz . NULL NOMATCH 84 85 86# 87# now move on to the repetion ops, 88# starting with operator * 89# - match_default normal REG_EXTENDED 90E a* b (0,0) 91E ab* a (0,1) 92E ab* ab (0,2) 93E ab* sssabbbbbbsss (3,10) 94E ab*c* a (0,1) 95E ab*c* abbb (0,4) 96E ab*c* accc (0,4) 97E ab*c* abbcc (0,5) 98E *a ! BADRPT 99E$ \n* \n\n (0,2) 100E \** ** (0,2) 101E \* * (0,1) 102 103# now try operator + 104E ab+ a NOMATCH 105E ab+ ab (0,2) 106E ab+ sssabbbbbbsss (3,10) 107E ab+c+ a NOMATCH 108E ab+c+ abbb NOMATCH 109E ab+c+ accc NOMATCH 110E ab+c+ abbcc (0,5) 111E +a ! BADRPT 112E$ \n+ \n\n (0,2) 113E \+ + (0,1) 114E \+ ++ (0,1) 115E \++ ++ (0,2) 116# - match_default normal bk_plus_qm REG_EXTENDED REG_NO_POSIX_TEST 117Exz + + BADRPT 118Exz \+ ! NOMATCH 119Exz a\+ aa NOMATCH 120 121# now try operator ? 122# - match_default normal REG_EXTENDED 123E a? b (0,0) 124E ab? a (0,1) 125E ab? ab (0,2) 126E ab? sssabbbbbbsss (3,5) 127E ab?c? a (0,1) 128E ab?c? abbb (0,2) 129E ab?c? accc (0,2) 130E ab?c? abcc (0,3) 131E ?a ! BADRPT 132E$ \n? \n\n (0,1) 133E \? ? (0,1) 134E \? ?? (0,1) 135E \?? ?? (0,1) 136# - match_default normal bk_plus_qm REG_EXTENDED REG_NO_POSIX_TEST 137Exz ? ? BADRPT 138Exz \? ! NOMATCH 139Exz a\? aa NOMATCH 140Exz a\? b NOMATCH 141 142# - match_default normal limited_ops 143B a? a? (0,2) 144B a+ a+ (0,2) 145B a\? a? (0,2) 146B a\+ a+ (0,2) 147 148# now try operator {} 149# - match_default normal REG_EXTENDED 150E a{2} a NOMATCH 151E a{2} aa (0,2) 152E a{2} aaa (0,2) 153E a{2,} a NOMATCH 154E a{2,} aa (0,2) 155E a{2,} aaaaa (0,5) 156E a{2,4} a NOMATCH 157E a{2,4} aa (0,2) 158E a{2,4} aaa (0,3) 159E a{2,4} aaaa (0,4) 160E a{2,4} aaaaa (0,4) 161# spaces are now allowed inside {} 162E a{ 2 , 4 } aaaaa BADBR 163E a{} ! BADBR 164E a{ } ! BADBR 165E a{2 ! EBRACE 166E a} ! NOMATCH 167E \{\} {} (0,2) 168 169# - match_default normal bk_braces 170B a\{2\} a NOMATCH 171B a\{2\} aa (0,2) 172B a\{2\} aaa (0,2) 173B a\{2,\} a NOMATCH 174B a\{2,\} aa (0,2) 175B a\{2,\} aaaaa (0,5) 176B a\{2,4\} a NOMATCH 177B a\{2,4\} aa (0,2) 178B a\{2,4\} aaa (0,3) 179B a\{2,4\} aaaa (0,4) 180B a\{2,4\} aaaaa (0,4) 181B a\{ 2 , 4 \} aaaaa BADBR 182B {} {} (0,2) 183 184# now test the alternation operator | 185# - match_default normal REG_EXTENDED 186E a|b a (0,1) 187E a|b b (0,1) 188E a(b|c) ab (0,2)(1,2) 189E a(b|c) ac (0,2)(1,2) 190E a(b|c) ad NOMATCH 191E |c ! ENULL 192E c| ! ENULL 193E (|) ! ENULL 194E (a|) ! ENULL 195E (|a) ! ENULL 196E a\| a| (0,2) 197# - match_default normal limited_ops 198B a| a| (0,2) 199B a\| a| (0,2) 200B | | (0,1) 201# - match_default normal bk_vbar REG_NO_POSIX_TEST 202Bxz a| a| (0,2) 203Bxz a\|b a (0,1) 204Bxz a\|b b (0,1) 205 206# now test the set operator [] 207# - match_default normal REG_EXTENDED 208# try some literals first 209E [abc] a (0,1) 210E [abc] b (0,1) 211E [abc] c (0,1) 212E [abc] d NOMATCH 213E [^bcd] a (0,1) 214E [^bcd] b NOMATCH 215E [^bcd] d NOMATCH 216E [^bcd] e (0,1) 217E a[b]c abc (0,3) 218E a[ab]c abc (0,3) 219E a[^ab]c adc (0,3) 220E a[]b]c a]c (0,3) 221E a[[b]c a[c (0,3) 222E a[-b]c a-c (0,3) 223E a[^]b]c adc (0,3) 224E a[^-b]c adc (0,3) 225E a[b-]c a-c (0,3) 226E a[b ! EBRACK 227E a[] ! EBRACK 228 229# then some ranges 230E [b-e] a NOMATCH 231E [b-e] b (0,1) 232E [b-e] e (0,1) 233E [b-e] f NOMATCH 234E [^b-e] a (0,1) 235E [^b-e] b NOMATCH 236E [^b-e] e NOMATCH 237E [^b-e] f (0,1) 238E a[1-3]c a2c (0,3) 239E a[3-1]c ! ERANGE 240E a[1-3-5]c ! ERANGE 241E a[1- ! EBRACK 242 243# and some classes 244E a[[:alpha:]]c abc (0,3) 245E a[[:unknown:]]c ! ECTYPE 246E a[[: ! ECTYPE 247E a[[:alpha ! ECTYPE 248E a[[:alpha:] ! EBRACK 249E a[[:alpha,:] ! ECTYPE 250E a[[:]:]]b ! ECTYPE 251E a[[:-:]]b ! ECTYPE 252E a[[:alph:]] ! ECTYPE 253E a[[:alphabet:]] ! ECTYPE 254E [[:alnum:]]+ -%@a0X- (3,6) 255E [[:alpha:]]+ -%@aX0- (3,5) 256E$ [[:blank:]]+ a \tb (1,4) 257E$ [[:cntrl:]]+ a\n\tb (1,3) 258E [[:digit:]]+ a019b (1,4) 259E [[:graph:]]+ a%b (0,3) 260E [[:lower:]]+ AabC (1,3) 261# This test fails with STLPort, disable for now as this is a corner case anyway... 262#[[:print:]]+ "\na b\n" 1 4 263E$ [[:punct:]]+ %-&\t (0,3) 264E$ [[:space:]]+ a \n\t\rb (1,5) 265E [[:upper:]]+ aBCd (1,3) 266E [[:xdigit:]]+ p0f3Cx (1,5) 267 268# now test flag settings: 269# - escape_in_lists REG_NO_POSIX_TEST 270Exz$ [\n] \n (0,1) 271# - REG_NO_POSIX_TEST 272Bxz$ [\n] \n (0,1) 273Bxz$ [\n] \\ NOMATCH 274Bxz [[:class:] : ECTYPE 275Bxz [[:class:] [ ECTYPE 276Bxz [[:class:] c ECTYPE 277 278# line anchors 279# - match_default normal REG_EXTENDED 280En ^ab ab (0,2) 281En ^ab xxabxx NOMATCH 282En$ ^ab xx\nabzz (3,5) 283En ab$ ab (0,2) 284En ab$ abxx NOMATCH 285En$ ab$ ab\nzz (0,2) 286# - match_default match_not_bol match_not_eol normal REG_EXTENDED REG_NOTBOL REG_NOTEOL 287Eben ^ab ab NOMATCH 288Eben ^ab xxabxx NOMATCH 289Eben$ ^ab xx\nabzz (3,5) 290Eben ab$ ab NOMATCH 291Eben ab$ abxx NOMATCH 292Eben$ ab$ ab\nzz (0,2) 293 294# back references 295# - match_default normal REG_EXTENDED 296E a(b)\2c ! ESUBREG 297E a(b\1)c ! ESUBREG 298E a(b*)c\1d abbcbbd (0,7)(1,3) 299E a(b*)c\1d abbcbd NOMATCH 300E a(b*)c\1d abbcbbbd NOMATCH 301E ^(.)\1 abc NOMATCH 302E a([bc])\1d abcdabbd (4,8)(5,6) 303# strictly speaking this is at best ambiguous, at worst wrong, this is what most 304# re implimentations will match though. 305E a(([bc])\2)*d abbccd (0,6)(3,5)(3,4) 306 307E a(([bc])\2)*d abbcbd NOMATCH 308E a((b)*\2)*d abbbd (0,5)(1,4)(2,3) 309E (ab*)[ab]*\1 ababaaa (0,7)(0,1) 310E (a)\1bcd aabcd (0,5)(0,1) 311E (a)\1bc*d aabcd (0,5)(0,1) 312E (a)\1bc*d aabd (0,4)(0,1) 313E (a)\1bc*d aabcccd (0,7)(0,1) 314E (a)\1bc*[ce]d aabcccd (0,7)(0,1) 315E ^(a)\1b(c)*cd$ aabcccd (0,7)(0,1)(4,5) 316 317# 318# characters by code: 319# - match_default normal REG_EXTENDED REG_STARTEND 320{E \101 A (0,1) not an ascii implementation 321E \172 z (0,1) 322E \0172 z NOMATCH 323} 324E NULL NULL ENULL 325E NULL NULL ENULL 326 327# 328# word operators: 329{E \w a (0,1) perl \w not supported 330E \w z (0,1) 331E \w A (0,1) 332E \w Z (0,1) 333E \w _ (0,1) 334E \w } NOMATCH 335E \w ` NOMATCH 336E \w [ NOMATCH 337E \w @ NOMATCH 338} 339# non-word: 340{E \W W NOMATCH perl \W not supported 341E \W z NOMATCH 342E \W A NOMATCH 343E \W Z NOMATCH 344E \W _ NOMATCH 345E \W } (0,1) 346E \W ` (0,1) 347E \W [ (0,1) 348E \W @ (0,1) 349 350E \<\w+\W+ aa aa a (1,5) 351} 352# word boundaries 353{E \<a\> ,a, (1,2) word boundaries not supported 354E \<* ! BADRPT 355E \>* ! BADRPT 356E \<+ ! BADRPT 357E \>+ ! BADRPT 358E \<? ! BADRPT 359E \>? ! BADRPT 360# word start: 361E \<abcd abcd (2,6) 362E \<ab cab NOMATCH 363E$ \\<ab \nab (1,3) 364E \<tag ::tag (2,5) 365# word end: 366E abc\> abc (0,3) 367E abc\> abcd NOMATCH 368E$ abc\\> abc\n (0,3) 369E abc\> abc:: (0,3) 370 371E \<abc abcabc abc\n\nabc (0,3) 372E \< ab a aaa (2,2) 373} 374# word boundary: 375{E \babcd abcd (0,4) perl \b not supported 376E \babcd :abcd: (1,5) perl \b not supported 377E \bab cab NOMATCH 378E$ \\bab \nab (1,3) 379E \btag ::tag (2,5) 380E abc\b abc (0,3) 381E abc\b abcd NOMATCH 382E$ abc\\b abc\n (0,3) 383E abc\b abc:: (0,3) 384 385E \b abb a abbb (0,0) 386} 387# within word: 388{E \B ab (1,1) perl \B not supported 389E a\Bb ab (0,2) 390E a\B ab (0,1) 391E a\B a NOMATCH 392E a\B a NOMATCH 393} 394 395# 396# buffer operators: 397{E \`abc abc (0,3) regex++ \' not supported 398E$ \\`abc \nabc NOMATCH 399E \`abc abc NOMATCH 400E abc\' abc (0,3) 401E$ abc\\' abc\n NOMATCH 402E abc\' abc NOMATCH 403} 404 405# 406# extra escape sequences: 407E$ \a \a (0,1) 408E$ \f \f (0,1) 409E$ \n \n (0,1) 410E$ \r \r (0,1) 411E$ \t \t (0,1) 412E$ \v \v (0,1) 413 414E$ \\a \a (0,1) 415E$ \\f \f (0,1) 416E$ \\n \n (0,1) 417E$ \\r \r (0,1) 418E$ \\t \t (0,1) 419E$ \\v \v (0,1) 420 421E \\a \a (0,2) 422E \\f \f (0,2) 423E \\n \n (0,2) 424E \\r \r (0,2) 425E \\t \t (0,2) 426E \\v \v (0,2) 427 428# 429# now follows various complex expressions designed to try and bust the matcher: 430E a(((b)))c abc (0,3)(1,2)(1,2)(1,2) 431E a(b|(c))d abd (0,3)(1,2) 432E a(b|(c))d acd (0,3)(1,2)(1,2) 433E a(b*|c)d abbd (0,4)(1,3) 434# just gotta have one DFA-buster, of course 435E a[ab]{20} aaaaabaaaabaaaabaaaab (0,21) 436# and an inline expansion in case somebody gets tricky 437E a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab] aaaaabaaaabaaaabaaaab (0,21) 438# and in case somebody just slips in an NFA... 439E a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night) aaaaabaaaabaaaabaaaabweeknights (0,31)(21,24)(24,31) 440# one really big one 441E 1234567890123456789012345678901234567890123456789012345678901234567890 a1234567890123456789012345678901234567890123456789012345678901234567890b (1,71) 442# fish for problems as brackets go past 8 443E [ab][cd][ef][gh][ij][kl][mn] xacegikmoq (1,8) 444E [ab][cd][ef][gh][ij][kl][mn][op] xacegikmoq (1,9) 445E [ab][cd][ef][gh][ij][kl][mn][op][qr] xacegikmoqy (1,10) 446E [ab][cd][ef][gh][ij][kl][mn][op][q] xacegikmoqy (1,10) 447# and as parenthesis go past 9: 448E (a)(b)(c)(d)(e)(f)(g)(h) zabcdefghi (1,9)(1,2)(2,3)(3,4)(4,5)(5,6)(6,7)(7,8)(8,9) 449E (a)(b)(c)(d)(e)(f)(g)(h)(i) zabcdefghij (1,10)(1,2)(2,3)(3,4)(4,5)(5,6)(6,7)(7,8)(8,9)(9,10) 450E (a)(b)(c)(d)(e)(f)(g)(h)(i)(j) zabcdefghijk (1,11)(1,2)(2,3)(3,4)(4,5)(5,6)(6,7)(7,8)(8,9)(9,10)(10,11) 451E (a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k) zabcdefghijkl (1,12)(1,2)(2,3)(3,4)(4,5)(5,6)(6,7)(7,8)(8,9)(9,10)(10,11)(11,12) 452E (a)d|(b)c abc (1,3)(-1,-1)(1,2) 453E _+((www)|(ftp)|(mailto)):_* _wwwnocolon _mailto: (12,20)(13,19)(-1,-1)(-1,-1)(13,19) 454 455# subtleties of matching 456E a(b)?cd acd (0,3) 457E a(b)?c\1d acd NOMATCH 458E a(b?c)+d accd (0,4)(2,3) 459E (wee|week)(knights|night) weeknights (0,10)(0,3)(3,10) 460E .* abc (0,3) 461E a(b|(c))d abd (0,3)(1,2) 462E a(b|(c))d acd (0,3)(1,2)(1,2) 463E a(b*|c|e)d abbd (0,4)(1,3) 464E a(b*|c|e)d acd (0,3)(1,2) 465E a(b*|c|e)d ad (0,2)(1,1) 466E a(b?)c abc (0,3)(1,2) 467E a(b?)c ac (0,2)(1,1) 468E a(b+)c abc (0,3)(1,2) 469E a(b+)c abbbc (0,5)(1,4) 470E a(b*)c ac (0,2)(1,1) 471E (a|ab)(bc([de]+)f|cde) abcdef (0,6)(0,1)(1,6)(3,5) 472E a([bc]?)c abc (0,3)(1,2) 473E a([bc]?)c ac (0,2)(1,1) 474E a([bc]+)c abc (0,3)(1,2) 475E a([bc]+)c abcc (0,4)(1,3) 476E a([bc]+)bc abcbc (0,5)(1,3) 477E a(bb+|b)b abb (0,3)(1,2) 478E a(bbb+|bb+|b)b abb (0,3)(1,2) 479E a(bbb+|bb+|b)b abbb (0,4)(1,3) 480E a(bbb+|bb+|b)bb abbb (0,4)(1,2) 481E (.*).* abcdef (0,6)(0,6) 482E (a*)* bc (0,0)(0,0) 483 484# do we get the right subexpression when it is used more than once? 485E a(b|c)*d ad (0,2) 486E a(b|c)*d abcd (0,4)(2,3) 487E a(b|c)+d abd (0,3)(1,2) 488E a(b|c)+d abcd (0,4)(2,3) 489E a(b|c?)+d ad (0,2)(1,1) 490E a(b|c?)+d abcd (0,4)(2,3) 491E a(b|c){0,0}d ad (0,2) 492E a(b|c){0,1}d ad (0,2) 493E a(b|c){0,1}d abd (0,3)(1,2) 494E a(b|c){0,2}d ad (0,2) 495E a(b|c){0,2}d abcd (0,4)(2,3) 496E a(b|c){0,}d ad (0,2) 497E a(b|c){0,}d abcd (0,4)(2,3) 498E a(b|c){1,1}d abd (0,3)(1,2) 499E a(b|c){1,2}d abd (0,3)(1,2) 500E a(b|c){1,2}d abcd (0,4)(2,3) 501E a(b|c){1,}d abd (0,3)(1,2) 502E a(b|c){1,}d abcd (0,4)(2,3) 503E a(b|c){2,2}d acbd (0,4)(2,3) 504E a(b|c){2,2}d abcd (0,4)(2,3) 505E a(b|c){2,4}d abcd (0,4)(2,3) 506E a(b|c){2,4}d abcbd (0,5)(3,4) 507E a(b|c){2,4}d abcbcd (0,6)(4,5) 508E a(b|c){2,}d abcd (0,4)(2,3) 509E a(b|c){2,}d abcbd (0,5)(3,4) 510E a(b+|((c)*))+d abd (0,3)(1,2) 511E a(b+|((c)*))+d abcd (0,4)(2,3)(2,3)(2,3) 512 513# - match_default normal REG_EXTENDED REG_STARTEND REG_NOSPEC literal 514L \**?/{} \**?/{} (0,7) 515 516# - match_default normal REG_EXTENDED REG_NO_POSIX_TEST 517# try to match C++ syntax elements: 518# line comment: 519Exz$ //[^\n]* ++i //here is a line comment\n (4,28) 520# block comment: 521Exz /\*([^*]|\*+[^*/])*\*+/ /* here is a block comment */ (0,29)(26,27) 522Exz /\*([^*]|\*+[^*/])*\*+/ /**/ (0,4) 523Exz /\*([^*]|\*+[^*/])*\*+/ /***/ (0,5) 524Exz /\*([^*]|\*+[^*/])*\*+/ /****/ (0,6) 525Exz /\*([^*]|\*+[^*/])*\*+/ /*****/ (0,7) 526Exz /\*([^*]|\*+[^*/])*\*+/ /*****/*/ (0,7) 527# preprossor directives: 528E$ ^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* #define some_symbol (0,19) 529E$ ^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* #define some_symbol(x) #x (0,25) 530E$ ^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* #define some_symbol(x) \\ \r\n foo();\\\r\n printf(#x); (0,27) 531# literals: 532E ((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFF (0,4)(0,4)(0,4) 533E ((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 35 (0,2)(0,2)(-1,-1)(0,2) 534E ((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFu (0,5)(0,4)(0,4) 535E ((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFL (0,5)(0,4)(0,4)(-1,-1)(4,5) 536E ((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFFFFFFFFFFFFFFFuint64 (0,24)(0,18)(0,18)(-1,-1)(19,24)(19,24)(22,24) 537# strings: 538#E '([^\\']|\\.)*' '\\x3A' (0,6)(4,5) 539E '([^\\']|\\.)*' '\\'' (0,4)(1,3) 540E$ '([^']|\\.)*' '\n' (0,3)(1,2) 541 542# now try and test some unicode specific characters: 543# - match_default normal REG_EXTENDED REG_UNICODE_ONLY 544E [[:unicode:]]+ a\0300\0400z ECTYPE 545 546# finally try some case insensitive matches: 547# - match_default normal REG_EXTENDED REG_ICASE 548# upper and lower have no meaning here so they fail, however these 549# may compile with other libraries... 550Ei [[:lower:]]+ Ab (0,2) 551Ei [[:lower:]]+ aB (0,2) 552Ei [[:upper:]]+ Ab (0,2) 553Ei [[:upper:]]+ aB (0,2) 554Ei 0123456789@abcdefghijklmnopqrstuvwxyz_`ABCDEFGHIJKLMNOPQRSTUVWXYZ 0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz (0,65) 555Ei 0123456789@abcdefghijklmnopqrstuvwxyz\^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ 0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz (0,66) 556Ei 0123456789@abcdefghijklmnopqrstuvwxyz\[\\\]\^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ\{\|\} 0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|} (0,72) 557 558# known and suspected bugs: 559# - match_default normal REG_EXTENDED 560E \( ( (0,1) 561E \) ) (0,1) 562E \$ $ (0,1) 563E \^ ^ (0,1) 564E \. . (0,1) 565E \* * (0,1) 566E \+ + (0,1) 567E \? ? (0,1) 568E \[ [ (0,1) 569E \] ] (0,1) 570E \| | (0,1) 571E \\ \\ (0,1) 572E # # (0,1) 573E \# # BADESC 574Ex \# # (0,1) 575E a- a- (0,2) 576E \- - BADESC 577Ex \- - (0,1) 578E \{ { (0,1) 579E \} } (0,1) 580E 0 0 (0,1) 581E 1 1 (0,1) 582E 9 9 (0,1) 583E b b (0,1) 584E B B (0,1) 585E < < (0,1) 586E > > (0,1) 587E w w (0,1) 588E W W (0,1) 589E ` ` (0,1) 590E ' ' (0,1) 591E$ \n \n (0,1) 592E , , (0,1) 593E a a (0,1) 594E f f (0,1) 595E n n (0,1) 596E r r (0,1) 597E t t (0,1) 598E v v (0,1) 599E c c (0,1) 600E x x (0,1) 601E : : (0,1) 602E (\.[[:alnum:]]+){2} w.a.b (1,5)(3,5) 603 604# - match_default normal REG_EXTENDED REG_ICASE 605Ei a A (0,1) 606Ei A a (0,1) 607Ei [abc]+ abcABC (0,6) 608Ei [ABC]+ abcABC (0,6) 609Ei [a-z]+ abcABC (0,6) 610Ei [A-Z]+ abzANZ (0,6) 611Ei [a-Z]+ abzABZ ERANGE 612Eix [a-Z]+ abzABZ NOMATCH 613Ei [A-z]+ abzABZ (0,6) 614Ei [[:lower:]]+ abyzABYZ (0,8) 615Ei [[:upper:]]+ abzABZ (0,6) 616Ei [[:word:]]+ abcZZZ (0,6) 617Ei [[:alpha:]]+ abyzABYZ (0,8) 618Ei [[:alnum:]]+ 09abyzABYZ (0,10) 619 620# updated tests for version 2: 621# - match_default normal REG_EXTENDED 622E$ \x41 A (0,1) 623E$ \xff \xFF (0,1) 624E$ \xFF \xff (0,1) 625# - match_default normal REG_EXTENDED REG_NO_POSIX_TEST 626{Exz$ \\c[ \e (0,1) perl \c not supported 627# - match_default normal REG_EXTENDED 628E$ \\cA \001 (0,1) 629E$ \\cz \032 (0,1) 630E$ \\c= ! NOMATCH 631E$ \\c? ! NOMATCH 632} 633E =: =: (0,2) 634 635# word start: 636E [[:<:]]abcd abcd (2,6) 637E [[:<:]]ab cab NOMATCH 638E$ [[:<:]]ab \nab (1,3) 639E [[:<:]]tag ::tag (2,5) 640#word end: 641E abc[[:>:]] abc (0,3) 642E abc[[:>:]] abcd NOMATCH 643E$ abc[[:>:]] abc\n (0,3) 644E abc[[:>:]] abc:: (0,3) 645 646# collating elements and rewritten set code: 647# - match_default normal REG_EXTENDED REG_STARTEND 648{E [[.zero.]] 0 (0,1) [[.element-name.]] not supported 649E [[.one.]] 1 (0,1) 650E [[.two.]] 2 (0,1) 651E [[.three.]] 3 (0,1) 652E [[.a.]] baa (1,2) 653#E [[.NUL.]] NULL (0,1) 654E [[.right-curly-bracket.]] } (0,1) 655E [[=right-curly-bracket=]] } (0,1) 656} 657E [[:<:]z] ! ECTYPE 658E [a[:>:]] ! ECTYPE 659E [[=a=]] a (0,1) 660# - match_default normal REG_EXTENDED REG_STARTEND REG_ICASE 661Ei [[.A.]] A (0,1) 662Ei [[.A.]] a (0,1) 663Ei [[.A.]-b]+ AaBb (0,4) 664Ei [A-[.b.]]+ AaBb (0,4) 665Ei [[.a.]-B]+ AaBb ERANGE 666Eix [[.a.]-B]+ AaBb NOMATCH 667Ei [a-[.B.]]+ AaBb ERANGE 668Eix [a-[.B.]]+ AaBb NOMATCH 669# - match_default normal REG_EXTENDED REG_NO_POSIX_TEST 670Exz$ [\x61] a (0,1) 671Exz$ [\x61-c]+ abcd (0,3) 672Exz$ [a-\x63]+ abcd (0,3) 673# - match_default normal REG_EXTENDED REG_STARTEND 674E [[.a.]-c]+ abcd (0,3) 675E [a-[.c.]]+ abcd (0,3) 676E [[:alpha:]-a] ! ERANGE 677E [a-[:alpha:]] ! NOMATCH 678 679# try mutli-character ligatures: 680{E [[.ae.]] ae (0,2) [[.ligature.]] not supported 681E [[.ae.]] aE NOMATCH 682E [[.AE.]] AE (0,2) 683E [[.Ae.]] Ae (0,2) 684E [[.ae.]-b] a NOMATCH 685E [[.ae.]-b] b (0,1) 686E [[.ae.]-b] ae (0,2) 687E [a-[.ae.]] a (0,1) 688E [a-[.ae.]] b NOMATCH 689E [a-[.ae.]] ae (0,2) 690# - match_default normal REG_EXTENDED REG_STARTEND REG_ICASE 691Ei [[.ae.]] AE (0,2) 692Ei [[.ae.]] Ae (0,2) 693Ei [[.AE.]] Ae (0,2) 694Ei [[.Ae.]] aE (0,2) 695Ei [[.AE.]-B] a NOMATCH 696Ei [[.Ae.]-b] b (0,1) 697Ei [[.Ae.]-b] B (0,1) 698Ei [[.ae.]-b] AE (0,2) 699} 700 701# - match_default normal REG_EXTENDED REG_STARTEND 702#extended perl style escape sequences: 703{E$ \\e \033 (0,1) perl \e not supported 704} 705{E$ \\x1b \033 (0,1) perl \x not supported 706E$ \\x{1b} \033 (0,1) 707E \x{} ! NOMATCH 708E \x{ ! NOMATCH 709E \x} ! NOMATCH 710E \x ! NOMATCH 711E \x{yy ! NOMATCH 712E \x{1b ! NOMATCH 713} 714 715# - match_default normal REG_EXTENDED REG_STARTEND REG_NO_POSIX_TEST 716{Exz \l+ ABabcAB (2,5) regex++ \l not supported 717Exz [\l]+ ABabcAB (2,5) 718Exz [a-\l] ! NOMATCH 719E [a-\l] ! ERANGE 720E [\l-a] ! ERANGE 721Exz [\L] ! (0,1) 722Exz \L+ abABCab (2,5) 723Exz \L+ ab,.-ab (2,5) 724Exz \u+ abABCab (2,5) 725Exz [\u]+ abABCab (2,5) 726Exz [\U] ! (0,1) 727Exz \U+ ABabcAB (2,5) 728} 729{Exz \d+ ab012ab (2,5) perl \d not supported 730Exz [a-\d] ! NOMATCH 731E [a-\d] ! ERANGE 732E [\d-a] ! ERANGE 733Exz [\d]+ ab012ad (6,7) 734Evxz [\d]+ ab012ad (2,5) 735Exz [\D] !D (1,2) 736Evxz [\D] !D (0,1) 737Exz \D+ 01abc01 (2,5) 738Exz \s+ ab ab (2,5) 739Exz [\s]+ as as (1,2) 740Evxz [\s]+ as as (2,5) 741Exz [\S] !S (1,2) 742Evxz [\S] !S (0,1) 743Exz \S+ abc (2,5) 744} 745# - match_default normal REG_EXTENDED REG_STARTEND 746{E \Qabc abc (0,3) regex++ \Q not supported 747E \Qabc\E abcd (0,3) 748E \Qabc\Ed abcde (0,4) 749E \Q+*?\\E +*?\\ (0,4) 750} 751 752{E \C+ abcde (0,5) regex++ \C not supported 753} 754{E \X+ abcde (0,5) regex++ \X not supported 755 756# - match_default normal REG_EXTENDED REG_STARTEND REG_UNICODE_ONLY 757E \X+ a\768\769 (0,3) 758E \X+ \2309\2307 (0,2) 759E \X+ \2489\2494 (0,2) 760} 761 762# - match_default normal REG_EXTENDED REG_STARTEND 763{E \Aabc abc (0,3) regex++ \A not supported 764E \Aabc aabc NOMATCH 765E a\Aab abc NOMATCH 766E abc\z abc (0,3) 767E abc\z abcd NOMATCH 768E$ abc\\z abc\n\n NOMATCH 769E$ abc\\Z abc\n (0,3) 770E$ abc\\Z abc\n\n (0,3) 771E abc\Z abc (0,3) 772E \Aabc abc abc (0,3) 773} 774 775{E \Gabc abc (0,3) perl \G not supported 776E \G\w+\W+ abc abc a cbbb (0,5) 777E \Ga+b+ aaababb abb (0,4) 778E \Gabc dabcd NOMATCH 779E a\Gbc abc NOMATCH 780} 781 782# 783# now test grep, 784# basically check all our restart types - line, word, etc 785# checking each one for null and non-null matches. 786# 787# - match_default normal REG_EXTENDED REG_STARTEND REG_GREP 788E a a a a aa (1,2) 789E a+b+ aabaabbb ab (0,3) 790E a(b*|c|e)d adabbdacd (0,2)(1,1) 791E$ a \na\na\na\naa (1,2) 792 793E$ ^ \n\n \n\n\n (0,0) 794E$ ^ab ab \nab ab\n (0,2) 795E$ ^[^\n]*\n \n \n\n \n (0,4) 796 797E abc abc (0,3) 798E abc abc abcabc (1,4) 799E$ \n\n \n\n\n \n \n\n\n\n (1,3) 800 801E$ $ \n\n \n\n\n (10,10) 802En$ $ \n\n \n\n\n (3,3) 803 804# - match_default normal REG_EXTENDED REG_STARTEND REG_GREP REG_ICASE 805Ei A a a a aa (1,2) 806Ei A+B+ aabaabbb ab (0,3) 807Ei A(B*|c|e)D adabbdacd (0,2)(1,1) 808Ei$ A \na\na\na\naa (1,2) 809 810Ei$ ^aB Ab \nab Ab\n (0,2) 811Ei$ \\<abc Abcabc aBc\n\nabc (0,3) 812 813Ei ABC abc (0,3) 814Ei abc ABC ABCABC (1,4) 815 816 817# 818# now test merge, 819# 820# - match_default normal REG_EXTENDED REG_STARTEND REG_MERGE format_no_copy 821# start by testing subs: 822# a+ ...aaa,,, ($`,...) 823# a+ ...aaa,,, ($',,,,) 824# a+ ...aaa,,, ($&,aaa) 825# a+ ...aaa,,, ($0,aaa) 826# a+ ...aaa,,, ($1,NULL) 827# a+ ...aaa,,, ($15,NULL) 828# (a+)b+ ...aaabbb,,, ($1,aaa) 829# [[:digit:]]* 123ab (<$0>,<123><><><>) 830# [[:digit:]]* 123ab1 (<$0>,<123><><><1>) 831 832# and now escapes: 833# a+ ...aaa,,, ($x,$x) 834# a+ ...aaa,,, (\a,\a) 835# a+ ...aaa,,, (\f,\f) 836# a+ ...aaa,,, (\n,\n) 837# a+ ...aaa,,, (\r,\r) 838# a+ ...aaa,,, (\t,\t) 839# a+ ...aaa,,, (\v,\v) 840 841# a+ ...aaa,,, (\x21,!) 842# a+ ...aaa,,, (\x{21},!) 843# a+ ...aaa,,, (\c@,\0) 844# a+ ...aaa,,, (\e,\27) 845# a+ ...aaa,,, (\0101,A) 846# a+ ...aaa,,, ((\0101),A) 847 848# - match_default normal REG_EXTENDED REG_STARTEND REG_MERGE format_sed format_no_copy 849# (a+)(b+) ...aabb,, (\0,aabb) 850# (a+)(b+) ...aabb,, (\1,aa) 851# (a+)(b+) ...aabb,, (\2,bb) 852# (a+)(b+) ...aabb,, (&,aabb) 853# (a+)(b+) ...aabb,, ($,$) 854# (a+)(b+) ...aabb,, ($1,$1) 855# (a+)(b+) ...aabb,, (()?:,()?:) 856# (a+)(b+) ...aabb,, (\\,\\) 857# (a+)(b+) ...aabb,, (\&,&) 858 859 860# - match_default normal REG_EXTENDED REG_STARTEND REG_MERGE format_perl format_no_copy 861# (a+)(b+) ...aabb,, ($0,aabb) 862# (a+)(b+) ...aabb,, ($1,aa) 863# (a+)(b+) ...aabb,, ($2,bb) 864# (a+)(b+) ...aabb,, ($&,aabb) 865# (a+)(b+) ...aabb,, (&,&) 866# (a+)(b+) ...aabb,, (\0,\0) 867# (a+)(b+) ...aabb,, (()?:,()?:) 868 869# - match_default normal REG_EXTENDED REG_STARTEND REG_MERGE 870# move to copying unmatched data: 871# a+ ...aaa,,, (bbb,...bbb,,,) 872# a+(b+) ...aaabb,,, ($1,...bb,,,) 873# a+(b+) ...aaabb,,,ab*abbb? ($1,...bb,,,b*bbb?) 874 875# (a+)|(b+) ...aaabb,,,ab*abbb? ((?1A)(?2B),...AB,,,AB*AB?) 876# (a+)|(b+) ...aaabb,,,ab*abbb? (?1A:B,...AB,,,AB*AB?) 877# (a+)|(b+) ...aaabb,,,ab*abbb? ((?1A:B)C,...ACBC,,,ACBC*ACBC?) 878# (a+)|(b+) ...aaabb,,,ab*abbb? (?1:B,...B,,,B*B?) 879 880# 881# changes to newline handling with 2.11: 882# 883 884# - match_default normal REG_EXTENDED REG_STARTEND REG_GREP 885 886E$ ^. \n \r\n (0,1) 887E$ .$ \n \r\n (8,9) 888En$ .$ \n \r\n (1,2) 889 890# - match_default normal REG_EXTENDED REG_STARTEND REG_GREP REG_UNICODE_ONLY 891#E ^. \8232 \8233 (0,1) 892#E .$ \8232 \8233 (1,2) 893 894# 895# non-greedy repeats added 21/04/00 896# - match_default normal REG_EXTENDED 897E a{1,3}{1} ! BADRPT 898{E a*? aa (0,0) non-greedy repeats not supported 899E a** aaa (0,3) 900E a?? aa (0,0) 901E a++ ! BADRPT 902E a+? aa (0,1) 903E a{1,3}? aaa (0,1) 904E \w+?w ...ccccccwcccccw (3,10) 905E \W+\w+?w ...ccccccwcccccw (0,10) 906E abc|\w+? abd (0,1) 907E abc|\w+? abcd (0,3) 908E <\s*tag[^>]*>(.*?)<\s*/tag\s*> <tag>here is some text</tag> <tag></tag> (1,29)(6,23) 909E <\s*tag[^>]*>(.*?)<\s*/tag\s*> < tag attr=\"something\">here is some text< /tag > <tag></tag> (1,51)(26,43) 910} 911 912# 913# non-marking parenthesis added 25/04/00 914# - match_default normal REG_EXTENDED 915{E (?:abc)+ xxabcabcxx (2,8) non-marking parens not supported 916E (?:a+)(b+) xaaabbbx (1,7)(4,7) 917E (a+)(?:b+) xaaabbba (1,7)(1,4) 918E (?:(a+)b+) xaaabbba (1,7)(1,4) 919E (?:a+(b+)) xaaabbba (1,7)(4,7) 920E a+(?#b+)b+ xaaabbba (1,7) 921} 922 923# 924# try some partial matches: 925# - match_partial match_default normal REG_EXTENDED REG_NO_POSIX_TEST 926Exz (xyz)(.*)abc xyzaaab NOMATCH 927Exz (xyz)(.*)abc xyz NOMATCH 928Exz (xyz)(.*)abc xy NOMATCH 929