1.\" Copyright (c) 1992, 1993, 1994 Henry Spencer. 2.\" Copyright (c) 1992, 1993, 1994 3.\" The Regents of the University of California. All rights reserved. 4.\" 5.\" This code is derived from software contributed to Berkeley by 6.\" Henry Spencer. 7.\" 8.\" Redistribution and use in source and binary forms, with or without 9.\" modification, are permitted provided that the following conditions 10.\" are met: 11.\" 1. Redistributions of source code must retain the above copyright 12.\" notice, this list of conditions and the following disclaimer. 13.\" 2. Redistributions in binary form must reproduce the above copyright 14.\" notice, this list of conditions and the following disclaimer in the 15.\" documentation and/or other materials provided with the distribution. 16.\" 3. Neither the name of the University nor the names of its contributors 17.\" may be used to endorse or promote products derived from this software 18.\" without specific prior written permission. 19.\" 20.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30.\" SUCH DAMAGE. 31.\" 32.\" @(#)regex.3 8.4 (Berkeley) 3/20/94 33.\" $FreeBSD: src/lib/libc/regex/regex.3,v 1.21 2007/01/09 00:28:04 imp Exp $ 34.\" 35.Dd May 5, 2019 36.Dt REGEX 3 37.Os 38.Sh NAME 39.Nm regcomp , 40.Nm regcomp_l , 41.Nm regerror , 42.Nm regexec , 43.Nm regfree , 44.Nm regncomp , 45.Nm regncomp_l , 46.Nm regnexec , 47.Nm regwcomp , 48.Nm regwcomp_l , 49.Nm regwexec , 50.Nm regwncomp , 51.Nm regwncomp_l , 52.Nm regwnexec 53.Nd regular-expression library 54.Sh LIBRARY 55.Lb libc 56.Sh SYNOPSIS 57.Sy (Standards-compliant APIs) 58.Pp 59.In regex.h 60.Ft int 61.Fo regcomp 62.Fa "regex_t *restrict preg" 63.Fa "const char *restrict pattern" 64.Fa "int cflags" 65.Fc 66.Ft size_t 67.Fo regerror 68.Fa "int errcode" 69.Fa "const regex_t *restrict preg" 70.Fa "char *restrict errbuf" 71.Fa "size_t errbuf_size" 72.Fc 73.Ft int 74.Fo regexec 75.Fa "const regex_t *restrict preg" 76.Fa "const char *restrict string" 77.Fa "size_t nmatch" 78.Fa "regmatch_t pmatch[restrict]" 79.Fa "int eflags" 80.Fc 81.Ft void 82.Fo regfree 83.Fa "regex_t *preg" 84.Fc 85.Pp 86.Sy (Non-portable extensions) 87.Ft int 88.Fo regncomp 89.Fa "regex_t *restrict preg" 90.Fa "const char *restrict pattern" 91.Fa "size_t len" 92.Fa "int cflags" 93.Fc 94.Ft int 95.Fo regnexec 96.Fa "const regex_t *restrict preg" 97.Fa "const char *restrict string" 98.Fa "size_t len" 99.Fa "size_t nmatch" 100.Fa "regmatch_t pmatch[restrict]" 101.Fa "int eflags" 102.Fc 103.Ft int 104.Fo regwcomp 105.Fa "regex_t *restrict preg" 106.Fa "const wchar_t *restrict widepat" 107.Fa "int cflags" 108.Fc 109.Ft int 110.Fo regwexec 111.Fa "const regex_t *restrict preg" 112.Fa "const wchar_t *restrict widestr" 113.Fa "size_t nmatch" 114.Fa "regmatch_t pmatch[restrict]" 115.Fa "int eflags" 116.Fc 117.Ft int 118.Fo regwncomp 119.Fa "regex_t *restrict preg" 120.Fa "const wchar_t *restrict widepat" 121.Fa "size_t len" 122.Fa "int cflags" 123.Fc 124.Ft int 125.Fo regwnexec 126.Fa "const regex_t *restrict preg" 127.Fa "const wchar_t *restrict widestr" 128.Fa "size_t len" 129.Fa "size_t nmatch" 130.Fa "regmatch_t pmatch[restrict]" 131.Fa "int eflags" 132.Fc 133.In regex.h 134.In xlocale.h 135.Ft int 136.Fo regcomp_l 137.Fa "regex_t *restrict preg" 138.Fa "const char *restrict pattern" 139.Fa "int cflags" 140.Fa "locale_t restrict" 141.Fc 142.Ft int 143.Fo regncomp_l 144.Fa "regex_t *restrict preg" 145.Fa "const char *restrict pattern" 146.Fa "size_t len" 147.Fa "int cflags" 148.Fa "locale_t restrict" 149.Fc 150.Ft int 151.Fo regwcomp_l 152.Fa "regex_t *restrict preg" 153.Fa "const wchar_t *restrict widepat" 154.Fa "int cflags" 155.Fa "locale_t restrict" 156.Fc 157.Ft int 158.Fo regwncomp_l 159.Fa "regex_t *restrict preg" 160.Fa "const wchar_t *restrict widepat" 161.Fa "size_t len" 162.Fa "int cflags" 163.Fa "locale_t restrict" 164.Fc 165.Sh DESCRIPTION 166These routines implement 167.St -p1003.2 168regular expressions 169.Pq Do RE Dc Ns s ; 170see 171.Xr re_format 7 . 172The 173.Fn regcomp 174function 175compiles an RE, written as a string, into an internal form. 176.Fn regexec 177matches that internal form against a string and reports results. 178.Fn regerror 179transforms error codes from either into human-readable messages. 180.Fn regfree 181frees any dynamically-allocated storage used by the internal form 182of an RE. 183.Pp 184The header 185.In regex.h 186declares two structure types, 187.Ft regex_t 188and 189.Ft regmatch_t , 190the former for compiled internal forms and the latter for match reporting. 191It also declares the four functions, 192a type 193.Ft regoff_t , 194and a number of constants with names starting with 195.Dq Dv REG_ . 196.Pp 197The 198.Fn regcomp 199function 200compiles the regular expression contained in the 201.Fa pattern 202string, 203subject to the flags in 204.Fa cflags , 205and places the results in the 206.Ft regex_t 207structure pointed to by 208.Fa preg . 209The 210.Fa cflags 211argument 212is the bitwise OR of zero or more of the following flags: 213.Bl -tag -width REG_EXTENDED 214.It Dv REG_EXTENDED 215Compile modern 216.Pq Dq extended 217REs, 218rather than the obsolete 219.Pq Dq basic 220REs that 221are the default. 222.It Dv REG_BASIC 223This is a synonym for 0, 224provided as a counterpart to 225.Dv REG_EXTENDED 226to improve readability. 227.It Dv REG_NOSPEC 228Compile with recognition of all special characters turned off. 229All characters are thus considered ordinary, 230so the 231.Dq RE 232is a literal string. 233This is an extension, 234compatible with but not specified by 235.St -p1003.2 , 236and should be used with 237caution in software intended to be portable to other systems. 238.Dv REG_EXTENDED 239and 240.Dv REG_NOSPEC 241may not be used 242in the same call to 243.Fn regcomp . 244.It Dv REG_LITERAL 245An alias of 246.Dv REG_NOSPEC . 247.It Dv REG_ICASE 248Compile for matching that ignores upper/lower case distinctions. 249See 250.Xr re_format 7 . 251.It Dv REG_NOSUB 252Compile for matching that need only report success or failure, 253not what was matched. 254.It Dv REG_NEWLINE 255Compile for newline-sensitive matching. 256By default, newline is a completely ordinary character with no special 257meaning in either REs or strings. 258With this flag, 259.Ql [^ 260bracket expressions and 261.Ql .\& 262never match newline, 263a 264.Ql ^\& 265anchor matches the null string after any newline in the string 266in addition to its normal function, 267and the 268.Ql $\& 269anchor matches the null string before any newline in the 270string in addition to its normal function. 271.It Dv REG_PEND 272(Note that 273.Dv REG_PEND 274is not recognized by any of the wide character or 275.Dq Nm n 276variants. 277Besides, the 278.Dq Nm n 279variants can be used instead of 280.Dv REG_PEND ; 281see EXTENDED APIS below.) 282The regular expression ends, 283not at the first NUL, 284but just before the character pointed to by the 285.Va re_endp 286member of the structure pointed to by 287.Fa preg . 288The 289.Va re_endp 290member is of type 291.Ft "const char *" . 292This flag permits inclusion of NULs in the RE; 293they are considered ordinary characters. 294This is an extension, 295compatible with but not specified by 296.St -p1003.2 , 297and should be used with 298caution in software intended to be portable to other systems. 299.It Dv REG_ENHANCED 300Recognized enhanced regular expression features; see 301.Xr re_format 7 302for details. 303This is an extension not specified by 304.St -p1003.2 , 305and should be used with 306caution in software intended to be portable to other systems. 307.It Dv REG_MINIMAL 308Use minimal (non-greedy) repetitions instead of the normal greedy ones; see 309.Xr re_format 7 310for details. 311(This only applies when both 312.Dv REG_ENHANCED 313and 314.Dv REG_EXTENDED 315are also set.) 316This is an extension not specified by 317.St -p1003.2 , 318and should be used with 319caution in software intended to be portable to other systems. 320.It Dv REG_UNGREEDY 321Alias of 322.Dv REG_MINIMAL . 323.El 324.Pp 325When successful, 326.Fn regcomp 327returns 0 and fills in the structure pointed to by 328.Fa preg . 329One member of that structure 330(other than 331.Va re_endp ) 332is publicized: 333.Va re_nsub , 334of type 335.Ft size_t , 336contains the number of parenthesized subexpressions within the RE 337(except that the value of this member is undefined if the 338.Dv REG_NOSUB 339flag was used). 340If 341.Fn regcomp 342fails, it returns a non-zero error code; 343see 344.Sx RETURN VALUES . 345.Pp 346The 347.Fn regexec 348function 349matches the compiled RE pointed to by 350.Fa preg 351against the 352.Fa string , 353subject to the flags in 354.Fa eflags , 355and reports results using 356.Fa nmatch , 357.Fa pmatch , 358and the returned value. 359The RE must have been compiled by a previous invocation of 360.Fn regcomp . 361The compiled form is not altered during execution of 362.Fn regexec , 363so a single compiled RE can be used simultaneously by multiple threads. 364.Pp 365By default, 366the NUL-terminated string pointed to by 367.Fa string 368is considered to be the text of an entire line, minus any terminating 369newline. 370The 371.Fa eflags 372argument is the bitwise OR of zero or more of the following flags: 373.Bl -tag -width REG_STARTEND 374.It Dv REG_NOTBOL 375The first character of 376the string 377is not the beginning of a line, so the 378.Ql ^\& 379anchor should not match before it. 380This does not affect the behavior of newlines under 381.Dv REG_NEWLINE . 382.It Dv REG_NOTEOL 383The NUL terminating 384the string 385does not end a line, so the 386.Ql $\& 387anchor should not match before it. 388This does not affect the behavior of newlines under 389.Dv REG_NEWLINE . 390.It Dv REG_STARTEND 391The string is considered to start at 392.Fa string 393+ 394.Fa pmatch Ns [0]. Ns Va rm_so 395and to have a terminating NUL located at 396.Fa string 397+ 398.Fa pmatch Ns [0]. Ns Va rm_eo 399(there need not actually be a NUL at that location), 400regardless of the value of 401.Fa nmatch . 402See below for the definition of 403.Fa pmatch 404and 405.Fa nmatch . 406This is an extension, 407compatible with but not specified by 408.St -p1003.2 , 409and should be used with 410caution in software intended to be portable to other systems. 411Note that a non-zero 412.Va rm_so 413does not imply 414.Dv REG_NOTBOL ; 415.Dv REG_STARTEND 416affects only the location of the string, 417not how it is matched. 418.El 419.Pp 420See 421.Xr re_format 7 422for a discussion of what is matched in situations where an RE or a 423portion thereof could match any of several substrings of 424.Fa string . 425.Pp 426Normally, 427.Fn regexec 428returns 0 for success and the non-zero code 429.Dv REG_NOMATCH 430for failure. 431Other non-zero error codes may be returned in exceptional situations; 432see 433.Sx RETURN VALUES . 434.Pp 435If 436.Dv REG_NOSUB 437was specified in the compilation of the RE, 438or if 439.Fa nmatch 440is 0, 441.Fn regexec 442ignores the 443.Fa pmatch 444argument (but see below for the case where 445.Dv REG_STARTEND 446is specified). 447Otherwise, 448.Fa pmatch 449points to an array of 450.Fa nmatch 451structures of type 452.Ft regmatch_t . 453Such a structure has at least the members 454.Va rm_so 455and 456.Va rm_eo , 457both of type 458.Ft regoff_t 459(a signed arithmetic type at least as large as an 460.Ft off_t 461and a 462.Ft ssize_t ) , 463containing respectively the offset of the first character of a substring 464and the offset of the first character after the end of the substring. 465Offsets are measured from the beginning of the 466.Fa string 467argument given to 468.Fn regexec . 469An empty substring is denoted by equal offsets, 470both indicating the character following the empty substring. 471.Pp 472The 0th member of the 473.Fa pmatch 474array is filled in to indicate what substring of 475.Fa string 476was matched by the entire RE. 477Remaining members report what substring was matched by parenthesized 478subexpressions within the RE; 479member 480.Va i 481reports subexpression 482.Va i , 483with subexpressions counted (starting at 1) by the order of their opening 484parentheses in the RE, left to right. 485Unused entries in the array (corresponding either to subexpressions that 486did not participate in the match at all, or to subexpressions that do not 487exist in the RE (that is, 488.Va i 489> 490.Fa preg Ns -> Ns Va re_nsub ) ) 491have both 492.Va rm_so 493and 494.Va rm_eo 495set to -1. 496If a subexpression participated in the match several times, 497the reported substring is the last one it matched. 498(Note, as an example in particular, that when the RE 499.Ql "(b*)+" 500matches 501.Ql bbb , 502the parenthesized subexpression matches each of the three 503.So Li b Sc Ns s 504and then 505an infinite number of empty strings following the last 506.Ql b , 507so the reported substring is one of the empties.) 508.Pp 509If 510.Dv REG_STARTEND 511is specified, 512.Fa pmatch 513must point to at least one 514.Ft regmatch_t 515(even if 516.Fa nmatch 517is 0 or 518.Dv REG_NOSUB 519was specified), 520to hold the input offsets for 521.Dv REG_STARTEND . 522Use for output is still entirely controlled by 523.Fa nmatch ; 524if 525.Fa nmatch 526is 0 or 527.Dv REG_NOSUB 528was specified, 529the value of 530.Fa pmatch Ns [0] 531will not be changed by a successful 532.Fn regexec . 533.Pp 534The 535.Fn regerror 536function 537maps a non-zero 538.Fa errcode 539from either 540.Fn regcomp 541or 542.Fn regexec 543to a human-readable, printable message. 544If 545.Fa preg 546is 547.No non\- Ns Dv NULL , 548the error code should have arisen from use of 549the 550.Ft regex_t 551pointed to by 552.Fa preg , 553and if the error code came from 554.Fn regcomp , 555it should have been the result from the most recent 556.Fn regcomp 557using that 558.Ft regex_t . 559The 560.Fn ( regerror 561may be able to supply a more detailed message using information 562from the 563.Ft regex_t . ) 564The 565.Fn regerror 566function 567places the NUL-terminated message into the buffer pointed to by 568.Fa errbuf , 569limiting the length (including the NUL) to at most 570.Fa errbuf_size 571bytes. 572If the whole message will not fit, 573as much of it as will fit before the terminating NUL is supplied. 574In any case, 575the returned value is the size of buffer needed to hold the whole 576message (including terminating NUL). 577If 578.Fa errbuf_size 579is 0, 580.Fa errbuf 581is ignored but the return value is still correct. 582.Pp 583If the 584.Fa errcode 585given to 586.Fn regerror 587is first ORed with 588.Dv REG_ITOA , 589the 590.Dq message 591that results is the printable name of the error code, 592e.g.\& 593.Dq Dv REG_NOMATCH , 594rather than an explanation thereof. 595If 596.Fa errcode 597is 598.Dv REG_ATOI , 599then 600.Fa preg 601shall be 602.No non\- Ns Dv NULL 603and the 604.Va re_endp 605member of the structure it points to 606must point to the printable name of an error code; 607in this case, the result in 608.Fa errbuf 609is the decimal digits of 610the numeric value of the error code 611(0 if the name is not recognized). 612.Dv REG_ITOA 613and 614.Dv REG_ATOI 615are intended primarily as debugging facilities; 616they are extensions, 617compatible with but not specified by 618.St -p1003.2 , 619and should be used with 620caution in software intended to be portable to other systems. 621Be warned also that they are considered experimental and changes are possible. 622.Pp 623The 624.Fn regfree 625function 626frees any dynamically-allocated storage associated with the compiled RE 627pointed to by 628.Fa preg . 629The remaining 630.Ft regex_t 631is no longer a valid compiled RE 632and the effect of supplying it to 633.Fn regexec 634or 635.Fn regerror 636is undefined. 637.Pp 638None of these functions references global variables except for tables 639of constants; 640all are safe for use from multiple threads if the arguments are safe. 641.Sh EXTENDED APIS 642These extended APIs are available in Mac OS X 10.8 and beyond, when the 643deployment target is 10.8 or later. 644It should also be noted that any of the 645.Fn regcomp 646variants may be used to initialize a 647.Ft regex_t 648structure, that can then be passed to any of the 649.Fn regexec 650variants. 651So it is quite legal to compile a wide character RE and use it to match a 652multibyte character string, or vice versa. 653.Pp 654The 655.Fn regncomp 656routine compiles regular expressions like 657.Fn regcomp , 658but the length of the regular expression string is specified, allowing a string 659that is not NUL terminated and/or contains NUL characters. 660This is a modern replacement for using 661.Fn regcomp 662with the 663.Dv REG_PEND 664option. 665.Pp 666Similarly, the 667.Fn regnexec 668routine is like 669.Fn regexec , 670but the length of the string to match is specified, allowing a string 671that is not NUL terminated and/or contains NUL characters. 672.Pp 673The 674.Fn regwcomp 675and 676.Fn regwexec 677variants take a wide-character 678.Vt ( wchar_t ) 679string for the regular expression and string to match. 680And 681.Fn regwncomp 682and 683.Fn regwnexec 684are variants that allow specifying the wide character string length, and 685so allows wide character strings that are not NUL terminated and/or 686contains NUL characters. 687.Sh INTERACTION WITH THE LOCALE 688When 689.Fn regcomp 690or one of its variants is run, the regular expression is compiled into an 691internal form, which may include specific information about the locale currently 692in effect, such as equivalence classes or multi-character collation symbols. 693So a reference to the current locale is also stored with the internal form, 694so that when 695.Fn regexec 696is run, it can use the same locale (even if the locale is changed in-between 697the calls to 698.Fn regcomp 699and 700.Fn regexec ) . 701.Pp 702To provide more direct control over which locale is used, 703routines with 704.Dq Nm _l 705appended to their names are provided that work just like the variants 706without the 707.Dq Nm _l , 708except that a locale (via a 709.Vt locale_t 710variable type) is specified directly. 711Note that only variants of 712.Fn regcomp 713have 714.Dq Nm _l 715variants, since the 716.Fn regexec 717variants just use the reference to the locale stored in the internal form. 718.Sh IMPLEMENTATION CHOICES 719The 720.Nm regex 721implementation in Mac OS X 10.8 and later is based on a heavily modified subset 722of TRE (http://laurikari.net/tre/). 723This provides improved performance, better conformance and additional features. 724However, both API and binary compatibility have been maintained with previous 725releases, so binaries 726built on previous releases should work on 10.8 and later, and binaries built on 72710.8 and later should be able to run on previous releases (as long as none of 728the new variants or new features are used. 729.Pp 730There are a number of decisions that 731.St -p1003.2 732leaves up to the implementor, 733either by explicitly saying 734.Dq undefined 735or by virtue of them being 736forbidden by the RE grammar. 737This implementation treats them as follows. 738.Pp 739See 740.Xr re_format 7 741for a discussion of the definition of case-independent matching. 742.Pp 743There is no particular limit on the length of REs, 744except insofar as memory is limited. 745Memory usage is approximately linear in RE size, and largely insensitive 746to RE complexity, except for bounded repetitions. 747See 748.Sx BUGS 749for one short RE using them 750that will run almost any system out of memory. 751.Pp 752A backslashed character other than one specifically given a magic meaning 753by 754.St -p1003.2 755(such magic meanings occur only in obsolete 756.Bq Dq basic 757REs) 758is taken as an ordinary character. 759.Pp 760Any unmatched 761.Ql [\& 762is a 763.Dv REG_EBRACK 764error. 765.Pp 766Equivalence classes cannot begin or end bracket-expression ranges. 767The endpoint of one range cannot begin another. 768.Pp 769.Dv RE_DUP_MAX , 770the limit on repetition counts in bounded repetitions, is 255. 771.Pp 772A repetition operator 773.Ql ( ?\& , 774.Ql *\& , 775.Ql +\& , 776or bounds) 777cannot follow another 778repetition operator, except for the use of 779.Ql ?\& 780for minimal repetition (for enhanced extended REs; see 781.Xr re_format 7 782for details). 783A repetition operator cannot begin an expression or subexpression 784or follow 785.Ql ^\& 786or 787.Ql |\& . 788.Pp 789.Ql |\& 790cannot appear first or last in a (sub)expression or after another 791.Ql |\& , 792i.e., an operand of 793.Ql |\& 794cannot be an empty subexpression. 795An empty parenthesized subexpression, 796.Ql "()" , 797is legal and matches an 798empty (sub)string. 799An empty string is not a legal RE. 800.Pp 801A 802.Ql {\& 803followed by a digit is considered the beginning of bounds for a 804bounded repetition, which must then follow the syntax for bounds. 805A 806.Ql {\& 807.Em not 808followed by a digit is considered an ordinary character. 809.Pp 810.Ql ^\& 811and 812.Ql $\& 813beginning and ending subexpressions in obsolete 814.Pq Dq basic 815REs are anchors, not ordinary characters. 816.Sh RETURN VALUES 817Non-zero error codes from 818.Fn regcomp 819and 820.Fn regexec 821include the following: 822.Pp 823.Bl -tag -width REG_ECOLLATE -compact 824.It Dv REG_NOMATCH 825The 826.Fn regexec 827function 828failed to match 829.It Dv REG_BADPAT 830invalid regular expression 831.It Dv REG_ECOLLATE 832invalid collating element 833.It Dv REG_ECTYPE 834invalid character class 835.It Dv REG_EESCAPE 836.Ql \e 837applied to unescapable character 838.It Dv REG_ESUBREG 839invalid backreference number 840.It Dv REG_EBRACK 841brackets 842.Ql "[ ]" 843not balanced 844.It Dv REG_EPAREN 845parentheses 846.Ql "( )" 847not balanced 848.It Dv REG_EBRACE 849braces 850.Ql "{ }" 851not balanced 852.It Dv REG_BADBR 853invalid repetition count(s) in 854.Ql "{ }" 855.It Dv REG_ERANGE 856invalid character range in 857.Ql "[ ]" 858.It Dv REG_ESPACE 859ran out of memory 860.It Dv REG_BADRPT 861.Ql ?\& , 862.Ql *\& , 863or 864.Ql +\& 865operand invalid 866.It Dv REG_EMPTY 867empty (sub)expression 868.It Dv REG_ASSERT 869cannot happen - you found a bug 870.It Dv REG_INVARG 871invalid argument, e.g.\& negative-length string 872.It Dv REG_ILLSEQ 873illegal byte sequence (bad multibyte character) 874.El 875.Sh SEE ALSO 876.Xr grep 1 , 877.Xr re_format 7 878.Pp 879.St -p1003.2 , 880sections 2.8 (Regular Expression Notation) 881and 882B.5 (C Binding for Regular Expression Matching). 883.Sh HISTORY 884The 885.Nm regex 886implementation is based on a heavily modified subset of TRE 887(http://laurikari.net/tre/), originally written by Ville Laurikari. 888Previous releases used an implementation originally written by 889.An Henry Spencer , 890and altered for inclusion in the 891.Bx 4.4 892distribution. 893.Sh BUGS 894The beginning-of-line and end-of-line anchors ( 895.Dq ^\& 896and 897.Dq $\& ) 898are currently implemented so that repetitions can not be applied to them. 899The standards are unclear about whether this is legal, but other 900.Nm regex 901packages do support this case. 902It is best to avoid this non-portable (and not really very useful) case. 903.Pp 904The back-reference code is subtle and doubts linger about its correctness 905in complex cases. 906.Pp 907The 908.Fn regexec 909variants use one of two internal matching engines. 910The normal one is linear worst-case time in the length of the text being 911searched, and quadratic worst-case time in the length of the used regular 912expression. 913When back-references are used, a slower, backtracking engine is used. 914While all backtracking matching engines suffer from extreme slowness for certain 915pathological cases, the normal engines doesn't suffer from these cases. 916It is advised to avoid back-references whenever possible. 917.Pp 918The 919.Fn regcomp 920variants 921implements bounded repetitions by macro expansion, 922which is costly in time and space if counts are large 923or bounded repetitions are nested. 924An RE like, say, 925.Ql "((((a{1,100}){1,100}){1,100}){1,100}){1,100}" 926will (eventually) run almost any existing machine out of swap space. 927.Pp 928Due to a mistake in 929.St -p1003.2 , 930things like 931.Ql "a)b" 932are legal REs because 933.Ql )\& 934is 935a special character only in the presence of a previous unmatched 936.Ql (\& . 937This cannot be fixed until the spec is fixed. 938.Pp 939The standard's definition of back references is vague. 940For example, does 941.Ql "a\e(\e(b\e)*\e2\e)*d" 942match 943.Ql "abbbd" ? 944Until the standard is clarified, 945behavior in such cases should not be relied on. 946