1*760c2415Smrg /++
2*760c2415Smrg   $(LINK2 https://en.wikipedia.org/wiki/Regular_expression, Regular expressions)
3*760c2415Smrg   are a commonly used method of pattern matching
4*760c2415Smrg   on strings, with $(I regex) being a catchy word for a pattern in this domain
5*760c2415Smrg   specific language. Typical problems usually solved by regular expressions
6*760c2415Smrg   include validation of user input and the ubiquitous find $(AMP) replace
7*760c2415Smrg   in text processing utilities.
8*760c2415Smrg 
9*760c2415Smrg $(SCRIPT inhibitQuickIndex = 1;)
10*760c2415Smrg $(BOOKTABLE,
11*760c2415Smrg $(TR $(TH Category) $(TH Functions))
12*760c2415Smrg $(TR $(TD Matching) $(TD
13*760c2415Smrg         $(LREF bmatch)
14*760c2415Smrg         $(LREF match)
15*760c2415Smrg         $(LREF matchAll)
16*760c2415Smrg         $(LREF matchFirst)
17*760c2415Smrg ))
18*760c2415Smrg $(TR $(TD Building) $(TD
19*760c2415Smrg         $(LREF ctRegex)
20*760c2415Smrg         $(LREF escaper)
21*760c2415Smrg         $(LREF _regex)
22*760c2415Smrg ))
23*760c2415Smrg $(TR $(TD Replace) $(TD
24*760c2415Smrg         $(LREF replace)
25*760c2415Smrg         $(LREF replaceAll)
26*760c2415Smrg         $(LREF replaceAllInto)
27*760c2415Smrg         $(LREF replaceFirst)
28*760c2415Smrg         $(LREF replaceFirstInto)
29*760c2415Smrg ))
30*760c2415Smrg $(TR $(TD Split) $(TD
31*760c2415Smrg         $(LREF split)
32*760c2415Smrg         $(LREF splitter)
33*760c2415Smrg ))
34*760c2415Smrg $(TR $(TD Objects) $(TD
35*760c2415Smrg         $(LREF Captures)
36*760c2415Smrg         $(LREF Regex)
37*760c2415Smrg         $(LREF RegexException)
38*760c2415Smrg         $(LREF RegexMatch)
39*760c2415Smrg         $(LREF Splitter)
40*760c2415Smrg         $(LREF StaticRegex)
41*760c2415Smrg ))
42*760c2415Smrg )
43*760c2415Smrg 
44*760c2415Smrg   $(SECTION Synopsis)
45*760c2415Smrg   ---
46*760c2415Smrg   import std.regex;
47*760c2415Smrg   import std.stdio;
48*760c2415Smrg   void main()
49*760c2415Smrg   {
50*760c2415Smrg       // Print out all possible dd/mm/yy(yy) dates found in user input.
51*760c2415Smrg       auto r = regex(r"\b[0-9][0-9]?/[0-9][0-9]?/[0-9][0-9](?:[0-9][0-9])?\b");
52*760c2415Smrg       foreach (line; stdin.byLine)
53*760c2415Smrg       {
54*760c2415Smrg         // matchAll() returns a range that can be iterated
55*760c2415Smrg         // to get all subsequent matches.
56*760c2415Smrg         foreach (c; matchAll(line, r))
57*760c2415Smrg             writeln(c.hit);
58*760c2415Smrg       }
59*760c2415Smrg   }
60*760c2415Smrg   ...
61*760c2415Smrg 
62*760c2415Smrg   // Create a static regex at compile-time, which contains fast native code.
63*760c2415Smrg   auto ctr = ctRegex!(`^.*/([^/]+)/?$`);
64*760c2415Smrg 
65*760c2415Smrg   // It works just like a normal regex:
66*760c2415Smrg   auto c2 = matchFirst("foo/bar", ctr);   // First match found here, if any
67*760c2415Smrg   assert(!c2.empty);   // Be sure to check if there is a match before examining contents!
68*760c2415Smrg   assert(c2[1] == "bar");   // Captures is a range of submatches: 0 = full match.
69*760c2415Smrg 
70*760c2415Smrg   ...
71*760c2415Smrg   // multi-pattern regex
72*760c2415Smrg   auto multi = regex([`\d+,\d+`,`(a-z]+):(\d+)`]);
73*760c2415Smrg   auto m = "abc:43 12,34".matchAll(multi);
74*760c2415Smrg   assert(m.front.whichPattern == 2);
75*760c2415Smrg   assert(m.front[1] == "abc");
76*760c2415Smrg   assert(m.front[2] == "43");
77*760c2415Smrg   m.popFront();
78*760c2415Smrg   assert(m.front.whichPattern == 1);
79*760c2415Smrg   assert(m.front[1] == "12");
80*760c2415Smrg   ...
81*760c2415Smrg 
82*760c2415Smrg   // The result of the `matchAll/matchFirst` is directly testable with if/assert/while.
83*760c2415Smrg   // e.g. test if a string consists of letters:
84*760c2415Smrg   assert(matchFirst("Letter", `^\p{L}+$`));
85*760c2415Smrg   ---
86*760c2415Smrg 
87*760c2415Smrg   $(SECTION Syntax and general information)
88*760c2415Smrg   The general usage guideline is to keep regex complexity on the side of simplicity,
89*760c2415Smrg   as its capabilities reside in purely character-level manipulation.
90*760c2415Smrg   As such it's ill-suited for tasks involving higher level invariants
91*760c2415Smrg   like matching an integer number $(U bounded) in an [a,b] interval.
92*760c2415Smrg   Checks of this sort of are better addressed by additional post-processing.
93*760c2415Smrg 
94*760c2415Smrg   The basic syntax shouldn't surprise experienced users of regular expressions.
95*760c2415Smrg   For an introduction to $(D std.regex) see a
96*760c2415Smrg   $(HTTP dlang.org/regular-expression.html, short tour) of the module API
97*760c2415Smrg   and its abilities.
98*760c2415Smrg 
99*760c2415Smrg   There are other web resources on regular expressions to help newcomers,
100*760c2415Smrg   and a good $(HTTP www.regular-expressions.info, reference with tutorial)
101*760c2415Smrg   can easily be found.
102*760c2415Smrg 
103*760c2415Smrg   This library uses a remarkably common ECMAScript syntax flavor
104*760c2415Smrg   with the following extensions:
105*760c2415Smrg   $(UL
106*760c2415Smrg     $(LI Named subexpressions, with Python syntax. )
107*760c2415Smrg     $(LI Unicode properties such as Scripts, Blocks and common binary properties e.g Alphabetic, White_Space, Hex_Digit etc.)
108*760c2415Smrg     $(LI Arbitrary length and complexity lookbehind, including lookahead in lookbehind and vise-versa.)
109*760c2415Smrg   )
110*760c2415Smrg 
111*760c2415Smrg   $(REG_START Pattern syntax )
112*760c2415Smrg   $(I std.regex operates on codepoint level,
113*760c2415Smrg     'character' in this table denotes a single Unicode codepoint.)
114*760c2415Smrg   $(REG_TABLE
115*760c2415Smrg     $(REG_TITLE Pattern element, Semantics )
116*760c2415Smrg     $(REG_TITLE Atoms, Match single characters )
117*760c2415Smrg     $(REG_ROW any character except [{|*+?()^$, Matches the character itself. )
118*760c2415Smrg     $(REG_ROW ., In single line mode matches any character.
119*760c2415Smrg       Otherwise it matches any character except '\n' and '\r'. )
120*760c2415Smrg     $(REG_ROW [class], Matches a single character
121*760c2415Smrg       that belongs to this character class. )
122*760c2415Smrg     $(REG_ROW [^class], Matches a single character that
123*760c2415Smrg       does $(U not) belong to this character class.)
124*760c2415Smrg     $(REG_ROW \cC, Matches the control character corresponding to letter C)
125*760c2415Smrg     $(REG_ROW \xXX, Matches a character with hexadecimal value of XX. )
126*760c2415Smrg     $(REG_ROW \uXXXX, Matches a character  with hexadecimal value of XXXX. )
127*760c2415Smrg     $(REG_ROW \U00YYYYYY, Matches a character with hexadecimal value of YYYYYY. )
128*760c2415Smrg     $(REG_ROW \f, Matches a formfeed character. )
129*760c2415Smrg     $(REG_ROW \n, Matches a linefeed character. )
130*760c2415Smrg     $(REG_ROW \r, Matches a carriage return character. )
131*760c2415Smrg     $(REG_ROW \t, Matches a tab character. )
132*760c2415Smrg     $(REG_ROW \v, Matches a vertical tab character. )
133*760c2415Smrg     $(REG_ROW \d, Matches any Unicode digit. )
134*760c2415Smrg     $(REG_ROW \D, Matches any character except Unicode digits. )
135*760c2415Smrg     $(REG_ROW \w, Matches any word character (note: this includes numbers).)
136*760c2415Smrg     $(REG_ROW \W, Matches any non-word character.)
137*760c2415Smrg     $(REG_ROW \s, Matches whitespace, same as \p{White_Space}.)
138*760c2415Smrg     $(REG_ROW \S, Matches any character except those recognized as $(I \s ). )
139*760c2415Smrg     $(REG_ROW \\, Matches \ character. )
140*760c2415Smrg     $(REG_ROW \c where c is one of [|*+?(), Matches the character c itself. )
141*760c2415Smrg     $(REG_ROW \p{PropertyName}, Matches a character that belongs
142*760c2415Smrg         to the Unicode PropertyName set.
143*760c2415Smrg       Single letter abbreviations can be used without surrounding {,}. )
144*760c2415Smrg     $(REG_ROW  \P{PropertyName}, Matches a character that does not belong
145*760c2415Smrg         to the Unicode PropertyName set.
146*760c2415Smrg       Single letter abbreviations can be used without surrounding {,}. )
147*760c2415Smrg     $(REG_ROW \p{InBasicLatin}, Matches any character that is part of
148*760c2415Smrg           the BasicLatin Unicode $(U block).)
149*760c2415Smrg     $(REG_ROW \P{InBasicLatin}, Matches any character except ones in
150*760c2415Smrg           the BasicLatin Unicode $(U block).)
151*760c2415Smrg     $(REG_ROW \p{Cyrillic}, Matches any character that is part of
152*760c2415Smrg         Cyrillic $(U script).)
153*760c2415Smrg     $(REG_ROW \P{Cyrillic}, Matches any character except ones in
154*760c2415Smrg         Cyrillic $(U script).)
155*760c2415Smrg     $(REG_TITLE Quantifiers, Specify repetition of other elements)
156*760c2415Smrg     $(REG_ROW *, Matches previous character/subexpression 0 or more times.
157*760c2415Smrg       Greedy version - tries as many times as possible.)
158*760c2415Smrg     $(REG_ROW *?, Matches previous character/subexpression 0 or more times.
159*760c2415Smrg       Lazy version  - stops as early as possible.)
160*760c2415Smrg     $(REG_ROW +, Matches previous character/subexpression 1 or more times.
161*760c2415Smrg       Greedy version - tries as many times as possible.)
162*760c2415Smrg     $(REG_ROW +?, Matches previous character/subexpression 1 or more times.
163*760c2415Smrg       Lazy version  - stops as early as possible.)
164*760c2415Smrg     $(REG_ROW {n}, Matches previous character/subexpression exactly n times. )
165*760c2415Smrg     $(REG_ROW {n$(COMMA)}, Matches previous character/subexpression n times or more.
166*760c2415Smrg       Greedy version - tries as many times as possible. )
167*760c2415Smrg     $(REG_ROW {n$(COMMA)}?, Matches previous character/subexpression n times or more.
168*760c2415Smrg       Lazy version - stops as early as possible.)
169*760c2415Smrg     $(REG_ROW {n$(COMMA)m}, Matches previous character/subexpression n to m times.
170*760c2415Smrg       Greedy version - tries as many times as possible, but no more than m times. )
171*760c2415Smrg     $(REG_ROW {n$(COMMA)m}?, Matches previous character/subexpression n to m times.
172*760c2415Smrg       Lazy version - stops as early as possible, but no less then n times.)
173*760c2415Smrg     $(REG_TITLE Other, Subexpressions $(AMP) alternations )
174*760c2415Smrg     $(REG_ROW (regex),  Matches subexpression regex,
175*760c2415Smrg       saving matched portion of text for later retrieval. )
176*760c2415Smrg     $(REG_ROW (?#comment), An inline comment that is ignored while matching.)
177*760c2415Smrg     $(REG_ROW (?:regex), Matches subexpression regex,
178*760c2415Smrg       $(U not) saving matched portion of text. Useful to speed up matching. )
179*760c2415Smrg     $(REG_ROW A|B, Matches subexpression A, or failing that, matches B. )
180*760c2415Smrg     $(REG_ROW (?P$(LT)name$(GT)regex), Matches named subexpression
181*760c2415Smrg         regex labeling it with name 'name'.
182*760c2415Smrg         When referring to a matched portion of text,
183*760c2415Smrg         names work like aliases in addition to direct numbers.
184*760c2415Smrg      )
185*760c2415Smrg     $(REG_TITLE Assertions, Match position rather than character )
186*760c2415Smrg     $(REG_ROW ^, Matches at the begining of input or line (in multiline mode).)
187*760c2415Smrg     $(REG_ROW $, Matches at the end of input or line (in multiline mode). )
188*760c2415Smrg     $(REG_ROW \b, Matches at word boundary. )
189*760c2415Smrg     $(REG_ROW \B, Matches when $(U not) at word boundary. )
190*760c2415Smrg     $(REG_ROW (?=regex), Zero-width lookahead assertion.
191*760c2415Smrg         Matches at a point where the subexpression
192*760c2415Smrg         regex could be matched starting from the current position.
193*760c2415Smrg       )
194*760c2415Smrg     $(REG_ROW (?!regex), Zero-width negative lookahead assertion.
195*760c2415Smrg         Matches at a point where the subexpression
196*760c2415Smrg         regex could $(U not) be matched starting from the current position.
197*760c2415Smrg       )
198*760c2415Smrg     $(REG_ROW (?<=regex), Zero-width lookbehind assertion. Matches at a point
199*760c2415Smrg         where the subexpression regex could be matched ending
200*760c2415Smrg         at the current position (matching goes backwards).
201*760c2415Smrg       )
202*760c2415Smrg     $(REG_ROW  (?<!regex), Zero-width negative lookbehind assertion.
203*760c2415Smrg       Matches at a point where the subexpression regex could $(U not)
204*760c2415Smrg       be matched ending at the current position (matching goes backwards).
205*760c2415Smrg      )
206*760c2415Smrg   )
207*760c2415Smrg 
208*760c2415Smrg   $(REG_START Character classes )
209*760c2415Smrg   $(REG_TABLE
210*760c2415Smrg     $(REG_TITLE Pattern element, Semantics )
211*760c2415Smrg     $(REG_ROW Any atom, Has the same meaning as outside of a character class.)
212*760c2415Smrg     $(REG_ROW a-z, Includes characters a, b, c, ..., z. )
213*760c2415Smrg     $(REG_ROW [a||b]$(COMMA) [a--b]$(COMMA) [a~~b]$(COMMA) [a$(AMP)$(AMP)b],
214*760c2415Smrg      Where a, b are arbitrary classes, means union, set difference,
215*760c2415Smrg      symmetric set difference, and intersection respectively.
216*760c2415Smrg      $(I Any sequence of character class elements implicitly forms a union.) )
217*760c2415Smrg   )
218*760c2415Smrg 
219*760c2415Smrg   $(REG_START Regex flags )
220*760c2415Smrg   $(REG_TABLE
221*760c2415Smrg     $(REG_TITLE Flag, Semantics )
222*760c2415Smrg     $(REG_ROW g, Global regex, repeat over the whole input. )
223*760c2415Smrg     $(REG_ROW i, Case insensitive matching. )
224*760c2415Smrg     $(REG_ROW m, Multi-line mode, match ^, $ on start and end line separators
225*760c2415Smrg        as well as start and end of input.)
226*760c2415Smrg     $(REG_ROW s, Single-line mode, makes . match '\n' and '\r' as well. )
227*760c2415Smrg     $(REG_ROW x, Free-form syntax, ignores whitespace in pattern,
228*760c2415Smrg       useful for formatting complex regular expressions. )
229*760c2415Smrg   )
230*760c2415Smrg 
231*760c2415Smrg   $(SECTION Unicode support)
232*760c2415Smrg 
233*760c2415Smrg   This library provides full Level 1 support* according to
234*760c2415Smrg     $(HTTP unicode.org/reports/tr18/, UTS 18). Specifically:
235*760c2415Smrg   $(UL
236*760c2415Smrg     $(LI 1.1 Hex notation via any of \uxxxx, \U00YYYYYY, \xZZ.)
237*760c2415Smrg     $(LI 1.2 Unicode properties.)
238*760c2415Smrg     $(LI 1.3 Character classes with set operations.)
239*760c2415Smrg     $(LI 1.4 Word boundaries use the full set of "word" characters.)
240*760c2415Smrg     $(LI 1.5 Using simple casefolding to match case
241*760c2415Smrg         insensitively across the full range of codepoints.)
242*760c2415Smrg     $(LI 1.6 Respecting line breaks as any of
243*760c2415Smrg         \u000A | \u000B | \u000C | \u000D | \u0085 | \u2028 | \u2029 | \u000D\u000A.)
244*760c2415Smrg     $(LI 1.7 Operating on codepoint level.)
245*760c2415Smrg   )
246*760c2415Smrg   *With exception of point 1.1.1, as of yet, normalization of input
247*760c2415Smrg     is expected to be enforced by user.
248*760c2415Smrg 
249*760c2415Smrg     $(SECTION Replace format string)
250*760c2415Smrg 
251*760c2415Smrg     A set of functions in this module that do the substitution rely
252*760c2415Smrg     on a simple format to guide the process. In particular the table below
253*760c2415Smrg     applies to the $(D format) argument of
254*760c2415Smrg     $(LREF replaceFirst) and $(LREF replaceAll).
255*760c2415Smrg 
256*760c2415Smrg     The format string can reference parts of match using the following notation.
257*760c2415Smrg     $(REG_TABLE
258*760c2415Smrg         $(REG_TITLE Format specifier, Replaced by )
259*760c2415Smrg         $(REG_ROW $$(AMP), the whole match. )
260*760c2415Smrg         $(REG_ROW $(DOLLAR)$(BACKTICK), part of input $(I preceding) the match. )
261*760c2415Smrg         $(REG_ROW $', part of input $(I following) the match. )
262*760c2415Smrg         $(REG_ROW $$, '$' character. )
263*760c2415Smrg         $(REG_ROW \c $(COMMA) where c is any character, the character c itself. )
264*760c2415Smrg         $(REG_ROW \\, '\' character. )
265*760c2415Smrg         $(REG_ROW $(DOLLAR)1 .. $(DOLLAR)99, submatch number 1 to 99 respectively. )
266*760c2415Smrg     )
267*760c2415Smrg 
268*760c2415Smrg   $(SECTION Slicing and zero memory allocations orientation)
269*760c2415Smrg 
270*760c2415Smrg   All matches returned by pattern matching functionality in this library
271*760c2415Smrg     are slices of the original input. The notable exception is the $(D replace)
272*760c2415Smrg     family of functions  that generate a new string from the input.
273*760c2415Smrg 
274*760c2415Smrg     In cases where producing the replacement is the ultimate goal
275*760c2415Smrg     $(LREF replaceFirstInto) and $(LREF replaceAllInto) could come in handy
276*760c2415Smrg     as functions that  avoid allocations even for replacement.
277*760c2415Smrg 
278*760c2415Smrg     Copyright: Copyright Dmitry Olshansky, 2011-
279*760c2415Smrg 
280*760c2415Smrg   License: $(HTTP boost.org/LICENSE_1_0.txt, Boost License 1.0).
281*760c2415Smrg 
282*760c2415Smrg   Authors: Dmitry Olshansky,
283*760c2415Smrg 
284*760c2415Smrg     API and utility constructs are modeled after the original $(D std.regex)
285*760c2415Smrg   by Walter Bright and Andrei Alexandrescu.
286*760c2415Smrg 
287*760c2415Smrg   Source: $(PHOBOSSRC std/_regex/_package.d)
288*760c2415Smrg 
289*760c2415Smrg Macros:
290*760c2415Smrg     REG_ROW = $(TR $(TD $(I $1 )) $(TD $+) )
291*760c2415Smrg     REG_TITLE = $(TR $(TD $(B $1)) $(TD $(B $2)) )
292*760c2415Smrg     REG_TABLE = <table border="1" cellspacing="0" cellpadding="5" > $0 </table>
293*760c2415Smrg     REG_START = <h3><div align="center"> $0 </div></h3>
294*760c2415Smrg     SECTION = <h3><a id="$1" href="#$1" class="anchor">$0</a></h3>
295*760c2415Smrg     S_LINK = <a href="#$1">$+</a>
296*760c2415Smrg  +/
297*760c2415Smrg module std.regex;
298*760c2415Smrg 
299*760c2415Smrg import std.range.primitives, std.traits;
300*760c2415Smrg import std.regex.internal.ir;
301*760c2415Smrg import std.regex.internal.thompson; //TODO: get rid of this dependency
302*760c2415Smrg import std.typecons; // : Flag, Yes, No;
303*760c2415Smrg 
304*760c2415Smrg /++
305*760c2415Smrg     $(D Regex) object holds regular expression pattern in compiled form.
306*760c2415Smrg 
307*760c2415Smrg     Instances of this object are constructed via calls to $(D regex).
308*760c2415Smrg     This is an intended form for caching and storage of frequently
309*760c2415Smrg     used regular expressions.
310*760c2415Smrg 
311*760c2415Smrg     Example:
312*760c2415Smrg 
313*760c2415Smrg     Test if this object doesn't contain any compiled pattern.
314*760c2415Smrg     ---
315*760c2415Smrg     Regex!char r;
316*760c2415Smrg     assert(r.empty);
317*760c2415Smrg     r = regex(""); // Note: "" is a valid regex pattern.
318*760c2415Smrg     assert(!r.empty);
319*760c2415Smrg     ---
320*760c2415Smrg 
321*760c2415Smrg     Getting a range of all the named captures in the regex.
322*760c2415Smrg     ----
323*760c2415Smrg     import std.range;
324*760c2415Smrg     import std.algorithm;
325*760c2415Smrg 
326*760c2415Smrg     auto re = regex(`(?P<name>\w+) = (?P<var>\d+)`);
327*760c2415Smrg     auto nc = re.namedCaptures;
328*760c2415Smrg     static assert(isRandomAccessRange!(typeof(nc)));
329*760c2415Smrg     assert(!nc.empty);
330*760c2415Smrg     assert(nc.length == 2);
331*760c2415Smrg     assert(nc.equal(["name", "var"]));
332*760c2415Smrg     assert(nc[0] == "name");
333*760c2415Smrg     assert(nc[1..$].equal(["var"]));
334*760c2415Smrg     ----
335*760c2415Smrg +/
336*760c2415Smrg public alias Regex(Char) = std.regex.internal.ir.Regex!(Char);
337*760c2415Smrg 
338*760c2415Smrg /++
339*760c2415Smrg     A $(D StaticRegex) is $(D Regex) object that contains D code specially
340*760c2415Smrg     generated at compile-time to speed up matching.
341*760c2415Smrg 
342*760c2415Smrg     Implicitly convertible to normal $(D Regex),
343*760c2415Smrg     however doing so will result in losing this additional capability.
344*760c2415Smrg +/
345*760c2415Smrg public alias StaticRegex(Char) = std.regex.internal.ir.StaticRegex!(Char);
346*760c2415Smrg 
347*760c2415Smrg /++
348*760c2415Smrg     Compile regular expression pattern for the later execution.
349*760c2415Smrg     Returns: $(D Regex) object that works on inputs having
350*760c2415Smrg     the same character width as $(D pattern).
351*760c2415Smrg 
352*760c2415Smrg     Params:
353*760c2415Smrg     pattern = A single regular expression to match.
354*760c2415Smrg     patterns = An array of regular expression strings.
355*760c2415Smrg         The resulting `Regex` object will match any expression;
356*760c2415Smrg         use $(LREF whichPattern) to know which.
357*760c2415Smrg     flags = The _attributes (g, i, m and x accepted)
358*760c2415Smrg 
359*760c2415Smrg     Throws: $(D RegexException) if there were any errors during compilation.
360*760c2415Smrg +/
361*760c2415Smrg @trusted public auto regex(S)(S[] patterns, const(char)[] flags="")
362*760c2415Smrg if (isSomeString!(S))
363*760c2415Smrg {
364*760c2415Smrg     import std.array : appender;
365*760c2415Smrg     import std.functional : memoize;
366*760c2415Smrg     enum cacheSize = 8; //TODO: invent nice interface to control regex caching
367*760c2415Smrg     S pat;
368*760c2415Smrg     if (patterns.length > 1)
369*760c2415Smrg     {
370*760c2415Smrg         auto app = appender!S();
foreach(i,p;patterns)371*760c2415Smrg         foreach (i, p; patterns)
372*760c2415Smrg         {
373*760c2415Smrg             if (i != 0)
374*760c2415Smrg                 app.put("|");
375*760c2415Smrg             app.put("(?:");
376*760c2415Smrg             app.put(patterns[i]);
377*760c2415Smrg             // terminator for the pattern
378*760c2415Smrg             // to detect if the pattern unexpectedly ends
379*760c2415Smrg             app.put("\\");
380*760c2415Smrg             app.put(cast(dchar)(privateUseStart+i));
381*760c2415Smrg             app.put(")");
382*760c2415Smrg             // another one to return correct whichPattern
383*760c2415Smrg             // for all of potential alternatives in the patterns[i]
384*760c2415Smrg             app.put("\\");
385*760c2415Smrg             app.put(cast(dchar)(privateUseStart+i));
386*760c2415Smrg         }
387*760c2415Smrg         pat = app.data;
388*760c2415Smrg     }
389*760c2415Smrg     else
390*760c2415Smrg         pat = patterns[0];
391*760c2415Smrg 
392*760c2415Smrg     if (__ctfe)
393*760c2415Smrg         return regexImpl(pat, flags);
394*760c2415Smrg     return memoize!(regexImpl!S, cacheSize)(pat, flags);
395*760c2415Smrg }
396*760c2415Smrg 
397*760c2415Smrg ///ditto
398*760c2415Smrg @trusted public auto regex(S)(S pattern, const(char)[] flags="")
399*760c2415Smrg if (isSomeString!(S))
400*760c2415Smrg {
401*760c2415Smrg     return regex([pattern], flags);
402*760c2415Smrg }
403*760c2415Smrg 
404*760c2415Smrg ///
405*760c2415Smrg @system unittest
406*760c2415Smrg {
407*760c2415Smrg     // multi-pattern regex example
408*760c2415Smrg     auto multi = regex([`([a-z]+):(\d+)`, `(\d+),\d+`]); // multi regex
409*760c2415Smrg     auto m = "abc:43 12,34".matchAll(multi);
410*760c2415Smrg     assert(m.front.whichPattern == 1);
411*760c2415Smrg     assert(m.front[1] == "abc");
412*760c2415Smrg     assert(m.front[2] == "43");
413*760c2415Smrg     m.popFront();
414*760c2415Smrg     assert(m.front.whichPattern == 2);
415*760c2415Smrg     assert(m.front[1] == "12");
416*760c2415Smrg }
417*760c2415Smrg 
418*760c2415Smrg public auto regexImpl(S)(S pattern, const(char)[] flags="")
419*760c2415Smrg if (isSomeString!(S))
420*760c2415Smrg {
421*760c2415Smrg     import std.regex.internal.parser : Parser, CodeGen;
422*760c2415Smrg     auto parser = Parser!(Unqual!(typeof(pattern)), CodeGen)(pattern, flags);
423*760c2415Smrg     auto r = parser.program;
424*760c2415Smrg     return r;
425*760c2415Smrg }
426*760c2415Smrg 
427*760c2415Smrg 
428*760c2415Smrg template ctRegexImpl(alias pattern, string flags=[])
429*760c2415Smrg {
430*760c2415Smrg     import std.regex.internal.backtracking, std.regex.internal.parser;
431*760c2415Smrg     enum r = regex(pattern, flags);
432*760c2415Smrg     alias Char = BasicElementOf!(typeof(pattern));
433*760c2415Smrg     enum source = ctGenRegExCode(r);
434*760c2415Smrg     alias Matcher = BacktrackingMatcher!(true);
435*760c2415Smrg     @trusted bool func(ref Matcher!Char matcher)
436*760c2415Smrg     {
437*760c2415Smrg         debug(std_regex_ctr) pragma(msg, source);
438*760c2415Smrg         mixin(source);
439*760c2415Smrg     }
440*760c2415Smrg     enum nr = StaticRegex!Char(r, &func);
441*760c2415Smrg }
442*760c2415Smrg 
443*760c2415Smrg /++
444*760c2415Smrg     Compile regular expression using CTFE
445*760c2415Smrg     and generate optimized native machine code for matching it.
446*760c2415Smrg 
447*760c2415Smrg     Returns: StaticRegex object for faster matching.
448*760c2415Smrg 
449*760c2415Smrg     Params:
450*760c2415Smrg     pattern = Regular expression
451*760c2415Smrg     flags = The _attributes (g, i, m and x accepted)
452*760c2415Smrg +/
453*760c2415Smrg public enum ctRegex(alias pattern, alias flags=[]) = ctRegexImpl!(pattern, flags).nr;
454*760c2415Smrg 
455*760c2415Smrg enum isRegexFor(RegEx, R) = is(RegEx == Regex!(BasicElementOf!R))
456*760c2415Smrg      || is(RegEx == StaticRegex!(BasicElementOf!R));
457*760c2415Smrg 
458*760c2415Smrg 
459*760c2415Smrg /++
460*760c2415Smrg     $(D Captures) object contains submatches captured during a call
461*760c2415Smrg     to $(D match) or iteration over $(D RegexMatch) range.
462*760c2415Smrg 
463*760c2415Smrg     First element of range is the whole match.
464*760c2415Smrg +/
465*760c2415Smrg @trusted public struct Captures(R, DIndex = size_t)
466*760c2415Smrg if (isSomeString!R)
467*760c2415Smrg {//@trusted because of union inside
468*760c2415Smrg     alias DataIndex = DIndex;
469*760c2415Smrg     alias String = R;
470*760c2415Smrg private:
471*760c2415Smrg     import std.conv : text;
472*760c2415Smrg     R _input;
473*760c2415Smrg     int _nMatch;
474*760c2415Smrg     enum smallString = 3;
475*760c2415Smrg     enum SMALL_MASK = 0x8000_0000, REF_MASK= 0x1FFF_FFFF;
476*760c2415Smrg     union
477*760c2415Smrg     {
478*760c2415Smrg         Group!DataIndex[] big_matches;
479*760c2415Smrg         Group!DataIndex[smallString] small_matches;
480*760c2415Smrg     }
481*760c2415Smrg     uint _f, _b;
482*760c2415Smrg     uint _refcount; // ref count or SMALL MASK + num groups
483*760c2415Smrg     NamedGroup[] _names;
484*760c2415Smrg 
485*760c2415Smrg     this()(R input, uint n, NamedGroup[] named)
486*760c2415Smrg     {
487*760c2415Smrg         _input = input;
488*760c2415Smrg         _names = named;
489*760c2415Smrg         newMatches(n);
490*760c2415Smrg         _b = n;
491*760c2415Smrg         _f = 0;
492*760c2415Smrg     }
493*760c2415Smrg 
494*760c2415Smrg     this(alias Engine)(ref RegexMatch!(R,Engine) rmatch)
495*760c2415Smrg     {
496*760c2415Smrg         _input = rmatch._input;
497*760c2415Smrg         _names = rmatch._engine.re.dict;
498*760c2415Smrg         immutable n = rmatch._engine.re.ngroup;
499*760c2415Smrg         newMatches(n);
500*760c2415Smrg         _b = n;
501*760c2415Smrg         _f = 0;
502*760c2415Smrg     }
503*760c2415Smrg 
504*760c2415Smrg     @property inout(Group!DataIndex[]) matches() inout
505*760c2415Smrg     {
506*760c2415Smrg        return (_refcount & SMALL_MASK)  ? small_matches[0 .. _refcount & 0xFF] : big_matches;
507*760c2415Smrg     }
508*760c2415Smrg 
509*760c2415Smrg     void newMatches(uint n)
510*760c2415Smrg     {
511*760c2415Smrg         import core.stdc.stdlib : calloc;
512*760c2415Smrg         import std.exception : enforce;
513*760c2415Smrg         if (n > smallString)
514*760c2415Smrg         {
515*760c2415Smrg             auto p = cast(Group!DataIndex*) enforce(
516*760c2415Smrg                 calloc(Group!DataIndex.sizeof,n),
517*760c2415Smrg                 "Failed to allocate Captures struct"
518*760c2415Smrg             );
519*760c2415Smrg             big_matches = p[0 .. n];
520*760c2415Smrg             _refcount = 1;
521*760c2415Smrg         }
522*760c2415Smrg         else
523*760c2415Smrg         {
524*760c2415Smrg             _refcount = SMALL_MASK | n;
525*760c2415Smrg         }
526*760c2415Smrg     }
527*760c2415Smrg 
528*760c2415Smrg     bool unique()
529*760c2415Smrg     {
530*760c2415Smrg         return (_refcount & SMALL_MASK) || _refcount == 1;
531*760c2415Smrg     }
532*760c2415Smrg 
533*760c2415Smrg public:
534*760c2415Smrg     this(this)
535*760c2415Smrg     {
536*760c2415Smrg         if (!(_refcount & SMALL_MASK))
537*760c2415Smrg         {
538*760c2415Smrg             _refcount++;
539*760c2415Smrg         }
540*760c2415Smrg     }
541*760c2415Smrg     ~this()
542*760c2415Smrg     {
543*760c2415Smrg         import core.stdc.stdlib : free;
544*760c2415Smrg         if (!(_refcount & SMALL_MASK))
545*760c2415Smrg         {
546*760c2415Smrg             if (--_refcount == 0)
547*760c2415Smrg             {
548*760c2415Smrg                 free(big_matches.ptr);
549*760c2415Smrg                 big_matches = null;
550*760c2415Smrg             }
551*760c2415Smrg         }
552*760c2415Smrg     }
553*760c2415Smrg     ///Slice of input prior to the match.
554*760c2415Smrg     @property R pre()
555*760c2415Smrg     {
556*760c2415Smrg         return _nMatch == 0 ? _input[] : _input[0 .. matches[0].begin];
557*760c2415Smrg     }
558*760c2415Smrg 
559*760c2415Smrg     ///Slice of input immediately after the match.
560*760c2415Smrg     @property R post()
561*760c2415Smrg     {
562*760c2415Smrg         return _nMatch == 0 ? _input[] : _input[matches[0].end .. $];
563*760c2415Smrg     }
564*760c2415Smrg 
565*760c2415Smrg     ///Slice of matched portion of input.
566*760c2415Smrg     @property R hit()
567*760c2415Smrg     {
568*760c2415Smrg         assert(_nMatch, "attempted to get hit of an empty match");
569*760c2415Smrg         return _input[matches[0].begin .. matches[0].end];
570*760c2415Smrg     }
571*760c2415Smrg 
572*760c2415Smrg     ///Range interface.
573*760c2415Smrg     @property R front()
574*760c2415Smrg     {
575*760c2415Smrg         assert(_nMatch, "attempted to get front of an empty match");
576*760c2415Smrg         return _input[matches[_f].begin .. matches[_f].end];
577*760c2415Smrg     }
578*760c2415Smrg 
579*760c2415Smrg     ///ditto
580*760c2415Smrg     @property R back()
581*760c2415Smrg     {
582*760c2415Smrg         assert(_nMatch, "attempted to get back of an empty match");
583*760c2415Smrg         return _input[matches[_b - 1].begin .. matches[_b - 1].end];
584*760c2415Smrg     }
585*760c2415Smrg 
586*760c2415Smrg     ///ditto
587*760c2415Smrg     void popFront()
588*760c2415Smrg     {
589*760c2415Smrg         assert(!empty);
590*760c2415Smrg         ++_f;
591*760c2415Smrg     }
592*760c2415Smrg 
593*760c2415Smrg     ///ditto
594*760c2415Smrg     void popBack()
595*760c2415Smrg     {
596*760c2415Smrg         assert(!empty);
597*760c2415Smrg         --_b;
598*760c2415Smrg     }
599*760c2415Smrg 
600*760c2415Smrg     ///ditto
601*760c2415Smrg     @property bool empty() const { return _nMatch == 0 || _f >= _b; }
602*760c2415Smrg 
603*760c2415Smrg     ///ditto
604*760c2415Smrg     inout(R) opIndex()(size_t i) inout
605*760c2415Smrg     {
606*760c2415Smrg         assert(_f + i < _b,text("requested submatch number ", i," is out of range"));
607*760c2415Smrg         assert(matches[_f + i].begin <= matches[_f + i].end,
608*760c2415Smrg             text("wrong match: ", matches[_f + i].begin, "..", matches[_f + i].end));
609*760c2415Smrg         return _input[matches[_f + i].begin .. matches[_f + i].end];
610*760c2415Smrg     }
611*760c2415Smrg 
612*760c2415Smrg     /++
613*760c2415Smrg         Explicit cast to bool.
614*760c2415Smrg         Useful as a shorthand for !(x.empty) in if and assert statements.
615*760c2415Smrg 
616*760c2415Smrg         ---
617*760c2415Smrg         import std.regex;
618*760c2415Smrg 
619*760c2415Smrg         assert(!matchFirst("nothing", "something"));
620*760c2415Smrg         ---
621*760c2415Smrg     +/
622*760c2415Smrg 
623*760c2415Smrg     @safe bool opCast(T:bool)() const nothrow { return _nMatch != 0; }
624*760c2415Smrg 
625*760c2415Smrg     /++
626*760c2415Smrg         Number of pattern matched counting, where 1 - the first pattern.
627*760c2415Smrg         Returns 0 on no match.
628*760c2415Smrg     +/
629*760c2415Smrg 
630*760c2415Smrg     @safe @property int whichPattern() const nothrow { return _nMatch; }
631*760c2415Smrg 
632*760c2415Smrg     ///
633*760c2415Smrg     @system unittest
634*760c2415Smrg     {
635*760c2415Smrg         import std.regex;
636*760c2415Smrg         assert(matchFirst("abc", "[0-9]+", "[a-z]+").whichPattern == 2);
637*760c2415Smrg     }
638*760c2415Smrg 
639*760c2415Smrg     /++
640*760c2415Smrg         Lookup named submatch.
641*760c2415Smrg 
642*760c2415Smrg         ---
643*760c2415Smrg         import std.regex;
644*760c2415Smrg         import std.range;
645*760c2415Smrg 
646*760c2415Smrg         auto c = matchFirst("a = 42;", regex(`(?P<var>\w+)\s*=\s*(?P<value>\d+);`));
647*760c2415Smrg         assert(c["var"] == "a");
648*760c2415Smrg         assert(c["value"] == "42");
649*760c2415Smrg         popFrontN(c, 2);
650*760c2415Smrg         //named groups are unaffected by range primitives
651*760c2415Smrg         assert(c["var"] =="a");
652*760c2415Smrg         assert(c.front == "42");
653*760c2415Smrg         ----
654*760c2415Smrg     +/
655*760c2415Smrg     R opIndex(String)(String i) /*const*/ //@@@BUG@@@
656*760c2415Smrg         if (isSomeString!String)
657*760c2415Smrg     {
658*760c2415Smrg         size_t index = lookupNamedGroup(_names, i);
659*760c2415Smrg         return _input[matches[index].begin .. matches[index].end];
660*760c2415Smrg     }
661*760c2415Smrg 
662*760c2415Smrg     ///Number of matches in this object.
663*760c2415Smrg     @property size_t length() const { return _nMatch == 0 ? 0 : _b - _f;  }
664*760c2415Smrg 
665*760c2415Smrg     ///A hook for compatibility with original std.regex.
666*760c2415Smrg     @property ref captures(){ return this; }
667*760c2415Smrg }
668*760c2415Smrg 
669*760c2415Smrg ///
670*760c2415Smrg @system unittest
671*760c2415Smrg {
672*760c2415Smrg     import std.range.primitives : popFrontN;
673*760c2415Smrg 
674*760c2415Smrg     auto c = matchFirst("@abc#", regex(`(\w)(\w)(\w)`));
675*760c2415Smrg     assert(c.pre == "@"); // Part of input preceding match
676*760c2415Smrg     assert(c.post == "#"); // Immediately after match
677*760c2415Smrg     assert(c.hit == c[0] && c.hit == "abc"); // The whole match
678*760c2415Smrg     assert(c[2] == "b");
679*760c2415Smrg     assert(c.front == "abc");
680*760c2415Smrg     c.popFront();
681*760c2415Smrg     assert(c.front == "a");
682*760c2415Smrg     assert(c.back == "c");
683*760c2415Smrg     c.popBack();
684*760c2415Smrg     assert(c.back == "b");
685*760c2415Smrg     popFrontN(c, 2);
686*760c2415Smrg     assert(c.empty);
687*760c2415Smrg 
688*760c2415Smrg     assert(!matchFirst("nothing", "something"));
689*760c2415Smrg }
690*760c2415Smrg 
691*760c2415Smrg /++
692*760c2415Smrg     A regex engine state, as returned by $(D match) family of functions.
693*760c2415Smrg 
694*760c2415Smrg     Effectively it's a forward range of Captures!R, produced
695*760c2415Smrg     by lazily searching for matches in a given input.
696*760c2415Smrg 
697*760c2415Smrg     $(D alias Engine) specifies an engine type to use during matching,
698*760c2415Smrg     and is automatically deduced in a call to $(D match)/$(D bmatch).
699*760c2415Smrg +/
700*760c2415Smrg @trusted public struct RegexMatch(R, alias Engine = ThompsonMatcher)
701*760c2415Smrg if (isSomeString!R)
702*760c2415Smrg {
703*760c2415Smrg private:
704*760c2415Smrg     import core.stdc.stdlib : malloc, free;
705*760c2415Smrg     alias Char = BasicElementOf!R;
706*760c2415Smrg     alias EngineType = Engine!Char;
707*760c2415Smrg     EngineType _engine;
708*760c2415Smrg     R _input;
709*760c2415Smrg     Captures!(R,EngineType.DataIndex) _captures;
710*760c2415Smrg     void[] _memory;//is ref-counted
711*760c2415Smrg 
712*760c2415Smrg     this(RegEx)(R input, RegEx prog)
713*760c2415Smrg     {
714*760c2415Smrg         import std.exception : enforce;
715*760c2415Smrg         _input = input;
716*760c2415Smrg         immutable size = EngineType.initialMemory(prog)+size_t.sizeof;
717*760c2415Smrg         _memory = (enforce(malloc(size), "malloc failed")[0 .. size]);
718*760c2415Smrg         scope(failure) free(_memory.ptr);
719*760c2415Smrg         *cast(size_t*)_memory.ptr = 1;
720*760c2415Smrg         _engine = EngineType(prog, Input!Char(input), _memory[size_t.sizeof..$]);
721*760c2415Smrg         static if (is(RegEx == StaticRegex!(BasicElementOf!R)))
722*760c2415Smrg             _engine.nativeFn = prog.nativeFn;
723*760c2415Smrg         _captures = Captures!(R,EngineType.DataIndex)(this);
724*760c2415Smrg         _captures._nMatch = _engine.match(_captures.matches);
725*760c2415Smrg         debug(std_regex_allocation) writefln("RefCount (ctor): %x %d", _memory.ptr, counter);
726*760c2415Smrg     }
727*760c2415Smrg 
728*760c2415Smrg     @property ref size_t counter(){ return *cast(size_t*)_memory.ptr; }
729*760c2415Smrg public:
730*760c2415Smrg     this(this)
731*760c2415Smrg     {
732*760c2415Smrg         if (_memory.ptr)
733*760c2415Smrg         {
734*760c2415Smrg             ++counter;
735*760c2415Smrg             debug(std_regex_allocation) writefln("RefCount (postblit): %x %d",
736*760c2415Smrg                 _memory.ptr, *cast(size_t*)_memory.ptr);
737*760c2415Smrg         }
738*760c2415Smrg     }
739*760c2415Smrg 
740*760c2415Smrg     ~this()
741*760c2415Smrg     {
742*760c2415Smrg         if (_memory.ptr && --*cast(size_t*)_memory.ptr == 0)
743*760c2415Smrg         {
744*760c2415Smrg             debug(std_regex_allocation) writefln("RefCount (dtor): %x %d",
745*760c2415Smrg                 _memory.ptr, *cast(size_t*)_memory.ptr);
746*760c2415Smrg             free(cast(void*)_memory.ptr);
747*760c2415Smrg         }
748*760c2415Smrg     }
749*760c2415Smrg 
750*760c2415Smrg     ///Shorthands for front.pre, front.post, front.hit.
751*760c2415Smrg     @property R pre()
752*760c2415Smrg     {
753*760c2415Smrg         return _captures.pre;
754*760c2415Smrg     }
755*760c2415Smrg 
756*760c2415Smrg     ///ditto
757*760c2415Smrg     @property R post()
758*760c2415Smrg     {
759*760c2415Smrg         return _captures.post;
760*760c2415Smrg     }
761*760c2415Smrg 
762*760c2415Smrg     ///ditto
763*760c2415Smrg     @property R hit()
764*760c2415Smrg     {
765*760c2415Smrg         return _captures.hit;
766*760c2415Smrg     }
767*760c2415Smrg 
768*760c2415Smrg     /++
769*760c2415Smrg         Functionality for processing subsequent matches of global regexes via range interface:
770*760c2415Smrg         ---
771*760c2415Smrg         import std.regex;
772*760c2415Smrg         auto m = matchAll("Hello, world!", regex(`\w+`));
773*760c2415Smrg         assert(m.front.hit == "Hello");
774*760c2415Smrg         m.popFront();
775*760c2415Smrg         assert(m.front.hit == "world");
776*760c2415Smrg         m.popFront();
777*760c2415Smrg         assert(m.empty);
778*760c2415Smrg         ---
779*760c2415Smrg     +/
780*760c2415Smrg     @property auto front()
781*760c2415Smrg     {
782*760c2415Smrg         return _captures;
783*760c2415Smrg     }
784*760c2415Smrg 
785*760c2415Smrg     ///ditto
786*760c2415Smrg     void popFront()
787*760c2415Smrg     {
788*760c2415Smrg         import std.exception : enforce;
789*760c2415Smrg         if (counter != 1)
790*760c2415Smrg         {//do cow magic first
791*760c2415Smrg             counter--;//we abandon this reference
792*760c2415Smrg             immutable size = EngineType.initialMemory(_engine.re)+size_t.sizeof;
793*760c2415Smrg             _memory = (enforce(malloc(size), "malloc failed")[0 .. size]);
794*760c2415Smrg             _engine = _engine.dupTo(_memory[size_t.sizeof .. size]);
795*760c2415Smrg             counter = 1;//points to new chunk
796*760c2415Smrg         }
797*760c2415Smrg 
798*760c2415Smrg         if (!_captures.unique)
799*760c2415Smrg         {
800*760c2415Smrg             // has external references - allocate new space
801*760c2415Smrg             _captures.newMatches(_engine.re.ngroup);
802*760c2415Smrg         }
803*760c2415Smrg         _captures._nMatch = _engine.match(_captures.matches);
804*760c2415Smrg     }
805*760c2415Smrg 
806*760c2415Smrg     ///ditto
807*760c2415Smrg     auto save(){ return this; }
808*760c2415Smrg 
809*760c2415Smrg     ///Test if this match object is empty.
810*760c2415Smrg     @property bool empty() const { return _captures._nMatch == 0; }
811*760c2415Smrg 
812*760c2415Smrg     ///Same as !(x.empty), provided for its convenience  in conditional statements.
813*760c2415Smrg     T opCast(T:bool)(){ return !empty; }
814*760c2415Smrg 
815*760c2415Smrg     /// Same as .front, provided for compatibility with original std.regex.
816*760c2415Smrg     @property auto captures() inout { return _captures; }
817*760c2415Smrg 
818*760c2415Smrg }
819*760c2415Smrg 
matchOnce(alias Engine,RegEx,R)820*760c2415Smrg private @trusted auto matchOnce(alias Engine, RegEx, R)(R input, RegEx re)
821*760c2415Smrg {
822*760c2415Smrg     import core.stdc.stdlib : malloc, free;
823*760c2415Smrg     import std.exception : enforce;
824*760c2415Smrg     alias Char = BasicElementOf!R;
825*760c2415Smrg     alias EngineType = Engine!Char;
826*760c2415Smrg 
827*760c2415Smrg     size_t size = EngineType.initialMemory(re);
828*760c2415Smrg     void[] memory = enforce(malloc(size), "malloc failed")[0 .. size];
829*760c2415Smrg     scope(exit) free(memory.ptr);
830*760c2415Smrg     auto captures = Captures!(R, EngineType.DataIndex)(input, re.ngroup, re.dict);
831*760c2415Smrg     auto engine = EngineType(re, Input!Char(input), memory);
832*760c2415Smrg     static if (is(RegEx == StaticRegex!(BasicElementOf!R)))
833*760c2415Smrg         engine.nativeFn = re.nativeFn;
834*760c2415Smrg     captures._nMatch = engine.match(captures.matches);
835*760c2415Smrg     return captures;
836*760c2415Smrg }
837*760c2415Smrg 
matchMany(alias Engine,RegEx,R)838*760c2415Smrg private auto matchMany(alias Engine, RegEx, R)(R input, RegEx re)
839*760c2415Smrg {
840*760c2415Smrg     re.flags |= RegexOption.global;
841*760c2415Smrg     return RegexMatch!(R, Engine)(input, re);
842*760c2415Smrg }
843*760c2415Smrg 
844*760c2415Smrg @system unittest
845*760c2415Smrg {
846*760c2415Smrg     //sanity checks for new API
847*760c2415Smrg     auto re = regex("abc");
848*760c2415Smrg     assert(!"abc".matchOnce!(ThompsonMatcher)(re).empty);
849*760c2415Smrg     assert("abc".matchOnce!(ThompsonMatcher)(re)[0] == "abc");
850*760c2415Smrg }
851*760c2415Smrg 
852*760c2415Smrg 
853*760c2415Smrg private enum isReplaceFunctor(alias fun, R) =
854*760c2415Smrg     __traits(compiles, (Captures!R c) { fun(c); });
855*760c2415Smrg 
856*760c2415Smrg // the lowest level - just stuff replacements into the sink
857*760c2415Smrg private @trusted void replaceCapturesInto(alias output, Sink, R, T)
858*760c2415Smrg         (ref Sink sink, R input, T captures)
859*760c2415Smrg if (isOutputRange!(Sink, dchar) && isSomeString!R)
860*760c2415Smrg {
861*760c2415Smrg     if (captures.empty)
862*760c2415Smrg     {
863*760c2415Smrg         sink.put(input);
864*760c2415Smrg         return;
865*760c2415Smrg     }
866*760c2415Smrg     sink.put(captures.pre);
867*760c2415Smrg     // a hack to get around bogus errors, should be simply output(captures, sink)
868*760c2415Smrg     // "is a nested function and cannot be accessed from"
869*760c2415Smrg     static if (isReplaceFunctor!(output, R))
870*760c2415Smrg         sink.put(output(captures)); //"mutator" type of function
871*760c2415Smrg     else
872*760c2415Smrg         output(captures, sink); //"output" type of function
873*760c2415Smrg     sink.put(captures.post);
874*760c2415Smrg }
875*760c2415Smrg 
876*760c2415Smrg // ditto for a range of captures
877*760c2415Smrg private void replaceMatchesInto(alias output, Sink, R, T)
878*760c2415Smrg         (ref Sink sink, R input, T matches)
879*760c2415Smrg if (isOutputRange!(Sink, dchar) && isSomeString!R)
880*760c2415Smrg {
881*760c2415Smrg     size_t offset = 0;
foreach(cap;matches)882*760c2415Smrg     foreach (cap; matches)
883*760c2415Smrg     {
884*760c2415Smrg         sink.put(cap.pre[offset .. $]);
885*760c2415Smrg         // same hack, see replaceCapturesInto
886*760c2415Smrg         static if (isReplaceFunctor!(output, R))
887*760c2415Smrg             sink.put(output(cap)); //"mutator" type of function
888*760c2415Smrg         else
889*760c2415Smrg             output(cap, sink); //"output" type of function
890*760c2415Smrg         offset = cap.pre.length + cap.hit.length;
891*760c2415Smrg     }
892*760c2415Smrg     sink.put(input[offset .. $]);
893*760c2415Smrg }
894*760c2415Smrg 
895*760c2415Smrg //  a general skeleton of replaceFirst
896*760c2415Smrg private R replaceFirstWith(alias output, R, RegEx)(R input, RegEx re)
897*760c2415Smrg if (isSomeString!R && isRegexFor!(RegEx, R))
898*760c2415Smrg {
899*760c2415Smrg     import std.array : appender;
900*760c2415Smrg     auto data = matchFirst(input, re);
901*760c2415Smrg     if (data.empty)
902*760c2415Smrg         return input;
903*760c2415Smrg     auto app = appender!(R)();
904*760c2415Smrg     replaceCapturesInto!output(app, input, data);
905*760c2415Smrg     return app.data;
906*760c2415Smrg }
907*760c2415Smrg 
908*760c2415Smrg // ditto for replaceAll
909*760c2415Smrg // the method parameter allows old API to ride on the back of the new one
910*760c2415Smrg private R replaceAllWith(alias output,
911*760c2415Smrg         alias method=matchAll, R, RegEx)(R input, RegEx re)
912*760c2415Smrg if (isSomeString!R && isRegexFor!(RegEx, R))
913*760c2415Smrg {
914*760c2415Smrg     import std.array : appender;
915*760c2415Smrg     auto matches = method(input, re); //inout(C)[] fails
916*760c2415Smrg     if (matches.empty)
917*760c2415Smrg         return input;
918*760c2415Smrg     auto app = appender!(R)();
919*760c2415Smrg     replaceMatchesInto!output(app, input, matches);
920*760c2415Smrg     return app.data;
921*760c2415Smrg }
922*760c2415Smrg 
923*760c2415Smrg 
924*760c2415Smrg /++
925*760c2415Smrg     Start matching $(D input) to regex pattern $(D re),
926*760c2415Smrg     using Thompson NFA matching scheme.
927*760c2415Smrg 
928*760c2415Smrg     The use of this function is $(RED discouraged) - use either of
929*760c2415Smrg     $(LREF matchAll) or $(LREF matchFirst).
930*760c2415Smrg 
931*760c2415Smrg     Delegating  the kind of operation
932*760c2415Smrg     to "g" flag is soon to be phased out along with the
933*760c2415Smrg     ability to choose the exact matching scheme. The choice of
934*760c2415Smrg     matching scheme to use depends highly on the pattern kind and
935*760c2415Smrg     can done automatically on case by case basis.
936*760c2415Smrg 
937*760c2415Smrg     Returns: a $(D RegexMatch) object holding engine state after first match.
938*760c2415Smrg +/
939*760c2415Smrg 
940*760c2415Smrg public auto match(R, RegEx)(R input, RegEx re)
941*760c2415Smrg if (isSomeString!R && is(RegEx == Regex!(BasicElementOf!R)))
942*760c2415Smrg {
943*760c2415Smrg     import std.regex.internal.thompson : ThompsonMatcher;
944*760c2415Smrg     return RegexMatch!(Unqual!(typeof(input)),ThompsonMatcher)(input, re);
945*760c2415Smrg }
946*760c2415Smrg 
947*760c2415Smrg ///ditto
948*760c2415Smrg public auto match(R, String)(R input, String re)
949*760c2415Smrg if (isSomeString!R && isSomeString!String)
950*760c2415Smrg {
951*760c2415Smrg     import std.regex.internal.thompson : ThompsonMatcher;
952*760c2415Smrg     return RegexMatch!(Unqual!(typeof(input)),ThompsonMatcher)(input, regex(re));
953*760c2415Smrg }
954*760c2415Smrg 
955*760c2415Smrg public auto match(R, RegEx)(R input, RegEx re)
956*760c2415Smrg if (isSomeString!R && is(RegEx == StaticRegex!(BasicElementOf!R)))
957*760c2415Smrg {
958*760c2415Smrg     import std.regex.internal.backtracking : BacktrackingMatcher;
959*760c2415Smrg     return RegexMatch!(Unqual!(typeof(input)),BacktrackingMatcher!true)(input, re);
960*760c2415Smrg }
961*760c2415Smrg 
962*760c2415Smrg /++
963*760c2415Smrg     Find the first (leftmost) slice of the $(D input) that
964*760c2415Smrg     matches the pattern $(D re). This function picks the most suitable
965*760c2415Smrg     regular expression engine depending on the pattern properties.
966*760c2415Smrg 
967*760c2415Smrg     $(D re) parameter can be one of three types:
968*760c2415Smrg     $(UL
969*760c2415Smrg       $(LI Plain string(s), in which case it's compiled to bytecode before matching. )
970*760c2415Smrg       $(LI Regex!char (wchar/dchar) that contains a pattern in the form of
971*760c2415Smrg         compiled  bytecode. )
972*760c2415Smrg       $(LI StaticRegex!char (wchar/dchar) that contains a pattern in the form of
973*760c2415Smrg         compiled native machine code. )
974*760c2415Smrg     )
975*760c2415Smrg 
976*760c2415Smrg     Returns:
977*760c2415Smrg     $(LREF Captures) containing the extent of a match together with all submatches
978*760c2415Smrg     if there was a match, otherwise an empty $(LREF Captures) object.
979*760c2415Smrg +/
980*760c2415Smrg public auto matchFirst(R, RegEx)(R input, RegEx re)
981*760c2415Smrg if (isSomeString!R && is(RegEx == Regex!(BasicElementOf!R)))
982*760c2415Smrg {
983*760c2415Smrg     import std.regex.internal.thompson : ThompsonMatcher;
984*760c2415Smrg     return matchOnce!ThompsonMatcher(input, re);
985*760c2415Smrg }
986*760c2415Smrg 
987*760c2415Smrg ///ditto
988*760c2415Smrg public auto matchFirst(R, String)(R input, String re)
989*760c2415Smrg if (isSomeString!R && isSomeString!String)
990*760c2415Smrg {
991*760c2415Smrg     import std.regex.internal.thompson : ThompsonMatcher;
992*760c2415Smrg     return matchOnce!ThompsonMatcher(input, regex(re));
993*760c2415Smrg }
994*760c2415Smrg 
995*760c2415Smrg ///ditto
996*760c2415Smrg public auto matchFirst(R, String)(R input, String[] re...)
997*760c2415Smrg if (isSomeString!R && isSomeString!String)
998*760c2415Smrg {
999*760c2415Smrg     import std.regex.internal.thompson : ThompsonMatcher;
1000*760c2415Smrg     return matchOnce!ThompsonMatcher(input, regex(re));
1001*760c2415Smrg }
1002*760c2415Smrg 
1003*760c2415Smrg public auto matchFirst(R, RegEx)(R input, RegEx re)
1004*760c2415Smrg if (isSomeString!R && is(RegEx == StaticRegex!(BasicElementOf!R)))
1005*760c2415Smrg {
1006*760c2415Smrg     import std.regex.internal.backtracking : BacktrackingMatcher;
1007*760c2415Smrg     return matchOnce!(BacktrackingMatcher!true)(input, re);
1008*760c2415Smrg }
1009*760c2415Smrg 
1010*760c2415Smrg /++
1011*760c2415Smrg     Initiate a search for all non-overlapping matches to the pattern $(D re)
1012*760c2415Smrg     in the given $(D input). The result is a lazy range of matches generated
1013*760c2415Smrg     as they are encountered in the input going left to right.
1014*760c2415Smrg 
1015*760c2415Smrg     This function picks the most suitable regular expression engine
1016*760c2415Smrg     depending on the pattern properties.
1017*760c2415Smrg 
1018*760c2415Smrg     $(D re) parameter can be one of three types:
1019*760c2415Smrg     $(UL
1020*760c2415Smrg       $(LI Plain string(s), in which case it's compiled to bytecode before matching. )
1021*760c2415Smrg       $(LI Regex!char (wchar/dchar) that contains a pattern in the form of
1022*760c2415Smrg         compiled  bytecode. )
1023*760c2415Smrg       $(LI StaticRegex!char (wchar/dchar) that contains a pattern in the form of
1024*760c2415Smrg         compiled native machine code. )
1025*760c2415Smrg     )
1026*760c2415Smrg 
1027*760c2415Smrg     Returns:
1028*760c2415Smrg     $(LREF RegexMatch) object that represents matcher state
1029*760c2415Smrg     after the first match was found or an empty one if not present.
1030*760c2415Smrg +/
1031*760c2415Smrg public auto matchAll(R, RegEx)(R input, RegEx re)
1032*760c2415Smrg if (isSomeString!R && is(RegEx == Regex!(BasicElementOf!R)))
1033*760c2415Smrg {
1034*760c2415Smrg     import std.regex.internal.thompson : ThompsonMatcher;
1035*760c2415Smrg     return matchMany!ThompsonMatcher(input, re);
1036*760c2415Smrg }
1037*760c2415Smrg 
1038*760c2415Smrg ///ditto
1039*760c2415Smrg public auto matchAll(R, String)(R input, String re)
1040*760c2415Smrg if (isSomeString!R && isSomeString!String)
1041*760c2415Smrg {
1042*760c2415Smrg     import std.regex.internal.thompson : ThompsonMatcher;
1043*760c2415Smrg     return matchMany!ThompsonMatcher(input, regex(re));
1044*760c2415Smrg }
1045*760c2415Smrg 
1046*760c2415Smrg ///ditto
1047*760c2415Smrg public auto matchAll(R, String)(R input, String[] re...)
1048*760c2415Smrg if (isSomeString!R && isSomeString!String)
1049*760c2415Smrg {
1050*760c2415Smrg     import std.regex.internal.thompson : ThompsonMatcher;
1051*760c2415Smrg     return matchMany!ThompsonMatcher(input, regex(re));
1052*760c2415Smrg }
1053*760c2415Smrg 
1054*760c2415Smrg public auto matchAll(R, RegEx)(R input, RegEx re)
1055*760c2415Smrg if (isSomeString!R && is(RegEx == StaticRegex!(BasicElementOf!R)))
1056*760c2415Smrg {
1057*760c2415Smrg     import std.regex.internal.backtracking : BacktrackingMatcher;
1058*760c2415Smrg     return matchMany!(BacktrackingMatcher!true)(input, re);
1059*760c2415Smrg }
1060*760c2415Smrg 
1061*760c2415Smrg // another set of tests just to cover the new API
1062*760c2415Smrg @system unittest
1063*760c2415Smrg {
1064*760c2415Smrg     import std.algorithm.comparison : equal;
1065*760c2415Smrg     import std.algorithm.iteration : map;
1066*760c2415Smrg     import std.conv : to;
1067*760c2415Smrg 
1068*760c2415Smrg     foreach (String; AliasSeq!(string, wstring, const(dchar)[]))
1069*760c2415Smrg     {
1070*760c2415Smrg         auto str1 = "blah-bleh".to!String();
1071*760c2415Smrg         auto pat1 = "bl[ae]h".to!String();
1072*760c2415Smrg         auto mf = matchFirst(str1, pat1);
1073*760c2415Smrg         assert(mf.equal(["blah".to!String()]));
1074*760c2415Smrg         auto mAll = matchAll(str1, pat1);
1075*760c2415Smrg         assert(mAll.equal!((a,b) => a.equal(b))
1076*760c2415Smrg             ([["blah".to!String()], ["bleh".to!String()]]));
1077*760c2415Smrg 
1078*760c2415Smrg         auto str2 = "1/03/12 - 3/03/12".to!String();
1079*760c2415Smrg         auto pat2 = regex([r"(\d+)/(\d+)/(\d+)".to!String(), "abc".to!String]);
1080*760c2415Smrg         auto mf2 = matchFirst(str2, pat2);
1081*760c2415Smrg         assert(mf2.equal(["1/03/12", "1", "03", "12"].map!(to!String)()));
1082*760c2415Smrg         auto mAll2 = matchAll(str2, pat2);
1083*760c2415Smrg         assert(mAll2.front.equal(mf2));
1084*760c2415Smrg         mAll2.popFront();
1085*760c2415Smrg         assert(mAll2.front.equal(["3/03/12", "3", "03", "12"].map!(to!String)()));
1086*760c2415Smrg         mf2.popFrontN(3);
1087*760c2415Smrg         assert(mf2.equal(["12".to!String()]));
1088*760c2415Smrg 
1089*760c2415Smrg         auto ctPat = ctRegex!(`(?P<Quot>\d+)/(?P<Denom>\d+)`.to!String());
1090*760c2415Smrg         auto str = "2 + 34/56 - 6/1".to!String();
1091*760c2415Smrg         auto cmf = matchFirst(str, ctPat);
1092*760c2415Smrg         assert(cmf.equal(["34/56", "34", "56"].map!(to!String)()));
1093*760c2415Smrg         assert(cmf["Quot"] == "34".to!String());
1094*760c2415Smrg         assert(cmf["Denom"] == "56".to!String());
1095*760c2415Smrg 
1096*760c2415Smrg         auto cmAll = matchAll(str, ctPat);
1097*760c2415Smrg         assert(cmAll.front.equal(cmf));
1098*760c2415Smrg         cmAll.popFront();
1099*760c2415Smrg         assert(cmAll.front.equal(["6/1", "6", "1"].map!(to!String)()));
1100*760c2415Smrg     }
1101*760c2415Smrg }
1102*760c2415Smrg 
1103*760c2415Smrg /++
1104*760c2415Smrg     Start matching of $(D input) to regex pattern $(D re),
1105*760c2415Smrg     using traditional $(LINK2 https://en.wikipedia.org/wiki/Backtracking,
1106*760c2415Smrg     backtracking) matching scheme.
1107*760c2415Smrg 
1108*760c2415Smrg     The use of this function is $(RED discouraged) - use either of
1109*760c2415Smrg     $(LREF matchAll) or $(LREF matchFirst).
1110*760c2415Smrg 
1111*760c2415Smrg     Delegating  the kind of operation
1112*760c2415Smrg     to "g" flag is soon to be phased out along with the
1113*760c2415Smrg     ability to choose the exact matching scheme. The choice of
1114*760c2415Smrg     matching scheme to use depends highly on the pattern kind and
1115*760c2415Smrg     can done automatically on case by case basis.
1116*760c2415Smrg 
1117*760c2415Smrg     Returns: a $(D RegexMatch) object holding engine
1118*760c2415Smrg     state after first match.
1119*760c2415Smrg 
1120*760c2415Smrg +/
1121*760c2415Smrg public auto bmatch(R, RegEx)(R input, RegEx re)
1122*760c2415Smrg if (isSomeString!R && is(RegEx == Regex!(BasicElementOf!R)))
1123*760c2415Smrg {
1124*760c2415Smrg     import std.regex.internal.backtracking : BacktrackingMatcher;
1125*760c2415Smrg     return RegexMatch!(Unqual!(typeof(input)), BacktrackingMatcher!false)(input, re);
1126*760c2415Smrg }
1127*760c2415Smrg 
1128*760c2415Smrg ///ditto
1129*760c2415Smrg public auto bmatch(R, String)(R input, String re)
1130*760c2415Smrg if (isSomeString!R && isSomeString!String)
1131*760c2415Smrg {
1132*760c2415Smrg     import std.regex.internal.backtracking : BacktrackingMatcher;
1133*760c2415Smrg     return RegexMatch!(Unqual!(typeof(input)), BacktrackingMatcher!false)(input, regex(re));
1134*760c2415Smrg }
1135*760c2415Smrg 
1136*760c2415Smrg public auto bmatch(R, RegEx)(R input, RegEx re)
1137*760c2415Smrg if (isSomeString!R && is(RegEx == StaticRegex!(BasicElementOf!R)))
1138*760c2415Smrg {
1139*760c2415Smrg     import std.regex.internal.backtracking : BacktrackingMatcher;
1140*760c2415Smrg     return RegexMatch!(Unqual!(typeof(input)),BacktrackingMatcher!true)(input, re);
1141*760c2415Smrg }
1142*760c2415Smrg 
1143*760c2415Smrg // produces replacement string from format using captures for substitution
1144*760c2415Smrg package void replaceFmt(R, Capt, OutR)
1145*760c2415Smrg     (R format, Capt captures, OutR sink, bool ignoreBadSubs = false)
1146*760c2415Smrg if (isOutputRange!(OutR, ElementEncodingType!R[]) &&
1147*760c2415Smrg     isOutputRange!(OutR, ElementEncodingType!(Capt.String)[]))
1148*760c2415Smrg {
1149*760c2415Smrg     import std.algorithm.searching : find;
1150*760c2415Smrg     import std.ascii : isDigit, isAlpha;
1151*760c2415Smrg     import std.conv : text, parse;
1152*760c2415Smrg     import std.exception : enforce;
1153*760c2415Smrg     enum State { Normal, Dollar }
1154*760c2415Smrg     auto state = State.Normal;
1155*760c2415Smrg     size_t offset;
1156*760c2415Smrg L_Replace_Loop:
1157*760c2415Smrg     while (!format.empty)
1158*760c2415Smrg         final switch (state)
1159*760c2415Smrg         {
1160*760c2415Smrg         case State.Normal:
1161*760c2415Smrg             for (offset = 0; offset < format.length; offset++)//no decoding
1162*760c2415Smrg             {
1163*760c2415Smrg                 if (format[offset] == '$')
1164*760c2415Smrg                 {
1165*760c2415Smrg                     state = State.Dollar;
1166*760c2415Smrg                     sink.put(format[0 .. offset]);
1167*760c2415Smrg                     format = format[offset+1 .. $];//ditto
1168*760c2415Smrg                     continue L_Replace_Loop;
1169*760c2415Smrg                 }
1170*760c2415Smrg             }
1171*760c2415Smrg             sink.put(format[0 .. offset]);
1172*760c2415Smrg             format = format[offset .. $];
1173*760c2415Smrg             break;
1174*760c2415Smrg         case State.Dollar:
1175*760c2415Smrg             if (isDigit(format[0]))
1176*760c2415Smrg             {
1177*760c2415Smrg                 uint digit = parse!uint(format);
1178*760c2415Smrg                 enforce(ignoreBadSubs || digit < captures.length, text("invalid submatch number ", digit));
1179*760c2415Smrg                 if (digit < captures.length)
1180*760c2415Smrg                     sink.put(captures[digit]);
1181*760c2415Smrg             }
1182*760c2415Smrg             else if (format[0] == '{')
1183*760c2415Smrg             {
1184*760c2415Smrg                 auto x = find!(a => !isAlpha(a))(format[1..$]);
1185*760c2415Smrg                 enforce(!x.empty && x[0] == '}', "no matching '}' in replacement format");
1186*760c2415Smrg                 auto name = format[1 .. $ - x.length];
1187*760c2415Smrg                 format = x[1..$];
1188*760c2415Smrg                 enforce(!name.empty, "invalid name in ${...} replacement format");
1189*760c2415Smrg                 sink.put(captures[name]);
1190*760c2415Smrg             }
1191*760c2415Smrg             else if (format[0] == '&')
1192*760c2415Smrg             {
1193*760c2415Smrg                 sink.put(captures[0]);
1194*760c2415Smrg                 format = format[1 .. $];
1195*760c2415Smrg             }
1196*760c2415Smrg             else if (format[0] == '`')
1197*760c2415Smrg             {
1198*760c2415Smrg                 sink.put(captures.pre);
1199*760c2415Smrg                 format = format[1 .. $];
1200*760c2415Smrg             }
1201*760c2415Smrg             else if (format[0] == '\'')
1202*760c2415Smrg             {
1203*760c2415Smrg                 sink.put(captures.post);
1204*760c2415Smrg                 format = format[1 .. $];
1205*760c2415Smrg             }
1206*760c2415Smrg             else if (format[0] == '$')
1207*760c2415Smrg             {
1208*760c2415Smrg                 sink.put(format[0 .. 1]);
1209*760c2415Smrg                 format = format[1 .. $];
1210*760c2415Smrg             }
1211*760c2415Smrg             state = State.Normal;
1212*760c2415Smrg             break;
1213*760c2415Smrg         }
1214*760c2415Smrg     enforce(state == State.Normal, "invalid format string in regex replace");
1215*760c2415Smrg }
1216*760c2415Smrg 
1217*760c2415Smrg /++
1218*760c2415Smrg     Construct a new string from $(D input) by replacing the first match with
1219*760c2415Smrg     a string generated from it according to the $(D format) specifier.
1220*760c2415Smrg 
1221*760c2415Smrg     To replace all matches use $(LREF replaceAll).
1222*760c2415Smrg 
1223*760c2415Smrg     Params:
1224*760c2415Smrg     input = string to search
1225*760c2415Smrg     re = compiled regular expression to use
1226*760c2415Smrg     format = _format string to generate replacements from,
1227*760c2415Smrg     see $(S_LINK Replace _format string, the _format string).
1228*760c2415Smrg 
1229*760c2415Smrg     Returns:
1230*760c2415Smrg     A string of the same type with the first match (if any) replaced.
1231*760c2415Smrg     If no match is found returns the input string itself.
1232*760c2415Smrg +/
1233*760c2415Smrg public R replaceFirst(R, C, RegEx)(R input, RegEx re, const(C)[] format)
1234*760c2415Smrg if (isSomeString!R && is(C : dchar) && isRegexFor!(RegEx, R))
1235*760c2415Smrg {
1236*760c2415Smrg     return replaceFirstWith!((m, sink) => replaceFmt(format, m, sink))(input, re);
1237*760c2415Smrg }
1238*760c2415Smrg 
1239*760c2415Smrg ///
1240*760c2415Smrg @system unittest
1241*760c2415Smrg {
1242*760c2415Smrg     assert(replaceFirst("noon", regex("n"), "[$&]") == "[n]oon");
1243*760c2415Smrg }
1244*760c2415Smrg 
1245*760c2415Smrg /++
1246*760c2415Smrg     This is a general replacement tool that construct a new string by replacing
1247*760c2415Smrg     matches of pattern $(D re) in the $(D input). Unlike the other overload
1248*760c2415Smrg     there is no format string instead captures are passed to
1249*760c2415Smrg     to a user-defined functor $(D fun) that returns a new string
1250*760c2415Smrg     to use as replacement.
1251*760c2415Smrg 
1252*760c2415Smrg     This version replaces the first match in $(D input),
1253*760c2415Smrg     see $(LREF replaceAll) to replace the all of the matches.
1254*760c2415Smrg 
1255*760c2415Smrg     Returns:
1256*760c2415Smrg     A new string of the same type as $(D input) with all matches
1257*760c2415Smrg     replaced by return values of $(D fun). If no matches found
1258*760c2415Smrg     returns the $(D input) itself.
1259*760c2415Smrg +/
1260*760c2415Smrg public R replaceFirst(alias fun, R, RegEx)(R input, RegEx re)
1261*760c2415Smrg if (isSomeString!R && isRegexFor!(RegEx, R))
1262*760c2415Smrg {
1263*760c2415Smrg     return replaceFirstWith!((m, sink) => sink.put(fun(m)))(input, re);
1264*760c2415Smrg }
1265*760c2415Smrg 
1266*760c2415Smrg ///
1267*760c2415Smrg @system unittest
1268*760c2415Smrg {
1269*760c2415Smrg     import std.conv : to;
1270*760c2415Smrg     string list = "#21 out of 46";
1271*760c2415Smrg     string newList = replaceFirst!(cap => to!string(to!int(cap.hit)+1))
1272*760c2415Smrg         (list, regex(`[0-9]+`));
1273*760c2415Smrg     assert(newList == "#22 out of 46");
1274*760c2415Smrg }
1275*760c2415Smrg 
1276*760c2415Smrg /++
1277*760c2415Smrg     A variation on $(LREF replaceFirst) that instead of allocating a new string
1278*760c2415Smrg     on each call outputs the result piece-wise to the $(D sink). In particular
1279*760c2415Smrg     this enables efficient construction of a final output incrementally.
1280*760c2415Smrg 
1281*760c2415Smrg     Like in $(LREF replaceFirst) family of functions there is an overload
1282*760c2415Smrg     for the substitution guided by the $(D format) string
1283*760c2415Smrg     and the one with the user defined callback.
1284*760c2415Smrg +/
1285*760c2415Smrg public @trusted void replaceFirstInto(Sink, R, C, RegEx)
1286*760c2415Smrg         (ref Sink sink, R input, RegEx re, const(C)[] format)
1287*760c2415Smrg if (isOutputRange!(Sink, dchar) && isSomeString!R
1288*760c2415Smrg     && is(C : dchar) && isRegexFor!(RegEx, R))
1289*760c2415Smrg     {
1290*760c2415Smrg     replaceCapturesInto!((m, sink) => replaceFmt(format, m, sink))
1291*760c2415Smrg         (sink, input, matchFirst(input, re));
1292*760c2415Smrg     }
1293*760c2415Smrg 
1294*760c2415Smrg ///ditto
1295*760c2415Smrg public @trusted void replaceFirstInto(alias fun, Sink, R, RegEx)
1296*760c2415Smrg     (Sink sink, R input, RegEx re)
1297*760c2415Smrg if (isOutputRange!(Sink, dchar) && isSomeString!R && isRegexFor!(RegEx, R))
1298*760c2415Smrg {
1299*760c2415Smrg     replaceCapturesInto!fun(sink, input, matchFirst(input, re));
1300*760c2415Smrg }
1301*760c2415Smrg 
1302*760c2415Smrg ///
1303*760c2415Smrg @system unittest
1304*760c2415Smrg {
1305*760c2415Smrg     import std.array;
1306*760c2415Smrg     string m1 = "first message\n";
1307*760c2415Smrg     string m2 = "second message\n";
1308*760c2415Smrg     auto result = appender!string();
1309*760c2415Smrg     replaceFirstInto(result, m1, regex(`([a-z]+) message`), "$1");
1310*760c2415Smrg     //equivalent of the above with user-defined callback
1311*760c2415Smrg     replaceFirstInto!(cap=>cap[1])(result, m2, regex(`([a-z]+) message`));
1312*760c2415Smrg     assert(result.data == "first\nsecond\n");
1313*760c2415Smrg }
1314*760c2415Smrg 
1315*760c2415Smrg //examples for replaceFirst
1316*760c2415Smrg @system unittest
1317*760c2415Smrg {
1318*760c2415Smrg     import std.conv;
1319*760c2415Smrg     string list = "#21 out of 46";
1320*760c2415Smrg     string newList = replaceFirst!(cap => to!string(to!int(cap.hit)+1))
1321*760c2415Smrg         (list, regex(`[0-9]+`));
1322*760c2415Smrg     assert(newList == "#22 out of 46");
1323*760c2415Smrg     import std.array;
1324*760c2415Smrg     string m1 = "first message\n";
1325*760c2415Smrg     string m2 = "second message\n";
1326*760c2415Smrg     auto result = appender!string();
1327*760c2415Smrg     replaceFirstInto(result, m1, regex(`([a-z]+) message`), "$1");
1328*760c2415Smrg     //equivalent of the above with user-defined callback
1329*760c2415Smrg     replaceFirstInto!(cap=>cap[1])(result, m2, regex(`([a-z]+) message`));
1330*760c2415Smrg     assert(result.data == "first\nsecond\n");
1331*760c2415Smrg }
1332*760c2415Smrg 
1333*760c2415Smrg /++
1334*760c2415Smrg     Construct a new string from $(D input) by replacing all of the
1335*760c2415Smrg     fragments that match a pattern $(D re) with a string generated
1336*760c2415Smrg     from the match according to the $(D format) specifier.
1337*760c2415Smrg 
1338*760c2415Smrg     To replace only the first match use $(LREF replaceFirst).
1339*760c2415Smrg 
1340*760c2415Smrg     Params:
1341*760c2415Smrg     input = string to search
1342*760c2415Smrg     re = compiled regular expression to use
1343*760c2415Smrg     format = _format string to generate replacements from,
1344*760c2415Smrg     see $(S_LINK Replace _format string, the _format string).
1345*760c2415Smrg 
1346*760c2415Smrg     Returns:
1347*760c2415Smrg     A string of the same type as $(D input) with the all
1348*760c2415Smrg     of the matches (if any) replaced.
1349*760c2415Smrg     If no match is found returns the input string itself.
1350*760c2415Smrg +/
1351*760c2415Smrg public @trusted R replaceAll(R, C, RegEx)(R input, RegEx re, const(C)[] format)
1352*760c2415Smrg if (isSomeString!R && is(C : dchar) && isRegexFor!(RegEx, R))
1353*760c2415Smrg {
1354*760c2415Smrg     return replaceAllWith!((m, sink) => replaceFmt(format, m, sink))(input, re);
1355*760c2415Smrg }
1356*760c2415Smrg 
1357*760c2415Smrg ///
1358*760c2415Smrg @system unittest
1359*760c2415Smrg {
1360*760c2415Smrg     // insert comma as thousands delimiter
1361*760c2415Smrg     auto re = regex(r"(?<=\d)(?=(\d\d\d)+\b)","g");
1362*760c2415Smrg     assert(replaceAll("12000 + 42100 = 54100", re, ",") == "12,000 + 42,100 = 54,100");
1363*760c2415Smrg }
1364*760c2415Smrg 
1365*760c2415Smrg /++
1366*760c2415Smrg     This is a general replacement tool that construct a new string by replacing
1367*760c2415Smrg     matches of pattern $(D re) in the $(D input). Unlike the other overload
1368*760c2415Smrg     there is no format string instead captures are passed to
1369*760c2415Smrg     to a user-defined functor $(D fun) that returns a new string
1370*760c2415Smrg     to use as replacement.
1371*760c2415Smrg 
1372*760c2415Smrg     This version replaces all of the matches found in $(D input),
1373*760c2415Smrg     see $(LREF replaceFirst) to replace the first match only.
1374*760c2415Smrg 
1375*760c2415Smrg     Returns:
1376*760c2415Smrg     A new string of the same type as $(D input) with all matches
1377*760c2415Smrg     replaced by return values of $(D fun). If no matches found
1378*760c2415Smrg     returns the $(D input) itself.
1379*760c2415Smrg 
1380*760c2415Smrg     Params:
1381*760c2415Smrg     input = string to search
1382*760c2415Smrg     re = compiled regular expression
1383*760c2415Smrg     fun = delegate to use
1384*760c2415Smrg +/
1385*760c2415Smrg public @trusted R replaceAll(alias fun, R, RegEx)(R input, RegEx re)
1386*760c2415Smrg if (isSomeString!R && isRegexFor!(RegEx, R))
1387*760c2415Smrg {
1388*760c2415Smrg     return replaceAllWith!((m, sink) => sink.put(fun(m)))(input, re);
1389*760c2415Smrg }
1390*760c2415Smrg 
1391*760c2415Smrg ///
1392*760c2415Smrg @system unittest
1393*760c2415Smrg {
1394*760c2415Smrg     string baz(Captures!(string) m)
1395*760c2415Smrg     {
1396*760c2415Smrg         import std.string : toUpper;
1397*760c2415Smrg         return toUpper(m.hit);
1398*760c2415Smrg     }
1399*760c2415Smrg     // Capitalize the letters 'a' and 'r':
1400*760c2415Smrg     auto s = replaceAll!(baz)("Strap a rocket engine on a chicken.",
1401*760c2415Smrg             regex("[ar]"));
1402*760c2415Smrg     assert(s == "StRAp A Rocket engine on A chicken.");
1403*760c2415Smrg }
1404*760c2415Smrg 
1405*760c2415Smrg /++
1406*760c2415Smrg     A variation on $(LREF replaceAll) that instead of allocating a new string
1407*760c2415Smrg     on each call outputs the result piece-wise to the $(D sink). In particular
1408*760c2415Smrg     this enables efficient construction of a final output incrementally.
1409*760c2415Smrg 
1410*760c2415Smrg     As with $(LREF replaceAll) there are 2 overloads - one with a format string,
1411*760c2415Smrg     the other one with a user defined functor.
1412*760c2415Smrg +/
1413*760c2415Smrg public @trusted void replaceAllInto(Sink, R, C, RegEx)
1414*760c2415Smrg         (Sink sink, R input, RegEx re, const(C)[] format)
1415*760c2415Smrg if (isOutputRange!(Sink, dchar) && isSomeString!R
1416*760c2415Smrg     && is(C : dchar) && isRegexFor!(RegEx, R))
1417*760c2415Smrg     {
1418*760c2415Smrg     replaceMatchesInto!((m, sink) => replaceFmt(format, m, sink))
1419*760c2415Smrg         (sink, input, matchAll(input, re));
1420*760c2415Smrg     }
1421*760c2415Smrg 
1422*760c2415Smrg ///ditto
1423*760c2415Smrg public @trusted void replaceAllInto(alias fun, Sink, R, RegEx)
1424*760c2415Smrg         (Sink sink, R input, RegEx re)
1425*760c2415Smrg if (isOutputRange!(Sink, dchar) && isSomeString!R && isRegexFor!(RegEx, R))
1426*760c2415Smrg {
1427*760c2415Smrg     replaceMatchesInto!fun(sink, input, matchAll(input, re));
1428*760c2415Smrg }
1429*760c2415Smrg 
1430*760c2415Smrg ///
1431*760c2415Smrg @system unittest
1432*760c2415Smrg {
1433*760c2415Smrg     // insert comma as thousands delimiter in fifty randomly produced big numbers
1434*760c2415Smrg     import std.array, std.conv, std.random, std.range;
1435*760c2415Smrg     static re = regex(`(?<=\d)(?=(\d\d\d)+\b)`, "g");
1436*760c2415Smrg     auto sink = appender!(char [])();
1437*760c2415Smrg     enum ulong min = 10UL ^^ 10, max = 10UL ^^ 19;
1438*760c2415Smrg     foreach (i; 0 .. 50)
1439*760c2415Smrg     {
1440*760c2415Smrg         sink.clear();
1441*760c2415Smrg         replaceAllInto(sink, text(uniform(min, max)), re, ",");
1442*760c2415Smrg         foreach (pos; iota(sink.data.length - 4, 0, -4))
1443*760c2415Smrg             assert(sink.data[pos] == ',');
1444*760c2415Smrg     }
1445*760c2415Smrg }
1446*760c2415Smrg 
1447*760c2415Smrg // exercise all of the replace APIs
1448*760c2415Smrg @system unittest
1449*760c2415Smrg {
1450*760c2415Smrg     import std.array : appender;
1451*760c2415Smrg     import std.conv;
1452*760c2415Smrg     // try and check first/all simple substitution
1453*760c2415Smrg     foreach (S; AliasSeq!(string, wstring, dstring, char[], wchar[], dchar[]))
1454*760c2415Smrg     {
1455*760c2415Smrg         S s1 = "curt trial".to!S();
1456*760c2415Smrg         S s2 = "round dome".to!S();
1457*760c2415Smrg         S t1F = "court trial".to!S();
1458*760c2415Smrg         S t2F = "hound dome".to!S();
1459*760c2415Smrg         S t1A = "court trial".to!S();
1460*760c2415Smrg         S t2A = "hound home".to!S();
1461*760c2415Smrg         auto re1 = regex("curt".to!S());
1462*760c2415Smrg         auto re2 = regex("[dr]o".to!S());
1463*760c2415Smrg 
1464*760c2415Smrg         assert(replaceFirst(s1, re1, "court") == t1F);
1465*760c2415Smrg         assert(replaceFirst(s2, re2, "ho") == t2F);
1466*760c2415Smrg         assert(replaceAll(s1, re1, "court") == t1A);
1467*760c2415Smrg         assert(replaceAll(s2, re2, "ho") == t2A);
1468*760c2415Smrg 
1469*760c2415Smrg         auto rep1 = replaceFirst!(cap => cap[0][0]~"o".to!S()~cap[0][1..$])(s1, re1);
1470*760c2415Smrg         assert(rep1 == t1F);
1471*760c2415Smrg         assert(replaceFirst!(cap => "ho".to!S())(s2, re2) == t2F);
1472*760c2415Smrg         auto rep1A = replaceAll!(cap => cap[0][0]~"o".to!S()~cap[0][1..$])(s1, re1);
1473*760c2415Smrg         assert(rep1A == t1A);
1474*760c2415Smrg         assert(replaceAll!(cap => "ho".to!S())(s2, re2) == t2A);
1475*760c2415Smrg 
1476*760c2415Smrg         auto sink = appender!S();
1477*760c2415Smrg         replaceFirstInto(sink, s1, re1, "court");
1478*760c2415Smrg         assert(sink.data == t1F);
1479*760c2415Smrg         replaceFirstInto(sink, s2, re2, "ho");
1480*760c2415Smrg         assert(sink.data == t1F~t2F);
1481*760c2415Smrg         replaceAllInto(sink, s1, re1, "court");
1482*760c2415Smrg         assert(sink.data == t1F~t2F~t1A);
1483*760c2415Smrg         replaceAllInto(sink, s2, re2, "ho");
1484*760c2415Smrg         assert(sink.data == t1F~t2F~t1A~t2A);
1485*760c2415Smrg     }
1486*760c2415Smrg }
1487*760c2415Smrg 
1488*760c2415Smrg /++
1489*760c2415Smrg     Old API for replacement, operation depends on flags of pattern $(D re).
1490*760c2415Smrg     With "g" flag it performs the equivalent of $(LREF replaceAll) otherwise it
1491*760c2415Smrg     works the same as $(LREF replaceFirst).
1492*760c2415Smrg 
1493*760c2415Smrg     The use of this function is $(RED discouraged), please use $(LREF replaceAll)
1494*760c2415Smrg     or $(LREF replaceFirst) explicitly.
1495*760c2415Smrg +/
1496*760c2415Smrg public R replace(alias scheme = match, R, C, RegEx)(R input, RegEx re, const(C)[] format)
1497*760c2415Smrg if (isSomeString!R && isRegexFor!(RegEx, R))
1498*760c2415Smrg {
1499*760c2415Smrg     return replaceAllWith!((m, sink) => replaceFmt(format, m, sink), match)(input, re);
1500*760c2415Smrg }
1501*760c2415Smrg 
1502*760c2415Smrg ///ditto
1503*760c2415Smrg public R replace(alias fun, R, RegEx)(R input, RegEx re)
1504*760c2415Smrg if (isSomeString!R && isRegexFor!(RegEx, R))
1505*760c2415Smrg {
1506*760c2415Smrg     return replaceAllWith!(fun, match)(input, re);
1507*760c2415Smrg }
1508*760c2415Smrg 
1509*760c2415Smrg /**
1510*760c2415Smrg Splits a string `r` using a regular expression `pat` as a separator.
1511*760c2415Smrg 
1512*760c2415Smrg Params:
1513*760c2415Smrg     keepSeparators = flag to specify if the matches should be in the resulting range
1514*760c2415Smrg     r = the string to split
1515*760c2415Smrg     pat = the pattern to split on
1516*760c2415Smrg Returns:
1517*760c2415Smrg     A lazy range of strings
1518*760c2415Smrg */
1519*760c2415Smrg public struct Splitter(Flag!"keepSeparators" keepSeparators = No.keepSeparators, Range, alias RegEx = Regex)
1520*760c2415Smrg if (isSomeString!Range && isRegexFor!(RegEx, Range))
1521*760c2415Smrg {
1522*760c2415Smrg private:
1523*760c2415Smrg     Range _input;
1524*760c2415Smrg     size_t _offset;
1525*760c2415Smrg     alias Rx = typeof(match(Range.init,RegEx.init));
1526*760c2415Smrg     Rx _match;
1527*760c2415Smrg 
1528*760c2415Smrg     static if (keepSeparators) bool onMatch = false;
1529*760c2415Smrg 
thisSplitter1530*760c2415Smrg     @trusted this(Range input, RegEx separator)
1531*760c2415Smrg     {//@@@BUG@@@ generated opAssign of RegexMatch is not @trusted
1532*760c2415Smrg         _input = input;
1533*760c2415Smrg         separator.flags |= RegexOption.global;
1534*760c2415Smrg         if (_input.empty)
1535*760c2415Smrg         {
1536*760c2415Smrg             //there is nothing to match at all, make _offset > 0
1537*760c2415Smrg             _offset = 1;
1538*760c2415Smrg         }
1539*760c2415Smrg         else
1540*760c2415Smrg         {
1541*760c2415Smrg             _match = Rx(_input, separator);
1542*760c2415Smrg 
1543*760c2415Smrg             static if (keepSeparators)
1544*760c2415Smrg                 if (_match.pre.empty)
1545*760c2415Smrg                     popFront();
1546*760c2415Smrg         }
1547*760c2415Smrg     }
1548*760c2415Smrg 
1549*760c2415Smrg public:
opSliceSplitter1550*760c2415Smrg     auto ref opSlice()
1551*760c2415Smrg     {
1552*760c2415Smrg         return this.save;
1553*760c2415Smrg     }
1554*760c2415Smrg 
1555*760c2415Smrg     ///Forward range primitives.
frontSplitter1556*760c2415Smrg     @property Range front()
1557*760c2415Smrg     {
1558*760c2415Smrg         import std.algorithm.comparison : min;
1559*760c2415Smrg 
1560*760c2415Smrg         assert(!empty && _offset <= _match.pre.length
1561*760c2415Smrg                 && _match.pre.length <= _input.length);
1562*760c2415Smrg 
1563*760c2415Smrg         static if (keepSeparators)
1564*760c2415Smrg         {
1565*760c2415Smrg             if (!onMatch)
1566*760c2415Smrg                 return _input[_offset .. min($, _match.pre.length)];
1567*760c2415Smrg             else
1568*760c2415Smrg                 return _match.hit();
1569*760c2415Smrg         }
1570*760c2415Smrg         else
1571*760c2415Smrg         {
1572*760c2415Smrg             return _input[_offset .. min($, _match.pre.length)];
1573*760c2415Smrg         }
1574*760c2415Smrg     }
1575*760c2415Smrg 
1576*760c2415Smrg     ///ditto
emptySplitter1577*760c2415Smrg     @property bool empty()
1578*760c2415Smrg     {
1579*760c2415Smrg         static if (keepSeparators)
1580*760c2415Smrg             return _offset >= _input.length;
1581*760c2415Smrg         else
1582*760c2415Smrg             return _offset > _input.length;
1583*760c2415Smrg     }
1584*760c2415Smrg 
1585*760c2415Smrg     ///ditto
popFrontSplitter1586*760c2415Smrg     void popFront()
1587*760c2415Smrg     {
1588*760c2415Smrg         assert(!empty);
1589*760c2415Smrg         if (_match.empty)
1590*760c2415Smrg         {
1591*760c2415Smrg             //No more separators, work is done here
1592*760c2415Smrg             _offset = _input.length + 1;
1593*760c2415Smrg         }
1594*760c2415Smrg         else
1595*760c2415Smrg         {
1596*760c2415Smrg             static if (keepSeparators)
1597*760c2415Smrg             {
1598*760c2415Smrg                 if (!onMatch)
1599*760c2415Smrg                 {
1600*760c2415Smrg                     //skip past the separator
1601*760c2415Smrg                     _offset = _match.pre.length;
1602*760c2415Smrg                 }
1603*760c2415Smrg                 else
1604*760c2415Smrg                 {
1605*760c2415Smrg                     _offset += _match.hit.length;
1606*760c2415Smrg                     _match.popFront();
1607*760c2415Smrg                 }
1608*760c2415Smrg 
1609*760c2415Smrg                 onMatch = !onMatch;
1610*760c2415Smrg             }
1611*760c2415Smrg             else
1612*760c2415Smrg             {
1613*760c2415Smrg                 //skip past the separator
1614*760c2415Smrg                 _offset = _match.pre.length + _match.hit.length;
1615*760c2415Smrg                 _match.popFront();
1616*760c2415Smrg             }
1617*760c2415Smrg         }
1618*760c2415Smrg     }
1619*760c2415Smrg 
1620*760c2415Smrg     ///ditto
saveSplitter1621*760c2415Smrg     @property auto save()
1622*760c2415Smrg     {
1623*760c2415Smrg         return this;
1624*760c2415Smrg     }
1625*760c2415Smrg }
1626*760c2415Smrg 
1627*760c2415Smrg /// ditto
1628*760c2415Smrg public Splitter!(keepSeparators, Range, RegEx) splitter(
1629*760c2415Smrg     Flag!"keepSeparators" keepSeparators = No.keepSeparators, Range, RegEx)(Range r, RegEx pat)
1630*760c2415Smrg if (
1631*760c2415Smrg     is(BasicElementOf!Range : dchar) && isRegexFor!(RegEx, Range))
1632*760c2415Smrg {
1633*760c2415Smrg     return Splitter!(keepSeparators, Range, RegEx)(r, pat);
1634*760c2415Smrg }
1635*760c2415Smrg 
1636*760c2415Smrg ///
1637*760c2415Smrg @system unittest
1638*760c2415Smrg {
1639*760c2415Smrg     import std.algorithm.comparison : equal;
1640*760c2415Smrg     auto s1 = ", abc, de,  fg, hi, ";
1641*760c2415Smrg     assert(equal(splitter(s1, regex(", *")),
1642*760c2415Smrg         ["", "abc", "de", "fg", "hi", ""]));
1643*760c2415Smrg }
1644*760c2415Smrg 
1645*760c2415Smrg /// Split on a pattern, but keep the matches in the resulting range
1646*760c2415Smrg @system unittest
1647*760c2415Smrg {
1648*760c2415Smrg     import std.algorithm.comparison : equal;
1649*760c2415Smrg     import std.typecons : Yes;
1650*760c2415Smrg 
1651*760c2415Smrg     auto pattern = regex(`([\.,])`);
1652*760c2415Smrg 
1653*760c2415Smrg     assert("2003.04.05"
1654*760c2415Smrg         .splitter!(Yes.keepSeparators)(pattern)
1655*760c2415Smrg         .equal(["2003", ".", "04", ".", "05"]));
1656*760c2415Smrg 
1657*760c2415Smrg     assert(",1,2,3"
1658*760c2415Smrg         .splitter!(Yes.keepSeparators)(pattern)
1659*760c2415Smrg         .equal([",", "1", ",", "2", ",", "3"]));
1660*760c2415Smrg }
1661*760c2415Smrg 
1662*760c2415Smrg ///An eager version of $(D splitter) that creates an array with splitted slices of $(D input).
1663*760c2415Smrg public @trusted String[] split(String, RegEx)(String input, RegEx rx)
1664*760c2415Smrg if (isSomeString!String  && isRegexFor!(RegEx, String))
1665*760c2415Smrg {
1666*760c2415Smrg     import std.array : appender;
1667*760c2415Smrg     auto a = appender!(String[])();
1668*760c2415Smrg     foreach (e; splitter(input, rx))
1669*760c2415Smrg         a.put(e);
1670*760c2415Smrg     return a.data;
1671*760c2415Smrg }
1672*760c2415Smrg 
1673*760c2415Smrg ///Exception object thrown in case of errors during regex compilation.
1674*760c2415Smrg public alias RegexException = std.regex.internal.ir.RegexException;
1675*760c2415Smrg 
1676*760c2415Smrg /++
1677*760c2415Smrg   A range that lazily produces a string output escaped
1678*760c2415Smrg   to be used inside of a regular expression.
1679*760c2415Smrg +/
escaper(Range)1680*760c2415Smrg auto escaper(Range)(Range r)
1681*760c2415Smrg {
1682*760c2415Smrg     import std.algorithm.searching : find;
1683*760c2415Smrg     static immutable escapables = [Escapables];
1684*760c2415Smrg     static struct Escaper // template to deduce attributes
1685*760c2415Smrg     {
1686*760c2415Smrg         Range r;
1687*760c2415Smrg         bool escaped;
1688*760c2415Smrg 
1689*760c2415Smrg         @property ElementType!Range front(){
1690*760c2415Smrg           if (escaped)
1691*760c2415Smrg               return '\\';
1692*760c2415Smrg           else
1693*760c2415Smrg               return r.front;
1694*760c2415Smrg         }
1695*760c2415Smrg 
1696*760c2415Smrg         @property bool empty(){ return r.empty; }
1697*760c2415Smrg 
1698*760c2415Smrg         void popFront(){
1699*760c2415Smrg           if (escaped) escaped = false;
1700*760c2415Smrg           else
1701*760c2415Smrg           {
1702*760c2415Smrg               r.popFront();
1703*760c2415Smrg               if (!r.empty && !escapables.find(r.front).empty)
1704*760c2415Smrg                   escaped = true;
1705*760c2415Smrg           }
1706*760c2415Smrg         }
1707*760c2415Smrg 
1708*760c2415Smrg         @property auto save(){ return Escaper(r.save, escaped); }
1709*760c2415Smrg     }
1710*760c2415Smrg 
1711*760c2415Smrg     bool escaped = !r.empty && !escapables.find(r.front).empty;
1712*760c2415Smrg     return Escaper(r, escaped);
1713*760c2415Smrg }
1714*760c2415Smrg 
1715*760c2415Smrg ///
1716*760c2415Smrg @system unittest
1717*760c2415Smrg {
1718*760c2415Smrg     import std.algorithm.comparison;
1719*760c2415Smrg     import std.regex;
1720*760c2415Smrg     string s = `This is {unfriendly} to *regex*`;
1721*760c2415Smrg     assert(s.escaper.equal(`This is \{unfriendly\} to \*regex\*`));
1722*760c2415Smrg }
1723*760c2415Smrg 
1724*760c2415Smrg @system unittest
1725*760c2415Smrg {
1726*760c2415Smrg     import std.algorithm.comparison;
1727*760c2415Smrg     import std.conv;
1728*760c2415Smrg     foreach (S; AliasSeq!(string, wstring, dstring))
1729*760c2415Smrg     {
1730*760c2415Smrg       auto s = "^".to!S;
1731*760c2415Smrg       assert(s.escaper.equal(`\^`));
1732*760c2415Smrg       auto s2 = "";
1733*760c2415Smrg       assert(s2.escaper.equal(""));
1734*760c2415Smrg     }
1735*760c2415Smrg }
1736