1 /++
2   $(LINK2 https://en.wikipedia.org/wiki/Regular_expression, Regular expressions)
3   are a commonly used method of pattern matching
4   on strings, with $(I regex) being a catchy word for a pattern in this domain
5   specific language. Typical problems usually solved by regular expressions
6   include validation of user input and the ubiquitous find $(AMP) replace
7   in text processing utilities.
8 
9 $(SCRIPT inhibitQuickIndex = 1;)
10 $(BOOKTABLE,
11 $(TR $(TH Category) $(TH Functions))
12 $(TR $(TD Matching) $(TD
13         $(LREF bmatch)
14         $(LREF match)
15         $(LREF matchAll)
16         $(LREF matchFirst)
17 ))
18 $(TR $(TD Building) $(TD
19         $(LREF ctRegex)
20         $(LREF escaper)
21         $(LREF _regex)
22 ))
23 $(TR $(TD Replace) $(TD
24         $(LREF replace)
25         $(LREF replaceAll)
26         $(LREF replaceAllInto)
27         $(LREF replaceFirst)
28         $(LREF replaceFirstInto)
29 ))
30 $(TR $(TD Split) $(TD
31         $(LREF split)
32         $(LREF splitter)
33 ))
34 $(TR $(TD Objects) $(TD
35         $(LREF Captures)
36         $(LREF Regex)
37         $(LREF RegexException)
38         $(LREF RegexMatch)
39         $(LREF Splitter)
40         $(LREF StaticRegex)
41 ))
42 )
43 
44   $(SECTION Synopsis)
45   ---
46   import std.regex;
47   import std.stdio;
48   void main()
49   {
50       // Print out all possible dd/mm/yy(yy) dates found in user input.
51       auto r = regex(r"\b[0-9][0-9]?/[0-9][0-9]?/[0-9][0-9](?:[0-9][0-9])?\b");
52       foreach (line; stdin.byLine)
53       {
54         // matchAll() returns a range that can be iterated
55         // to get all subsequent matches.
56         foreach (c; matchAll(line, r))
57             writeln(c.hit);
58       }
59   }
60   ...
61 
62   // Create a static regex at compile-time, which contains fast native code.
63   auto ctr = ctRegex!(`^.*/([^/]+)/?$`);
64 
65   // It works just like a normal regex:
66   auto c2 = matchFirst("foo/bar", ctr);   // First match found here, if any
67   assert(!c2.empty);   // Be sure to check if there is a match before examining contents!
68   assert(c2[1] == "bar");   // Captures is a range of submatches: 0 = full match.
69 
70   ...
71   // multi-pattern regex
72   auto multi = regex([`\d+,\d+`,`(a-z]+):(\d+)`]);
73   auto m = "abc:43 12,34".matchAll(multi);
74   assert(m.front.whichPattern == 2);
75   assert(m.front[1] == "abc");
76   assert(m.front[2] == "43");
77   m.popFront();
78   assert(m.front.whichPattern == 1);
79   assert(m.front[1] == "12");
80   ...
81 
82   // The result of the `matchAll/matchFirst` is directly testable with if/assert/while.
83   // e.g. test if a string consists of letters:
84   assert(matchFirst("Letter", `^\p{L}+$`));
85   ---
86 
87   $(SECTION Syntax and general information)
88   The general usage guideline is to keep regex complexity on the side of simplicity,
89   as its capabilities reside in purely character-level manipulation.
90   As such it's ill-suited for tasks involving higher level invariants
91   like matching an integer number $(U bounded) in an [a,b] interval.
92   Checks of this sort of are better addressed by additional post-processing.
93 
94   The basic syntax shouldn't surprise experienced users of regular expressions.
95   For an introduction to $(D std.regex) see a
96   $(HTTP dlang.org/regular-expression.html, short tour) of the module API
97   and its abilities.
98 
99   There are other web resources on regular expressions to help newcomers,
100   and a good $(HTTP www.regular-expressions.info, reference with tutorial)
101   can easily be found.
102 
103   This library uses a remarkably common ECMAScript syntax flavor
104   with the following extensions:
105   $(UL
106     $(LI Named subexpressions, with Python syntax. )
107     $(LI Unicode properties such as Scripts, Blocks and common binary properties e.g Alphabetic, White_Space, Hex_Digit etc.)
108     $(LI Arbitrary length and complexity lookbehind, including lookahead in lookbehind and vise-versa.)
109   )
110 
111   $(REG_START Pattern syntax )
112   $(I std.regex operates on codepoint level,
113     'character' in this table denotes a single Unicode codepoint.)
114   $(REG_TABLE
115     $(REG_TITLE Pattern element, Semantics )
116     $(REG_TITLE Atoms, Match single characters )
117     $(REG_ROW any character except [{|*+?()^$, Matches the character itself. )
118     $(REG_ROW ., In single line mode matches any character.
119       Otherwise it matches any character except '\n' and '\r'. )
120     $(REG_ROW [class], Matches a single character
121       that belongs to this character class. )
122     $(REG_ROW [^class], Matches a single character that
123       does $(U not) belong to this character class.)
124     $(REG_ROW \cC, Matches the control character corresponding to letter C)
125     $(REG_ROW \xXX, Matches a character with hexadecimal value of XX. )
126     $(REG_ROW \uXXXX, Matches a character  with hexadecimal value of XXXX. )
127     $(REG_ROW \U00YYYYYY, Matches a character with hexadecimal value of YYYYYY. )
128     $(REG_ROW \f, Matches a formfeed character. )
129     $(REG_ROW \n, Matches a linefeed character. )
130     $(REG_ROW \r, Matches a carriage return character. )
131     $(REG_ROW \t, Matches a tab character. )
132     $(REG_ROW \v, Matches a vertical tab character. )
133     $(REG_ROW \d, Matches any Unicode digit. )
134     $(REG_ROW \D, Matches any character except Unicode digits. )
135     $(REG_ROW \w, Matches any word character (note: this includes numbers).)
136     $(REG_ROW \W, Matches any non-word character.)
137     $(REG_ROW \s, Matches whitespace, same as \p{White_Space}.)
138     $(REG_ROW \S, Matches any character except those recognized as $(I \s ). )
139     $(REG_ROW \\, Matches \ character. )
140     $(REG_ROW \c where c is one of [|*+?(), Matches the character c itself. )
141     $(REG_ROW \p{PropertyName}, Matches a character that belongs
142         to the Unicode PropertyName set.
143       Single letter abbreviations can be used without surrounding {,}. )
144     $(REG_ROW  \P{PropertyName}, Matches a character that does not belong
145         to the Unicode PropertyName set.
146       Single letter abbreviations can be used without surrounding {,}. )
147     $(REG_ROW \p{InBasicLatin}, Matches any character that is part of
148           the BasicLatin Unicode $(U block).)
149     $(REG_ROW \P{InBasicLatin}, Matches any character except ones in
150           the BasicLatin Unicode $(U block).)
151     $(REG_ROW \p{Cyrillic}, Matches any character that is part of
152         Cyrillic $(U script).)
153     $(REG_ROW \P{Cyrillic}, Matches any character except ones in
154         Cyrillic $(U script).)
155     $(REG_TITLE Quantifiers, Specify repetition of other elements)
156     $(REG_ROW *, Matches previous character/subexpression 0 or more times.
157       Greedy version - tries as many times as possible.)
158     $(REG_ROW *?, Matches previous character/subexpression 0 or more times.
159       Lazy version  - stops as early as possible.)
160     $(REG_ROW +, Matches previous character/subexpression 1 or more times.
161       Greedy version - tries as many times as possible.)
162     $(REG_ROW +?, Matches previous character/subexpression 1 or more times.
163       Lazy version  - stops as early as possible.)
164     $(REG_ROW {n}, Matches previous character/subexpression exactly n times. )
165     $(REG_ROW {n$(COMMA)}, Matches previous character/subexpression n times or more.
166       Greedy version - tries as many times as possible. )
167     $(REG_ROW {n$(COMMA)}?, Matches previous character/subexpression n times or more.
168       Lazy version - stops as early as possible.)
169     $(REG_ROW {n$(COMMA)m}, Matches previous character/subexpression n to m times.
170       Greedy version - tries as many times as possible, but no more than m times. )
171     $(REG_ROW {n$(COMMA)m}?, Matches previous character/subexpression n to m times.
172       Lazy version - stops as early as possible, but no less then n times.)
173     $(REG_TITLE Other, Subexpressions $(AMP) alternations )
174     $(REG_ROW (regex),  Matches subexpression regex,
175       saving matched portion of text for later retrieval. )
176     $(REG_ROW (?#comment), An inline comment that is ignored while matching.)
177     $(REG_ROW (?:regex), Matches subexpression regex,
178       $(U not) saving matched portion of text. Useful to speed up matching. )
179     $(REG_ROW A|B, Matches subexpression A, or failing that, matches B. )
180     $(REG_ROW (?P$(LT)name$(GT)regex), Matches named subexpression
181         regex labeling it with name 'name'.
182         When referring to a matched portion of text,
183         names work like aliases in addition to direct numbers.
184      )
185     $(REG_TITLE Assertions, Match position rather than character )
186     $(REG_ROW ^, Matches at the begining of input or line (in multiline mode).)
187     $(REG_ROW $, Matches at the end of input or line (in multiline mode). )
188     $(REG_ROW \b, Matches at word boundary. )
189     $(REG_ROW \B, Matches when $(U not) at word boundary. )
190     $(REG_ROW (?=regex), Zero-width lookahead assertion.
191         Matches at a point where the subexpression
192         regex could be matched starting from the current position.
193       )
194     $(REG_ROW (?!regex), Zero-width negative lookahead assertion.
195         Matches at a point where the subexpression
196         regex could $(U not) be matched starting from the current position.
197       )
198     $(REG_ROW (?<=regex), Zero-width lookbehind assertion. Matches at a point
199         where the subexpression regex could be matched ending
200         at the current position (matching goes backwards).
201       )
202     $(REG_ROW  (?<!regex), Zero-width negative lookbehind assertion.
203       Matches at a point where the subexpression regex could $(U not)
204       be matched ending at the current position (matching goes backwards).
205      )
206   )
207 
208   $(REG_START Character classes )
209   $(REG_TABLE
210     $(REG_TITLE Pattern element, Semantics )
211     $(REG_ROW Any atom, Has the same meaning as outside of a character class.)
212     $(REG_ROW a-z, Includes characters a, b, c, ..., z. )
213     $(REG_ROW [a||b]$(COMMA) [a--b]$(COMMA) [a~~b]$(COMMA) [a$(AMP)$(AMP)b],
214      Where a, b are arbitrary classes, means union, set difference,
215      symmetric set difference, and intersection respectively.
216      $(I Any sequence of character class elements implicitly forms a union.) )
217   )
218 
219   $(REG_START Regex flags )
220   $(REG_TABLE
221     $(REG_TITLE Flag, Semantics )
222     $(REG_ROW g, Global regex, repeat over the whole input. )
223     $(REG_ROW i, Case insensitive matching. )
224     $(REG_ROW m, Multi-line mode, match ^, $ on start and end line separators
225        as well as start and end of input.)
226     $(REG_ROW s, Single-line mode, makes . match '\n' and '\r' as well. )
227     $(REG_ROW x, Free-form syntax, ignores whitespace in pattern,
228       useful for formatting complex regular expressions. )
229   )
230 
231   $(SECTION Unicode support)
232 
233   This library provides full Level 1 support* according to
234     $(HTTP unicode.org/reports/tr18/, UTS 18). Specifically:
235   $(UL
236     $(LI 1.1 Hex notation via any of \uxxxx, \U00YYYYYY, \xZZ.)
237     $(LI 1.2 Unicode properties.)
238     $(LI 1.3 Character classes with set operations.)
239     $(LI 1.4 Word boundaries use the full set of "word" characters.)
240     $(LI 1.5 Using simple casefolding to match case
241         insensitively across the full range of codepoints.)
242     $(LI 1.6 Respecting line breaks as any of
243         \u000A | \u000B | \u000C | \u000D | \u0085 | \u2028 | \u2029 | \u000D\u000A.)
244     $(LI 1.7 Operating on codepoint level.)
245   )
246   *With exception of point 1.1.1, as of yet, normalization of input
247     is expected to be enforced by user.
248 
249     $(SECTION Replace format string)
250 
251     A set of functions in this module that do the substitution rely
252     on a simple format to guide the process. In particular the table below
253     applies to the $(D format) argument of
254     $(LREF replaceFirst) and $(LREF replaceAll).
255 
256     The format string can reference parts of match using the following notation.
257     $(REG_TABLE
258         $(REG_TITLE Format specifier, Replaced by )
259         $(REG_ROW $$(AMP), the whole match. )
260         $(REG_ROW $(DOLLAR)$(BACKTICK), part of input $(I preceding) the match. )
261         $(REG_ROW $', part of input $(I following) the match. )
262         $(REG_ROW $$, '$' character. )
263         $(REG_ROW \c $(COMMA) where c is any character, the character c itself. )
264         $(REG_ROW \\, '\' character. )
265         $(REG_ROW $(DOLLAR)1 .. $(DOLLAR)99, submatch number 1 to 99 respectively. )
266     )
267 
268   $(SECTION Slicing and zero memory allocations orientation)
269 
270   All matches returned by pattern matching functionality in this library
271     are slices of the original input. The notable exception is the $(D replace)
272     family of functions  that generate a new string from the input.
273 
274     In cases where producing the replacement is the ultimate goal
275     $(LREF replaceFirstInto) and $(LREF replaceAllInto) could come in handy
276     as functions that  avoid allocations even for replacement.
277 
278     Copyright: Copyright Dmitry Olshansky, 2011-
279 
280   License: $(HTTP boost.org/LICENSE_1_0.txt, Boost License 1.0).
281 
282   Authors: Dmitry Olshansky,
283 
284     API and utility constructs are modeled after the original $(D std.regex)
285   by Walter Bright and Andrei Alexandrescu.
286 
287   Source: $(PHOBOSSRC std/_regex/_package.d)
288 
289 Macros:
290     REG_ROW = $(TR $(TD $(I $1 )) $(TD $+) )
291     REG_TITLE = $(TR $(TD $(B $1)) $(TD $(B $2)) )
292     REG_TABLE = <table border="1" cellspacing="0" cellpadding="5" > $0 </table>
293     REG_START = <h3><div align="center"> $0 </div></h3>
294     SECTION = <h3><a id="$1" href="#$1" class="anchor">$0</a></h3>
295     S_LINK = <a href="#$1">$+</a>
296  +/
297 module std.regex;
298 
299 import std.range.primitives, std.traits;
300 import std.regex.internal.ir;
301 import std.regex.internal.thompson; //TODO: get rid of this dependency
302 import std.typecons; // : Flag, Yes, No;
303 
304 /++
305     $(D Regex) object holds regular expression pattern in compiled form.
306 
307     Instances of this object are constructed via calls to $(D regex).
308     This is an intended form for caching and storage of frequently
309     used regular expressions.
310 
311     Example:
312 
313     Test if this object doesn't contain any compiled pattern.
314     ---
315     Regex!char r;
316     assert(r.empty);
317     r = regex(""); // Note: "" is a valid regex pattern.
318     assert(!r.empty);
319     ---
320 
321     Getting a range of all the named captures in the regex.
322     ----
323     import std.range;
324     import std.algorithm;
325 
326     auto re = regex(`(?P<name>\w+) = (?P<var>\d+)`);
327     auto nc = re.namedCaptures;
328     static assert(isRandomAccessRange!(typeof(nc)));
329     assert(!nc.empty);
330     assert(nc.length == 2);
331     assert(nc.equal(["name", "var"]));
332     assert(nc[0] == "name");
333     assert(nc[1..$].equal(["var"]));
334     ----
335 +/
336 public alias Regex(Char) = std.regex.internal.ir.Regex!(Char);
337 
338 /++
339     A $(D StaticRegex) is $(D Regex) object that contains D code specially
340     generated at compile-time to speed up matching.
341 
342     Implicitly convertible to normal $(D Regex),
343     however doing so will result in losing this additional capability.
344 +/
345 public alias StaticRegex(Char) = std.regex.internal.ir.StaticRegex!(Char);
346 
347 /++
348     Compile regular expression pattern for the later execution.
349     Returns: $(D Regex) object that works on inputs having
350     the same character width as $(D pattern).
351 
352     Params:
353     pattern = A single regular expression to match.
354     patterns = An array of regular expression strings.
355         The resulting `Regex` object will match any expression;
356         use $(LREF whichPattern) to know which.
357     flags = The _attributes (g, i, m and x accepted)
358 
359     Throws: $(D RegexException) if there were any errors during compilation.
360 +/
361 @trusted public auto regex(S)(S[] patterns, const(char)[] flags="")
362 if (isSomeString!(S))
363 {
364     import std.array : appender;
365     import std.functional : memoize;
366     enum cacheSize = 8; //TODO: invent nice interface to control regex caching
367     S pat;
368     if (patterns.length > 1)
369     {
370         auto app = appender!S();
foreach(i,p;patterns)371         foreach (i, p; patterns)
372         {
373             if (i != 0)
374                 app.put("|");
375             app.put("(?:");
376             app.put(patterns[i]);
377             // terminator for the pattern
378             // to detect if the pattern unexpectedly ends
379             app.put("\\");
380             app.put(cast(dchar)(privateUseStart+i));
381             app.put(")");
382             // another one to return correct whichPattern
383             // for all of potential alternatives in the patterns[i]
384             app.put("\\");
385             app.put(cast(dchar)(privateUseStart+i));
386         }
387         pat = app.data;
388     }
389     else
390         pat = patterns[0];
391 
392     if (__ctfe)
393         return regexImpl(pat, flags);
394     return memoize!(regexImpl!S, cacheSize)(pat, flags);
395 }
396 
397 ///ditto
398 @trusted public auto regex(S)(S pattern, const(char)[] flags="")
399 if (isSomeString!(S))
400 {
401     return regex([pattern], flags);
402 }
403 
404 ///
405 @system unittest
406 {
407     // multi-pattern regex example
408     auto multi = regex([`([a-z]+):(\d+)`, `(\d+),\d+`]); // multi regex
409     auto m = "abc:43 12,34".matchAll(multi);
410     assert(m.front.whichPattern == 1);
411     assert(m.front[1] == "abc");
412     assert(m.front[2] == "43");
413     m.popFront();
414     assert(m.front.whichPattern == 2);
415     assert(m.front[1] == "12");
416 }
417 
418 public auto regexImpl(S)(S pattern, const(char)[] flags="")
419 if (isSomeString!(S))
420 {
421     import std.regex.internal.parser : Parser, CodeGen;
422     auto parser = Parser!(Unqual!(typeof(pattern)), CodeGen)(pattern, flags);
423     auto r = parser.program;
424     return r;
425 }
426 
427 
428 template ctRegexImpl(alias pattern, string flags=[])
429 {
430     import std.regex.internal.backtracking, std.regex.internal.parser;
431     enum r = regex(pattern, flags);
432     alias Char = BasicElementOf!(typeof(pattern));
433     enum source = ctGenRegExCode(r);
434     alias Matcher = BacktrackingMatcher!(true);
435     @trusted bool func(ref Matcher!Char matcher)
436     {
437         debug(std_regex_ctr) pragma(msg, source);
438         mixin(source);
439     }
440     enum nr = StaticRegex!Char(r, &func);
441 }
442 
443 /++
444     Compile regular expression using CTFE
445     and generate optimized native machine code for matching it.
446 
447     Returns: StaticRegex object for faster matching.
448 
449     Params:
450     pattern = Regular expression
451     flags = The _attributes (g, i, m and x accepted)
452 +/
453 public enum ctRegex(alias pattern, alias flags=[]) = ctRegexImpl!(pattern, flags).nr;
454 
455 enum isRegexFor(RegEx, R) = is(RegEx == Regex!(BasicElementOf!R))
456      || is(RegEx == StaticRegex!(BasicElementOf!R));
457 
458 
459 /++
460     $(D Captures) object contains submatches captured during a call
461     to $(D match) or iteration over $(D RegexMatch) range.
462 
463     First element of range is the whole match.
464 +/
465 @trusted public struct Captures(R, DIndex = size_t)
466 if (isSomeString!R)
467 {//@trusted because of union inside
468     alias DataIndex = DIndex;
469     alias String = R;
470 private:
471     import std.conv : text;
472     R _input;
473     int _nMatch;
474     enum smallString = 3;
475     enum SMALL_MASK = 0x8000_0000, REF_MASK= 0x1FFF_FFFF;
476     union
477     {
478         Group!DataIndex[] big_matches;
479         Group!DataIndex[smallString] small_matches;
480     }
481     uint _f, _b;
482     uint _refcount; // ref count or SMALL MASK + num groups
483     NamedGroup[] _names;
484 
485     this()(R input, uint n, NamedGroup[] named)
486     {
487         _input = input;
488         _names = named;
489         newMatches(n);
490         _b = n;
491         _f = 0;
492     }
493 
494     this(alias Engine)(ref RegexMatch!(R,Engine) rmatch)
495     {
496         _input = rmatch._input;
497         _names = rmatch._engine.re.dict;
498         immutable n = rmatch._engine.re.ngroup;
499         newMatches(n);
500         _b = n;
501         _f = 0;
502     }
503 
504     @property inout(Group!DataIndex[]) matches() inout
505     {
506        return (_refcount & SMALL_MASK)  ? small_matches[0 .. _refcount & 0xFF] : big_matches;
507     }
508 
509     void newMatches(uint n)
510     {
511         import core.stdc.stdlib : calloc;
512         import std.exception : enforce;
513         if (n > smallString)
514         {
515             auto p = cast(Group!DataIndex*) enforce(
516                 calloc(Group!DataIndex.sizeof,n),
517                 "Failed to allocate Captures struct"
518             );
519             big_matches = p[0 .. n];
520             _refcount = 1;
521         }
522         else
523         {
524             _refcount = SMALL_MASK | n;
525         }
526     }
527 
528     bool unique()
529     {
530         return (_refcount & SMALL_MASK) || _refcount == 1;
531     }
532 
533 public:
534     this(this)
535     {
536         if (!(_refcount & SMALL_MASK))
537         {
538             _refcount++;
539         }
540     }
541     ~this()
542     {
543         import core.stdc.stdlib : free;
544         if (!(_refcount & SMALL_MASK))
545         {
546             if (--_refcount == 0)
547             {
548                 free(big_matches.ptr);
549                 big_matches = null;
550             }
551         }
552     }
553     ///Slice of input prior to the match.
554     @property R pre()
555     {
556         return _nMatch == 0 ? _input[] : _input[0 .. matches[0].begin];
557     }
558 
559     ///Slice of input immediately after the match.
560     @property R post()
561     {
562         return _nMatch == 0 ? _input[] : _input[matches[0].end .. $];
563     }
564 
565     ///Slice of matched portion of input.
566     @property R hit()
567     {
568         assert(_nMatch, "attempted to get hit of an empty match");
569         return _input[matches[0].begin .. matches[0].end];
570     }
571 
572     ///Range interface.
573     @property R front()
574     {
575         assert(_nMatch, "attempted to get front of an empty match");
576         return _input[matches[_f].begin .. matches[_f].end];
577     }
578 
579     ///ditto
580     @property R back()
581     {
582         assert(_nMatch, "attempted to get back of an empty match");
583         return _input[matches[_b - 1].begin .. matches[_b - 1].end];
584     }
585 
586     ///ditto
587     void popFront()
588     {
589         assert(!empty);
590         ++_f;
591     }
592 
593     ///ditto
594     void popBack()
595     {
596         assert(!empty);
597         --_b;
598     }
599 
600     ///ditto
601     @property bool empty() const { return _nMatch == 0 || _f >= _b; }
602 
603     ///ditto
604     inout(R) opIndex()(size_t i) inout
605     {
606         assert(_f + i < _b,text("requested submatch number ", i," is out of range"));
607         assert(matches[_f + i].begin <= matches[_f + i].end,
608             text("wrong match: ", matches[_f + i].begin, "..", matches[_f + i].end));
609         return _input[matches[_f + i].begin .. matches[_f + i].end];
610     }
611 
612     /++
613         Explicit cast to bool.
614         Useful as a shorthand for !(x.empty) in if and assert statements.
615 
616         ---
617         import std.regex;
618 
619         assert(!matchFirst("nothing", "something"));
620         ---
621     +/
622 
623     @safe bool opCast(T:bool)() const nothrow { return _nMatch != 0; }
624 
625     /++
626         Number of pattern matched counting, where 1 - the first pattern.
627         Returns 0 on no match.
628     +/
629 
630     @safe @property int whichPattern() const nothrow { return _nMatch; }
631 
632     ///
633     @system unittest
634     {
635         import std.regex;
636         assert(matchFirst("abc", "[0-9]+", "[a-z]+").whichPattern == 2);
637     }
638 
639     /++
640         Lookup named submatch.
641 
642         ---
643         import std.regex;
644         import std.range;
645 
646         auto c = matchFirst("a = 42;", regex(`(?P<var>\w+)\s*=\s*(?P<value>\d+);`));
647         assert(c["var"] == "a");
648         assert(c["value"] == "42");
649         popFrontN(c, 2);
650         //named groups are unaffected by range primitives
651         assert(c["var"] =="a");
652         assert(c.front == "42");
653         ----
654     +/
655     R opIndex(String)(String i) /*const*/ //@@@BUG@@@
656         if (isSomeString!String)
657     {
658         size_t index = lookupNamedGroup(_names, i);
659         return _input[matches[index].begin .. matches[index].end];
660     }
661 
662     ///Number of matches in this object.
663     @property size_t length() const { return _nMatch == 0 ? 0 : _b - _f;  }
664 
665     ///A hook for compatibility with original std.regex.
666     @property ref captures(){ return this; }
667 }
668 
669 ///
670 @system unittest
671 {
672     import std.range.primitives : popFrontN;
673 
674     auto c = matchFirst("@abc#", regex(`(\w)(\w)(\w)`));
675     assert(c.pre == "@"); // Part of input preceding match
676     assert(c.post == "#"); // Immediately after match
677     assert(c.hit == c[0] && c.hit == "abc"); // The whole match
678     assert(c[2] == "b");
679     assert(c.front == "abc");
680     c.popFront();
681     assert(c.front == "a");
682     assert(c.back == "c");
683     c.popBack();
684     assert(c.back == "b");
685     popFrontN(c, 2);
686     assert(c.empty);
687 
688     assert(!matchFirst("nothing", "something"));
689 }
690 
691 /++
692     A regex engine state, as returned by $(D match) family of functions.
693 
694     Effectively it's a forward range of Captures!R, produced
695     by lazily searching for matches in a given input.
696 
697     $(D alias Engine) specifies an engine type to use during matching,
698     and is automatically deduced in a call to $(D match)/$(D bmatch).
699 +/
700 @trusted public struct RegexMatch(R, alias Engine = ThompsonMatcher)
701 if (isSomeString!R)
702 {
703 private:
704     import core.stdc.stdlib : malloc, free;
705     alias Char = BasicElementOf!R;
706     alias EngineType = Engine!Char;
707     EngineType _engine;
708     R _input;
709     Captures!(R,EngineType.DataIndex) _captures;
710     void[] _memory;//is ref-counted
711 
712     this(RegEx)(R input, RegEx prog)
713     {
714         import std.exception : enforce;
715         _input = input;
716         immutable size = EngineType.initialMemory(prog)+size_t.sizeof;
717         _memory = (enforce(malloc(size), "malloc failed")[0 .. size]);
718         scope(failure) free(_memory.ptr);
719         *cast(size_t*)_memory.ptr = 1;
720         _engine = EngineType(prog, Input!Char(input), _memory[size_t.sizeof..$]);
721         static if (is(RegEx == StaticRegex!(BasicElementOf!R)))
722             _engine.nativeFn = prog.nativeFn;
723         _captures = Captures!(R,EngineType.DataIndex)(this);
724         _captures._nMatch = _engine.match(_captures.matches);
725         debug(std_regex_allocation) writefln("RefCount (ctor): %x %d", _memory.ptr, counter);
726     }
727 
728     @property ref size_t counter(){ return *cast(size_t*)_memory.ptr; }
729 public:
730     this(this)
731     {
732         if (_memory.ptr)
733         {
734             ++counter;
735             debug(std_regex_allocation) writefln("RefCount (postblit): %x %d",
736                 _memory.ptr, *cast(size_t*)_memory.ptr);
737         }
738     }
739 
740     ~this()
741     {
742         if (_memory.ptr && --*cast(size_t*)_memory.ptr == 0)
743         {
744             debug(std_regex_allocation) writefln("RefCount (dtor): %x %d",
745                 _memory.ptr, *cast(size_t*)_memory.ptr);
746             free(cast(void*)_memory.ptr);
747         }
748     }
749 
750     ///Shorthands for front.pre, front.post, front.hit.
751     @property R pre()
752     {
753         return _captures.pre;
754     }
755 
756     ///ditto
757     @property R post()
758     {
759         return _captures.post;
760     }
761 
762     ///ditto
763     @property R hit()
764     {
765         return _captures.hit;
766     }
767 
768     /++
769         Functionality for processing subsequent matches of global regexes via range interface:
770         ---
771         import std.regex;
772         auto m = matchAll("Hello, world!", regex(`\w+`));
773         assert(m.front.hit == "Hello");
774         m.popFront();
775         assert(m.front.hit == "world");
776         m.popFront();
777         assert(m.empty);
778         ---
779     +/
780     @property auto front()
781     {
782         return _captures;
783     }
784 
785     ///ditto
786     void popFront()
787     {
788         import std.exception : enforce;
789         if (counter != 1)
790         {//do cow magic first
791             counter--;//we abandon this reference
792             immutable size = EngineType.initialMemory(_engine.re)+size_t.sizeof;
793             _memory = (enforce(malloc(size), "malloc failed")[0 .. size]);
794             _engine = _engine.dupTo(_memory[size_t.sizeof .. size]);
795             counter = 1;//points to new chunk
796         }
797 
798         if (!_captures.unique)
799         {
800             // has external references - allocate new space
801             _captures.newMatches(_engine.re.ngroup);
802         }
803         _captures._nMatch = _engine.match(_captures.matches);
804     }
805 
806     ///ditto
807     auto save(){ return this; }
808 
809     ///Test if this match object is empty.
810     @property bool empty() const { return _captures._nMatch == 0; }
811 
812     ///Same as !(x.empty), provided for its convenience  in conditional statements.
813     T opCast(T:bool)(){ return !empty; }
814 
815     /// Same as .front, provided for compatibility with original std.regex.
816     @property auto captures() inout { return _captures; }
817 
818 }
819 
matchOnce(alias Engine,RegEx,R)820 private @trusted auto matchOnce(alias Engine, RegEx, R)(R input, RegEx re)
821 {
822     import core.stdc.stdlib : malloc, free;
823     import std.exception : enforce;
824     alias Char = BasicElementOf!R;
825     alias EngineType = Engine!Char;
826 
827     size_t size = EngineType.initialMemory(re);
828     void[] memory = enforce(malloc(size), "malloc failed")[0 .. size];
829     scope(exit) free(memory.ptr);
830     auto captures = Captures!(R, EngineType.DataIndex)(input, re.ngroup, re.dict);
831     auto engine = EngineType(re, Input!Char(input), memory);
832     static if (is(RegEx == StaticRegex!(BasicElementOf!R)))
833         engine.nativeFn = re.nativeFn;
834     captures._nMatch = engine.match(captures.matches);
835     return captures;
836 }
837 
matchMany(alias Engine,RegEx,R)838 private auto matchMany(alias Engine, RegEx, R)(R input, RegEx re)
839 {
840     re.flags |= RegexOption.global;
841     return RegexMatch!(R, Engine)(input, re);
842 }
843 
844 @system unittest
845 {
846     //sanity checks for new API
847     auto re = regex("abc");
848     assert(!"abc".matchOnce!(ThompsonMatcher)(re).empty);
849     assert("abc".matchOnce!(ThompsonMatcher)(re)[0] == "abc");
850 }
851 
852 
853 private enum isReplaceFunctor(alias fun, R) =
854     __traits(compiles, (Captures!R c) { fun(c); });
855 
856 // the lowest level - just stuff replacements into the sink
857 private @trusted void replaceCapturesInto(alias output, Sink, R, T)
858         (ref Sink sink, R input, T captures)
859 if (isOutputRange!(Sink, dchar) && isSomeString!R)
860 {
861     if (captures.empty)
862     {
863         sink.put(input);
864         return;
865     }
866     sink.put(captures.pre);
867     // a hack to get around bogus errors, should be simply output(captures, sink)
868     // "is a nested function and cannot be accessed from"
869     static if (isReplaceFunctor!(output, R))
870         sink.put(output(captures)); //"mutator" type of function
871     else
872         output(captures, sink); //"output" type of function
873     sink.put(captures.post);
874 }
875 
876 // ditto for a range of captures
877 private void replaceMatchesInto(alias output, Sink, R, T)
878         (ref Sink sink, R input, T matches)
879 if (isOutputRange!(Sink, dchar) && isSomeString!R)
880 {
881     size_t offset = 0;
foreach(cap;matches)882     foreach (cap; matches)
883     {
884         sink.put(cap.pre[offset .. $]);
885         // same hack, see replaceCapturesInto
886         static if (isReplaceFunctor!(output, R))
887             sink.put(output(cap)); //"mutator" type of function
888         else
889             output(cap, sink); //"output" type of function
890         offset = cap.pre.length + cap.hit.length;
891     }
892     sink.put(input[offset .. $]);
893 }
894 
895 //  a general skeleton of replaceFirst
896 private R replaceFirstWith(alias output, R, RegEx)(R input, RegEx re)
897 if (isSomeString!R && isRegexFor!(RegEx, R))
898 {
899     import std.array : appender;
900     auto data = matchFirst(input, re);
901     if (data.empty)
902         return input;
903     auto app = appender!(R)();
904     replaceCapturesInto!output(app, input, data);
905     return app.data;
906 }
907 
908 // ditto for replaceAll
909 // the method parameter allows old API to ride on the back of the new one
910 private R replaceAllWith(alias output,
911         alias method=matchAll, R, RegEx)(R input, RegEx re)
912 if (isSomeString!R && isRegexFor!(RegEx, R))
913 {
914     import std.array : appender;
915     auto matches = method(input, re); //inout(C)[] fails
916     if (matches.empty)
917         return input;
918     auto app = appender!(R)();
919     replaceMatchesInto!output(app, input, matches);
920     return app.data;
921 }
922 
923 
924 /++
925     Start matching $(D input) to regex pattern $(D re),
926     using Thompson NFA matching scheme.
927 
928     The use of this function is $(RED discouraged) - use either of
929     $(LREF matchAll) or $(LREF matchFirst).
930 
931     Delegating  the kind of operation
932     to "g" flag is soon to be phased out along with the
933     ability to choose the exact matching scheme. The choice of
934     matching scheme to use depends highly on the pattern kind and
935     can done automatically on case by case basis.
936 
937     Returns: a $(D RegexMatch) object holding engine state after first match.
938 +/
939 
940 public auto match(R, RegEx)(R input, RegEx re)
941 if (isSomeString!R && is(RegEx == Regex!(BasicElementOf!R)))
942 {
943     import std.regex.internal.thompson : ThompsonMatcher;
944     return RegexMatch!(Unqual!(typeof(input)),ThompsonMatcher)(input, re);
945 }
946 
947 ///ditto
948 public auto match(R, String)(R input, String re)
949 if (isSomeString!R && isSomeString!String)
950 {
951     import std.regex.internal.thompson : ThompsonMatcher;
952     return RegexMatch!(Unqual!(typeof(input)),ThompsonMatcher)(input, regex(re));
953 }
954 
955 public auto match(R, RegEx)(R input, RegEx re)
956 if (isSomeString!R && is(RegEx == StaticRegex!(BasicElementOf!R)))
957 {
958     import std.regex.internal.backtracking : BacktrackingMatcher;
959     return RegexMatch!(Unqual!(typeof(input)),BacktrackingMatcher!true)(input, re);
960 }
961 
962 /++
963     Find the first (leftmost) slice of the $(D input) that
964     matches the pattern $(D re). This function picks the most suitable
965     regular expression engine depending on the pattern properties.
966 
967     $(D re) parameter can be one of three types:
968     $(UL
969       $(LI Plain string(s), in which case it's compiled to bytecode before matching. )
970       $(LI Regex!char (wchar/dchar) that contains a pattern in the form of
971         compiled  bytecode. )
972       $(LI StaticRegex!char (wchar/dchar) that contains a pattern in the form of
973         compiled native machine code. )
974     )
975 
976     Returns:
977     $(LREF Captures) containing the extent of a match together with all submatches
978     if there was a match, otherwise an empty $(LREF Captures) object.
979 +/
980 public auto matchFirst(R, RegEx)(R input, RegEx re)
981 if (isSomeString!R && is(RegEx == Regex!(BasicElementOf!R)))
982 {
983     import std.regex.internal.thompson : ThompsonMatcher;
984     return matchOnce!ThompsonMatcher(input, re);
985 }
986 
987 ///ditto
988 public auto matchFirst(R, String)(R input, String re)
989 if (isSomeString!R && isSomeString!String)
990 {
991     import std.regex.internal.thompson : ThompsonMatcher;
992     return matchOnce!ThompsonMatcher(input, regex(re));
993 }
994 
995 ///ditto
996 public auto matchFirst(R, String)(R input, String[] re...)
997 if (isSomeString!R && isSomeString!String)
998 {
999     import std.regex.internal.thompson : ThompsonMatcher;
1000     return matchOnce!ThompsonMatcher(input, regex(re));
1001 }
1002 
1003 public auto matchFirst(R, RegEx)(R input, RegEx re)
1004 if (isSomeString!R && is(RegEx == StaticRegex!(BasicElementOf!R)))
1005 {
1006     import std.regex.internal.backtracking : BacktrackingMatcher;
1007     return matchOnce!(BacktrackingMatcher!true)(input, re);
1008 }
1009 
1010 /++
1011     Initiate a search for all non-overlapping matches to the pattern $(D re)
1012     in the given $(D input). The result is a lazy range of matches generated
1013     as they are encountered in the input going left to right.
1014 
1015     This function picks the most suitable regular expression engine
1016     depending on the pattern properties.
1017 
1018     $(D re) parameter can be one of three types:
1019     $(UL
1020       $(LI Plain string(s), in which case it's compiled to bytecode before matching. )
1021       $(LI Regex!char (wchar/dchar) that contains a pattern in the form of
1022         compiled  bytecode. )
1023       $(LI StaticRegex!char (wchar/dchar) that contains a pattern in the form of
1024         compiled native machine code. )
1025     )
1026 
1027     Returns:
1028     $(LREF RegexMatch) object that represents matcher state
1029     after the first match was found or an empty one if not present.
1030 +/
1031 public auto matchAll(R, RegEx)(R input, RegEx re)
1032 if (isSomeString!R && is(RegEx == Regex!(BasicElementOf!R)))
1033 {
1034     import std.regex.internal.thompson : ThompsonMatcher;
1035     return matchMany!ThompsonMatcher(input, re);
1036 }
1037 
1038 ///ditto
1039 public auto matchAll(R, String)(R input, String re)
1040 if (isSomeString!R && isSomeString!String)
1041 {
1042     import std.regex.internal.thompson : ThompsonMatcher;
1043     return matchMany!ThompsonMatcher(input, regex(re));
1044 }
1045 
1046 ///ditto
1047 public auto matchAll(R, String)(R input, String[] re...)
1048 if (isSomeString!R && isSomeString!String)
1049 {
1050     import std.regex.internal.thompson : ThompsonMatcher;
1051     return matchMany!ThompsonMatcher(input, regex(re));
1052 }
1053 
1054 public auto matchAll(R, RegEx)(R input, RegEx re)
1055 if (isSomeString!R && is(RegEx == StaticRegex!(BasicElementOf!R)))
1056 {
1057     import std.regex.internal.backtracking : BacktrackingMatcher;
1058     return matchMany!(BacktrackingMatcher!true)(input, re);
1059 }
1060 
1061 // another set of tests just to cover the new API
1062 @system unittest
1063 {
1064     import std.algorithm.comparison : equal;
1065     import std.algorithm.iteration : map;
1066     import std.conv : to;
1067 
1068     foreach (String; AliasSeq!(string, wstring, const(dchar)[]))
1069     {
1070         auto str1 = "blah-bleh".to!String();
1071         auto pat1 = "bl[ae]h".to!String();
1072         auto mf = matchFirst(str1, pat1);
1073         assert(mf.equal(["blah".to!String()]));
1074         auto mAll = matchAll(str1, pat1);
1075         assert(mAll.equal!((a,b) => a.equal(b))
1076             ([["blah".to!String()], ["bleh".to!String()]]));
1077 
1078         auto str2 = "1/03/12 - 3/03/12".to!String();
1079         auto pat2 = regex([r"(\d+)/(\d+)/(\d+)".to!String(), "abc".to!String]);
1080         auto mf2 = matchFirst(str2, pat2);
1081         assert(mf2.equal(["1/03/12", "1", "03", "12"].map!(to!String)()));
1082         auto mAll2 = matchAll(str2, pat2);
1083         assert(mAll2.front.equal(mf2));
1084         mAll2.popFront();
1085         assert(mAll2.front.equal(["3/03/12", "3", "03", "12"].map!(to!String)()));
1086         mf2.popFrontN(3);
1087         assert(mf2.equal(["12".to!String()]));
1088 
1089         auto ctPat = ctRegex!(`(?P<Quot>\d+)/(?P<Denom>\d+)`.to!String());
1090         auto str = "2 + 34/56 - 6/1".to!String();
1091         auto cmf = matchFirst(str, ctPat);
1092         assert(cmf.equal(["34/56", "34", "56"].map!(to!String)()));
1093         assert(cmf["Quot"] == "34".to!String());
1094         assert(cmf["Denom"] == "56".to!String());
1095 
1096         auto cmAll = matchAll(str, ctPat);
1097         assert(cmAll.front.equal(cmf));
1098         cmAll.popFront();
1099         assert(cmAll.front.equal(["6/1", "6", "1"].map!(to!String)()));
1100     }
1101 }
1102 
1103 /++
1104     Start matching of $(D input) to regex pattern $(D re),
1105     using traditional $(LINK2 https://en.wikipedia.org/wiki/Backtracking,
1106     backtracking) matching scheme.
1107 
1108     The use of this function is $(RED discouraged) - use either of
1109     $(LREF matchAll) or $(LREF matchFirst).
1110 
1111     Delegating  the kind of operation
1112     to "g" flag is soon to be phased out along with the
1113     ability to choose the exact matching scheme. The choice of
1114     matching scheme to use depends highly on the pattern kind and
1115     can done automatically on case by case basis.
1116 
1117     Returns: a $(D RegexMatch) object holding engine
1118     state after first match.
1119 
1120 +/
1121 public auto bmatch(R, RegEx)(R input, RegEx re)
1122 if (isSomeString!R && is(RegEx == Regex!(BasicElementOf!R)))
1123 {
1124     import std.regex.internal.backtracking : BacktrackingMatcher;
1125     return RegexMatch!(Unqual!(typeof(input)), BacktrackingMatcher!false)(input, re);
1126 }
1127 
1128 ///ditto
1129 public auto bmatch(R, String)(R input, String re)
1130 if (isSomeString!R && isSomeString!String)
1131 {
1132     import std.regex.internal.backtracking : BacktrackingMatcher;
1133     return RegexMatch!(Unqual!(typeof(input)), BacktrackingMatcher!false)(input, regex(re));
1134 }
1135 
1136 public auto bmatch(R, RegEx)(R input, RegEx re)
1137 if (isSomeString!R && is(RegEx == StaticRegex!(BasicElementOf!R)))
1138 {
1139     import std.regex.internal.backtracking : BacktrackingMatcher;
1140     return RegexMatch!(Unqual!(typeof(input)),BacktrackingMatcher!true)(input, re);
1141 }
1142 
1143 // produces replacement string from format using captures for substitution
1144 package void replaceFmt(R, Capt, OutR)
1145     (R format, Capt captures, OutR sink, bool ignoreBadSubs = false)
1146 if (isOutputRange!(OutR, ElementEncodingType!R[]) &&
1147     isOutputRange!(OutR, ElementEncodingType!(Capt.String)[]))
1148 {
1149     import std.algorithm.searching : find;
1150     import std.ascii : isDigit, isAlpha;
1151     import std.conv : text, parse;
1152     import std.exception : enforce;
1153     enum State { Normal, Dollar }
1154     auto state = State.Normal;
1155     size_t offset;
1156 L_Replace_Loop:
1157     while (!format.empty)
1158         final switch (state)
1159         {
1160         case State.Normal:
1161             for (offset = 0; offset < format.length; offset++)//no decoding
1162             {
1163                 if (format[offset] == '$')
1164                 {
1165                     state = State.Dollar;
1166                     sink.put(format[0 .. offset]);
1167                     format = format[offset+1 .. $];//ditto
1168                     continue L_Replace_Loop;
1169                 }
1170             }
1171             sink.put(format[0 .. offset]);
1172             format = format[offset .. $];
1173             break;
1174         case State.Dollar:
1175             if (isDigit(format[0]))
1176             {
1177                 uint digit = parse!uint(format);
1178                 enforce(ignoreBadSubs || digit < captures.length, text("invalid submatch number ", digit));
1179                 if (digit < captures.length)
1180                     sink.put(captures[digit]);
1181             }
1182             else if (format[0] == '{')
1183             {
1184                 auto x = find!(a => !isAlpha(a))(format[1..$]);
1185                 enforce(!x.empty && x[0] == '}', "no matching '}' in replacement format");
1186                 auto name = format[1 .. $ - x.length];
1187                 format = x[1..$];
1188                 enforce(!name.empty, "invalid name in ${...} replacement format");
1189                 sink.put(captures[name]);
1190             }
1191             else if (format[0] == '&')
1192             {
1193                 sink.put(captures[0]);
1194                 format = format[1 .. $];
1195             }
1196             else if (format[0] == '`')
1197             {
1198                 sink.put(captures.pre);
1199                 format = format[1 .. $];
1200             }
1201             else if (format[0] == '\'')
1202             {
1203                 sink.put(captures.post);
1204                 format = format[1 .. $];
1205             }
1206             else if (format[0] == '$')
1207             {
1208                 sink.put(format[0 .. 1]);
1209                 format = format[1 .. $];
1210             }
1211             state = State.Normal;
1212             break;
1213         }
1214     enforce(state == State.Normal, "invalid format string in regex replace");
1215 }
1216 
1217 /++
1218     Construct a new string from $(D input) by replacing the first match with
1219     a string generated from it according to the $(D format) specifier.
1220 
1221     To replace all matches use $(LREF replaceAll).
1222 
1223     Params:
1224     input = string to search
1225     re = compiled regular expression to use
1226     format = _format string to generate replacements from,
1227     see $(S_LINK Replace _format string, the _format string).
1228 
1229     Returns:
1230     A string of the same type with the first match (if any) replaced.
1231     If no match is found returns the input string itself.
1232 +/
1233 public R replaceFirst(R, C, RegEx)(R input, RegEx re, const(C)[] format)
1234 if (isSomeString!R && is(C : dchar) && isRegexFor!(RegEx, R))
1235 {
1236     return replaceFirstWith!((m, sink) => replaceFmt(format, m, sink))(input, re);
1237 }
1238 
1239 ///
1240 @system unittest
1241 {
1242     assert(replaceFirst("noon", regex("n"), "[$&]") == "[n]oon");
1243 }
1244 
1245 /++
1246     This is a general replacement tool that construct a new string by replacing
1247     matches of pattern $(D re) in the $(D input). Unlike the other overload
1248     there is no format string instead captures are passed to
1249     to a user-defined functor $(D fun) that returns a new string
1250     to use as replacement.
1251 
1252     This version replaces the first match in $(D input),
1253     see $(LREF replaceAll) to replace the all of the matches.
1254 
1255     Returns:
1256     A new string of the same type as $(D input) with all matches
1257     replaced by return values of $(D fun). If no matches found
1258     returns the $(D input) itself.
1259 +/
1260 public R replaceFirst(alias fun, R, RegEx)(R input, RegEx re)
1261 if (isSomeString!R && isRegexFor!(RegEx, R))
1262 {
1263     return replaceFirstWith!((m, sink) => sink.put(fun(m)))(input, re);
1264 }
1265 
1266 ///
1267 @system unittest
1268 {
1269     import std.conv : to;
1270     string list = "#21 out of 46";
1271     string newList = replaceFirst!(cap => to!string(to!int(cap.hit)+1))
1272         (list, regex(`[0-9]+`));
1273     assert(newList == "#22 out of 46");
1274 }
1275 
1276 /++
1277     A variation on $(LREF replaceFirst) that instead of allocating a new string
1278     on each call outputs the result piece-wise to the $(D sink). In particular
1279     this enables efficient construction of a final output incrementally.
1280 
1281     Like in $(LREF replaceFirst) family of functions there is an overload
1282     for the substitution guided by the $(D format) string
1283     and the one with the user defined callback.
1284 +/
1285 public @trusted void replaceFirstInto(Sink, R, C, RegEx)
1286         (ref Sink sink, R input, RegEx re, const(C)[] format)
1287 if (isOutputRange!(Sink, dchar) && isSomeString!R
1288     && is(C : dchar) && isRegexFor!(RegEx, R))
1289     {
1290     replaceCapturesInto!((m, sink) => replaceFmt(format, m, sink))
1291         (sink, input, matchFirst(input, re));
1292     }
1293 
1294 ///ditto
1295 public @trusted void replaceFirstInto(alias fun, Sink, R, RegEx)
1296     (Sink sink, R input, RegEx re)
1297 if (isOutputRange!(Sink, dchar) && isSomeString!R && isRegexFor!(RegEx, R))
1298 {
1299     replaceCapturesInto!fun(sink, input, matchFirst(input, re));
1300 }
1301 
1302 ///
1303 @system unittest
1304 {
1305     import std.array;
1306     string m1 = "first message\n";
1307     string m2 = "second message\n";
1308     auto result = appender!string();
1309     replaceFirstInto(result, m1, regex(`([a-z]+) message`), "$1");
1310     //equivalent of the above with user-defined callback
1311     replaceFirstInto!(cap=>cap[1])(result, m2, regex(`([a-z]+) message`));
1312     assert(result.data == "first\nsecond\n");
1313 }
1314 
1315 //examples for replaceFirst
1316 @system unittest
1317 {
1318     import std.conv;
1319     string list = "#21 out of 46";
1320     string newList = replaceFirst!(cap => to!string(to!int(cap.hit)+1))
1321         (list, regex(`[0-9]+`));
1322     assert(newList == "#22 out of 46");
1323     import std.array;
1324     string m1 = "first message\n";
1325     string m2 = "second message\n";
1326     auto result = appender!string();
1327     replaceFirstInto(result, m1, regex(`([a-z]+) message`), "$1");
1328     //equivalent of the above with user-defined callback
1329     replaceFirstInto!(cap=>cap[1])(result, m2, regex(`([a-z]+) message`));
1330     assert(result.data == "first\nsecond\n");
1331 }
1332 
1333 /++
1334     Construct a new string from $(D input) by replacing all of the
1335     fragments that match a pattern $(D re) with a string generated
1336     from the match according to the $(D format) specifier.
1337 
1338     To replace only the first match use $(LREF replaceFirst).
1339 
1340     Params:
1341     input = string to search
1342     re = compiled regular expression to use
1343     format = _format string to generate replacements from,
1344     see $(S_LINK Replace _format string, the _format string).
1345 
1346     Returns:
1347     A string of the same type as $(D input) with the all
1348     of the matches (if any) replaced.
1349     If no match is found returns the input string itself.
1350 +/
1351 public @trusted R replaceAll(R, C, RegEx)(R input, RegEx re, const(C)[] format)
1352 if (isSomeString!R && is(C : dchar) && isRegexFor!(RegEx, R))
1353 {
1354     return replaceAllWith!((m, sink) => replaceFmt(format, m, sink))(input, re);
1355 }
1356 
1357 ///
1358 @system unittest
1359 {
1360     // insert comma as thousands delimiter
1361     auto re = regex(r"(?<=\d)(?=(\d\d\d)+\b)","g");
1362     assert(replaceAll("12000 + 42100 = 54100", re, ",") == "12,000 + 42,100 = 54,100");
1363 }
1364 
1365 /++
1366     This is a general replacement tool that construct a new string by replacing
1367     matches of pattern $(D re) in the $(D input). Unlike the other overload
1368     there is no format string instead captures are passed to
1369     to a user-defined functor $(D fun) that returns a new string
1370     to use as replacement.
1371 
1372     This version replaces all of the matches found in $(D input),
1373     see $(LREF replaceFirst) to replace the first match only.
1374 
1375     Returns:
1376     A new string of the same type as $(D input) with all matches
1377     replaced by return values of $(D fun). If no matches found
1378     returns the $(D input) itself.
1379 
1380     Params:
1381     input = string to search
1382     re = compiled regular expression
1383     fun = delegate to use
1384 +/
1385 public @trusted R replaceAll(alias fun, R, RegEx)(R input, RegEx re)
1386 if (isSomeString!R && isRegexFor!(RegEx, R))
1387 {
1388     return replaceAllWith!((m, sink) => sink.put(fun(m)))(input, re);
1389 }
1390 
1391 ///
1392 @system unittest
1393 {
1394     string baz(Captures!(string) m)
1395     {
1396         import std.string : toUpper;
1397         return toUpper(m.hit);
1398     }
1399     // Capitalize the letters 'a' and 'r':
1400     auto s = replaceAll!(baz)("Strap a rocket engine on a chicken.",
1401             regex("[ar]"));
1402     assert(s == "StRAp A Rocket engine on A chicken.");
1403 }
1404 
1405 /++
1406     A variation on $(LREF replaceAll) that instead of allocating a new string
1407     on each call outputs the result piece-wise to the $(D sink). In particular
1408     this enables efficient construction of a final output incrementally.
1409 
1410     As with $(LREF replaceAll) there are 2 overloads - one with a format string,
1411     the other one with a user defined functor.
1412 +/
1413 public @trusted void replaceAllInto(Sink, R, C, RegEx)
1414         (Sink sink, R input, RegEx re, const(C)[] format)
1415 if (isOutputRange!(Sink, dchar) && isSomeString!R
1416     && is(C : dchar) && isRegexFor!(RegEx, R))
1417     {
1418     replaceMatchesInto!((m, sink) => replaceFmt(format, m, sink))
1419         (sink, input, matchAll(input, re));
1420     }
1421 
1422 ///ditto
1423 public @trusted void replaceAllInto(alias fun, Sink, R, RegEx)
1424         (Sink sink, R input, RegEx re)
1425 if (isOutputRange!(Sink, dchar) && isSomeString!R && isRegexFor!(RegEx, R))
1426 {
1427     replaceMatchesInto!fun(sink, input, matchAll(input, re));
1428 }
1429 
1430 ///
1431 @system unittest
1432 {
1433     // insert comma as thousands delimiter in fifty randomly produced big numbers
1434     import std.array, std.conv, std.random, std.range;
1435     static re = regex(`(?<=\d)(?=(\d\d\d)+\b)`, "g");
1436     auto sink = appender!(char [])();
1437     enum ulong min = 10UL ^^ 10, max = 10UL ^^ 19;
1438     foreach (i; 0 .. 50)
1439     {
1440         sink.clear();
1441         replaceAllInto(sink, text(uniform(min, max)), re, ",");
1442         foreach (pos; iota(sink.data.length - 4, 0, -4))
1443             assert(sink.data[pos] == ',');
1444     }
1445 }
1446 
1447 // exercise all of the replace APIs
1448 @system unittest
1449 {
1450     import std.array : appender;
1451     import std.conv;
1452     // try and check first/all simple substitution
1453     foreach (S; AliasSeq!(string, wstring, dstring, char[], wchar[], dchar[]))
1454     {
1455         S s1 = "curt trial".to!S();
1456         S s2 = "round dome".to!S();
1457         S t1F = "court trial".to!S();
1458         S t2F = "hound dome".to!S();
1459         S t1A = "court trial".to!S();
1460         S t2A = "hound home".to!S();
1461         auto re1 = regex("curt".to!S());
1462         auto re2 = regex("[dr]o".to!S());
1463 
1464         assert(replaceFirst(s1, re1, "court") == t1F);
1465         assert(replaceFirst(s2, re2, "ho") == t2F);
1466         assert(replaceAll(s1, re1, "court") == t1A);
1467         assert(replaceAll(s2, re2, "ho") == t2A);
1468 
1469         auto rep1 = replaceFirst!(cap => cap[0][0]~"o".to!S()~cap[0][1..$])(s1, re1);
1470         assert(rep1 == t1F);
1471         assert(replaceFirst!(cap => "ho".to!S())(s2, re2) == t2F);
1472         auto rep1A = replaceAll!(cap => cap[0][0]~"o".to!S()~cap[0][1..$])(s1, re1);
1473         assert(rep1A == t1A);
1474         assert(replaceAll!(cap => "ho".to!S())(s2, re2) == t2A);
1475 
1476         auto sink = appender!S();
1477         replaceFirstInto(sink, s1, re1, "court");
1478         assert(sink.data == t1F);
1479         replaceFirstInto(sink, s2, re2, "ho");
1480         assert(sink.data == t1F~t2F);
1481         replaceAllInto(sink, s1, re1, "court");
1482         assert(sink.data == t1F~t2F~t1A);
1483         replaceAllInto(sink, s2, re2, "ho");
1484         assert(sink.data == t1F~t2F~t1A~t2A);
1485     }
1486 }
1487 
1488 /++
1489     Old API for replacement, operation depends on flags of pattern $(D re).
1490     With "g" flag it performs the equivalent of $(LREF replaceAll) otherwise it
1491     works the same as $(LREF replaceFirst).
1492 
1493     The use of this function is $(RED discouraged), please use $(LREF replaceAll)
1494     or $(LREF replaceFirst) explicitly.
1495 +/
1496 public R replace(alias scheme = match, R, C, RegEx)(R input, RegEx re, const(C)[] format)
1497 if (isSomeString!R && isRegexFor!(RegEx, R))
1498 {
1499     return replaceAllWith!((m, sink) => replaceFmt(format, m, sink), match)(input, re);
1500 }
1501 
1502 ///ditto
1503 public R replace(alias fun, R, RegEx)(R input, RegEx re)
1504 if (isSomeString!R && isRegexFor!(RegEx, R))
1505 {
1506     return replaceAllWith!(fun, match)(input, re);
1507 }
1508 
1509 /**
1510 Splits a string `r` using a regular expression `pat` as a separator.
1511 
1512 Params:
1513     keepSeparators = flag to specify if the matches should be in the resulting range
1514     r = the string to split
1515     pat = the pattern to split on
1516 Returns:
1517     A lazy range of strings
1518 */
1519 public struct Splitter(Flag!"keepSeparators" keepSeparators = No.keepSeparators, Range, alias RegEx = Regex)
1520 if (isSomeString!Range && isRegexFor!(RegEx, Range))
1521 {
1522 private:
1523     Range _input;
1524     size_t _offset;
1525     alias Rx = typeof(match(Range.init,RegEx.init));
1526     Rx _match;
1527 
1528     static if (keepSeparators) bool onMatch = false;
1529 
thisSplitter1530     @trusted this(Range input, RegEx separator)
1531     {//@@@BUG@@@ generated opAssign of RegexMatch is not @trusted
1532         _input = input;
1533         separator.flags |= RegexOption.global;
1534         if (_input.empty)
1535         {
1536             //there is nothing to match at all, make _offset > 0
1537             _offset = 1;
1538         }
1539         else
1540         {
1541             _match = Rx(_input, separator);
1542 
1543             static if (keepSeparators)
1544                 if (_match.pre.empty)
1545                     popFront();
1546         }
1547     }
1548 
1549 public:
opSliceSplitter1550     auto ref opSlice()
1551     {
1552         return this.save;
1553     }
1554 
1555     ///Forward range primitives.
frontSplitter1556     @property Range front()
1557     {
1558         import std.algorithm.comparison : min;
1559 
1560         assert(!empty && _offset <= _match.pre.length
1561                 && _match.pre.length <= _input.length);
1562 
1563         static if (keepSeparators)
1564         {
1565             if (!onMatch)
1566                 return _input[_offset .. min($, _match.pre.length)];
1567             else
1568                 return _match.hit();
1569         }
1570         else
1571         {
1572             return _input[_offset .. min($, _match.pre.length)];
1573         }
1574     }
1575 
1576     ///ditto
emptySplitter1577     @property bool empty()
1578     {
1579         static if (keepSeparators)
1580             return _offset >= _input.length;
1581         else
1582             return _offset > _input.length;
1583     }
1584 
1585     ///ditto
popFrontSplitter1586     void popFront()
1587     {
1588         assert(!empty);
1589         if (_match.empty)
1590         {
1591             //No more separators, work is done here
1592             _offset = _input.length + 1;
1593         }
1594         else
1595         {
1596             static if (keepSeparators)
1597             {
1598                 if (!onMatch)
1599                 {
1600                     //skip past the separator
1601                     _offset = _match.pre.length;
1602                 }
1603                 else
1604                 {
1605                     _offset += _match.hit.length;
1606                     _match.popFront();
1607                 }
1608 
1609                 onMatch = !onMatch;
1610             }
1611             else
1612             {
1613                 //skip past the separator
1614                 _offset = _match.pre.length + _match.hit.length;
1615                 _match.popFront();
1616             }
1617         }
1618     }
1619 
1620     ///ditto
saveSplitter1621     @property auto save()
1622     {
1623         return this;
1624     }
1625 }
1626 
1627 /// ditto
1628 public Splitter!(keepSeparators, Range, RegEx) splitter(
1629     Flag!"keepSeparators" keepSeparators = No.keepSeparators, Range, RegEx)(Range r, RegEx pat)
1630 if (
1631     is(BasicElementOf!Range : dchar) && isRegexFor!(RegEx, Range))
1632 {
1633     return Splitter!(keepSeparators, Range, RegEx)(r, pat);
1634 }
1635 
1636 ///
1637 @system unittest
1638 {
1639     import std.algorithm.comparison : equal;
1640     auto s1 = ", abc, de,  fg, hi, ";
1641     assert(equal(splitter(s1, regex(", *")),
1642         ["", "abc", "de", "fg", "hi", ""]));
1643 }
1644 
1645 /// Split on a pattern, but keep the matches in the resulting range
1646 @system unittest
1647 {
1648     import std.algorithm.comparison : equal;
1649     import std.typecons : Yes;
1650 
1651     auto pattern = regex(`([\.,])`);
1652 
1653     assert("2003.04.05"
1654         .splitter!(Yes.keepSeparators)(pattern)
1655         .equal(["2003", ".", "04", ".", "05"]));
1656 
1657     assert(",1,2,3"
1658         .splitter!(Yes.keepSeparators)(pattern)
1659         .equal([",", "1", ",", "2", ",", "3"]));
1660 }
1661 
1662 ///An eager version of $(D splitter) that creates an array with splitted slices of $(D input).
1663 public @trusted String[] split(String, RegEx)(String input, RegEx rx)
1664 if (isSomeString!String  && isRegexFor!(RegEx, String))
1665 {
1666     import std.array : appender;
1667     auto a = appender!(String[])();
1668     foreach (e; splitter(input, rx))
1669         a.put(e);
1670     return a.data;
1671 }
1672 
1673 ///Exception object thrown in case of errors during regex compilation.
1674 public alias RegexException = std.regex.internal.ir.RegexException;
1675 
1676 /++
1677   A range that lazily produces a string output escaped
1678   to be used inside of a regular expression.
1679 +/
escaper(Range)1680 auto escaper(Range)(Range r)
1681 {
1682     import std.algorithm.searching : find;
1683     static immutable escapables = [Escapables];
1684     static struct Escaper // template to deduce attributes
1685     {
1686         Range r;
1687         bool escaped;
1688 
1689         @property ElementType!Range front(){
1690           if (escaped)
1691               return '\\';
1692           else
1693               return r.front;
1694         }
1695 
1696         @property bool empty(){ return r.empty; }
1697 
1698         void popFront(){
1699           if (escaped) escaped = false;
1700           else
1701           {
1702               r.popFront();
1703               if (!r.empty && !escapables.find(r.front).empty)
1704                   escaped = true;
1705           }
1706         }
1707 
1708         @property auto save(){ return Escaper(r.save, escaped); }
1709     }
1710 
1711     bool escaped = !r.empty && !escapables.find(r.front).empty;
1712     return Escaper(r, escaped);
1713 }
1714 
1715 ///
1716 @system unittest
1717 {
1718     import std.algorithm.comparison;
1719     import std.regex;
1720     string s = `This is {unfriendly} to *regex*`;
1721     assert(s.escaper.equal(`This is \{unfriendly\} to \*regex\*`));
1722 }
1723 
1724 @system unittest
1725 {
1726     import std.algorithm.comparison;
1727     import std.conv;
1728     foreach (S; AliasSeq!(string, wstring, dstring))
1729     {
1730       auto s = "^".to!S;
1731       assert(s.escaper.equal(`\^`));
1732       auto s2 = "";
1733       assert(s2.escaper.equal(""));
1734     }
1735 }
1736